From bf2949df100d43f3d54ca74a028aa59678ba76c8 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 16 May 2021 17:54:43 -0300
Subject: [PATCH] glasm: Improve texture sampling instructions

---
 .../backend/glasm/emit_glasm_image.cpp        | 66 ++++++++++++-------
 .../backend/glasm/emit_glasm_instructions.h   | 54 +++++++--------
 2 files changed, 70 insertions(+), 50 deletions(-)

diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index 4d146d34e5..7f2cf052af 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -112,24 +112,46 @@ static std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInf
 }
 
 void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                                Register coords, Register bias_lc,
+                                const IR::Value& coord, Register bias_lc,
                                 [[maybe_unused]] const IR::Value& offset) {
     const auto info{inst.Flags<IR::TextureInstInfo>()};
     const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
-    const std::string_view op{info.has_bias ? "TXB" : "TEX"};
-    const std::string_view lod_clamp{info.has_lod_clamp ? ".LODCLAMP" : ""};
     const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+    const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
+    const std::string_view type{"2D"}; // FIXME
     const std::string texture{Texture(ctx, info, index)};
+
+    std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
+    if (coord.InstRecursive()->HasUses()) {
+        // Move non-dead coords to a separate register, although this should never happen because
+        // vectors are only assembled for immediate texture instructions
+        ctx.Add("MOV.F RC,{};", coord_vec);
+        coord_vec = "RC";
+    }
     const Register ret{ctx.reg_alloc.Define(inst)};
-    // FIXME
-    const bool separate{info.type == TextureType::ColorArrayCube};
-    if (separate) {
-        ctx.Add("{}.F{}{} {},{},{},{},2D;", op, lod_clamp, sparse_mod, ret, coords, bias_lc,
-                texture);
+    if (info.has_bias) {
+        if (info.type == TextureType::ColorArrayCube) {
+            ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE;", lod_clamp_mod, sparse_mod, ret, coord_vec,
+                    bias_lc, texture);
+        } else {
+            if (info.has_lod_clamp) {
+                ctx.Add("MOV.F {}.w,{}.x;"
+                        "TXB.F.LODCLAMP{} {},{},{}.y,{},{};",
+                        coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type);
+            } else {
+                ctx.Add("MOV.F {}.w,{}.x;"
+                        "TXB.F{} {},{},{},{};",
+                        coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type);
+            }
+        }
     } else {
-        ctx.Add("MOV.F {}.w,{}.x;"
-                "{}.F{}{} {},{},{},2D;",
-                coords, bias_lc, op, lod_clamp, sparse_mod, ret, coords, texture);
+        if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
+            ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE;", sparse_mod, ret, coord_vec, bias_lc,
+                    texture);
+        } else {
+            ctx.Add("TEX.F{}{} {},{},{},{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
+                    type);
+        }
     }
     if (sparse_inst) {
         const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
@@ -142,7 +164,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu
 
 void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
                                 [[maybe_unused]] const IR::Value& index,
-                                [[maybe_unused]] Register coords, [[maybe_unused]] Register lod_lc,
+                                [[maybe_unused]] Register coord, [[maybe_unused]] Register lod_lc,
                                 [[maybe_unused]] const IR::Value& offset) {
     throw NotImplementedException("GLASM instruction");
 }
@@ -150,8 +172,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse
 void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx,
                                     [[maybe_unused]] IR::Inst& inst,
                                     [[maybe_unused]] const IR::Value& index,
-                                    [[maybe_unused]] Register coords,
-                                    [[maybe_unused]] Register dref,
+                                    [[maybe_unused]] Register coord, [[maybe_unused]] Register dref,
                                     [[maybe_unused]] Register bias_lc,
                                     [[maybe_unused]] const IR::Value& offset) {
     throw NotImplementedException("GLASM instruction");
@@ -160,22 +181,21 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx,
 void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx,
                                     [[maybe_unused]] IR::Inst& inst,
                                     [[maybe_unused]] const IR::Value& index,
-                                    [[maybe_unused]] Register coords,
-                                    [[maybe_unused]] Register dref,
+                                    [[maybe_unused]] Register coord, [[maybe_unused]] Register dref,
                                     [[maybe_unused]] Register lod_lc,
                                     [[maybe_unused]] const IR::Value& offset) {
     throw NotImplementedException("GLASM instruction");
 }
 
 void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                     [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords,
+                     [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord,
                      [[maybe_unused]] const IR::Value& offset,
                      [[maybe_unused]] const IR::Value& offset2) {
     throw NotImplementedException("GLASM instruction");
 }
 
 void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                         [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords,
+                         [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord,
                          [[maybe_unused]] const IR::Value& offset,
                          [[maybe_unused]] const IR::Value& offset2,
                          [[maybe_unused]] Register dref) {
@@ -183,7 +203,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR:
 }
 
 void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                    [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords,
+                    [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord,
                     [[maybe_unused]] Register offset, [[maybe_unused]] Register lod,
                     [[maybe_unused]] Register ms) {
     throw NotImplementedException("GLASM instruction");
@@ -196,24 +216,24 @@ void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]
 }
 
 void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                       [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords) {
+                       [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord) {
     throw NotImplementedException("GLASM instruction");
 }
 
 void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                       [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords,
+                       [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord,
                        [[maybe_unused]] Register derivates, [[maybe_unused]] Register offset,
                        [[maybe_unused]] Register lod_clamp) {
     throw NotImplementedException("GLASM instruction");
 }
 
 void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                   [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords) {
+                   [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord) {
     throw NotImplementedException("GLASM instruction");
 }
 
 void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                    [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords,
+                    [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord,
                     [[maybe_unused]] Register color) {
     throw NotImplementedException("GLASM instruction");
 }
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index ad640bcb99..a128f9ac44 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -525,28 +525,28 @@ void EmitBoundImageGradient(EmitContext&);
 void EmitBoundImageRead(EmitContext&);
 void EmitBoundImageWrite(EmitContext&);
 void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                                Register coords, Register bias_lc, const IR::Value& offset);
+                                const IR::Value& coord, Register bias_lc, const IR::Value& offset);
 void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                                Register coords, Register lod_lc, const IR::Value& offset);
+                                Register coord, Register lod_lc, const IR::Value& offset);
 void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                                    Register coords, Register dref, Register bias_lc,
+                                    Register coord, Register dref, Register bias_lc,
                                     const IR::Value& offset);
 void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                                    Register coords, Register dref, Register lod_lc,
+                                    Register coord, Register dref, Register lod_lc,
                                     const IR::Value& offset);
-void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                      const IR::Value& offset, const IR::Value& offset2);
-void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                          const IR::Value& offset, const IR::Value& offset2, Register dref);
-void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                     Register offset, Register lod, Register ms);
 void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                               Register lod);
-void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords);
-void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                        Register derivates, Register offset, Register lod_clamp);
-void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords);
-void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                     Register color);
 void EmitBindlessImageAtomicIAdd32(EmitContext&);
 void EmitBindlessImageAtomicSMin32(EmitContext&);
@@ -570,28 +570,28 @@ void EmitBoundImageAtomicAnd32(EmitContext&);
 void EmitBoundImageAtomicOr32(EmitContext&);
 void EmitBoundImageAtomicXor32(EmitContext&);
 void EmitBoundImageAtomicExchange32(EmitContext&);
-void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                           Register coords, ScalarU32 value);
-void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                           Register coords, ScalarS32 value);
-void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                           Register coords, ScalarU32 value);
-void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                           Register coords, ScalarS32 value);
-void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                           Register coords, ScalarU32 value);
-void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarS32 value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarS32 value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+                           ScalarU32 value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                           ScalarU32 value);
-void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                           ScalarU32 value);
-void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                           ScalarU32 value);
-void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                          ScalarU32 value);
-void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords,
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
                           ScalarU32 value);
 void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                               Register coords, ScalarU32 value);
+                               Register coord, ScalarU32 value);
 void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
 void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
 void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);