From 9293c3a0f21b0729ed64fbc417f4102e5e27d009 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 29 Oct 2019 20:48:18 -0400
Subject: [PATCH] Shader_IR: Fix TLD4 and add Bindless Variant.

This commit fixes an issue where not all 4 results of tld4 were being
written, the color component was defaulted to red, among other things.
It also implements the bindless variant.
---
 src/video_core/engines/shader_bytecode.h | 30 +++++++++++++++++++++-
 src/video_core/shader/decode/texture.cpp | 32 ++++++++++++++++++------
 src/video_core/shader/shader_ir.h        |  4 +--
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d3d05a8665..8f6bc76eb3 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1237,6 +1237,32 @@ union Instruction {
         }
     } tld4;
 
+    union {
+        BitField<35, 1, u64> ndv_flag;
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> dc_flag;
+        BitField<33, 2, u64> info;
+        BitField<37, 2, u64> component;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::NDV:
+                return ndv_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            case TextureMiscMode::DC:
+                return dc_flag != 0;
+            case TextureMiscMode::AOFFI:
+                return info == 1;
+            case TextureMiscMode::PTP:
+                return info == 2;
+            default:
+                break;
+            }
+            return false;
+        }
+    } tld4_b;
+
     union {
         BitField<49, 1, u64> nodep_flag;
         BitField<50, 1, u64> dc_flag;
@@ -1590,7 +1616,8 @@ public:
         TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
         TLD,    // Texture Load
         TLDS,   // Texture Load with scalar/non-vec4 source/destinations
-        TLD4,   // Texture Load 4
+        TLD4,   // Texture Gather 4
+        TLD4_B, // Texture Gather 4 Bindless
         TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
         TMML_B, // Texture Mip Map Level
         TMML,   // Texture Mip Map Level
@@ -1881,6 +1908,7 @@ private:
             INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
             INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
             INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
+            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
             INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
             INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
             INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index d61e656b75..0599ef34f1 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -96,6 +96,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::TLD4_B: {
+        is_bindless = true;
+        [[fallthrough]];
+    }
     case OpCode::Id::TLD4: {
         ASSERT(instr.tld4.array == 0);
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
@@ -108,11 +112,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
 
         const auto texture_type = instr.tld4.texture_type.Value();
-        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
+                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
         const bool is_array = instr.tld4.array != 0;
-        const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
+                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
         WriteTexInstructionFloat(
-            bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
+            bb, instr,
+            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless), true);
         break;
     }
     case OpCode::Id::TLD4S: {
@@ -359,10 +366,11 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
     return *used_samplers.emplace(entry).first;
 }
 
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+                                        bool is_tld4) {
     u32 dest_elem = 0;
     for (u32 elem = 0; elem < 4; ++elem) {
-        if (!instr.tex.IsComponentEnabled(elem)) {
+        if (!is_tld4 && !instr.tex.IsComponentEnabled(elem)) {
             // Skip disabled components
             continue;
         }
@@ -583,7 +591,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 }
 
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array, bool is_aoffi) {
+                            bool is_array, bool is_aoffi, bool is_bindless) {
     const std::size_t coord_count = GetCoordCount(texture_type);
 
     // If enabled arrays index is always stored in the gpr8 field
@@ -597,6 +605,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     }
 
     u64 parameter_register = instr.gpr20.Value();
+
+    const auto& sampler =
+        is_bindless
+            ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}})
+            : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
+
     std::vector<Node> aoffi;
     if (is_aoffi) {
         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
@@ -607,12 +621,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
         dc = GetRegister(parameter_register++);
     }
 
-    const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
+    const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
+                                       : Immediate(static_cast<u32>(instr.tld4.component));
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
+        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
+                         element};
         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
     }
 
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1fd44bde1d..7582999a51 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -326,7 +326,7 @@ private:
     Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
 
     void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                  const Node4& components);
+                                  const Node4& components, bool is_tld4 = false);
 
     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                    const Node4& components, bool ignore_mask = false);
@@ -343,7 +343,7 @@ private:
                       bool is_array);
 
     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array, bool is_aoffi);
+                      bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
 
     Node4 GetTldCode(Tegra::Shader::Instruction instr);