From e28fd3d0a533695242d17350dd929ad3bb56c429 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 26 Mar 2019 17:05:23 -0400
Subject: [PATCH] Implement Bindless Samplers and TEX_B in the IR.

---
 src/video_core/engines/shader_bytecode.h      |  2 +
 .../renderer_opengl/gl_shader_disk_cache.cpp  |  2 +-
 src/video_core/shader/decode/texture.cpp      | 58 +++++++++++++++++--
 src/video_core/shader/shader_ir.h             | 31 +++++++---
 4 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7f613370b6..2edd3245e1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1309,6 +1309,7 @@ public:
         LDG, // Load from global memory
         STG, // Store in global memory
         TEX,
+        TEX_B,  // Texture Load Bindless
         TXQ,    // Texture Query
         TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
         TLDS,   // Texture Load with scalar/non-vec4 source/destinations
@@ -1577,6 +1578,7 @@ private:
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
             INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
+            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
             INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
             INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
             INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 8a43eb1576..6a95af6f65 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -328,7 +328,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         }
         entry.entries.samplers.emplace_back(
             static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
-            static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+            static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0, false);
     }
 
     u32 global_memory_count{};
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a775b402b8..23f2ad9997 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -57,6 +57,23 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
         break;
     }
+    case OpCode::Id::TEX_B: {
+        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
+        }
+
+        const TextureType texture_type{instr.tex.texture_type};
+        const bool is_array = instr.tex.array != 0;
+        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex.GetTextureProcessMode();
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTexCode(instr, texture_type, process_mode, depth_compare,
+                                            is_array, true, instr.gpr20));
+        break;
+    }
     case OpCode::Id::TEXS: {
         const TextureType texture_type{instr.texs.GetTextureType()};
         const bool is_array{instr.texs.IsArrayTexture()};
@@ -250,10 +267,36 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
 
     // Otherwise create a new mapping for this sampler
     const std::size_t next_index = used_samplers.size();
-    const Sampler entry{offset, next_index, type, is_array, is_shadow};
+    const Sampler entry{offset, next_index, type, is_array, is_shadow, false};
     return *used_samplers.emplace(entry).first;
 }
 
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+                                            TextureType type, bool is_array, bool is_shadow) {
+
+    const Node sampler_register = GetRegister(reg);
+    const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
+    const auto cbuf = std::get_if<CbufNode>(base_sampler);
+    const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+    ASSERT(cbuf_offset_imm != nullptr);
+    const auto cbuf_offset = cbuf_offset_imm->GetValue();
+    const auto cbuf_index = cbuf->GetIndex();
+    const std::pair<u32, u32> cbuf_pair = {cbuf_index, cbuf_offset};
+
+    // If this sampler has already been used, return the existing mapping.
+    if (used_bindless_samplers.count(cbuf_pair) > 0) {
+        const auto& sampler = used_bindless_samplers[cbuf_pair];
+        ASSERT(sampler.GetType() == type && sampler.IsArray() == is_array &&
+               sampler.IsShadow() == is_shadow);
+        return sampler;
+    }
+
+    // Otherwise create a new mapping for this sampler
+    const std::size_t next_index = used_bindless_samplers.size();
+    const Sampler entry{0, next_index, type, is_array, is_shadow, true};
+    return (*used_bindless_samplers.emplace(std::make_pair(cbuf_pair, entry)).first).second;
+}
+
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
     u32 dest_elem = 0;
     for (u32 elem = 0; elem < 4; ++elem) {
@@ -325,8 +368,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
 
 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                                TextureProcessMode process_mode, std::vector<Node> coords,
-                               Node array, Node depth_compare, u32 bias_offset,
-                               std::vector<Node> aoffi) {
+                               Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, bool is_bindless,
+                               Register bindless_reg) {
     const bool is_array = array;
     const bool is_shadow = depth_compare;
 
@@ -334,7 +377,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                              (texture_type == TextureType::TextureCube && is_array && is_shadow),
                          "This method is not supported.");
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+    const auto& sampler = !is_bindless
+                              ? GetSampler(instr.sampler, texture_type, is_array, is_shadow)
+                              : GetBindlessSampler(bindless_reg, texture_type, is_array, is_shadow);
 
     const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                             process_mode == TextureProcessMode::LL ||
@@ -384,7 +429,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 
 Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
                            TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                           bool is_aoffi) {
+                           bool is_aoffi, bool is_bindless, Register bindless_reg) {
     const bool lod_bias_enabled{
         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
 
@@ -423,7 +468,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
         dc = GetRegister(parameter_register++);
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi, is_bindless,
+                          bindless_reg);
 }
 
 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d34..712dc3ddb3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -196,9 +196,12 @@ enum class ExitMethod {
 
 class Sampler {
 public:
+    Sampler() = default;
     explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
-                     bool is_array, bool is_shadow)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
+                     bool is_array, bool is_shadow, bool is_bindless)
+        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_bindless{is_bindless} {}
+
+    ~Sampler() = default;
 
     std::size_t GetOffset() const {
         return offset;
@@ -233,6 +236,7 @@ private:
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array{};  ///< Whether the texture is being sampled as an array texture or not.
     bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+    bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 };
 
 class ConstBuffer {
@@ -730,6 +734,10 @@ private:
     const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
 
+    // Accesses a texture sampler for a bindless texture.
+    const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, Tegra::Shader::TextureType type,
+                                      bool is_array, bool is_shadow);
+
     /// Extracts a sequence of bits from a node
     Node BitfieldExtract(Node value, u32 offset, u32 bits);
 
@@ -741,9 +749,11 @@ private:
     void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                        const Node4& components);
 
-    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                     bool is_array, bool is_aoffi);
+    Node4 GetTexCode(
+        Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+        Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, bool is_array,
+        bool is_aoffi, bool is_bindless = false,
+        Tegra::Shader::Register bindless_reg = static_cast<Tegra::Shader::Register>(0));
 
     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -760,10 +770,12 @@ private:
         bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
 
     std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
-
-    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
+    
+    Node4 GetTextureCode(
+        Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
+        Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, Node array,
+        Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, bool is_bindless = false,
+        Tegra::Shader::Register bindless_reg = static_cast<Tegra::Shader::Register>(0));
 
     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                          u64 byte_height);
@@ -833,6 +845,7 @@ private:
     std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
     std::map<u32, ConstBuffer> used_cbufs;
     std::set<Sampler> used_samplers;
+    std::map<std::pair<u32, u32>, Sampler> used_bindless_samplers;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
     std::set<GlobalMemoryBase> used_global_memory_bases;