From abefe293986f42b8912a561b7978e952eca7bfee Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Fri, 19 Oct 2018 19:47:06 -0400
Subject: [PATCH 1/2] Implement Shader Local Memory

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 81ffb24e42..c1a86755aa 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -279,6 +279,7 @@ public:
                         const Tegra::Shader::Header& header)
         : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
           fixed_pipeline_output_attributes_used{} {
+        local_memory_size = 0;
         BuildRegisterList();
         BuildInputList();
     }
@@ -436,6 +437,24 @@ public:
         shader.AddLine(dest + " = " + src + ';');
     }
 
+    std::string GetLocalMemoryAsFloat(const std::string index) {
+        return "lmem[" + index + "]";
+    }
+
+    std::string GetLocalMemoryAsInteger(const std::string index, bool is_signed = false) {
+        const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
+        return func + "(lmem[" + index + "])";
+    }
+
+    void SetLocalMemoryAsFloat(const std::string index, const std::string value) {
+        shader.AddLine("lmem[" + index + "] = " + value);
+    }
+
+    void SetLocalMemoryAsInteger(const std::string index, const std::string value, bool is_signed = false) {
+        const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
+        shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ')');
+    }
+
     std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
         switch (cc) {
         case Tegra::Shader::ControlCode::NEU:
@@ -533,6 +552,7 @@ public:
     void GenerateDeclarations(const std::string& suffix) {
         GenerateVertex();
         GenerateRegisters(suffix);
+        GenerateLocalMemory();
         GenerateInternalFlags();
         GenerateInputAttrs();
         GenerateOutputAttrs();
@@ -578,6 +598,10 @@ public:
         return entry.GetName();
     }
 
+    void SetLocalMemory(u64 lmem) {
+        local_memory_size = lmem;
+    }
+
 private:
     /// Generates declarations for registers.
     void GenerateRegisters(const std::string& suffix) {
@@ -588,6 +612,14 @@ private:
         declarations.AddNewLine();
     }
 
+    /// Generates declarations for local memory.
+    void GenerateLocalMemory() {
+        if (local_memory_size > 0) {
+            declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + "];");
+            declarations.AddNewLine();
+        }
+    }
+
     /// Generates declarations for internal flags.
     void GenerateInternalFlags() {
         for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
@@ -895,6 +927,7 @@ private:
     const std::string& suffix;
     const Tegra::Shader::Header& header;
     std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
+    u64 local_memory_size;
 };
 
 class GLSLGenerator {
@@ -904,6 +937,9 @@ public:
         : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
           stage(stage), suffix(suffix) {
         std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
+        local_memory_size = (header.common2.shader_local_memory_high_size << 24) |
+                            header.common1.shader_local_memory_low_size;
+        regs.SetLocalMemory(local_memory_size);
         Generate(suffix);
     }
 
@@ -3575,6 +3611,7 @@ private:
     const u32 main_offset;
     Maxwell3D::Regs::ShaderStage stage;
     const std::string& suffix;
+    u64 local_memory_size;
 
     ShaderWriter shader;
     ShaderWriter declarations;

From ca142f35c0f15e0d7e68e592b916660d9ee7a743 Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Fri, 19 Oct 2018 21:48:21 -0400
Subject: [PATCH 2/2] Implemented LD_L and ST_L

---
 src/video_core/engines/shader_bytecode.h      | 31 +++++++
 src/video_core/engines/shader_header.h        |  5 ++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 88 ++++++++++++++++---
 3 files changed, 112 insertions(+), 12 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index af77562662..141b9159b7 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -208,6 +208,16 @@ enum class UniformType : u64 {
     Double = 5,
 };
 
+enum class StoreType : u64 {
+    Unsigned8 = 0,
+    Signed8 = 1,
+    Unsigned16 = 2,
+    Signed16 = 3,
+    Bytes32 = 4,
+    Bytes64 = 5,
+    Bytes128 = 6,
+};
+
 enum class IMinMaxExchange : u64 {
     None = 0,
     XLo = 1,
@@ -747,6 +757,18 @@ union Instruction {
         BitField<44, 2, u64> unknown;
     } ld_c;
 
+    union {
+        BitField<48, 3, StoreType> type;
+    } ldst_sl;
+
+    union {
+        BitField<44, 2, u64> unknown;
+    } ld_l;
+
+    union {
+        BitField<44, 2, u64> unknown;
+    } st_l;
+
     union {
         BitField<0, 3, u64> pred0;
         BitField<3, 3, u64> pred3;
@@ -1209,6 +1231,7 @@ union Instruction {
     BitField<61, 1, u64> is_b_imm;
     BitField<60, 1, u64> is_b_gpr;
     BitField<59, 1, u64> is_c_gpr;
+    BitField<20, 24, s64> smem_imm;
 
     Attribute attribute;
     Sampler sampler;
@@ -1232,8 +1255,12 @@ public:
         BRA,
         PBK,
         LD_A,
+        LD_L,
+        LD_S,
         LD_C,
         ST_A,
+        ST_L,
+        ST_S,
         LDG, // Load from global memory
         STG, // Store in global memory
         TEX,
@@ -1490,8 +1517,12 @@ private:
             INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
             INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+            INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
+            INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
             INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
             INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
+            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
             INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a885ee3cf0..a0e015c4bc 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -96,6 +96,11 @@ struct Header {
             }
         } ps;
     };
+
+    u64 GetLocalMemorySize() {
+        return (common1.shader_local_memory_low_size |
+                (common2.shader_local_memory_high_size << 24));
+    }
 };
 
 static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c1a86755aa..dec291a7d9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -278,8 +278,7 @@ public:
                         const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
                         const Tegra::Shader::Header& header)
         : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
-          fixed_pipeline_output_attributes_used{} {
-        local_memory_size = 0;
+          fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
         BuildRegisterList();
         BuildInputList();
     }
@@ -437,22 +436,23 @@ public:
         shader.AddLine(dest + " = " + src + ';');
     }
 
-    std::string GetLocalMemoryAsFloat(const std::string index) {
-        return "lmem[" + index + "]";
+    std::string GetLocalMemoryAsFloat(const std::string& index) {
+        return "lmem[" + index + ']';
     }
 
-    std::string GetLocalMemoryAsInteger(const std::string index, bool is_signed = false) {
+    std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
         const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
         return func + "(lmem[" + index + "])";
     }
 
-    void SetLocalMemoryAsFloat(const std::string index, const std::string value) {
-        shader.AddLine("lmem[" + index + "] = " + value);
+    void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) {
+        shader.AddLine("lmem[" + index + "] = " + value + ';');
     }
 
-    void SetLocalMemoryAsInteger(const std::string index, const std::string value, bool is_signed = false) {
+    void SetLocalMemoryAsInteger(const std::string& index, const std::string& value,
+                                 bool is_signed = false) {
         const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
-        shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ')');
+        shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
     }
 
     std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
@@ -615,7 +615,8 @@ private:
     /// Generates declarations for local memory.
     void GenerateLocalMemory() {
         if (local_memory_size > 0) {
-            declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + "];");
+            declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) +
+                                 "];");
             declarations.AddNewLine();
         }
     }
@@ -937,8 +938,7 @@ public:
         : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
           stage(stage), suffix(suffix) {
         std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-        local_memory_size = (header.common2.shader_local_memory_high_size << 24) |
-                            header.common1.shader_local_memory_low_size;
+        local_memory_size = header.GetLocalMemorySize();
         regs.SetLocalMemory(local_memory_size);
         Generate(suffix);
     }
@@ -2360,6 +2360,39 @@ private:
                 shader.AddLine("}");
                 break;
             }
+            case OpCode::Id::LD_L: {
+                // Add an extra scope and declare the index register inside to prevent
+                // overwriting it in case it is used as an output of the LD instruction.
+                shader.AddLine('{');
+                ++shader.scope;
+
+                std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
+                                 std::to_string(instr.smem_imm.Value()) + ')';
+
+                shader.AddLine("uint index = (" + op + " / 4);");
+
+                const std::string op_a = regs.GetLocalMemoryAsFloat("index");
+
+                if (instr.ld_l.unknown != 1) {
+                    LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}",
+                                 static_cast<unsigned>(instr.ld_l.unknown.Value()));
+                    UNREACHABLE();
+                }
+
+                switch (instr.ldst_sl.type.Value()) {
+                case Tegra::Shader::StoreType::Bytes32:
+                    regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                    break;
+                default:
+                    LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}",
+                                 static_cast<unsigned>(instr.ldst_sl.type.Value()));
+                    UNREACHABLE();
+                }
+
+                --shader.scope;
+                shader.AddLine('}');
+                break;
+            }
             case OpCode::Id::ST_A: {
                 ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
                            "Indirect attribute loads are not supported");
@@ -2388,6 +2421,37 @@ private:
 
                 break;
             }
+            case OpCode::Id::ST_L: {
+                // Add an extra scope and declare the index register inside to prevent
+                // overwriting it in case it is used as an output of the LD instruction.
+                shader.AddLine('{');
+                ++shader.scope;
+
+                std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
+                                 std::to_string(instr.smem_imm.Value()) + ')';
+
+                shader.AddLine("uint index = (" + op + " / 4);");
+
+                if (instr.st_l.unknown != 0) {
+                    LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}",
+                                 static_cast<unsigned>(instr.st_l.unknown.Value()));
+                    UNREACHABLE();
+                }
+
+                switch (instr.ldst_sl.type.Value()) {
+                case Tegra::Shader::StoreType::Bytes32:
+                    regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
+                    break;
+                default:
+                    LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}",
+                                 static_cast<unsigned>(instr.ldst_sl.type.Value()));
+                    UNREACHABLE();
+                }
+
+                --shader.scope;
+                shader.AddLine('}');
+                break;
+            }
             case OpCode::Id::TEX: {
                 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
                 std::string coord;