From ada79fa8ad94952c4ddee766a3bb9091dc6282e0 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 3 May 2019 02:59:25 -0300
Subject: [PATCH 1/2] gl_shader_decompiler: Make GetSwizzle constexpr

---
 .../renderer_opengl/gl_shader_decompiler.cpp       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4bff54a599..713eded6d2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -31,6 +31,8 @@ using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
 using Tegra::Shader::IpaSampleMode;
 using Tegra::Shader::Register;
+
+using namespace std::string_literals;
 using namespace VideoCommon::Shader;
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -96,11 +98,9 @@ private:
 };
 
 /// Generates code to use for a swizzle operation.
-std::string GetSwizzle(u32 elem) {
-    ASSERT(elem <= 3);
-    std::string swizzle = ".";
-    swizzle += "xyzw"[elem];
-    return swizzle;
+constexpr const char* GetSwizzle(u32 element) {
+    constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"};
+    return swizzle.at(element);
 }
 
 /// Translate topology
@@ -622,7 +622,7 @@ private:
             if (stage != ShaderStage::Fragment) {
                 return GeometryPass("position") + GetSwizzle(element);
             } else {
-                return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
+                return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
             }
         case Attribute::Index::PointCoord:
             switch (element) {
@@ -909,7 +909,7 @@ private:
             target = [&]() -> std::string {
                 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
                 case Attribute::Index::Position:
-                    return "position" + GetSwizzle(abuf->GetElement());
+                    return "position"s + GetSwizzle(abuf->GetElement());
                 case Attribute::Index::PointSize:
                     return "gl_PointSize";
                 case Attribute::Index::ClipDistances0123:

From 9c3461604cff25ef11ecb6937f904eac37090ee7 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 3 May 2019 03:00:51 -0300
Subject: [PATCH 2/2] shader: Implement S2R Tid{XYZ} and CtaId{XYZ}

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 16 +++++++
 .../renderer_vulkan/vk_shader_decompiler.cpp  | 18 ++++++++
 src/video_core/shader/decode/other.cpp        | 42 ++++++++++++-------
 src/video_core/shader/shader_ir.h             |  8 +++-
 4 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 713eded6d2..d437afad18 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1513,6 +1513,16 @@ private:
         return "uintBitsToFloat(config_pack[2])";
     }
 
+    template <u32 element>
+    std::string LocalInvocationId(Operation) {
+        return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')';
+    }
+
+    template <u32 element>
+    std::string WorkGroupId(Operation) {
+        return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
+    }
+
     static constexpr OperationDecompilersArray operation_decompilers = {
         &GLSLDecompiler::Assign,
 
@@ -1652,6 +1662,12 @@ private:
         &GLSLDecompiler::EndPrimitive,
 
         &GLSLDecompiler::YNegate,
+        &GLSLDecompiler::LocalInvocationId<0>,
+        &GLSLDecompiler::LocalInvocationId<1>,
+        &GLSLDecompiler::LocalInvocationId<2>,
+        &GLSLDecompiler::WorkGroupId<0>,
+        &GLSLDecompiler::WorkGroupId<1>,
+        &GLSLDecompiler::WorkGroupId<2>,
     };
 
     std::string GetRegister(u32 index) const {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b61a6d170d..a5b25aeffb 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1035,6 +1035,18 @@ private:
         return {};
     }
 
+    template <u32 element>
+    Id LocalInvocationId(Operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    template <u32 element>
+    Id WorkGroupId(Operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
     Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
                       const std::string& name) {
         const Id id = OpVariable(type, storage);
@@ -1291,6 +1303,12 @@ private:
         &SPIRVDecompiler::EndPrimitive,
 
         &SPIRVDecompiler::YNegate,
+        &SPIRVDecompiler::LocalInvocationId<0>,
+        &SPIRVDecompiler::LocalInvocationId<1>,
+        &SPIRVDecompiler::LocalInvocationId<2>,
+        &SPIRVDecompiler::WorkGroupId<0>,
+        &SPIRVDecompiler::WorkGroupId<1>,
+        &SPIRVDecompiler::WorkGroupId<2>,
     };
 
     const ShaderIR& ir;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index fa17c45b50..77c6f99519 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -13,6 +13,7 @@ using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::SystemVariable;
 
 u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
@@ -58,20 +59,33 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::MOV_SYS: {
-        switch (instr.sys20) {
-        case Tegra::Shader::SystemVariable::InvocationInfo: {
-            LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
-            SetRegister(bb, instr.gpr0, Immediate(0u));
-            break;
-        }
-        case Tegra::Shader::SystemVariable::Ydirection: {
-            // Config pack's third value is Y_NEGATE's state.
-            SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
-        }
+        const Node value = [&]() {
+            switch (instr.sys20) {
+            case SystemVariable::Ydirection:
+                return Operation(OperationCode::YNegate);
+            case SystemVariable::InvocationInfo:
+                LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+                return Immediate(0u);
+            case SystemVariable::TidX:
+                return Operation(OperationCode::LocalInvocationIdX);
+            case SystemVariable::TidY:
+                return Operation(OperationCode::LocalInvocationIdY);
+            case SystemVariable::TidZ:
+                return Operation(OperationCode::LocalInvocationIdZ);
+            case SystemVariable::CtaIdX:
+                return Operation(OperationCode::WorkGroupIdX);
+            case SystemVariable::CtaIdY:
+                return Operation(OperationCode::WorkGroupIdY);
+            case SystemVariable::CtaIdZ:
+                return Operation(OperationCode::WorkGroupIdZ);
+            default:
+                UNIMPLEMENTED_MSG("Unhandled system move: {}",
+                                  static_cast<u32>(instr.sys20.Value()));
+                return Immediate(0u);
+            }
+        }();
+        SetRegister(bb, instr.gpr0, value);
+
         break;
     }
     case OpCode::Id::BRA: {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0bf1242520..f99300c1c8 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -181,7 +181,13 @@ enum class OperationCode {
     EmitVertex,   /// () -> void
     EndPrimitive, /// () -> void
 
-    YNegate, /// () -> float
+    YNegate,            /// () -> float
+    LocalInvocationIdX, /// () -> uint
+    LocalInvocationIdY, /// () -> uint
+    LocalInvocationIdZ, /// () -> uint
+    WorkGroupIdX,       /// () -> uint
+    WorkGroupIdY,       /// () -> uint
+    WorkGroupIdZ,       /// () -> uint
 
     Amount,
 };