From 8335b2f115d8c2e082f1b085eedb54d334489f6d Mon Sep 17 00:00:00 2001
From: Subv <>
Date: Sat, 18 Aug 2018 14:36:37 -0500
Subject: [PATCH] Shader: Implemented the predicate and mode arguments of LOP.

The mode can be used to set the predicate to true depending on the result of the logic operation. In some cases, this means discarding the result (writing it to register 0xFF (Zero)).

This is used by Super Mario Odyssey.
 src/video_core/engines/shader_bytecode.h      |  7 ++-
 .../renderer_opengl/gl_shader_decompiler.cpp  | 43 ++++++++++++++-----
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index b038a9d922..6cfb9c5f8b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,11 @@ enum class FlowCondition : u64 {
     Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
+enum class PredicateResultMode : u64 {
+    None = 0x0,
+    NotZero = 0x3,
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -254,7 +259,7 @@ union Instruction {
             BitField<39, 1, u64> invert_a;
             BitField<40, 1, u64> invert_b;
             BitField<41, 2, LogicOperation> operation;
-            BitField<44, 2, u64> unk44;
+            BitField<44, 2, PredicateResultMode> pred_result_mode;
             BitField<48, 3, Pred> pred48;
         } lop;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bb01b3c274..77974b9d25 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -756,28 +756,51 @@ private:
     void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
-                             const std::string& op_b) {
+                             const std::string& op_b,
+                             Tegra::Shader::PredicateResultMode predicate_mode,
+                             Tegra::Shader::Pred predicate) {
+        std::string result{};
         switch (logic_op) {
         case LogicOperation::And: {
-            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+            result = '(' + op_a + " & " + op_b + ')';
         case LogicOperation::Or: {
-            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+            result = '(' + op_a + " | " + op_b + ')';
         case LogicOperation::Xor: {
-            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+            result = '(' + op_a + " ^ " + op_b + ')';
         case LogicOperation::PassB: {
-            regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+            result = op_b;
             LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
+        if (dest != Tegra::Shader::Register::ZeroIndex) {
+            regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+        }
+        using Tegra::Shader::PredicateResultMode;
+        // Write the predicate value depending on the predicate mode.
+        switch (predicate_mode) {
+        case PredicateResultMode::None:
+            // Do nothing.
+            return;
+        case PredicateResultMode::NotZero:
+            // Set the predicate to true if the result is not zero.
+            SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
+            break;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}",
+                         static_cast<u32>(predicate_mode));
+            UNREACHABLE();
+        }
     void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
@@ -1099,7 +1122,9 @@ private:
                 if (instr.alu.lop32i.invert_b)
                     op_b = "~(" + op_b + ')';
-                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+                                    Tegra::Shader::PredicateResultMode::None,
+                                    Tegra::Shader::Pred::UnusedIndex);
             default: {
@@ -1165,16 +1190,14 @@ private:
             case OpCode::Id::LOP_C:
             case OpCode::Id::LOP_R:
             case OpCode::Id::LOP_IMM: {
-                ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
-                ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
                 if (instr.alu.lop.invert_a)
                     op_a = "~(" + op_a + ')';
                 if (instr.alu.lop.invert_b)
                     op_b = "~(" + op_b + ')';
-                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+                                    instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
             case OpCode::Id::IMNMX_C: