From e5d417213ce67bc23ac644132828d125a59c2455 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 19 Mar 2016 15:16:16 -0400
Subject: [PATCH 01/15] emitter: Support arbitrary FixupBranch targets.

---
 src/common/x64/emitter.cpp | 16 ++++++++++++++++
 src/common/x64/emitter.h   |  1 +
 2 files changed, 17 insertions(+)

diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 1dcf2416c2..6c8d10ea7c 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -531,6 +531,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch)
     }
 }
 
+void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target)
+{
+    if (branch.type == 0)
+    {
+        s64 distance = (s64)(target - branch.ptr);
+        ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
+        branch.ptr[-1] = (u8)(s8)distance;
+    }
+    else if (branch.type == 1)
+    {
+        s64 distance = (s64)(target - branch.ptr);
+        ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
+        ((s32*)branch.ptr)[-1] = (s32)distance;
+    }
+}
+
 //Single byte opcodes
 //There is no PUSHAD/POPAD in 64-bit mode.
 void XEmitter::INT3() {Write8(0xCC);}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 7c6548fb59..80dfa96d25 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -431,6 +431,7 @@ public:
     void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
 
     void SetJumpTarget(const FixupBranch& branch);
+    void SetJumpTarget(const FixupBranch& branch, const u8* target);
 
     void SETcc(CCFlags flag, OpArg dest);
     // Note: CMOV brings small if any benefit on current cpus.

From 135aec7beab9e484183565eea9d3cab03fe0b879 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 17 Mar 2016 19:51:43 -0400
Subject: [PATCH 02/15] shader_jit_x64: Fix strict memory aliasing issues.

---
 src/video_core/shader/shader_jit_x64.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index dffe051efe..d74b58d84b 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -741,7 +741,9 @@ void JitCompiler::Compile_Block(unsigned end) {
 void JitCompiler::Compile_NextInstr(unsigned* offset) {
     offset_ptr = offset;
 
-    Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++];
+    Instruction instr;
+    std::memcpy(&instr, &g_state.vs.program_code[(*offset_ptr)++], sizeof(Instruction));
+
     OpCode::Id opcode = instr.opcode.Value();
     auto instr_func = instr_table[static_cast<unsigned>(opcode)];
 

From 4632791a40f8ec5af7e166ff90fd4f8cd69b2745 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 17 Mar 2016 19:45:09 -0400
Subject: [PATCH 03/15] shader_jit_x64: Rewrite flow control to support
 arbitrary CALL and JMP instructions.

---
 src/video_core/shader/shader_jit_x64.cpp | 128 +++++++++++++++++------
 src/video_core/shader/shader_jit_x64.h   |  32 +++++-
 2 files changed, 122 insertions(+), 38 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index d74b58d84b..c798992ecc 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -137,6 +137,15 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
 /// Raw constant for the destination register enable mask that indicates all components are enabled
 static const u8 NO_DEST_REG_MASK = 0xf;
 
+/**
+ * Get the vertex shader instruction for a given offset in the current shader program
+ * @param offset Offset in the current shader program of the instruction
+ * @return Instruction at the specified offset
+ */
+static Instruction GetVertexShaderInstruction(size_t offset) {
+    return { g_state.vs.program_code[offset] };
+}
+
 /**
  * Loads and swizzles a source register into the specified XMM register.
  * @param instr VS instruction, used for determining how to load the source register
@@ -564,10 +573,23 @@ void JitCompiler::Compile_END(Instruction instr) {
 }
 
 void JitCompiler::Compile_CALL(Instruction instr) {
-    unsigned offset = instr.flow_control.dest_offset;
-    while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) {
-        Compile_NextInstr(&offset);
-    }
+    // Need to advance the return address past the proceeding instructions, this is the number of bytes to skip
+    constexpr unsigned SKIP = 21;
+    const uintptr_t start = reinterpret_cast<uintptr_t>(GetCodePtr());
+
+    // Push return address - not using CALL because we also want to push the offset of the return before jumping
+    MOV(64, R(RAX), ImmPtr(GetCodePtr() + SKIP));
+    PUSH(RAX);
+
+    // Push offset of the return
+    PUSH(32, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
+
+    // Jump
+    FixupBranch b = J(true);
+    fixup_branches.push_back({ b, instr.flow_control.dest_offset });
+
+    // Make sure that if the above code changes, SKIP gets updated
+    ASSERT(reinterpret_cast<uintptr_t>(GetCodePtr()) - start == SKIP);
 }
 
 void JitCompiler::Compile_CALLC(Instruction instr) {
@@ -645,8 +667,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
 }
 
 void JitCompiler::Compile_IF(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported",
-            *offset_ptr, instr.flow_control.dest_offset.Value());
+    ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements (%d -> %d) not supported",
+        last_program_counter, instr.flow_control.dest_offset.Value());
 
     // Evaluate the "IF" condition
     if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -677,8 +699,8 @@ void JitCompiler::Compile_IF(Instruction instr) {
 }
 
 void JitCompiler::Compile_LOOP(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported",
-            *offset_ptr, instr.flow_control.dest_offset.Value());
+    ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards loops (%d -> %d) not supported",
+        last_program_counter, instr.flow_control.dest_offset.Value());
     ASSERT_MSG(!looping, "Nested loops not supported");
 
     looping = true;
@@ -706,9 +728,6 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
 }
 
 void JitCompiler::Compile_JMP(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
-            *offset_ptr, instr.flow_control.dest_offset.Value());
-
     if (instr.opcode.Value() == OpCode::Id::JMPC)
         Compile_EvaluateCondition(instr);
     else if (instr.opcode.Value() == OpCode::Id::JMPU)
@@ -718,31 +737,42 @@ void JitCompiler::Compile_JMP(Instruction instr) {
 
     bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
         (instr.flow_control.num_instructions & 1);
+
     FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
-
-    Compile_Block(instr.flow_control.dest_offset);
-
-    SetJumpTarget(b);
+    fixup_branches.push_back({ b, instr.flow_control.dest_offset });
 }
 
 void JitCompiler::Compile_Block(unsigned end) {
-    // Save current offset pointer
-    unsigned* prev_offset_ptr = offset_ptr;
-    unsigned offset = *prev_offset_ptr;
-
-    while (offset < end)
-        Compile_NextInstr(&offset);
-
-    // Restore current offset pointer
-    offset_ptr = prev_offset_ptr;
-    *offset_ptr = offset;
+    while (program_counter < end) {
+        Compile_NextInstr();
+    }
 }
 
-void JitCompiler::Compile_NextInstr(unsigned* offset) {
-    offset_ptr = offset;
+void JitCompiler::Compile_Return() {
+    // Peek return offset on the stack and check if we're at that offset
+    MOV(64, R(RAX), MDisp(RSP, 0));
+    CMP(32, R(RAX), Imm32(program_counter));
 
-    Instruction instr;
-    std::memcpy(&instr, &g_state.vs.program_code[(*offset_ptr)++], sizeof(Instruction));
+    // If so, jump back to before CALL
+    FixupBranch b = J_CC(CC_NZ, true);
+    ADD(64, R(RSP), Imm32(8)); // Ignore return offset that's on the stack
+    POP(RAX); // Pop off return address
+    JMPptr(R(RAX));
+    SetJumpTarget(b);
+}
+
+void JitCompiler::Compile_NextInstr() {
+    last_program_counter = program_counter;
+
+    auto search = return_offsets.find(program_counter);
+    if (search != return_offsets.end()) {
+        Compile_Return();
+    }
+
+    ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
+    code_ptr[program_counter] = GetCodePtr();
+
+    Instruction instr = GetVertexShaderInstruction(program_counter++);
 
     OpCode::Id opcode = instr.opcode.Value();
     auto instr_func = instr_table[static_cast<unsigned>(opcode)];
@@ -757,9 +787,24 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
     }
 }
 
+void JitCompiler::FindReturnOffsets() {
+    return_offsets.clear();
+
+    for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
+        Instruction instr = GetVertexShaderInstruction(offset);
+
+        switch (instr.opcode.Value()) {
+        case OpCode::Id::CALL:
+        case OpCode::Id::CALLC:
+        case OpCode::Id::CALLU:
+            return_offsets.insert(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
+            break;
+        }
+    }
+}
+
 CompiledShader* JitCompiler::Compile() {
     const u8* start = GetCodePtr();
-    unsigned offset = g_state.regs.vs.main_offset;
 
     // The stack pointer is 8 modulo 16 at the entry of a procedure
     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -782,10 +827,27 @@ CompiledShader* JitCompiler::Compile() {
     MOV(PTRBITS, R(RAX), ImmPtr(&neg));
     MOVAPS(NEGBIT, MatR(RAX));
 
-    looping = false;
+    // Find all `CALL` instructions and identify return locations
+    FindReturnOffsets();
 
-    while (offset < g_state.vs.program_code.size()) {
-        Compile_NextInstr(&offset);
+    // Reset flow control state
+    last_program_counter = 0;
+    program_counter = 0;
+    looping = false;
+    code_ptr.fill(nullptr);
+    fixup_branches.clear();
+
+    // Jump to start of the shader program
+    if (g_state.regs.vs.main_offset != 0) {
+        fixup_branches.push_back({ J(true),  g_state.regs.vs.main_offset });
+    }
+
+    // Compile entire program
+    Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
+
+    // Set the target for any incomplete branches now that the entire shader program has been emitted
+    for (const auto& branch : fixup_branches) {
+        SetJumpTarget(branch.first, code_ptr[branch.second]);
     }
 
     return (CompiledShader*)start;
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5357c964bf..d6f03892da 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include <set>
+#include <utility>
+
 #include <nihstro/shader_bytecode.h>
 
 #include "common/x64/emitter.h"
@@ -66,8 +69,9 @@ public:
     void Compile_MAD(Instruction instr);
 
 private:
+
     void Compile_Block(unsigned end);
-    void Compile_NextInstr(unsigned* offset);
+    void Compile_NextInstr();
 
     void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
     void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
@@ -81,13 +85,31 @@ private:
     void Compile_EvaluateCondition(Instruction instr);
     void Compile_UniformCondition(Instruction instr);
 
+    /**
+     * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
+     */
+    void Compile_Return();
+
     BitSet32 PersistentCallerSavedRegs();
 
-    /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
-    unsigned* offset_ptr = nullptr;
+    /**
+     * Analyzes the entire shader program for `CALL` instructions before emitting any code,
+     * identifying the locations where a return needs to be inserted.
+     */
+    void FindReturnOffsets();
 
-    /// Set to true if currently in a loop, used to check for the existence of nested loops
-    bool looping = false;
+    /// Mapping of Pica VS instructions to pointers in the emitted code
+    std::array<const u8*, 1024> code_ptr;
+
+    /// Offsets in code where a return needs to be inserted
+    std::set<unsigned> return_offsets;
+
+    unsigned last_program_counter;  ///< Offset of the most recent instruction decoded
+    unsigned program_counter;       ///< Offset of the next instruction to decode
+    bool looping = false;           ///< True if compiling a loop, used to check for nested loops
+
+    /// Branches that need to be fixed up once the entire shader program is compiled
+    std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
 };
 
 } // Shader

From c9d10de644078a29e2310791ee221f3bc916e923 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 20 Mar 2016 00:37:05 -0400
Subject: [PATCH 04/15] shader_jit_x64: Allocate each program independently and
 persist for emu session.

---
 src/video_core/shader/shader.cpp         | 29 ++++++++----------------
 src/video_core/shader/shader_jit_x64.cpp | 17 +++++++-------
 src/video_core/shader/shader_jit_x64.h   | 20 ++++++++--------
 3 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 78d295c76a..e17368a4ae 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -28,15 +28,8 @@ namespace Pica {
 namespace Shader {
 
 #ifdef ARCHITECTURE_x86_64
-static std::unordered_map<u64, CompiledShader*> shader_map;
-static JitCompiler jit;
-static CompiledShader* jit_shader;
-
-static void ClearCache() {
-    shader_map.clear();
-    jit.Clear();
-    LOG_INFO(HW_GPU, "Shader JIT cache cleared");
-}
+static std::unordered_map<u64, std::unique_ptr<JitCompiler>> shader_map;
+static const JitCompiler* jit_shader;
 #endif // ARCHITECTURE_x86_64
 
 void Setup(UnitState<false>& state) {
@@ -48,16 +41,12 @@ void Setup(UnitState<false>& state) {
 
         auto iter = shader_map.find(cache_key);
         if (iter != shader_map.end()) {
-            jit_shader = iter->second;
+            jit_shader = iter->second.get();
         } else {
-            // Check if remaining JIT code space is enough for at least one more (massive) shader
-            if (jit.GetSpaceLeft() < jit_shader_size) {
-                // If not, clear the cache of all previously compiled shaders
-                ClearCache();
-            }
-
-            jit_shader = jit.Compile();
-            shader_map.emplace(cache_key, jit_shader);
+            auto shader = std::make_unique<JitCompiler>();
+            shader->Compile();
+            jit_shader = shader.get();
+            shader_map[cache_key] = std::move(shader);
         }
     }
 #endif // ARCHITECTURE_x86_64
@@ -65,7 +54,7 @@ void Setup(UnitState<false>& state) {
 
 void Shutdown() {
 #ifdef ARCHITECTURE_x86_64
-    ClearCache();
+    shader_map.clear();
 #endif // ARCHITECTURE_x86_64
 }
 
@@ -109,7 +98,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
 
 #ifdef ARCHITECTURE_x86_64
     if (VideoCore::g_shader_jit_enabled)
-        jit_shader(&state.registers);
+        jit_shader->Run(&state.registers);
     else
         RunInterpreter(state);
 #else
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c798992ecc..3da4e51fa6 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -589,7 +589,7 @@ void JitCompiler::Compile_CALL(Instruction instr) {
     fixup_branches.push_back({ b, instr.flow_control.dest_offset });
 
     // Make sure that if the above code changes, SKIP gets updated
-    ASSERT(reinterpret_cast<uintptr_t>(GetCodePtr()) - start == SKIP);
+    ASSERT(reinterpret_cast<ptrdiff_t>(GetCodePtr()) - start == SKIP);
 }
 
 void JitCompiler::Compile_CALLC(Instruction instr) {
@@ -803,8 +803,8 @@ void JitCompiler::FindReturnOffsets() {
     }
 }
 
-CompiledShader* JitCompiler::Compile() {
-    const u8* start = GetCodePtr();
+void JitCompiler::Compile() {
+    program = (CompiledShader*)GetCodePtr();
 
     // The stack pointer is 8 modulo 16 at the entry of a procedure
     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -850,15 +850,14 @@ CompiledShader* JitCompiler::Compile() {
         SetJumpTarget(branch.first, code_ptr[branch.second]);
     }
 
-    return (CompiledShader*)start;
+    uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
+    ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
+
+    LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
 }
 
 JitCompiler::JitCompiler() {
-    AllocCodeSpace(jit_cache_size);
-}
-
-void JitCompiler::Clear() {
-    ClearCodeSpace();
+    AllocCodeSpace(MAX_SHADER_SIZE);
 }
 
 } // namespace Shader
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index d6f03892da..19f9bdb563 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -22,10 +22,8 @@ namespace Pica {
 
 namespace Shader {
 
-/// Memory needed to be available to compile the next shader (otherwise, clear the cache)
-constexpr size_t jit_shader_size = 1024 * 512;
-/// Memory allocated for the JIT code space cache
-constexpr size_t jit_cache_size = 1024 * 1024 * 8;
+/// Memory allocated for each compiled shader (64Kb)
+constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
 
 using CompiledShader = void(void* registers);
 
@@ -37,9 +35,11 @@ class JitCompiler : public Gen::XCodeBlock {
 public:
     JitCompiler();
 
-    CompiledShader* Compile();
+    void Run(void* registers) const {
+        program(registers);
+    }
 
-    void Clear();
+    void Compile();
 
     void Compile_ADD(Instruction instr);
     void Compile_DP3(Instruction instr);
@@ -104,12 +104,14 @@ private:
     /// Offsets in code where a return needs to be inserted
     std::set<unsigned> return_offsets;
 
-    unsigned last_program_counter;  ///< Offset of the most recent instruction decoded
-    unsigned program_counter;       ///< Offset of the next instruction to decode
-    bool looping = false;           ///< True if compiling a loop, used to check for nested loops
+    unsigned last_program_counter = 0;  ///< Offset of the most recent instruction decoded
+    unsigned program_counter = 0;       ///< Offset of the next instruction to decode
+    bool looping = false;               ///< True if compiling a loop, used to check for nested loops
 
     /// Branches that need to be fixed up once the entire shader program is compiled
     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
+
+    CompiledShader* program = nullptr;
 };
 
 } // Shader

From a5a74eb121e0586706c3196d450c088280f996a5 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 26 Mar 2016 21:02:15 -0400
Subject: [PATCH 05/15] shader_jit_x64: Specify shader main offset at runtime.

---
 src/video_core/shader/shader.cpp         | 5 ++---
 src/video_core/shader/shader_jit_x64.cpp | 4 +---
 src/video_core/shader/shader_jit_x64.h   | 7 +++----
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index e17368a4ae..b354134882 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -36,8 +36,7 @@ void Setup(UnitState<false>& state) {
 #ifdef ARCHITECTURE_x86_64
     if (VideoCore::g_shader_jit_enabled) {
         u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
-            Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^
-            g_state.regs.vs.main_offset);
+            Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
 
         auto iter = shader_map.find(cache_key);
         if (iter != shader_map.end()) {
@@ -98,7 +97,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
 
 #ifdef ARCHITECTURE_x86_64
     if (VideoCore::g_shader_jit_enabled)
-        jit_shader->Run(&state.registers);
+        jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
     else
         RunInterpreter(state);
 #else
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 3da4e51fa6..cbdc1e40fd 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -838,9 +838,7 @@ void JitCompiler::Compile() {
     fixup_branches.clear();
 
     // Jump to start of the shader program
-    if (g_state.regs.vs.main_offset != 0) {
-        fixup_branches.push_back({ J(true),  g_state.regs.vs.main_offset });
-    }
+    JMPptr(R(ABI_PARAM2));
 
     // Compile entire program
     Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 19f9bdb563..1501d13bf9 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -25,8 +25,6 @@ namespace Shader {
 /// Memory allocated for each compiled shader (64Kb)
 constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
 
-using CompiledShader = void(void* registers);
-
 /**
  * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
  * code that can be executed on the host machine directly.
@@ -35,8 +33,8 @@ class JitCompiler : public Gen::XCodeBlock {
 public:
     JitCompiler();
 
-    void Run(void* registers) const {
-        program(registers);
+    void Run(void* registers, unsigned offset) const {
+        program(registers, code_ptr[offset]);
     }
 
     void Compile();
@@ -111,6 +109,7 @@ private:
     /// Branches that need to be fixed up once the entire shader program is compiled
     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
 
+    using CompiledShader = void(void* registers, const u8* start_addr);
     CompiledShader* program = nullptr;
 };
 

From ffcf7ecee9f0b2843783e3678edaffbe1dda8ca2 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 1 Apr 2016 23:33:03 -0400
Subject: [PATCH 06/15] shader: Remove unused 'state' argument from 'Setup'
 function.

---
 src/video_core/command_processor.cpp | 4 ++--
 src/video_core/shader/shader.cpp     | 2 +-
 src/video_core/shader/shader.h       | 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 08ec2907a9..3abe79c09f 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                         immediate_attribute_id = 0;
 
                         Shader::UnitState<false> shader_unit;
-                        Shader::Setup(shader_unit);
+                        Shader::Setup();
 
                         if (g_debug_context)
                             g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
@@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
             vertex_cache_ids.fill(-1);
 
             Shader::UnitState<false> shader_unit;
-            Shader::Setup(shader_unit);
+            Shader::Setup();
 
             for (unsigned int index = 0; index < regs.num_vertices; ++index)
             {
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index b354134882..5214864ec8 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -32,7 +32,7 @@ static std::unordered_map<u64, std::unique_ptr<JitCompiler>> shader_map;
 static const JitCompiler* jit_shader;
 #endif // ARCHITECTURE_x86_64
 
-void Setup(UnitState<false>& state) {
+void Setup() {
 #ifdef ARCHITECTURE_x86_64
     if (VideoCore::g_shader_jit_enabled) {
         u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 7af8f1fa12..9c5bd97bd5 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -339,9 +339,8 @@ struct UnitState {
 /**
  * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
  * vertex, which would happen within the `Run` function).
- * @param state Shader unit state, must be setup per shader and per shader unit
  */
-void Setup(UnitState<false>& state);
+void Setup();
 
 /// Performs any cleanup when the emulator is shutdown
 void Shutdown();

From f3afe24594bad11d7e0fd28902d1ce1e6e22e3a2 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 2 Apr 2016 00:02:03 -0400
Subject: [PATCH 07/15] shader_jit_x64: Execute certain asserts at runtime.

- This is because we compile the full shader code space, and therefore its common to compile malformed instructions.
---
 src/video_core/shader/shader_jit_x64.cpp | 18 +++++++++++++-----
 src/video_core/shader/shader_jit_x64.h   |  6 ++++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index cbdc1e40fd..dda9bcef71 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -146,6 +146,16 @@ static Instruction GetVertexShaderInstruction(size_t offset) {
     return { g_state.vs.program_code[offset] };
 }
 
+static void LogCritical(const char* msg) {
+    LOG_CRITICAL(HW_GPU, msg);
+}
+
+void JitCompiler::RuntimeAssert(bool condition, const char* msg) {
+    if (!condition) {
+        ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
+    }
+}
+
 /**
  * Loads and swizzles a source register into the specified XMM register.
  * @param instr VS instruction, used for determining how to load the source register
@@ -667,8 +677,7 @@ void JitCompiler::Compile_MAD(Instruction instr) {
 }
 
 void JitCompiler::Compile_IF(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements (%d -> %d) not supported",
-        last_program_counter, instr.flow_control.dest_offset.Value());
+    RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements not supported");
 
     // Evaluate the "IF" condition
     if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -699,9 +708,8 @@ void JitCompiler::Compile_IF(Instruction instr) {
 }
 
 void JitCompiler::Compile_LOOP(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards loops (%d -> %d) not supported",
-        last_program_counter, instr.flow_control.dest_offset.Value());
-    ASSERT_MSG(!looping, "Nested loops not supported");
+    RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards loops not supported");
+    RuntimeAssert(!looping, "Nested loops not supported");
 
     looping = true;
 
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 1501d13bf9..159b902b25 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -90,6 +90,12 @@ private:
 
     BitSet32 PersistentCallerSavedRegs();
 
+    /**
+     * Assertion evaluated at compile-time, but only triggered if executed at runtime.
+     * @param msg Message to be logged if the assertion fails.
+     */
+    void RuntimeAssert(bool condition, const char* msg);
+
     /**
      * Analyzes the entire shader program for `CALL` instructions before emitting any code,
      * identifying the locations where a return needs to be inserted.

From 6e0319eec91341101505b944a652e0b635a51b6e Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 9 Apr 2016 11:24:48 -0400
Subject: [PATCH 08/15] shader_jit_x64: Get rid of unnecessary
 last_program_counter variable.

---
 src/video_core/shader/shader_jit_x64.cpp | 7 ++-----
 src/video_core/shader/shader_jit_x64.h   | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index dda9bcef71..fae7e8b414 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -677,7 +677,7 @@ void JitCompiler::Compile_MAD(Instruction instr) {
 }
 
 void JitCompiler::Compile_IF(Instruction instr) {
-    RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements not supported");
+    RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
 
     // Evaluate the "IF" condition
     if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -708,7 +708,7 @@ void JitCompiler::Compile_IF(Instruction instr) {
 }
 
 void JitCompiler::Compile_LOOP(Instruction instr) {
-    RuntimeAssert(instr.flow_control.dest_offset > last_program_counter, "Backwards loops not supported");
+    RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
     RuntimeAssert(!looping, "Nested loops not supported");
 
     looping = true;
@@ -770,8 +770,6 @@ void JitCompiler::Compile_Return() {
 }
 
 void JitCompiler::Compile_NextInstr() {
-    last_program_counter = program_counter;
-
     auto search = return_offsets.find(program_counter);
     if (search != return_offsets.end()) {
         Compile_Return();
@@ -839,7 +837,6 @@ void JitCompiler::Compile() {
     FindReturnOffsets();
 
     // Reset flow control state
-    last_program_counter = 0;
     program_counter = 0;
     looping = false;
     code_ptr.fill(nullptr);
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 159b902b25..920a269e23 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -108,7 +108,6 @@ private:
     /// Offsets in code where a return needs to be inserted
     std::set<unsigned> return_offsets;
 
-    unsigned last_program_counter = 0;  ///< Offset of the most recent instruction decoded
     unsigned program_counter = 0;       ///< Offset of the next instruction to decode
     bool looping = false;               ///< True if compiling a loop, used to check for nested loops
 

From 1d45b57939b10bc1bc13ee33ad74e968850af703 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 9 Apr 2016 11:39:56 -0400
Subject: [PATCH 09/15] shader_jit_x64: Separate initialization and code
 generation for readability.

---
 src/video_core/shader/shader_jit_x64.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index fae7e8b414..efea55811e 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -810,7 +810,15 @@ void JitCompiler::FindReturnOffsets() {
 }
 
 void JitCompiler::Compile() {
+    // Reset flow control state
     program = (CompiledShader*)GetCodePtr();
+    program_counter = 0;
+    looping = false;
+    code_ptr.fill(nullptr);
+    fixup_branches.clear();
+
+    // Find all `CALL` instructions and identify return locations
+    FindReturnOffsets();
 
     // The stack pointer is 8 modulo 16 at the entry of a procedure
     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -833,15 +841,6 @@ void JitCompiler::Compile() {
     MOV(PTRBITS, R(RAX), ImmPtr(&neg));
     MOVAPS(NEGBIT, MatR(RAX));
 
-    // Find all `CALL` instructions and identify return locations
-    FindReturnOffsets();
-
-    // Reset flow control state
-    program_counter = 0;
-    looping = false;
-    code_ptr.fill(nullptr);
-    fixup_branches.clear();
-
     // Jump to start of the shader program
     JMPptr(R(ABI_PARAM2));
 

From 507e0b59896779d0276456c780ad2aefc3dbc28a Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 9 Apr 2016 17:42:48 -0400
Subject: [PATCH 10/15] emitter: Add CALL that can be fixed up.

---
 src/common/x64/emitter.cpp | 12 ++++++++++++
 src/common/x64/emitter.h   |  1 +
 2 files changed, 13 insertions(+)

diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 6c8d10ea7c..5662f7f867 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr)
     Write32(u32(distance));
 }
 
+FixupBranch XEmitter::CALL()
+{
+    FixupBranch branch;
+    branch.type = 1;
+    branch.ptr = code + 5;
+
+    Write8(0xE8);
+    Write32(0);
+
+    return branch;
+}
+
 FixupBranch XEmitter::J(bool force5bytes)
 {
     FixupBranch branch;
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 80dfa96d25..a33724146d 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -425,6 +425,7 @@ public:
 #undef CALL
 #endif
     void CALL(const void* fnptr);
+    FixupBranch CALL();
     void CALLptr(OpArg arg);
 
     FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);

From 60749f2cda38f35a80a144f990d45c9b016ed0e2 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 9 Apr 2016 17:46:13 -0400
Subject: [PATCH 11/15] shader_jit_x64: Use CALL/RET instead of JMP for
 subroutines.

---
 src/video_core/shader/shader_jit_x64.cpp | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index efea55811e..503fad1586 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -583,23 +583,15 @@ void JitCompiler::Compile_END(Instruction instr) {
 }
 
 void JitCompiler::Compile_CALL(Instruction instr) {
-    // Need to advance the return address past the proceeding instructions, this is the number of bytes to skip
-    constexpr unsigned SKIP = 21;
-    const uintptr_t start = reinterpret_cast<uintptr_t>(GetCodePtr());
-
-    // Push return address - not using CALL because we also want to push the offset of the return before jumping
-    MOV(64, R(RAX), ImmPtr(GetCodePtr() + SKIP));
-    PUSH(RAX);
-
     // Push offset of the return
-    PUSH(32, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
+    PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
 
-    // Jump
-    FixupBranch b = J(true);
+    // Call the subroutine
+    FixupBranch b = CALL();
     fixup_branches.push_back({ b, instr.flow_control.dest_offset });
 
-    // Make sure that if the above code changes, SKIP gets updated
-    ASSERT(reinterpret_cast<ptrdiff_t>(GetCodePtr()) - start == SKIP);
+    // Skip over the return offset that's on the stack
+    ADD(64, R(RSP), Imm32(8));
 }
 
 void JitCompiler::Compile_CALLC(Instruction instr) {
@@ -758,14 +750,12 @@ void JitCompiler::Compile_Block(unsigned end) {
 
 void JitCompiler::Compile_Return() {
     // Peek return offset on the stack and check if we're at that offset
-    MOV(64, R(RAX), MDisp(RSP, 0));
+    MOV(64, R(RAX), MDisp(RSP, 8));
     CMP(32, R(RAX), Imm32(program_counter));
 
     // If so, jump back to before CALL
     FixupBranch b = J_CC(CC_NZ, true);
-    ADD(64, R(RSP), Imm32(8)); // Ignore return offset that's on the stack
-    POP(RAX); // Pop off return address
-    JMPptr(R(RAX));
+    RET();
     SetJumpTarget(b);
 }
 

From 60aa72e1177c436351c91be291ef869816df79e0 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 12 Apr 2016 23:24:34 -0400
Subject: [PATCH 12/15] shader_jit_x64: Use a sorted vector instead of a set
 for keeping track of return addresses.

---
 src/video_core/shader/shader_jit_x64.cpp | 9 ++++++---
 src/video_core/shader/shader_jit_x64.h   | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 503fad1586..e32a4e7204 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <smmintrin.h>
 
 #include "common/x64/abi.h"
@@ -760,8 +761,7 @@ void JitCompiler::Compile_Return() {
 }
 
 void JitCompiler::Compile_NextInstr() {
-    auto search = return_offsets.find(program_counter);
-    if (search != return_offsets.end()) {
+    if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
         Compile_Return();
     }
 
@@ -793,10 +793,13 @@ void JitCompiler::FindReturnOffsets() {
         case OpCode::Id::CALL:
         case OpCode::Id::CALLC:
         case OpCode::Id::CALLU:
-            return_offsets.insert(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
+            return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
             break;
         }
     }
+
+    // Sort for efficient binary search later
+    std::sort(return_offsets.begin(), return_offsets.end());
 }
 
 void JitCompiler::Compile() {
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 920a269e23..aa5060584d 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
-#include <set>
 #include <utility>
+#include <vector>
 
 #include <nihstro/shader_bytecode.h>
 
@@ -106,7 +106,7 @@ private:
     std::array<const u8*, 1024> code_ptr;
 
     /// Offsets in code where a return needs to be inserted
-    std::set<unsigned> return_offsets;
+    std::vector<unsigned> return_offsets;
 
     unsigned program_counter = 0;       ///< Offset of the next instruction to decode
     bool looping = false;               ///< True if compiling a loop, used to check for nested loops

From 847fb951e29bb9bfb2735cf6bb1186e0374f3654 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 12 Apr 2016 23:29:25 -0400
Subject: [PATCH 13/15] shader_jit_x64: Free memory that's no longer needed
 after compilation.

---
 src/video_core/shader/shader_jit_x64.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index e32a4e7204..7735422839 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -845,6 +845,12 @@ void JitCompiler::Compile() {
         SetJumpTarget(branch.first, code_ptr[branch.second]);
     }
 
+    // Free memory that's no longer needed
+    return_offsets.clear();
+    return_offsets.shrink_to_fit();
+    fixup_branches.clear();
+    fixup_branches.shrink_to_fit();
+
     uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
     ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
 

From 3f623b2561eb829b5c9c3855cb24a612b12f7d6f Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 12 Apr 2016 23:34:03 -0400
Subject: [PATCH 14/15] shader_jit_x64.cpp: Rename JitCompiler to JitShader.

---
 src/video_core/shader/shader.cpp         |   6 +-
 src/video_core/shader/shader_jit_x64.cpp | 174 +++++++++++------------
 src/video_core/shader/shader_jit_x64.h   |   4 +-
 3 files changed, 92 insertions(+), 92 deletions(-)

diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 5214864ec8..75301accdd 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -28,8 +28,8 @@ namespace Pica {
 namespace Shader {
 
 #ifdef ARCHITECTURE_x86_64
-static std::unordered_map<u64, std::unique_ptr<JitCompiler>> shader_map;
-static const JitCompiler* jit_shader;
+static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
+static const JitShader* jit_shader;
 #endif // ARCHITECTURE_x86_64
 
 void Setup() {
@@ -42,7 +42,7 @@ void Setup() {
         if (iter != shader_map.end()) {
             jit_shader = iter->second.get();
         } else {
-            auto shader = std::make_unique<JitCompiler>();
+            auto shader = std::make_unique<JitShader>();
             shader->Compile();
             jit_shader = shader.get();
             shader_map[cache_key] = std::move(shader);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 7735422839..9369d2fe55 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -20,73 +20,73 @@ namespace Shader {
 
 using namespace Gen;
 
-typedef void (JitCompiler::*JitFunction)(Instruction instr);
+typedef void (JitShader::*JitFunction)(Instruction instr);
 
 const JitFunction instr_table[64] = {
-    &JitCompiler::Compile_ADD,      // add
-    &JitCompiler::Compile_DP3,      // dp3
-    &JitCompiler::Compile_DP4,      // dp4
-    &JitCompiler::Compile_DPH,      // dph
+    &JitShader::Compile_ADD,        // add
+    &JitShader::Compile_DP3,        // dp3
+    &JitShader::Compile_DP4,        // dp4
+    &JitShader::Compile_DPH,        // dph
     nullptr,                        // unknown
-    &JitCompiler::Compile_EX2,      // ex2
-    &JitCompiler::Compile_LG2,      // lg2
+    &JitShader::Compile_EX2,        // ex2
+    &JitShader::Compile_LG2,        // lg2
     nullptr,                        // unknown
-    &JitCompiler::Compile_MUL,      // mul
-    &JitCompiler::Compile_SGE,      // sge
-    &JitCompiler::Compile_SLT,      // slt
-    &JitCompiler::Compile_FLR,      // flr
-    &JitCompiler::Compile_MAX,      // max
-    &JitCompiler::Compile_MIN,      // min
-    &JitCompiler::Compile_RCP,      // rcp
-    &JitCompiler::Compile_RSQ,      // rsq
+    &JitShader::Compile_MUL,        // mul
+    &JitShader::Compile_SGE,        // sge
+    &JitShader::Compile_SLT,        // slt
+    &JitShader::Compile_FLR,        // flr
+    &JitShader::Compile_MAX,        // max
+    &JitShader::Compile_MIN,        // min
+    &JitShader::Compile_RCP,        // rcp
+    &JitShader::Compile_RSQ,        // rsq
     nullptr,                        // unknown
     nullptr,                        // unknown
-    &JitCompiler::Compile_MOVA,     // mova
-    &JitCompiler::Compile_MOV,      // mov
+    &JitShader::Compile_MOVA,       // mova
+    &JitShader::Compile_MOV,        // mov
     nullptr,                        // unknown
     nullptr,                        // unknown
     nullptr,                        // unknown
     nullptr,                        // unknown
-    &JitCompiler::Compile_DPH,      // dphi
+    &JitShader::Compile_DPH,        // dphi
     nullptr,                        // unknown
-    &JitCompiler::Compile_SGE,      // sgei
-    &JitCompiler::Compile_SLT,      // slti
+    &JitShader::Compile_SGE,        // sgei
+    &JitShader::Compile_SLT,        // slti
     nullptr,                        // unknown
     nullptr,                        // unknown
     nullptr,                        // unknown
     nullptr,                        // unknown
     nullptr,                        // unknown
-    &JitCompiler::Compile_NOP,      // nop
-    &JitCompiler::Compile_END,      // end
+    &JitShader::Compile_NOP,        // nop
+    &JitShader::Compile_END,        // end
     nullptr,                        // break
-    &JitCompiler::Compile_CALL,     // call
-    &JitCompiler::Compile_CALLC,    // callc
-    &JitCompiler::Compile_CALLU,    // callu
-    &JitCompiler::Compile_IF,       // ifu
-    &JitCompiler::Compile_IF,       // ifc
-    &JitCompiler::Compile_LOOP,     // loop
+    &JitShader::Compile_CALL,       // call
+    &JitShader::Compile_CALLC,      // callc
+    &JitShader::Compile_CALLU,      // callu
+    &JitShader::Compile_IF,         // ifu
+    &JitShader::Compile_IF,         // ifc
+    &JitShader::Compile_LOOP,       // loop
     nullptr,                        // emit
     nullptr,                        // sete
-    &JitCompiler::Compile_JMP,      // jmpc
-    &JitCompiler::Compile_JMP,      // jmpu
-    &JitCompiler::Compile_CMP,      // cmp
-    &JitCompiler::Compile_CMP,      // cmp
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // madi
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
-    &JitCompiler::Compile_MAD,      // mad
+    &JitShader::Compile_JMP,        // jmpc
+    &JitShader::Compile_JMP,        // jmpu
+    &JitShader::Compile_CMP,        // cmp
+    &JitShader::Compile_CMP,        // cmp
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // madi
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
+    &JitShader::Compile_MAD,        // mad
 };
 
 // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -151,7 +151,7 @@ static void LogCritical(const char* msg) {
     LOG_CRITICAL(HW_GPU, msg);
 }
 
-void JitCompiler::RuntimeAssert(bool condition, const char* msg) {
+void JitShader::RuntimeAssert(bool condition, const char* msg) {
     if (!condition) {
         ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
     }
@@ -164,7 +164,7 @@ void JitCompiler::RuntimeAssert(bool condition, const char* msg) {
  * @param src_reg SourceRegister object corresponding to the source register to load
  * @param dest Destination XMM register to store the loaded, swizzled source register
  */
-void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
+void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
     X64Reg src_ptr;
     size_t src_offset;
 
@@ -236,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
     }
 }
 
-void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
+void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
     DestRegister dest;
     unsigned operand_desc_id;
     if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -283,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
     }
 }
 
-void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
+void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
     MOVAPS(scratch, R(src1));
     CMPPS(scratch, R(src2), CMP_ORD);
 
@@ -296,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::
     ANDPS(src1, R(scratch));
 }
 
-void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
+void JitShader::Compile_EvaluateCondition(Instruction instr) {
     // Note: NXOR is used below to check for equality
     switch (instr.flow_control.op) {
     case Instruction::FlowControlType::Or:
@@ -327,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
     }
 }
 
-void JitCompiler::Compile_UniformCondition(Instruction instr) {
+void JitShader::Compile_UniformCondition(Instruction instr) {
     int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
     CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
 }
 
-BitSet32 JitCompiler::PersistentCallerSavedRegs() {
+BitSet32 JitShader::PersistentCallerSavedRegs() {
     return persistent_regs & ABI_ALL_CALLER_SAVED;
 }
 
-void JitCompiler::Compile_ADD(Instruction instr) {
+void JitShader::Compile_ADD(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
     ADDPS(SRC1, R(SRC2));
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_DP3(Instruction instr) {
+void JitShader::Compile_DP3(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
 
@@ -362,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_DP4(Instruction instr) {
+void JitShader::Compile_DP4(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
 
@@ -379,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_DPH(Instruction instr) {
+void JitShader::Compile_DPH(Instruction instr) {
     if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
         Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
         Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -411,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_EX2(Instruction instr) {
+void JitShader::Compile_EX2(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     MOVSS(XMM0, R(SRC1));
 
@@ -424,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_LG2(Instruction instr) {
+void JitShader::Compile_LG2(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     MOVSS(XMM0, R(SRC1));
 
@@ -437,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_MUL(Instruction instr) {
+void JitShader::Compile_MUL(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
     Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_SGE(Instruction instr) {
+void JitShader::Compile_SGE(Instruction instr) {
     if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
         Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
         Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -459,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) {
     Compile_DestEnable(instr, SRC2);
 }
 
-void JitCompiler::Compile_SLT(Instruction instr) {
+void JitShader::Compile_SLT(Instruction instr) {
     if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
         Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
         Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -474,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_FLR(Instruction instr) {
+void JitShader::Compile_FLR(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
 
     if (Common::GetCPUCaps().sse4_1) {
@@ -487,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_MAX(Instruction instr) {
+void JitShader::Compile_MAX(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
     // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -495,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_MIN(Instruction instr) {
+void JitShader::Compile_MIN(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
     // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -503,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_MOVA(Instruction instr) {
+void JitShader::Compile_MOVA(Instruction instr) {
     SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
 
     if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
@@ -548,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
     }
 }
 
-void JitCompiler::Compile_MOV(Instruction instr) {
+void JitShader::Compile_MOV(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_RCP(Instruction instr) {
+void JitShader::Compile_RCP(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
 
     // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
@@ -564,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_RSQ(Instruction instr) {
+void JitShader::Compile_RSQ(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
 
     // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
@@ -575,15 +575,15 @@ void JitCompiler::Compile_RSQ(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_NOP(Instruction instr) {
+void JitShader::Compile_NOP(Instruction instr) {
 }
 
-void JitCompiler::Compile_END(Instruction instr) {
+void JitShader::Compile_END(Instruction instr) {
     ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
     RET();
 }
 
-void JitCompiler::Compile_CALL(Instruction instr) {
+void JitShader::Compile_CALL(Instruction instr) {
     // Push offset of the return
     PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
 
@@ -595,21 +595,21 @@ void JitCompiler::Compile_CALL(Instruction instr) {
     ADD(64, R(RSP), Imm32(8));
 }
 
-void JitCompiler::Compile_CALLC(Instruction instr) {
+void JitShader::Compile_CALLC(Instruction instr) {
     Compile_EvaluateCondition(instr);
     FixupBranch b = J_CC(CC_Z, true);
     Compile_CALL(instr);
     SetJumpTarget(b);
 }
 
-void JitCompiler::Compile_CALLU(Instruction instr) {
+void JitShader::Compile_CALLU(Instruction instr) {
     Compile_UniformCondition(instr);
     FixupBranch b = J_CC(CC_Z, true);
     Compile_CALL(instr);
     SetJumpTarget(b);
 }
 
-void JitCompiler::Compile_CMP(Instruction instr) {
+void JitShader::Compile_CMP(Instruction instr) {
     using Op = Instruction::Common::CompareOpType::Op;
     Op op_x = instr.common.compare_op.x;
     Op op_y = instr.common.compare_op.y;
@@ -652,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) {
     SHR(64, R(COND1), Imm8(63));
 }
 
-void JitCompiler::Compile_MAD(Instruction instr) {
+void JitShader::Compile_MAD(Instruction instr) {
     Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
 
     if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
@@ -669,7 +669,7 @@ void JitCompiler::Compile_MAD(Instruction instr) {
     Compile_DestEnable(instr, SRC1);
 }
 
-void JitCompiler::Compile_IF(Instruction instr) {
+void JitShader::Compile_IF(Instruction instr) {
     RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
 
     // Evaluate the "IF" condition
@@ -700,7 +700,7 @@ void JitCompiler::Compile_IF(Instruction instr) {
     SetJumpTarget(b2);
 }
 
-void JitCompiler::Compile_LOOP(Instruction instr) {
+void JitShader::Compile_LOOP(Instruction instr) {
     RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
     RuntimeAssert(!looping, "Nested loops not supported");
 
@@ -728,7 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
     looping = false;
 }
 
-void JitCompiler::Compile_JMP(Instruction instr) {
+void JitShader::Compile_JMP(Instruction instr) {
     if (instr.opcode.Value() == OpCode::Id::JMPC)
         Compile_EvaluateCondition(instr);
     else if (instr.opcode.Value() == OpCode::Id::JMPU)
@@ -743,13 +743,13 @@ void JitCompiler::Compile_JMP(Instruction instr) {
     fixup_branches.push_back({ b, instr.flow_control.dest_offset });
 }
 
-void JitCompiler::Compile_Block(unsigned end) {
+void JitShader::Compile_Block(unsigned end) {
     while (program_counter < end) {
         Compile_NextInstr();
     }
 }
 
-void JitCompiler::Compile_Return() {
+void JitShader::Compile_Return() {
     // Peek return offset on the stack and check if we're at that offset
     MOV(64, R(RAX), MDisp(RSP, 8));
     CMP(32, R(RAX), Imm32(program_counter));
@@ -760,7 +760,7 @@ void JitCompiler::Compile_Return() {
     SetJumpTarget(b);
 }
 
-void JitCompiler::Compile_NextInstr() {
+void JitShader::Compile_NextInstr() {
     if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
         Compile_Return();
     }
@@ -783,7 +783,7 @@ void JitCompiler::Compile_NextInstr() {
     }
 }
 
-void JitCompiler::FindReturnOffsets() {
+void JitShader::FindReturnOffsets() {
     return_offsets.clear();
 
     for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
@@ -802,7 +802,7 @@ void JitCompiler::FindReturnOffsets() {
     std::sort(return_offsets.begin(), return_offsets.end());
 }
 
-void JitCompiler::Compile() {
+void JitShader::Compile() {
     // Reset flow control state
     program = (CompiledShader*)GetCodePtr();
     program_counter = 0;
@@ -857,7 +857,7 @@ void JitCompiler::Compile() {
     LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
 }
 
-JitCompiler::JitCompiler() {
+JitShader::JitShader() {
     AllocCodeSpace(MAX_SHADER_SIZE);
 }
 
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index aa5060584d..005fbdbe39 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -29,9 +29,9 @@ constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
  * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
  * code that can be executed on the host machine directly.
  */
-class JitCompiler : public Gen::XCodeBlock {
+class JitShader : public Gen::XCodeBlock {
 public:
-    JitCompiler();
+    JitShader();
 
     void Run(void* registers, unsigned offset) const {
         program(registers, code_ptr[offset]);

From d7fe2784cca9c13d1f79f4063691fc4ced1c4759 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 12 Apr 2016 23:35:36 -0400
Subject: [PATCH 15/15] shader_jit_x64: Rename RuntimeAssert to Compile_Assert.

---
 src/video_core/shader/shader_jit_x64.cpp | 8 ++++----
 src/video_core/shader/shader_jit_x64.h   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 9369d2fe55..b47d3beda6 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -151,7 +151,7 @@ static void LogCritical(const char* msg) {
     LOG_CRITICAL(HW_GPU, msg);
 }
 
-void JitShader::RuntimeAssert(bool condition, const char* msg) {
+void JitShader::Compile_Assert(bool condition, const char* msg) {
     if (!condition) {
         ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
     }
@@ -670,7 +670,7 @@ void JitShader::Compile_MAD(Instruction instr) {
 }
 
 void JitShader::Compile_IF(Instruction instr) {
-    RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
+    Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
 
     // Evaluate the "IF" condition
     if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -701,8 +701,8 @@ void JitShader::Compile_IF(Instruction instr) {
 }
 
 void JitShader::Compile_LOOP(Instruction instr) {
-    RuntimeAssert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
-    RuntimeAssert(!looping, "Nested loops not supported");
+    Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
+    Compile_Assert(!looping, "Nested loops not supported");
 
     looping = true;
 
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 005fbdbe39..cd6280adef 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -94,7 +94,7 @@ private:
      * Assertion evaluated at compile-time, but only triggered if executed at runtime.
      * @param msg Message to be logged if the assertion fails.
      */
-    void RuntimeAssert(bool condition, const char* msg);
+    void Compile_Assert(bool condition, const char* msg);
 
     /**
      * Analyzes the entire shader program for `CALL` instructions before emitting any code,