diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 66e11dd5b9..804c735a31 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -228,7 +228,7 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&
 
 #define COMMAND_IN_RANGE(cmd_id, reg_name)   \
     (cmd_id >= PICA_REG_INDEX(reg_name) &&   \
-     cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::registers.reg_name)) / 4)
+     cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::g_state.regs.reg_name)) / 4)
 
 void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
     const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt();
@@ -244,8 +244,8 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
         } else {
             index = 2;
         }
-        auto config = Pica::registers.GetTextures()[index].config;
-        auto format = Pica::registers.GetTextures()[index].format;
+        auto config = Pica::g_state.regs.GetTextures()[index].config;
+        auto format = Pica::g_state.regs.GetTextures()[index].format;
         auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
 
         // TODO: Instead, emit a signal here to be caught by the main window widget.
@@ -270,8 +270,8 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
         } else {
             index = 2;
         }
-        auto config = Pica::registers.GetTextures()[index].config;
-        auto format = Pica::registers.GetTextures()[index].format;
+        auto config = Pica::g_state.regs.GetTextures()[index].config;
+        auto format = Pica::g_state.regs.GetTextures()[index].format;
 
         auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
         u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress());
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 0c1a3f47f4..e073445915 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -178,7 +178,7 @@ void GraphicsFramebufferWidget::OnUpdate()
     {
         // TODO: Store a reference to the registers in the debug context instead of accessing them directly...
 
-        const auto& framebuffer = Pica::registers.framebuffer;
+        const auto& framebuffer = Pica::g_state.regs.framebuffer;
 
         framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
         framebuffer_width = framebuffer.GetWidth();
@@ -191,7 +191,7 @@ void GraphicsFramebufferWidget::OnUpdate()
 
     case Source::DepthBuffer:
     {
-        const auto& framebuffer = Pica::registers.framebuffer;
+        const auto& framebuffer = Pica::g_state.regs.framebuffer;
 
         framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
         framebuffer_width = framebuffer.GetWidth();
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 3b072d0151..14d3f8f398 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -253,13 +253,13 @@ void GraphicsVertexShaderModel::OnUpdate()
 
     info.Clear();
 
-    for (auto instr : Pica::VertexShader::GetShaderBinary())
+    for (auto instr : Pica::g_state.vs.program_code)
         info.code.push_back({instr});
 
-    for (auto pattern : Pica::VertexShader::GetSwizzlePatterns())
+    for (auto pattern : Pica::g_state.vs.swizzle_data)
         info.swizzle_info.push_back({pattern});
 
-    info.labels.insert({Pica::registers.vs_main_offset, "main"});
+    info.labels.insert({ Pica::g_state.regs.vs_main_offset, "main" });
 
     endResetModel();
 }
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 9866078d41..0258a3255e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,7 @@ set(SRCS
             debug_utils/debug_utils.cpp
             clipper.cpp
             command_processor.cpp
+            pica.cpp
             primitive_assembly.cpp
             rasterizer.cpp
             utils.cpp
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index ba3876a763..943f3eb35d 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -58,12 +58,13 @@ static void InitScreenCoordinates(OutputVertex& vtx)
         float24 offset_z;
     } viewport;
 
-    viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x);
-    viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y);
-    viewport.offset_x   = float24::FromFloat32(static_cast<float>(registers.viewport_corner.x));
-    viewport.offset_y   = float24::FromFloat32(static_cast<float>(registers.viewport_corner.y));
-    viewport.zscale     = float24::FromRawFloat24(registers.viewport_depth_range);
-    viewport.offset_z   = float24::FromRawFloat24(registers.viewport_depth_far_plane);
+    const auto& regs = g_state.regs;
+    viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x);
+    viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y);
+    viewport.offset_x   = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
+    viewport.offset_y   = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
+    viewport.zscale     = float24::FromRawFloat24(regs.viewport_depth_range);
+    viewport.offset_z   = float24::FromRawFloat24(regs.viewport_depth_far_plane);
 
     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
     vtx.color *= inv_w;
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 5c4c044087..100d8c7c1b 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -21,8 +21,6 @@
 
 namespace Pica {
 
-Regs registers;
-
 namespace CommandProcessor {
 
 static int float_regs_counter = 0;
@@ -36,8 +34,9 @@ static u32 default_attr_write_buffer[3];
 Common::Profiling::TimingCategory category_drawing("Drawing");
 
 static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
+    auto& regs = g_state.regs;
 
-    if (id >= registers.NumIds())
+    if (id >= regs.NumIds())
         return;
 
     // If we're skipping this frame, only allow trigger IRQ
@@ -45,13 +44,13 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
         return;
 
     // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value
-    u32 old_value = registers[id];
-    registers[id] = (old_value & ~mask) | (value & mask);
+    u32 old_value = regs[id];
+    regs[id] = (old_value & ~mask) | (value & mask);
 
     if (g_debug_context)
         g_debug_context->OnEvent(DebugContext::Event::CommandLoaded, reinterpret_cast<void*>(&id));
 
-    DebugUtils::OnPicaRegWrite(id, registers[id]);
+    DebugUtils::OnPicaRegWrite(id, regs[id]);
 
     switch(id) {
         // Trigger IRQ
@@ -65,12 +64,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
         {
             Common::Profiling::ScopeTimer scope_timer(category_drawing);
 
-            DebugUtils::DumpTevStageConfig(registers.GetTevStages());
+            DebugUtils::DumpTevStageConfig(regs.GetTevStages());
 
             if (g_debug_context)
                 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
 
-            const auto& attribute_config = registers.vertex_attributes;
+            const auto& attribute_config = regs.vertex_attributes;
             const u32 base_address = attribute_config.GetPhysicalBaseAddress();
 
             // Information about internal vertex attributes
@@ -103,16 +102,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
             // Load vertices
             bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
 
-            const auto& index_info = registers.index_array;
+            const auto& index_info = regs.index_array;
             const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
             const u16* index_address_16 = (u16*)index_address_8;
             bool index_u16 = index_info.format != 0;
 
             DebugUtils::GeometryDumper geometry_dumper;
-            PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(registers.triangle_topology.Value());
-            PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value());
+            PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value());
+            PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value());
 
-            for (unsigned int index = 0; index < registers.num_vertices; ++index)
+            for (unsigned int index = 0; index < regs.num_vertices; ++index)
             {
                 unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
 
@@ -131,7 +130,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
                     // Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set
                     if (attribute_config.IsDefaultAttribute(i)) {
-                        input.attr[i] = VertexShader::GetDefaultAttribute(i);
+                        input.attr[i] = g_state.vs.default_attributes[i];
                         LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
                                   i, vertex, index,
                                   input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
@@ -216,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
 
         case PICA_REG_INDEX(vs_bool_uniforms):
             for (unsigned i = 0; i < 16; ++i)
-                VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i)) != 0;
+                g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0;
 
             break;
 
@@ -226,8 +225,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
         case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4):
         {
             int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1));
-            auto values = registers.vs_int_uniforms[index];
-            VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
+            auto values = regs.vs_int_uniforms[index];
+            g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
             LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
                       index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
             break;
@@ -242,7 +241,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7):
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8):
         {
-            auto& uniform_setup = registers.vs_uniform_setup;
+            auto& uniform_setup = regs.vs_uniform_setup;
 
             // TODO: Does actual hardware indeed keep an intermediate buffer or does
             //       it directly write the values?
@@ -255,7 +254,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
                 float_regs_counter = 0;
 
-                auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index);
+                auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
 
                 if (uniform_setup.index > 95) {
                     LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
@@ -299,14 +298,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
             if (default_attr_counter >= 3) {
                 default_attr_counter = 0;
 
-                auto& setup = registers.vs_default_attributes_setup;
+                auto& setup = regs.vs_default_attributes_setup;
 
                 if (setup.index >= 16) {
                     LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
                     break;
                 }
 
-                Math::Vec4<float24>& attribute = VertexShader::GetDefaultAttribute(setup.index);
+                Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
                 
                 // NOTE: The destination component order indeed is "backwards"
                 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8);
@@ -334,8 +333,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2):
         case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3):
         {
-            VertexShader::SubmitShaderMemoryChange(registers.vs_program.offset, value);
-            registers.vs_program.offset++;
+            g_state.vs.program_code[regs.vs_program.offset] = value;
+            regs.vs_program.offset++;
             break;
         }
 
@@ -349,8 +348,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc):
         case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd):
         {
-            VertexShader::SubmitSwizzleDataChange(registers.vs_swizzle_patterns.offset, value);
-            registers.vs_swizzle_patterns.offset++;
+            g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value;
+            regs.vs_swizzle_patterns.offset++;
             break;
         }
 
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 9da44ccd6a..7987b922c5 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -632,7 +632,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
             info.width = texture_config.width;
             info.height = texture_config.height;
             info.stride = row_stride;
-            info.format = registers.texture0_format;
+            info.format = g_state.regs.texture0_format;
             Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info);
             buf[3 * x + y * row_stride    ] = texture_color.r();
             buf[3 * x + y * row_stride + 1] = texture_color.g();
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
new file mode 100644
index 0000000000..543d9c443d
--- /dev/null
+++ b/src/video_core/pica.cpp
@@ -0,0 +1,20 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string.h>
+
+#include "pica.h"
+
+namespace Pica {
+
+State g_state;
+
+void Init() {
+}
+
+void Shutdown() {
+    memset(&g_state, 0, sizeof(State));
+}
+
+}
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 503c09eca2..b67dce1a9d 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -16,6 +16,8 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 
+#include "math.h" 
+
 namespace Pica {
 
 // Returns index corresponding to the Regs member labeled by field_name
@@ -356,50 +358,50 @@ struct Regs {
                  tev_stage4, tev_stage5 };
     };
 
-    struct {
-        enum CompareFunc : u32 {
-            Never               = 0,
-            Always              = 1,
-            Equal               = 2,
-            NotEqual            = 3,
-            LessThan            = 4,
-            LessThanOrEqual     = 5,
-            GreaterThan         = 6,
-            GreaterThanOrEqual  = 7,
-        };
+    enum class BlendEquation : u32 {
+        Add             = 0,
+        Subtract        = 1,
+        ReverseSubtract = 2,
+        Min             = 3,
+        Max             = 4,
+    };
 
+    enum class BlendFactor : u32 {
+        Zero                    = 0,
+        One                     = 1,
+        SourceColor             = 2,
+        OneMinusSourceColor     = 3,
+        DestColor               = 4,
+        OneMinusDestColor       = 5,
+        SourceAlpha             = 6,
+        OneMinusSourceAlpha     = 7,
+        DestAlpha               = 8,
+        OneMinusDestAlpha       = 9,
+        ConstantColor           = 10,
+        OneMinusConstantColor   = 11,
+        ConstantAlpha           = 12,
+        OneMinusConstantAlpha   = 13,
+        SourceAlphaSaturate     = 14,
+    };
+
+    enum class CompareFunc : u32 {
+        Never              = 0,
+        Always             = 1,
+        Equal              = 2,
+        NotEqual           = 3,
+        LessThan           = 4,
+        LessThanOrEqual    = 5,
+        GreaterThan        = 6,
+        GreaterThanOrEqual = 7,
+    };
+
+    struct {
         union {
             // If false, logic blending is used
             BitField<8, 1, u32> alphablend_enable;
         };
 
         union {
-            enum class BlendEquation : u32 {
-                Add             = 0,
-                Subtract        = 1,
-                ReverseSubtract = 2,
-                Min             = 3,
-                Max             = 4
-            };
-
-            enum BlendFactor : u32 {
-                Zero                    = 0,
-                One                     = 1,
-                SourceColor             = 2,
-                OneMinusSourceColor     = 3,
-                DestColor               = 4,
-                OneMinusDestColor       = 5,
-                SourceAlpha             = 6,
-                OneMinusSourceAlpha     = 7,
-                DestAlpha               = 8,
-                OneMinusDestAlpha       = 9,
-                ConstantColor           = 10,
-                OneMinusConstantColor   = 11,
-                ConstantAlpha           = 12,
-                OneMinusConstantAlpha   = 13,
-                SourceAlphaSaturate     = 14
-            };
-
             BitField< 0, 8, BlendEquation> blend_equation_rgb;
             BitField< 8, 8, BlendEquation> blend_equation_a;
 
@@ -454,49 +456,19 @@ struct Regs {
         INSERT_PADDING_WORDS(0x8);
     } output_merger;
 
-    enum DepthFormat : u32 {
-        D16    = 0,
-
-        D24    = 2,
-        D24S8  = 3
+    // Components are laid out in reverse byte order, most significant bits first.
+    enum class ColorFormat : u32 {
+        RGBA8  = 0,
+        RGB8   = 1,
+        RGB5A1 = 2,
+        RGB565 = 3,
+        RGBA4  = 4,
     };
 
-    // Returns the number of bytes in the specified depth format
-    static u32 BytesPerDepthPixel(DepthFormat format) {
-        switch (format) {
-        case DepthFormat::D16:
-            return 2;
-        case DepthFormat::D24:
-            return 3;
-        case DepthFormat::D24S8:
-            return 4;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
-            UNIMPLEMENTED();
-        }
-    }
-
-    // Returns the number of bits per depth component of the specified depth format
-    static u32 DepthBitsPerPixel(DepthFormat format) {
-        switch (format) {
-        case DepthFormat::D16:
-            return 16;
-        case DepthFormat::D24:
-        case DepthFormat::D24S8:
-            return 24;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
-            UNIMPLEMENTED();
-        }
-    }
-
-    // Components are laid out in reverse byte order, most significant bits first.
-    enum ColorFormat : u32 {
-        RGBA8    = 0,
-        RGB8     = 1,
-        RGB5A1   = 2,
-        RGB565   = 3,
-        RGBA4    = 4,
+    enum class DepthFormat : u32 {
+        D16   = 0,
+        D24   = 2,
+        D24S8 = 3,
     };
 
     // Returns the number of bytes in the specified color format
@@ -554,6 +526,35 @@ struct Regs {
         }
     } framebuffer;
 
+    // Returns the number of bytes in the specified depth format
+    static u32 BytesPerDepthPixel(DepthFormat format) {
+        switch (format) {
+        case DepthFormat::D16:
+            return 2;
+        case DepthFormat::D24:
+            return 3;
+        case DepthFormat::D24S8:
+            return 4;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+            UNIMPLEMENTED();
+        }
+    }
+
+    // Returns the number of bits per depth component of the specified depth format
+    static u32 DepthBitsPerPixel(DepthFormat format) {
+        switch (format) {
+        case DepthFormat::D16:
+            return 16;
+        case DepthFormat::D24:
+        case DepthFormat::D24S8:
+            return 24;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+            UNIMPLEMENTED();
+        }
+    }
+
     INSERT_PADDING_WORDS(0xe0);
 
     enum class VertexAttributeFormat : u64 {
@@ -953,9 +954,6 @@ ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5);
 static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
 static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
 
-extern Regs registers; // TODO: Not sure if we want to have one global instance for this
-
-
 struct float24 {
     static float24 FromFloat32(float val) {
         float24 ret;
@@ -1066,4 +1064,30 @@ union CommandHeader {
     BitField<31,  1, u32> group_commands;
 };
 
+/// Struct used to describe current Pica state
+struct State {
+    Regs regs;
+
+    struct {
+        struct {
+            Math::Vec4<float24> f[96];
+            std::array<bool, 16> b;
+            std::array<Math::Vec4<u8>, 4> i;
+        } uniforms;
+
+        Math::Vec4<float24> default_attributes[16];
+
+        std::array<u32, 1024> program_code;
+        std::array<u32, 1024> swizzle_data;
+    } vs;
+};
+
+/// Initialize Pica state
+void Init();
+
+/// Shutdown Pica state
+void Shutdown();
+
+extern State g_state; ///< Current Pica state
+
 } // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 362efe52e9..767ff42058 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -24,72 +24,74 @@ namespace Pica {
 namespace Rasterizer {
 
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    const auto& framebuffer = g_state.regs.framebuffer;
+    const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
 
     // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
     // NOTE: The framebuffer height register contains the actual FB height minus one.
-    y = (registers.framebuffer.height - y);
+    y = framebuffer.height - y;
 
     const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
-    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
+    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
+    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel;
     u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
 
-    switch (registers.framebuffer.color_format) {
-    case Pica::Regs::ColorFormat::RGBA8:
+    switch (framebuffer.color_format) {
+    case Regs::ColorFormat::RGBA8:
         Color::EncodeRGBA8(color, dst_pixel);
         break;
 
-    case Pica::Regs::ColorFormat::RGB8:
+    case Regs::ColorFormat::RGB8:
         Color::EncodeRGB8(color, dst_pixel);
         break;
 
-    case Pica::Regs::ColorFormat::RGB5A1:
+    case Regs::ColorFormat::RGB5A1:
         Color::EncodeRGB5A1(color, dst_pixel);
         break;
 
-    case Pica::Regs::ColorFormat::RGB565:
+    case Regs::ColorFormat::RGB565:
         Color::EncodeRGB565(color, dst_pixel);
         break;
 
-    case Pica::Regs::ColorFormat::RGBA4:
+    case Regs::ColorFormat::RGBA4:
         Color::EncodeRGBA4(color, dst_pixel);
         break;
 
     default:
-        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
+        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value());
         UNIMPLEMENTED();
     }
 }
 
 static const Math::Vec4<u8> GetPixel(int x, int y) {
-    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    const auto& framebuffer = g_state.regs.framebuffer;
+    const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
 
-    y = (registers.framebuffer.height - y);
+    y = framebuffer.height - y;
 
     const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value()));
-    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel;
+    u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
+    u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel;
     u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
 
-    switch (registers.framebuffer.color_format) {
-    case Pica::Regs::ColorFormat::RGBA8:
+    switch (framebuffer.color_format) {
+    case Regs::ColorFormat::RGBA8:
         return Color::DecodeRGBA8(src_pixel);
 
-    case Pica::Regs::ColorFormat::RGB8:
+    case Regs::ColorFormat::RGB8:
         return Color::DecodeRGB8(src_pixel);
 
-    case Pica::Regs::ColorFormat::RGB5A1:
+    case Regs::ColorFormat::RGB5A1:
         return Color::DecodeRGB5A1(src_pixel);
 
-    case Pica::Regs::ColorFormat::RGB565:
+    case Regs::ColorFormat::RGB565:
         return Color::DecodeRGB565(src_pixel);
 
-    case Pica::Regs::ColorFormat::RGBA4:
+    case Regs::ColorFormat::RGBA4:
         return Color::DecodeRGBA4(src_pixel);
 
     default:
-        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value());
+        LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value());
         UNIMPLEMENTED();
     }
 
@@ -97,58 +99,60 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
 }
 
 static u32 GetDepth(int x, int y) {
-    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    const auto& framebuffer = g_state.regs.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
     u8* depth_buffer = Memory::GetPhysicalPointer(addr);
 
-    y = (registers.framebuffer.height - y);
+    y = framebuffer.height - y;
     
     const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
-    u32 stride = registers.framebuffer.width * bytes_per_pixel;
+    u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
 
     u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
     u8* src_pixel = depth_buffer + src_offset;
 
-    switch (registers.framebuffer.depth_format) {
-        case Pica::Regs::DepthFormat::D16:
+    switch (framebuffer.depth_format) {
+        case Regs::DepthFormat::D16:
             return Color::DecodeD16(src_pixel);
-        case Pica::Regs::DepthFormat::D24:
+        case Regs::DepthFormat::D24:
             return Color::DecodeD24(src_pixel);
-        case Pica::Regs::DepthFormat::D24S8:
+        case Regs::DepthFormat::D24S8:
             return Color::DecodeD24S8(src_pixel).x;
         default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
             UNIMPLEMENTED();
             return 0;
     }
 }
 
 static void SetDepth(int x, int y, u32 value) {
-    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    const auto& framebuffer = g_state.regs.framebuffer;
+    const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
     u8* depth_buffer = Memory::GetPhysicalPointer(addr);
 
-    y = (registers.framebuffer.height - y);
+    y = framebuffer.height - y;
 
     const u32 coarse_y = y & ~7;
-    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
-    u32 stride = registers.framebuffer.width * bytes_per_pixel;
+    u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format);
+    u32 stride = framebuffer.width * bytes_per_pixel;
 
     u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
     u8* dst_pixel = depth_buffer + dst_offset;
 
-    switch (registers.framebuffer.depth_format) {
-        case Pica::Regs::DepthFormat::D16:
+    switch (framebuffer.depth_format) {
+        case Regs::DepthFormat::D16:
             Color::EncodeD16(value, dst_pixel);
             break;
-        case Pica::Regs::DepthFormat::D24:
+        case Regs::DepthFormat::D24:
             Color::EncodeD24(value, dst_pixel);
             break;
-        case Pica::Regs::DepthFormat::D24S8:
+        case Regs::DepthFormat::D24S8:
             // TODO(Subv): Implement the stencil buffer
             Color::EncodeD24S8(value, 0, dst_pixel);
             break;
         default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+            LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
             UNIMPLEMENTED();
             break;
     }
@@ -200,6 +204,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                                     const VertexShader::OutputVertex& v2,
                                     bool reversed = false)
 {
+    const auto& regs = g_state.regs;
     Common::Profiling::ScopeTimer timer(rasterization_category);
 
     // vertex positions in rasterizer coordinates
@@ -216,14 +221,14 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                                    ScreenToRasterizerCoordinates(v1.screenpos),
                                    ScreenToRasterizerCoordinates(v2.screenpos) };
 
-    if (registers.cull_mode == Regs::CullMode::KeepAll) {
+    if (regs.cull_mode == Regs::CullMode::KeepAll) {
         // Make sure we always end up with a triangle wound counter-clockwise
         if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
             ProcessTriangleInternal(v0, v2, v1, true);
             return;
         }
     } else {
-        if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) {
+        if (!reversed && regs.cull_mode == Regs::CullMode::KeepClockWise) {
             // Reverse vertex order and use the CCW code path.
             ProcessTriangleInternal(v0, v2, v1, true);
             return;
@@ -268,8 +273,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
 
     auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
 
-    auto textures = registers.GetTextures();
-    auto tev_stages = registers.GetTevStages();
+    auto textures = regs.GetTextures();
+    auto tev_stages = regs.GetTevStages();
 
     // Enter rasterization loop, starting at the center of the topleft bounding box corner.
     // TODO: Not sure if looping through x first might be faster
@@ -384,8 +389,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
             // analogously.
             Math::Vec4<u8> combiner_output;
             Math::Vec4<u8> combiner_buffer = {
-                registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g,
-                registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a
+                regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g,
+                regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a
             };
 
             for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
@@ -609,51 +614,52 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
                 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
 
-                if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
+                if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
                     combiner_buffer.r() = combiner_output.r();
                     combiner_buffer.g() = combiner_output.g();
                     combiner_buffer.b() = combiner_output.b();
                 }
 
-                if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
+                if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
                     combiner_buffer.a() = combiner_output.a();
                 }
             }
 
-            if (registers.output_merger.alpha_test.enable) {
+            const auto& output_merger = regs.output_merger;
+            if (output_merger.alpha_test.enable) {
                 bool pass = false;
 
-                switch (registers.output_merger.alpha_test.func) {
-                case registers.output_merger.Never:
+                switch (output_merger.alpha_test.func) {
+                case Regs::CompareFunc::Never:
                     pass = false;
                     break;
 
-                case registers.output_merger.Always:
+                case Regs::CompareFunc::Always:
                     pass = true;
                     break;
 
-                case registers.output_merger.Equal:
-                    pass = combiner_output.a() == registers.output_merger.alpha_test.ref;
+                case Regs::CompareFunc::Equal:
+                    pass = combiner_output.a() == output_merger.alpha_test.ref;
                     break;
 
-                case registers.output_merger.NotEqual:
-                    pass = combiner_output.a() != registers.output_merger.alpha_test.ref;
+                case Regs::CompareFunc::NotEqual:
+                    pass = combiner_output.a() != output_merger.alpha_test.ref;
                     break;
 
-                case registers.output_merger.LessThan:
-                    pass = combiner_output.a() < registers.output_merger.alpha_test.ref;
+                case Regs::CompareFunc::LessThan:
+                    pass = combiner_output.a() < output_merger.alpha_test.ref;
                     break;
 
-                case registers.output_merger.LessThanOrEqual:
-                    pass = combiner_output.a() <= registers.output_merger.alpha_test.ref;
+                case Regs::CompareFunc::LessThanOrEqual:
+                    pass = combiner_output.a() <= output_merger.alpha_test.ref;
                     break;
 
-                case registers.output_merger.GreaterThan:
-                    pass = combiner_output.a() > registers.output_merger.alpha_test.ref;
+                case Regs::CompareFunc::GreaterThan:
+                    pass = combiner_output.a() > output_merger.alpha_test.ref;
                     break;
 
-                case registers.output_merger.GreaterThanOrEqual:
-                    pass = combiner_output.a() >= registers.output_merger.alpha_test.ref;
+                case Regs::CompareFunc::GreaterThanOrEqual:
+                    pass = combiner_output.a() >= output_merger.alpha_test.ref;
                     break;
                 }
 
@@ -662,8 +668,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
             }
 
             // TODO: Does depth indeed only get written even if depth testing is enabled?
-            if (registers.output_merger.depth_test_enable) {
-                unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format);
+            if (output_merger.depth_test_enable) {
+                unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
                 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
                                v1.screenpos[2].ToFloat32() * w1 +
                                v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
@@ -671,36 +677,36 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
 
                 bool pass = false;
 
-                switch (registers.output_merger.depth_test_func) {
-                case registers.output_merger.Never:
+                switch (output_merger.depth_test_func) {
+                case Regs::CompareFunc::Never:
                     pass = false;
                     break;
 
-                case registers.output_merger.Always:
+                case Regs::CompareFunc::Always:
                     pass = true;
                     break;
 
-                case registers.output_merger.Equal:
+                case Regs::CompareFunc::Equal:
                     pass = z == ref_z;
                     break;
 
-                case registers.output_merger.NotEqual:
+                case Regs::CompareFunc::NotEqual:
                     pass = z != ref_z;
                     break;
 
-                case registers.output_merger.LessThan:
+                case Regs::CompareFunc::LessThan:
                     pass = z < ref_z;
                     break;
 
-                case registers.output_merger.LessThanOrEqual:
+                case Regs::CompareFunc::LessThanOrEqual:
                     pass = z <= ref_z;
                     break;
 
-                case registers.output_merger.GreaterThan:
+                case Regs::CompareFunc::GreaterThan:
                     pass = z > ref_z;
                     break;
 
-                case registers.output_merger.GreaterThanOrEqual:
+                case Regs::CompareFunc::GreaterThanOrEqual:
                     pass = z >= ref_z;
                     break;
                 }
@@ -708,59 +714,59 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                 if (!pass)
                     continue;
 
-                if (registers.output_merger.depth_write_enable)
+                if (output_merger.depth_write_enable)
                     SetDepth(x >> 4, y >> 4, z);
             }
 
             auto dest = GetPixel(x >> 4, y >> 4);
             Math::Vec4<u8> blend_output = combiner_output;
 
-            if (registers.output_merger.alphablend_enable) {
-                auto params = registers.output_merger.alpha_blending;
+            if (output_merger.alphablend_enable) {
+                auto params = output_merger.alpha_blending;
 
-                auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
+                auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> {
                     switch (factor) {
-                    case params.Zero:
+                    case Regs::BlendFactor::Zero :
                         return Math::Vec3<u8>(0, 0, 0);
 
-                    case params.One:
+                    case Regs::BlendFactor::One :
                         return Math::Vec3<u8>(255, 255, 255);
 
-                    case params.SourceColor:
+                    case Regs::BlendFactor::SourceColor:
                         return combiner_output.rgb();
 
-                    case params.OneMinusSourceColor:
+                    case Regs::BlendFactor::OneMinusSourceColor:
                         return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b());
 
-                    case params.DestColor:
+                    case Regs::BlendFactor::DestColor:
                         return dest.rgb();
 
-                    case params.OneMinusDestColor:
+                    case Regs::BlendFactor::OneMinusDestColor:
                         return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b());
 
-                    case params.SourceAlpha:
+                    case Regs::BlendFactor::SourceAlpha:
                         return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a());
 
-                    case params.OneMinusSourceAlpha:
+                    case Regs::BlendFactor::OneMinusSourceAlpha:
                         return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a());
 
-                    case params.DestAlpha:
+                    case Regs::BlendFactor::DestAlpha:
                         return Math::Vec3<u8>(dest.a(), dest.a(), dest.a());
 
-                    case params.OneMinusDestAlpha:
+                    case Regs::BlendFactor::OneMinusDestAlpha:
                         return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a());
 
-                    case params.ConstantColor:
-                        return Math::Vec3<u8>(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b);
+                    case Regs::BlendFactor::ConstantColor:
+                        return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b);
 
-                    case params.OneMinusConstantColor:
-                        return Math::Vec3<u8>(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b);
+                    case Regs::BlendFactor::OneMinusConstantColor:
+                        return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b);
 
-                    case params.ConstantAlpha:
-                        return Math::Vec3<u8>(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a);
+                    case Regs::BlendFactor::ConstantAlpha:
+                        return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a);
 
-                    case params.OneMinusConstantAlpha:
-                        return Math::Vec3<u8>(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a);
+                    case Regs::BlendFactor::OneMinusConstantAlpha:
+                        return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a);
 
                     default:
                         LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
@@ -769,31 +775,31 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
+                auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 {
                     switch (factor) {
-                    case params.Zero:
+                    case Regs::BlendFactor::Zero:
                         return 0;
 
-                    case params.One:
+                    case Regs::BlendFactor::One:
                         return 255;
 
-                    case params.SourceAlpha:
+                    case Regs::BlendFactor::SourceAlpha:
                         return combiner_output.a();
 
-                    case params.OneMinusSourceAlpha:
+                    case Regs::BlendFactor::OneMinusSourceAlpha:
                         return 255 - combiner_output.a();
 
-                    case params.DestAlpha:
+                    case Regs::BlendFactor::DestAlpha:
                         return dest.a();
 
-                    case params.OneMinusDestAlpha:
+                    case Regs::BlendFactor::OneMinusDestAlpha:
                         return 255 - dest.a();
 
-                    case params.ConstantAlpha:
-                        return registers.output_merger.blend_const.a;
+                    case Regs::BlendFactor::ConstantAlpha:
+                        return output_merger.blend_const.a;
 
-                    case params.OneMinusConstantAlpha:
-                        return 255 - registers.output_merger.blend_const.a;
+                    case Regs::BlendFactor::OneMinusConstantAlpha:
+                        return 255 - output_merger.blend_const.a;
 
                     default:
                         LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
@@ -802,7 +808,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                using BlendEquation = decltype(params)::BlendEquation;
+                using BlendEquation = Regs::BlendEquation;
                 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
                                                        const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
                                                        BlendEquation equation) {
@@ -812,29 +818,29 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                     auto dst_result = (dest * destfactor).Cast<int>();
 
                     switch (equation) {
-                    case BlendEquation::Add:
+                    case Regs::BlendEquation::Add:
                         result = (src_result + dst_result) / 255;
                         break;
 
-                    case BlendEquation::Subtract:
+                    case Regs::BlendEquation::Subtract:
                         result = (src_result - dst_result) / 255;
                         break;
 
-                    case BlendEquation::ReverseSubtract:
+                    case Regs::BlendEquation::ReverseSubtract:
                         result = (dst_result - src_result) / 255;
                         break;
 
                     // TODO: How do these two actually work?
                     //       OpenGL doesn't include the blend factors in the min/max computations,
                     //       but is this what the 3DS actually does?
-                    case BlendEquation::Min:
+                    case Regs::BlendEquation::Min:
                         result.r() = std::min(src.r(), dest.r());
                         result.g() = std::min(src.g(), dest.g());
                         result.b() = std::min(src.b(), dest.b());
                         result.a() = std::min(src.a(), dest.a());
                         break;
 
-                    case BlendEquation::Max:
+                    case Regs::BlendEquation::Max:
                         result.r() = std::max(src.r(), dest.r());
                         result.g() = std::max(src.g(), dest.g());
                         result.b() = std::max(src.b(), dest.b());
@@ -860,15 +866,15 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
                 blend_output     = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
                 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
             } else {
-                LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
+                LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op);
                 UNIMPLEMENTED();
             }
 
             const Math::Vec4<u8> result = {
-                registers.output_merger.red_enable   ? blend_output.r() : dest.r(),
-                registers.output_merger.green_enable ? blend_output.g() : dest.g(),
-                registers.output_merger.blue_enable  ? blend_output.b() : dest.b(),
-                registers.output_merger.alpha_enable ? blend_output.a() : dest.a()
+                output_merger.red_enable   ? blend_output.r() : dest.r(),
+                output_merger.green_enable ? blend_output.g() : dest.g(),
+                output_merger.blue_enable  ? blend_output.b() : dest.b(),
+                output_merger.alpha_enable ? blend_output.a() : dest.a()
             };
 
             DrawPixel(x >> 4, y >> 4, result);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e44375547e..4b7d099a5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -46,7 +46,7 @@ void RasterizerOpenGL::InitObjects() {
 
     uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color");
 
-    const auto tev_stages = Pica::registers.GetTevStages();
+    const auto tev_stages = Pica::g_state.regs.GetTevStages();
     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
         auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index];
 
@@ -128,6 +128,8 @@ void RasterizerOpenGL::InitObjects() {
 }
 
 void RasterizerOpenGL::Reset() {
+    const auto& regs = Pica::g_state.regs;
+
     SyncCullMode();
     SyncBlendEnabled();
     SyncBlendFuncs();
@@ -137,46 +139,46 @@ void RasterizerOpenGL::Reset() {
     SyncDepthTest();
 
     // TEV stage 0
-    SyncTevSources(0, Pica::registers.tev_stage0);
-    SyncTevModifiers(0, Pica::registers.tev_stage0);
-    SyncTevOps(0, Pica::registers.tev_stage0);
-    SyncTevColor(0, Pica::registers.tev_stage0);
-    SyncTevMultipliers(0, Pica::registers.tev_stage0);
+    SyncTevSources(0, regs.tev_stage0);
+    SyncTevModifiers(0, regs.tev_stage0);
+    SyncTevOps(0, regs.tev_stage0);
+    SyncTevColor(0, regs.tev_stage0);
+    SyncTevMultipliers(0, regs.tev_stage0);
 
     // TEV stage 1
-    SyncTevSources(1, Pica::registers.tev_stage1);
-    SyncTevModifiers(1, Pica::registers.tev_stage1);
-    SyncTevOps(1, Pica::registers.tev_stage1);
-    SyncTevColor(1, Pica::registers.tev_stage1);
-    SyncTevMultipliers(1, Pica::registers.tev_stage1);
+    SyncTevSources(1, regs.tev_stage1);
+    SyncTevModifiers(1, regs.tev_stage1);
+    SyncTevOps(1, regs.tev_stage1);
+    SyncTevColor(1, regs.tev_stage1);
+    SyncTevMultipliers(1, regs.tev_stage1);
 
     // TEV stage 2
-    SyncTevSources(2, Pica::registers.tev_stage2);
-    SyncTevModifiers(2, Pica::registers.tev_stage2);
-    SyncTevOps(2, Pica::registers.tev_stage2);
-    SyncTevColor(2, Pica::registers.tev_stage2);
-    SyncTevMultipliers(2, Pica::registers.tev_stage2);
+    SyncTevSources(2, regs.tev_stage2);
+    SyncTevModifiers(2, regs.tev_stage2);
+    SyncTevOps(2, regs.tev_stage2);
+    SyncTevColor(2, regs.tev_stage2);
+    SyncTevMultipliers(2, regs.tev_stage2);
 
     // TEV stage 3
-    SyncTevSources(3, Pica::registers.tev_stage3);
-    SyncTevModifiers(3, Pica::registers.tev_stage3);
-    SyncTevOps(3, Pica::registers.tev_stage3);
-    SyncTevColor(3, Pica::registers.tev_stage3);
-    SyncTevMultipliers(3, Pica::registers.tev_stage3);
+    SyncTevSources(3, regs.tev_stage3);
+    SyncTevModifiers(3, regs.tev_stage3);
+    SyncTevOps(3, regs.tev_stage3);
+    SyncTevColor(3, regs.tev_stage3);
+    SyncTevMultipliers(3, regs.tev_stage3);
 
     // TEV stage 4
-    SyncTevSources(4, Pica::registers.tev_stage4);
-    SyncTevModifiers(4, Pica::registers.tev_stage4);
-    SyncTevOps(4, Pica::registers.tev_stage4);
-    SyncTevColor(4, Pica::registers.tev_stage4);
-    SyncTevMultipliers(4, Pica::registers.tev_stage4);
+    SyncTevSources(4, regs.tev_stage4);
+    SyncTevModifiers(4, regs.tev_stage4);
+    SyncTevOps(4, regs.tev_stage4);
+    SyncTevColor(4, regs.tev_stage4);
+    SyncTevMultipliers(4, regs.tev_stage4);
 
     // TEV stage 5
-    SyncTevSources(5, Pica::registers.tev_stage5);
-    SyncTevModifiers(5, Pica::registers.tev_stage5);
-    SyncTevOps(5, Pica::registers.tev_stage5);
-    SyncTevColor(5, Pica::registers.tev_stage5);
-    SyncTevMultipliers(5, Pica::registers.tev_stage5);
+    SyncTevSources(5, regs.tev_stage5);
+    SyncTevModifiers(5, regs.tev_stage5);
+    SyncTevOps(5, regs.tev_stage5);
+    SyncTevColor(5, regs.tev_stage5);
+    SyncTevMultipliers(5, regs.tev_stage5);
 
     SyncCombinerColor();
     SyncCombinerWriteFlags();
@@ -210,6 +212,8 @@ void RasterizerOpenGL::CommitFramebuffer() {
 }
 
 void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
+    const auto& regs = Pica::g_state.regs;
+
     if (!Settings::values.use_hw_renderer)
         return;
 
@@ -247,104 +251,104 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
 
     // TEV stage 0
     case PICA_REG_INDEX(tev_stage0.color_source1):
-        SyncTevSources(0, Pica::registers.tev_stage0);
+        SyncTevSources(0, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage0.color_modifier1):
-        SyncTevModifiers(0, Pica::registers.tev_stage0);
+        SyncTevModifiers(0, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage0.color_op):
-        SyncTevOps(0, Pica::registers.tev_stage0);
+        SyncTevOps(0, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage0.const_r):
-        SyncTevColor(0, Pica::registers.tev_stage0);
+        SyncTevColor(0, regs.tev_stage0);
         break;
     case PICA_REG_INDEX(tev_stage0.color_scale):
-        SyncTevMultipliers(0, Pica::registers.tev_stage0);
+        SyncTevMultipliers(0, regs.tev_stage0);
         break;
 
     // TEV stage 1
     case PICA_REG_INDEX(tev_stage1.color_source1):
-        SyncTevSources(1, Pica::registers.tev_stage1);
+        SyncTevSources(1, regs.tev_stage1);
         break;
     case PICA_REG_INDEX(tev_stage1.color_modifier1):
-        SyncTevModifiers(1, Pica::registers.tev_stage1);
+        SyncTevModifiers(1, regs.tev_stage1);
         break;
     case PICA_REG_INDEX(tev_stage1.color_op):
-        SyncTevOps(1, Pica::registers.tev_stage1);
+        SyncTevOps(1, regs.tev_stage1);
         break;
     case PICA_REG_INDEX(tev_stage1.const_r):
-        SyncTevColor(1, Pica::registers.tev_stage1);
+        SyncTevColor(1, regs.tev_stage1);
         break;
     case PICA_REG_INDEX(tev_stage1.color_scale):
-        SyncTevMultipliers(1, Pica::registers.tev_stage1);
+        SyncTevMultipliers(1, regs.tev_stage1);
         break;
 
     // TEV stage 2
     case PICA_REG_INDEX(tev_stage2.color_source1):
-        SyncTevSources(2, Pica::registers.tev_stage2);
+        SyncTevSources(2, regs.tev_stage2);
         break;
     case PICA_REG_INDEX(tev_stage2.color_modifier1):
-        SyncTevModifiers(2, Pica::registers.tev_stage2);
+        SyncTevModifiers(2, regs.tev_stage2);
         break;
     case PICA_REG_INDEX(tev_stage2.color_op):
-        SyncTevOps(2, Pica::registers.tev_stage2);
+        SyncTevOps(2, regs.tev_stage2);
         break;
     case PICA_REG_INDEX(tev_stage2.const_r):
-        SyncTevColor(2, Pica::registers.tev_stage2);
+        SyncTevColor(2, regs.tev_stage2);
         break;
     case PICA_REG_INDEX(tev_stage2.color_scale):
-        SyncTevMultipliers(2, Pica::registers.tev_stage2);
+        SyncTevMultipliers(2, regs.tev_stage2);
         break;
 
     // TEV stage 3
     case PICA_REG_INDEX(tev_stage3.color_source1):
-        SyncTevSources(3, Pica::registers.tev_stage3);
+        SyncTevSources(3, regs.tev_stage3);
         break;
     case PICA_REG_INDEX(tev_stage3.color_modifier1):
-        SyncTevModifiers(3, Pica::registers.tev_stage3);
+        SyncTevModifiers(3, regs.tev_stage3);
         break;
     case PICA_REG_INDEX(tev_stage3.color_op):
-        SyncTevOps(3, Pica::registers.tev_stage3);
+        SyncTevOps(3, regs.tev_stage3);
         break;
     case PICA_REG_INDEX(tev_stage3.const_r):
-        SyncTevColor(3, Pica::registers.tev_stage3);
+        SyncTevColor(3, regs.tev_stage3);
         break;
     case PICA_REG_INDEX(tev_stage3.color_scale):
-        SyncTevMultipliers(3, Pica::registers.tev_stage3);
+        SyncTevMultipliers(3, regs.tev_stage3);
         break;
 
     // TEV stage 4
     case PICA_REG_INDEX(tev_stage4.color_source1):
-        SyncTevSources(4, Pica::registers.tev_stage4);
+        SyncTevSources(4, regs.tev_stage4);
         break;
     case PICA_REG_INDEX(tev_stage4.color_modifier1):
-        SyncTevModifiers(4, Pica::registers.tev_stage4);
+        SyncTevModifiers(4, regs.tev_stage4);
         break;
     case PICA_REG_INDEX(tev_stage4.color_op):
-        SyncTevOps(4, Pica::registers.tev_stage4);
+        SyncTevOps(4, regs.tev_stage4);
         break;
     case PICA_REG_INDEX(tev_stage4.const_r):
-        SyncTevColor(4, Pica::registers.tev_stage4);
+        SyncTevColor(4, regs.tev_stage4);
         break;
     case PICA_REG_INDEX(tev_stage4.color_scale):
-        SyncTevMultipliers(4, Pica::registers.tev_stage4);
+        SyncTevMultipliers(4, regs.tev_stage4);
         break;
 
     // TEV stage 5
     case PICA_REG_INDEX(tev_stage5.color_source1):
-        SyncTevSources(5, Pica::registers.tev_stage5);
+        SyncTevSources(5, regs.tev_stage5);
         break;
     case PICA_REG_INDEX(tev_stage5.color_modifier1):
-        SyncTevModifiers(5, Pica::registers.tev_stage5);
+        SyncTevModifiers(5, regs.tev_stage5);
         break;
     case PICA_REG_INDEX(tev_stage5.color_op):
-        SyncTevOps(5, Pica::registers.tev_stage5);
+        SyncTevOps(5, regs.tev_stage5);
         break;
     case PICA_REG_INDEX(tev_stage5.const_r):
-        SyncTevColor(5, Pica::registers.tev_stage5);
+        SyncTevColor(5, regs.tev_stage5);
         break;
     case PICA_REG_INDEX(tev_stage5.color_scale):
-        SyncTevMultipliers(5, Pica::registers.tev_stage5);
+        SyncTevMultipliers(5, regs.tev_stage5);
         break;
     
     // TEV combiner buffer color
@@ -360,16 +364,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
 }
 
 void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) {
+    const auto& regs = Pica::g_state.regs;
+
     if (!Settings::values.use_hw_renderer)
         return;
 
-    PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress();
-    u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format)
-                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight();
+    PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
+    u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format)
+                            * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
 
-    PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress();
-    u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format)
-                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight();
+    PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
+    u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format)
+                            * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
 
     // If source memory region overlaps 3DS framebuffers, commit them before the copy happens
     if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size))
@@ -380,16 +386,18 @@ void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) {
 }
 
 void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) {
+    const auto& regs = Pica::g_state.regs;
+
     if (!Settings::values.use_hw_renderer)
         return;
 
-    PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress();
-    u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format)
-                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight();
+    PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
+    u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format)
+                            * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
 
-    PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress();
-    u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format)
-                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight();
+    PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
+    u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format)
+                            * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
 
     // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL
     if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size))
@@ -501,14 +509,16 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::
 }
 
 void RasterizerOpenGL::SyncFramebuffer() {
-    PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress();
-    Pica::Regs::ColorFormat new_fb_color_format = Pica::registers.framebuffer.color_format;
+    const auto& regs = Pica::g_state.regs;
 
-    PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress();
-    Pica::Regs::DepthFormat new_fb_depth_format = Pica::registers.framebuffer.depth_format;
+    PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
+    Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
 
-    bool fb_size_changed = fb_color_texture.width != Pica::registers.framebuffer.GetWidth() ||
-                           fb_color_texture.height != Pica::registers.framebuffer.GetHeight();
+    PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
+    Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
+
+    bool fb_size_changed = fb_color_texture.width != regs.framebuffer.GetWidth() ||
+                           fb_color_texture.height != regs.framebuffer.GetHeight();
 
     bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
                                  fb_size_changed;
@@ -532,12 +542,12 @@ void RasterizerOpenGL::SyncFramebuffer() {
     // Reconfigure framebuffer textures if any property has changed
     if (color_fb_prop_changed) {
         ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
-                                Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight());
+                                regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
     }
 
     if (depth_fb_prop_changed) {
         ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
-                                Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight());
+                                regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
 
         // Only attach depth buffer as stencil if it supports stencil
         switch (new_fb_depth_format) {
@@ -572,7 +582,9 @@ void RasterizerOpenGL::SyncFramebuffer() {
 }
 
 void RasterizerOpenGL::SyncCullMode() {
-    switch (Pica::registers.cull_mode) {
+    const auto& regs = Pica::g_state.regs;
+
+    switch (regs.cull_mode) {
     case Pica::Regs::CullMode::KeepAll:
         state.cull.enabled = false;
         break;
@@ -588,25 +600,26 @@ void RasterizerOpenGL::SyncCullMode() {
         break;
 
     default:
-        LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", Pica::registers.cull_mode.Value());
+        LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", regs.cull_mode.Value());
         UNIMPLEMENTED();
         break;
     }
 }
 
 void RasterizerOpenGL::SyncBlendEnabled() {
-    state.blend.enabled = Pica::registers.output_merger.alphablend_enable;
+    state.blend.enabled = (Pica::g_state.regs.output_merger.alphablend_enable == 1);
 }
 
 void RasterizerOpenGL::SyncBlendFuncs() {
-    state.blend.src_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_rgb);
-    state.blend.dst_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_rgb);
-    state.blend.src_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_a);
-    state.blend.dst_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_a);
+    const auto& regs = Pica::g_state.regs;
+    state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
+    state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
+    state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
+    state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a);
 }
 
 void RasterizerOpenGL::SyncBlendColor() {
-    auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.output_merger.blend_const.r);
+    auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::g_state.regs.output_merger.blend_const.r);
     state.blend.color.red = blend_color[0];
     state.blend.color.green = blend_color[1];
     state.blend.color.blue = blend_color[2];
@@ -614,9 +627,10 @@ void RasterizerOpenGL::SyncBlendColor() {
 }
 
 void RasterizerOpenGL::SyncAlphaTest() {
-    glUniform1i(uniform_alphatest_enabled, Pica::registers.output_merger.alpha_test.enable);
-    glUniform1i(uniform_alphatest_func, Pica::registers.output_merger.alpha_test.func);
-    glUniform1f(uniform_alphatest_ref, Pica::registers.output_merger.alpha_test.ref / 255.0f);
+    const auto& regs = Pica::g_state.regs;
+    glUniform1i(uniform_alphatest_enabled, regs.output_merger.alpha_test.enable);
+    glUniform1i(uniform_alphatest_func, (GLint)regs.output_merger.alpha_test.func.Value());
+    glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f);
 }
 
 void RasterizerOpenGL::SyncStencilTest() {
@@ -624,9 +638,10 @@ void RasterizerOpenGL::SyncStencilTest() {
 }
 
 void RasterizerOpenGL::SyncDepthTest() {
-    state.depth.test_enabled = Pica::registers.output_merger.depth_test_enable;
-    state.depth.test_func = PicaToGL::CompareFunc(Pica::registers.output_merger.depth_test_func);
-    state.depth.write_mask = Pica::registers.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE;
+    const auto& regs = Pica::g_state.regs;
+    state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1);
+    state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func);
+    state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE;
 }
 
 void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) {
@@ -667,34 +682,37 @@ void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs
 }
 
 void RasterizerOpenGL::SyncCombinerColor() {
-    auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.tev_combiner_buffer_color.r);
+    auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::g_state.regs.tev_combiner_buffer_color.r);
     glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data());
 }
 
 void RasterizerOpenGL::SyncCombinerWriteFlags() {
-    const auto tev_stages = Pica::registers.GetTevStages();
+    const auto& regs = Pica::g_state.regs;
+    const auto tev_stages = regs.GetTevStages();
     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
         glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha,
-                    Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index),
-                    Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index));
+                    regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index),
+                    regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index));
     }
 }
 
 void RasterizerOpenGL::SyncDrawState() {
+    const auto& regs = Pica::g_state.regs;
+
     // Sync the viewport
-    GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_x).ToFloat32() * 2;
-    GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_y).ToFloat32() * 2;
+    GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2;
+    GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2;
 
     // OpenGL uses different y coordinates, so negate corner offset and flip origin
     // TODO: Ensure viewport_corner.x should not be negated or origin flipped
     // TODO: Use floating-point viewports for accuracy if supported
-    glViewport((GLsizei)static_cast<float>(Pica::registers.viewport_corner.x),
-                -(GLsizei)static_cast<float>(Pica::registers.viewport_corner.y)
-                    + Pica::registers.framebuffer.GetHeight() - viewport_height,
+    glViewport((GLsizei)static_cast<float>(regs.viewport_corner.x),
+                -(GLsizei)static_cast<float>(regs.viewport_corner.y)
+                    + regs.framebuffer.GetHeight() - viewport_height,
                 viewport_width, viewport_height);
 
     // Sync bound texture(s), upload if not cached
-    const auto pica_textures = Pica::registers.GetTextures();
+    const auto pica_textures = regs.GetTextures();
     for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
         const auto& texture = pica_textures[texture_index];
 
@@ -707,7 +725,7 @@ void RasterizerOpenGL::SyncDrawState() {
     }
 
     // Skip processing TEV stages that simply pass the previous stage results through
-    const auto tev_stages = Pica::registers.GetTevStages();
+    const auto tev_stages = regs.GetTevStages();
     for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
         glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index]));
     }
@@ -716,7 +734,7 @@ void RasterizerOpenGL::SyncDrawState() {
 }
 
 void RasterizerOpenGL::ReloadColorBuffer() {
-    u8* color_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetColorBufferPhysicalAddress());
+    u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress());
 
     if (color_buffer == nullptr)
         return;
@@ -748,7 +766,7 @@ void RasterizerOpenGL::ReloadColorBuffer() {
 
 void RasterizerOpenGL::ReloadDepthBuffer() {
     // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
-    u8* depth_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetDepthBufferPhysicalAddress());
+    u8* depth_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress());
 
     if (depth_buffer == nullptr) {
         return;
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 8369c649e6..f8763e71bb 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -41,7 +41,7 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
     return gl_mode;
 }
 
-inline GLenum BlendFunc(u32 factor) {
+inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
     static const GLenum blend_func_table[] = {
         GL_ZERO,                     // BlendFactor::Zero
         GL_ONE,                      // BlendFactor::One
@@ -61,17 +61,17 @@ inline GLenum BlendFunc(u32 factor) {
     };
 
     // Range check table for input
-    if (factor >= ARRAY_SIZE(blend_func_table)) {
+    if ((unsigned)factor >= ARRAY_SIZE(blend_func_table)) {
         LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor);
         UNREACHABLE();
 
         return GL_ONE;
     }
 
-    return blend_func_table[factor];
+    return blend_func_table[(unsigned)factor];
 }
 
-inline GLenum CompareFunc(u32 func) {
+inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
     static const GLenum compare_func_table[] = {
         GL_NEVER,    // CompareFunc::Never
         GL_ALWAYS,   // CompareFunc::Always
@@ -84,14 +84,14 @@ inline GLenum CompareFunc(u32 func) {
     };
 
     // Range check table for input
-    if (func >= ARRAY_SIZE(compare_func_table)) {
+    if ((unsigned)func >= ARRAY_SIZE(compare_func_table)) {
         LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func);
         UNREACHABLE();
 
         return GL_ALWAYS;
     }
 
-    return compare_func_table[func];
+    return compare_func_table[(unsigned)func];
 }
 
 inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) {
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 981d1a356e..7d68998f12 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -26,55 +26,8 @@ namespace Pica {
 
 namespace VertexShader {
 
-static struct {
-    Math::Vec4<float24> f[96];
-
-    std::array<bool,16> b;
-
-    std::array<Math::Vec4<u8>,4> i;
-} shader_uniforms;
-
-static Math::Vec4<float24> vs_default_attributes[16];
-
-// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
-// For now, we just keep these local arrays around.
-static std::array<u32, 1024> shader_memory;
-static std::array<u32, 1024> swizzle_data;
-
-void SubmitShaderMemoryChange(u32 addr, u32 value) {
-    shader_memory[addr] = value;
-}
-
-void SubmitSwizzleDataChange(u32 addr, u32 value) {
-    swizzle_data[addr] = value;
-}
-
-Math::Vec4<float24>& GetFloatUniform(u32 index) {
-    return shader_uniforms.f[index];
-}
-
-bool& GetBoolUniform(u32 index) {
-    return shader_uniforms.b[index];
-}
-
-Math::Vec4<u8>& GetIntUniform(u32 index) {
-    return shader_uniforms.i[index];
-}
-
-Math::Vec4<float24>& GetDefaultAttribute(u32 index) {
-    return vs_default_attributes[index];
-}
-
-const std::array<u32, 1024>& GetShaderBinary() {
-    return shader_memory;
-}
-
-const std::array<u32, 1024>& GetSwizzlePatterns() {
-    return swizzle_data;
-}
-
 struct VertexShaderState {
-    u32* program_counter;
+    const u32* program_counter;
 
     const float24* input_register_table[16];
     Math::Vec4<float24> output_registers[16];
@@ -109,6 +62,9 @@ struct VertexShaderState {
 };
 
 static void ProcessShaderCode(VertexShaderState& state) {
+    const auto& uniforms = g_state.vs.uniforms;
+    const auto& swizzle_data = g_state.vs.swizzle_data;
+    const auto& program_code = g_state.vs.program_code;
 
     // Placeholder for invalid inputs
     static float24 dummy_vec4_float24[4];
@@ -116,14 +72,14 @@ static void ProcessShaderCode(VertexShaderState& state) {
     while (true) {
         if (!state.call_stack.empty()) {
             auto& top = state.call_stack.top();
-            if (state.program_counter - shader_memory.data() == top.final_address) {
+            if (state.program_counter - program_code.data() == top.final_address) {
                 state.address_registers[2] += top.loop_increment;
 
                 if (top.repeat_counter-- == 0) {
-                    state.program_counter = &shader_memory[top.return_address];
+                    state.program_counter = &program_code[top.return_address];
                     state.call_stack.pop();
                 } else {
-                    state.program_counter = &shader_memory[top.loop_address];
+                    state.program_counter = &program_code[top.loop_address];
                 }
 
                 // TODO: Is "trying again" accurate to hardware?
@@ -135,12 +91,12 @@ static void ProcessShaderCode(VertexShaderState& state) {
         const Instruction& instr = *(const Instruction*)state.program_counter;
         const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
 
-        static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions,
+        static auto call = [&program_code](VertexShaderState& state, u32 offset, u32 num_instructions,
                               u32 return_offset, u8 repeat_count, u8 loop_increment) {
-            state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
+            state.program_counter = &program_code[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
             state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
         };
-        u32 binary_offset = state.program_counter - shader_memory.data();
+        u32 binary_offset = state.program_counter - program_code.data();
 
         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset);
 
@@ -153,7 +109,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
                 return &state.temporary_registers[source_reg.GetIndex()].x;
 
             case RegisterType::FloatUniform:
-                return &shader_uniforms.f[source_reg.GetIndex()].x;
+                return &uniforms.f[source_reg.GetIndex()].x;
 
             default:
                 return dummy_vec4_float24;
@@ -471,13 +427,13 @@ static void ProcessShaderCode(VertexShaderState& state) {
 
             case OpCode::Id::JMPC:
                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
-                    state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
+                    state.program_counter = &program_code[instr.flow_control.dest_offset] - 1;
                 }
                 break;
 
             case OpCode::Id::JMPU:
-                if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
-                    state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
+                if (uniforms.b[instr.flow_control.bool_uniform_id]) {
+                    state.program_counter = &program_code[instr.flow_control.dest_offset] - 1;
                 }
                 break;
 
@@ -489,7 +445,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
                 break;
 
             case OpCode::Id::CALLU:
-                if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
+                if (uniforms.b[instr.flow_control.bool_uniform_id]) {
                     call(state,
                         instr.flow_control.dest_offset,
                         instr.flow_control.num_instructions,
@@ -510,7 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
                 break;
 
             case OpCode::Id::IFU:
-                if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
+                if (uniforms.b[instr.flow_control.bool_uniform_id]) {
                     call(state,
                          binary_offset + 1,
                          instr.flow_control.dest_offset - binary_offset - 1,
@@ -545,14 +501,14 @@ static void ProcessShaderCode(VertexShaderState& state) {
 
             case OpCode::Id::LOOP:
             {
-                state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y;
+                state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y;
 
                 call(state,
                      binary_offset + 1,
                      instr.flow_control.dest_offset - binary_offset + 1,
                      instr.flow_control.dest_offset + 1,
-                     shader_uniforms.i[instr.flow_control.int_uniform_id].x,
-                     shader_uniforms.i[instr.flow_control.int_uniform_id].z);
+                     uniforms.i[instr.flow_control.int_uniform_id].x,
+                     uniforms.i[instr.flow_control.int_uniform_id].z);
                 break;
             }
 
@@ -578,15 +534,17 @@ static Common::Profiling::TimingCategory shader_category("Vertex Shader");
 OutputVertex RunShader(const InputVertex& input, int num_attributes) {
     Common::Profiling::ScopeTimer timer(shader_category);
 
+    const auto& regs = g_state.regs;
+    const auto& vs = g_state.vs;
     VertexShaderState state;
 
-    const u32* main = &shader_memory[registers.vs_main_offset];
+    const u32* main = &vs.program_code[regs.vs_main_offset];
     state.program_counter = (u32*)main;
     state.debug.max_offset = 0;
     state.debug.max_opdesc_id = 0;
 
     // Setup input register table
-    const auto& attribute_register_map = registers.vs_input_register_map;
+    const auto& attribute_register_map = regs.vs_input_register_map;
     float24 dummy_register;
     boost::fill(state.input_register_table, &dummy_register);
     
@@ -611,16 +569,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
     state.conditional_code[1] = false;
 
     ProcessShaderCode(state);
-    DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
-                           state.debug.max_opdesc_id, registers.vs_main_offset,
-                           registers.vs_output_attributes);
+    DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(),
+                           state.debug.max_opdesc_id, regs.vs_main_offset,
+                           regs.vs_output_attributes);
 
     // Setup output data
     OutputVertex ret;
     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
     // figure out what those circumstances are and enable the remaining outputs then.
     for (int i = 0; i < 7; ++i) {
-        const auto& output_register_map = registers.vs_output_attributes[i];
+        const auto& output_register_map = regs.vs_output_attributes[i];
 
         u32 semantics[4] = {
             output_register_map.map_x, output_register_map.map_y,
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
index c26709bbc4..7471a6de8e 100644
--- a/src/video_core/vertex_shader.h
+++ b/src/video_core/vertex_shader.h
@@ -66,19 +66,8 @@ struct OutputVertex {
 static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
 static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
 
-void SubmitShaderMemoryChange(u32 addr, u32 value);
-void SubmitSwizzleDataChange(u32 addr, u32 value);
-
 OutputVertex RunShader(const InputVertex& input, int num_attributes);
 
-Math::Vec4<float24>& GetFloatUniform(u32 index);
-bool& GetBoolUniform(u32 index);
-Math::Vec4<u8>& GetIntUniform(u32 index);
-Math::Vec4<float24>& GetDefaultAttribute(u32 index);
-
-const std::array<u32, 1024>& GetShaderBinary();
-const std::array<u32, 1024>& GetSwizzlePatterns();
-
 } // namespace
 
 } // namespace
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index d4d907d5eb..3becc42615 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -8,9 +8,11 @@
 #include "core/core.h"
 #include "core/settings.h"
 
-#include "video_core/video_core.h"
-#include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core.h"
+#include "renderer_base.h"
+#include "renderer_opengl/renderer_opengl.h"
+
+#include "pica.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Video Core namespace
@@ -24,6 +26,8 @@ std::atomic<bool> g_hw_renderer_enabled;
 
 /// Initialize the video core
 void Init(EmuWindow* emu_window) {
+    Pica::Init();
+
     g_emu_window = emu_window;
     g_renderer = new RendererOpenGL();
     g_renderer->SetWindow(g_emu_window);
@@ -34,7 +38,10 @@ void Init(EmuWindow* emu_window) {
 
 /// Shutdown the video core
 void Shutdown() {
+    Pica::Shutdown();
+
     delete g_renderer;
+
     LOG_DEBUG(Render, "shutdown OK");
 }