From 82210ab480c1e508de69722b7391ed2916ae2fe5 Mon Sep 17 00:00:00 2001
From: jphalimi <jeanphilippe.halimi@gmail.com>
Date: Wed, 23 Nov 2016 20:10:34 -0800
Subject: [PATCH] Cache Vertices instead of Output registers (#2165)

This patch brings +3% performance improvement on average. It removes
ToVertex() as an important hotspot of the emulator.
---
 src/video_core/command_processor.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 6bd5b281c6..b7c32035ef 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -235,7 +235,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
         // The size has been tuned for optimal balance between hit-rate and the cost of lookup
         const size_t VERTEX_CACHE_SIZE = 32;
         std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
-        std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
+        std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
+        Shader::OutputVertex output_vertex;
 
         unsigned int vertex_cache_pos = 0;
         vertex_cache_ids.fill(-1);
@@ -265,7 +266,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 
                 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
                     if (vertex == vertex_cache_ids[i]) {
-                        output_registers = vertex_cache[i];
+                        output_vertex = vertex_cache[i];
                         vertex_cache_hit = true;
                         break;
                     }
@@ -284,16 +285,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                 g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
                 output_registers = shader_unit.output_registers;
 
+                // Retrieve vertex from register data
+                output_vertex = output_registers.ToVertex(regs.vs);
+
                 if (is_indexed) {
-                    vertex_cache[vertex_cache_pos] = output_registers;
+                    vertex_cache[vertex_cache_pos] = output_vertex;
                     vertex_cache_ids[vertex_cache_pos] = vertex;
                     vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
                 }
             }
 
-            // Retrieve vertex from register data
-            Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
-
             // Send to renderer
             using Pica::Shader::OutputVertex;
             auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,