From 4e6c64bf8d3622c6296f70ab64018c8c85855b0b Mon Sep 17 00:00:00 2001
From: Rodolfo Bogado <rodolfoosvaldobogado@gmail.com>
Date: Wed, 7 Nov 2018 22:27:47 -0300
Subject: [PATCH] Improve state management by splitting some of the states id
 separated function to avoid a full apply overhead

---
 .../renderer_opengl/gl_rasterizer.cpp         | 54 ++++++++-----------
 .../renderer_opengl/gl_rasterizer.h           |  6 +--
 .../renderer_opengl/gl_rasterizer_cache.cpp   |  2 +-
 src/video_core/renderer_opengl/gl_state.cpp   |  8 ++-
 src/video_core/renderer_opengl/gl_state.h     |  4 ++
 .../renderer_opengl/maxwell_to_gl.h           |  5 +-
 6 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8dd7bd5144..c4fe86b490 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -140,7 +140,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
     if (is_cache_miss) {
         VAO.Create();
         state.draw.vertex_array = VAO.handle;
-        state.Apply();
+        state.ApplyVertexBufferState();
 
         // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
         // around.
@@ -182,7 +182,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
         }
     }
     state.draw.vertex_array = VAO.handle;
-    state.Apply();
+    state.ApplyVertexBufferState();
 }
 
 void RasterizerOpenGL::SetupVertexBuffer() {
@@ -342,8 +342,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             index++;
         }
     }
-
-    state.Apply();
 }
 
 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -412,8 +410,8 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
         cached_pages.add({pages_interval, delta});
 }
 
-void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
-                                             bool preserve_contents,
+void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
+                                             bool using_depth_fb, bool preserve_contents,
                                              std::optional<std::size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -429,9 +427,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
     ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
 
     // Bind the framebuffer surfaces
-    state.draw.draw_framebuffer = framebuffer.handle;
-    state.Apply();
-    state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
+    current_state.draw.draw_framebuffer = framebuffer.handle;
+    current_state.ApplyFramebufferState();
+    current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
 
     if (using_color_fb) {
         if (single_color_target) {
@@ -509,10 +507,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
                                0);
     }
-
-    SyncViewport();
-
-    state.Apply();
+    SyncViewport(current_state);
 }
 
 void RasterizerOpenGL::Clear() {
@@ -525,22 +520,23 @@ void RasterizerOpenGL::Clear() {
     bool use_stencil{};
 
     OpenGLState clear_state;
-    clear_state.draw.draw_framebuffer = framebuffer.handle;
-    clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
-
     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
         regs.clear_buffers.A) {
         use_color = true;
     }
+    if (use_color) {
+        clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
+        clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
+        clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
+        clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+    }
     if (regs.clear_buffers.Z) {
         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
         use_depth = true;
 
         // Always enable the depth write when clearing the depth buffer. The depth write mask is
-        // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
+        // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to
+        // true.
         clear_state.depth.test_enabled = true;
         clear_state.depth.test_func = GL_ALWAYS;
     }
@@ -557,11 +553,8 @@ void RasterizerOpenGL::Clear() {
 
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+    ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
                           regs.clear_buffers.RT.Value());
-    // Copy the sRGB setting to the clear state to avoid problem with
-    // specific driver implementations
-    clear_state.framebuffer_srgb.enabled = state.framebuffer_srgb.enabled;
     clear_state.Apply();
 
     if (use_color) {
@@ -587,7 +580,7 @@ void RasterizerOpenGL::DrawArrays() {
 
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    ConfigureFramebuffers();
+    ConfigureFramebuffers(state);
     SyncColorMask();
     SyncDepthTestState();
     SyncStencilTestState();
@@ -608,7 +601,7 @@ void RasterizerOpenGL::DrawArrays() {
     const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
 
     state.draw.vertex_buffer = buffer_cache.GetHandle();
-    state.Apply();
+    state.ApplyVertexBufferState();
 
     std::size_t buffer_size = CalculateVertexArraysSize();
 
@@ -923,11 +916,11 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
     return current_unit + static_cast<u32>(entries.size());
 }
 
-void RasterizerOpenGL::SyncViewport() {
+void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
     for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
         const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
-        auto& viewport = state.viewports[i];
+        auto& viewport = current_state.viewports[i];
         viewport.x = viewport_rect.left;
         viewport.y = viewport_rect.bottom;
         viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
@@ -1131,9 +1124,8 @@ void RasterizerOpenGL::CheckAlphaTests() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
     if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
-        LOG_CRITICAL(
-            Render_OpenGL,
-            "Alpha Testing is enabled with Multiple Render Targets, this behavior is undefined.");
+        LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
+                                    "this behavior is undefined.");
         UNREACHABLE();
     }
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index aa793caf2d..8ef0f6c129 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -109,8 +109,8 @@ private:
      * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
      * @param single_color_target Specifies if a single color buffer target should be used.
      */
-    void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
-                               bool preserve_contents = true,
+    void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
+                               bool using_depth_fb = true, bool preserve_contents = true,
                                std::optional<std::size_t> single_color_target = {});
 
     /*
@@ -134,7 +134,7 @@ private:
                       GLenum primitive_mode, u32 current_unit);
 
     /// Syncs the viewport and depth range to match the guest state
-    void SyncViewport();
+    void SyncViewport(OpenGLState& current_state);
 
     /// Syncs the clip enabled status to match the guest state
     void SyncClipEnabled();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index c8864cce89..894f4f2944 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -580,7 +580,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
     state.draw.draw_framebuffer = draw_fb_handle;
     // Set sRGB enabled if the destination surfaces need it
     state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
-    state.Apply();
+    state.ApplyFramebufferState();
 
     u32 buffers{};
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 9517285e56..2635f2b0cd 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -427,7 +427,7 @@ void OpenGLState::ApplySamplers() const {
     }
 }
 
-void OpenGLState::Apply() const {
+void OpenGLState::ApplyFramebufferState() const {
     // Framebuffer
     if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -435,7 +435,9 @@ void OpenGLState::Apply() const {
     if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
     }
+}
 
+void OpenGLState::ApplyVertexBufferState() const {
     // Vertex array
     if (draw.vertex_array != cur_state.draw.vertex_array) {
         glBindVertexArray(draw.vertex_array);
@@ -445,7 +447,11 @@ void OpenGLState::Apply() const {
     if (draw.vertex_buffer != cur_state.draw.vertex_buffer) {
         glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer);
     }
+}
 
+void OpenGLState::Apply() const {
+    ApplyFramebufferState();
+    ApplyVertexBufferState();
     // Uniform buffer
     if (draw.uniform_buffer != cur_state.draw.uniform_buffer) {
         glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b8cf1f637c..eacca0b9cd 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -181,6 +181,10 @@ public:
     }
     /// Apply this state as the current OpenGL state
     void Apply() const;
+    /// Apply only the state afecting the framebuffer
+    void ApplyFramebufferState() const;
+    /// Apply only the state afecting the vertex buffer
+    void ApplyVertexBufferState() const;
     /// Set the initial OpenGL state
     static void ApplyDefaultState();
     /// Resets any references to the given resource
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 32dc158e4e..3ce2cc6d27 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -159,8 +159,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
         }
     }
     }
-    LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}",
-                 static_cast<u32>(filter_mode));
+    LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
     return GL_LINEAR;
 }
 
@@ -206,7 +205,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
         return GL_ALWAYS;
     }
     LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
-                 static_cast<u32>(func));
+              static_cast<u32>(func));
     return GL_GREATER;
 }