From 5d31bab69a9cdc720347dfd69a9f5011b361e17a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 9 Jul 2019 18:02:03 -0400
Subject: [PATCH 1/7] Texture_Cache: Correct Linear Structural Match.

---
 src/video_core/texture_cache/surface_base.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 7a0fdb19bc..6af9044cad 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
 
     // Linear Surface check
     if (!params.is_tiled) {
-        if (std::tie(params.width, params.height, params.pitch) ==
-            std::tie(rhs.width, rhs.height, rhs.pitch)) {
-            return MatchStructureResult::FullMatch;
+        if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
+            if (params.width == rhs.width) {
+                return MatchStructureResult::FullMatch;
+            } else {
+                return MatchStructureResult::SemiMatch;
+            }
         }
         return MatchStructureResult::None;
     }

From 5c1e1a148ee6be4f8c33264d210467da92702c6a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 9 Jul 2019 19:49:53 -0400
Subject: [PATCH 2/7] Gl_Texture_Cache: Measure Buffer Copy Times

---
 src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 08ae1a429b..c6c76de081 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -31,6 +31,7 @@ using VideoCore::Surface::SurfaceType;
 
 MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
 MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
+MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", MP_RGB(128, 192, 128));
 
 namespace {
 
@@ -535,6 +536,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
 }
 
 void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
+    MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
     const auto& src_params = src_surface->GetSurfaceParams();
     const auto& dst_params = dst_surface->GetSurfaceParams();
     UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);

From a9943222f2afce6255b635091099925b3e451c8b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 9 Jul 2019 20:58:30 -0400
Subject: [PATCH 3/7] GL_State: Add a microprofile timer to OpenGL state.

---
 src/video_core/renderer_opengl/gl_state.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d86e137ac1..0eae98afef 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -6,8 +6,11 @@
 #include <glad/glad.h>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/renderer_opengl/gl_state.h"
 
+MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
+
 namespace OpenGL {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const {
 }
 
 void OpenGLState::Apply() const {
+    MICROPROFILE_SCOPE(OpenGL_State);
     ApplyFramebufferState();
     ApplyVertexArrayState();
     ApplyShaderProgram();

From 913b7a6872a67a0dd689bb19bc4ecfef7fb9cdcd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 9 Jul 2019 21:27:27 -0400
Subject: [PATCH 4/7] GPU: Add a microprofile for macro interpreter

---
 src/video_core/macro_interpreter.cpp                | 4 ++++
 src/video_core/renderer_opengl/gl_texture_cache.cpp | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index c766ed692b..9f59a2dc1f 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -4,14 +4,18 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/macro_interpreter.h"
 
+MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
+
 namespace Tegra {
 
 MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
 
 void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
+    MICROPROFILE_SCOPE(MacroInterp);
     Reset();
     registers[1] = parameters[0];
     this->parameters = std::move(parameters);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index c6c76de081..b1f6bc7c20 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -31,7 +31,8 @@ using VideoCore::Surface::SurfaceType;
 
 MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
 MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", MP_RGB(128, 192, 128));
+MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
+                    MP_RGB(128, 192, 128));
 
 namespace {
 

From 5818959e543041fdff8965e71e52d55a05ee22de Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 11 Jul 2019 15:15:21 -0400
Subject: [PATCH 5/7] Texture_Cache: Force Framebuffer reset if an active
 render target is unregistered.

---
 src/video_core/texture_cache/surface_base.h   |  8 +++++-
 .../texture_cache/surface_params.cpp          | 13 +++++++---
 src/video_core/texture_cache/texture_cache.h  | 25 ++++++++++++++-----
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 8ba386a8ac..fb6378bc73 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -200,8 +200,9 @@ public:
         modification_tick = tick;
     }
 
-    void MarkAsRenderTarget(const bool is_target) {
+    void MarkAsRenderTarget(const bool is_target, const u32 index) {
         this->is_target = is_target;
+        this->index = index;
     }
 
     void MarkAsPicked(const bool is_picked) {
@@ -221,6 +222,10 @@ public:
         return is_target;
     }
 
+    u32 GetRenderTarget() const {
+        return index;
+    }
+
     bool IsRegistered() const {
         return is_registered;
     }
@@ -311,6 +316,7 @@ private:
     bool is_target{};
     bool is_registered{};
     bool is_picked{};
+    u32 index{0xFFFFFFFF};
     u64 modification_tick{};
 };
 
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9c56e2b4f1..df92608599 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
 
 std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
                                                     bool uncompressed) const {
-    const bool tiled{as_host_size ? false : is_tiled};
     const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
     const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
     const u32 depth{is_layered ? 1U : GetMipDepth(level)};
-    return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth,
-                                         GetMipBlockHeight(level), GetMipBlockDepth(level));
+    if (is_tiled) {
+        return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, depth,
+                                             GetMipBlockHeight(level), GetMipBlockDepth(level));
+    } else {
+        if (as_host_size || IsBuffer()) {
+            return GetBytesPerPixel()*width*height*depth;
+        } else {
+            return pitch*height*depth;
+        }
+    }
 }
 
 bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c9e72531a5..5e9812bb93 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -133,11 +133,11 @@ public:
             regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
         auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
         if (depth_buffer.target)
-            depth_buffer.target->MarkAsRenderTarget(false);
+            depth_buffer.target->MarkAsRenderTarget(false, -1);
         depth_buffer.target = surface_view.first;
         depth_buffer.view = surface_view.second;
         if (depth_buffer.target)
-            depth_buffer.target->MarkAsRenderTarget(true);
+            depth_buffer.target->MarkAsRenderTarget(true, 8);
         return surface_view.second;
     }
 
@@ -167,11 +167,11 @@ public:
         auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
                                        preserve_contents, true);
         if (render_targets[index].target)
-            render_targets[index].target->MarkAsRenderTarget(false);
+            render_targets[index].target->MarkAsRenderTarget(false, -1);
         render_targets[index].target = surface_view.first;
         render_targets[index].view = surface_view.second;
         if (render_targets[index].target)
-            render_targets[index].target->MarkAsRenderTarget(true);
+            render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
         return surface_view.second;
     }
 
@@ -191,7 +191,7 @@ public:
         if (depth_buffer.target == nullptr) {
             return;
         }
-        depth_buffer.target->MarkAsRenderTarget(false);
+        depth_buffer.target->MarkAsRenderTarget(false, -1);
         depth_buffer.target = nullptr;
         depth_buffer.view = nullptr;
     }
@@ -200,7 +200,7 @@ public:
         if (render_targets[index].target == nullptr) {
             return;
         }
-        render_targets[index].target->MarkAsRenderTarget(false);
+        render_targets[index].target->MarkAsRenderTarget(false, -1);
         render_targets[index].target = nullptr;
         render_targets[index].view = nullptr;
     }
@@ -270,6 +270,16 @@ protected:
     // and reading it from a sepparate buffer.
     virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
 
+    void ManageRenderTargetUnregister(TSurface& surface) {
+        auto& maxwell3d = system.GPU().Maxwell3D();
+        u32 index = surface->GetRenderTarget();
+        if (index == 8) {
+            maxwell3d.dirty_flags.zeta_buffer = true;
+        } else {
+            maxwell3d.dirty_flags.color_buffer.set(index, true);
+        }
+    }
+
     void Register(TSurface surface) {
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
         const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
@@ -294,6 +304,9 @@ protected:
         if (guard_render_targets && surface->IsProtected()) {
             return;
         }
+        if (!guard_render_targets && surface->IsRenderTarget()) {
+            ManageRenderTargetUnregister(surface);
+        }
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
         const CacheAddr cache_ptr = surface->GetCacheAddr();
         const std::size_t size = surface->GetSizeInBytes();

From 0f54b541f4877eda87ad968708fa38ce604c3a80 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 14 Jul 2019 08:41:06 -0400
Subject: [PATCH 6/7] Texture_Cache: Remove some unprecise fallback case and
 clang format

---
 src/video_core/texture_cache/surface_params.cpp | 9 +++++----
 src/video_core/texture_cache/texture_cache.h    | 9 ---------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index df92608599..33c94daa89 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -294,13 +294,14 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size
     const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
     const u32 depth{is_layered ? 1U : GetMipDepth(level)};
     if (is_tiled) {
-        return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, depth,
-                                             GetMipBlockHeight(level), GetMipBlockDepth(level));
+        return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
+                                             depth, GetMipBlockHeight(level),
+                                             GetMipBlockDepth(level));
     } else {
         if (as_host_size || IsBuffer()) {
-            return GetBytesPerPixel()*width*height*depth;
+            return GetBytesPerPixel() * width * height * depth;
         } else {
-            return pitch*height*depth;
+            return pitch * height * depth;
         }
     }
 }
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 5e9812bb93..6d3d2da7db 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -662,15 +662,6 @@ private:
                 }
                 return {current_surface, *view};
             }
-            // The next case is unsafe, so if we r in accurate GPU, just skip it
-            if (Settings::values.use_accurate_gpu_emulation) {
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
-                                      MatchTopologyResult::FullMatch);
-            }
-            // This is the case the texture is a part of the parent.
-            if (current_surface->MatchesSubTexture(params, gpu_addr)) {
-                return RebuildSurface(current_surface, params, is_render);
-            }
         } else {
             // If there are many overlaps, odds are they are subtextures of the candidate
             // surface. We try to construct a new surface based on the candidate parameters,

From 2ac7472d3f94f1adb33c0a1d7748e922c515f6a8 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 14 Jul 2019 17:42:39 -0400
Subject: [PATCH 7/7] Texture_Cache: Address Feedback

---
 src/video_core/texture_cache/surface_base.h     |  4 +++-
 src/video_core/texture_cache/surface_params.cpp |  9 ++++-----
 src/video_core/texture_cache/texture_cache.h    | 17 ++++++++++-------
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index fb6378bc73..bcce8d8634 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -312,11 +312,13 @@ private:
         return view;
     }
 
+    static constexpr u32 NO_RT = 0xFFFFFFFF;
+
     bool is_modified{};
     bool is_target{};
     bool is_registered{};
     bool is_picked{};
-    u32 index{0xFFFFFFFF};
+    u32 index{NO_RT};
     u64 modification_tick{};
 };
 
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 33c94daa89..fd54724513 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -297,12 +297,11 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size
         return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
                                              depth, GetMipBlockHeight(level),
                                              GetMipBlockDepth(level));
+    } else if (as_host_size || IsBuffer()) {
+        return GetBytesPerPixel() * width * height * depth;
     } else {
-        if (as_host_size || IsBuffer()) {
-            return GetBytesPerPixel() * width * height * depth;
-        } else {
-            return pitch * height * depth;
-        }
+        // Linear Texture Case
+        return pitch * height * depth;
     }
 }
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6d3d2da7db..7f9623c623 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -133,11 +133,11 @@ public:
             regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
         auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
         if (depth_buffer.target)
-            depth_buffer.target->MarkAsRenderTarget(false, -1);
+            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
         depth_buffer.target = surface_view.first;
         depth_buffer.view = surface_view.second;
         if (depth_buffer.target)
-            depth_buffer.target->MarkAsRenderTarget(true, 8);
+            depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
         return surface_view.second;
     }
 
@@ -167,7 +167,7 @@ public:
         auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
                                        preserve_contents, true);
         if (render_targets[index].target)
-            render_targets[index].target->MarkAsRenderTarget(false, -1);
+            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
         render_targets[index].target = surface_view.first;
         render_targets[index].view = surface_view.second;
         if (render_targets[index].target)
@@ -191,7 +191,7 @@ public:
         if (depth_buffer.target == nullptr) {
             return;
         }
-        depth_buffer.target->MarkAsRenderTarget(false, -1);
+        depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
         depth_buffer.target = nullptr;
         depth_buffer.view = nullptr;
     }
@@ -200,7 +200,7 @@ public:
         if (render_targets[index].target == nullptr) {
             return;
         }
-        render_targets[index].target->MarkAsRenderTarget(false, -1);
+        render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
         render_targets[index].target = nullptr;
         render_targets[index].view = nullptr;
     }
@@ -272,8 +272,8 @@ protected:
 
     void ManageRenderTargetUnregister(TSurface& surface) {
         auto& maxwell3d = system.GPU().Maxwell3D();
-        u32 index = surface->GetRenderTarget();
-        if (index == 8) {
+        const u32 index = surface->GetRenderTarget();
+        if (index == DEPTH_RT) {
             maxwell3d.dirty_flags.zeta_buffer = true;
         } else {
             maxwell3d.dirty_flags.color_buffer.set(index, true);
@@ -797,6 +797,9 @@ private:
     static constexpr u64 registry_page_size{1 << registry_page_bits};
     std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
 
+    static constexpr u32 DEPTH_RT = 8;
+    static constexpr u32 NO_RT = 0xFFFFFFFF;
+
     // The L1 Cache is used for fast texture lookup before checking the overlaps
     // This avoids calculating size and other stuffs.
     std::unordered_map<CacheAddr, TSurface> l1_cache;