From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 8 May 2019 18:27:29 -0400
Subject: [PATCH] Reduce amount of size calculations.

---
 src/common/common_funcs.h                     | 11 ++++
 .../renderer_opengl/gl_texture_cache.cpp      |  1 -
 .../renderer_opengl/gl_texture_cache.h        |  2 +-
 src/video_core/texture_cache/surface_base.cpp | 22 +++++---
 src/video_core/texture_cache/surface_base.h   | 28 ++++-------
 .../texture_cache/surface_params.cpp          | 31 +-----------
 src/video_core/texture_cache/surface_params.h | 50 +++++++++++++++----
 src/video_core/texture_cache/texture_cache.h  | 40 +++++++--------
 8 files changed, 97 insertions(+), 88 deletions(-)

diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 8b0d34da6e..00a5698f3e 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <algorithm>
 #include <string>
 
 #if !defined(ARCHITECTURE_x86_64)
@@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) {
     return a | b << 8 | c << 16 | d << 24;
 }
 
+template <class ForwardIt, class T, class Compare = std::less<>>
+ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) {
+    // Note: BOTH type T and the type after ForwardIt is dereferenced
+    // must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
+    // This is stricter than lower_bound requirement (see above)
+
+    first = std::lower_bound(first, last, value, comp);
+    return first != last && !comp(value, *first) ? first : last;
+}
+
 } // namespace Common
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index a58e3a816e..32cb08963e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -240,7 +240,6 @@ CachedSurface::~CachedSurface() {
 }
 
 void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
-    LOG_CRITICAL(Render_OpenGL, "Flushing");
     MICROPROFILE_SCOPE(OpenGL_Texture_Download);
 
     // TODO(Rodrigo): Optimize alignment
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1ad01137b4..0a1b57014d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -133,7 +133,7 @@ protected:
                    const VideoCommon::CopyParams& copy_params) override;
 
     void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle<u32>& src_rect,
-                           const Common::Rectangle<u32>& dst_rect) override;
+                   const Common::Rectangle<u32>& dst_rect) override;
 
 private:
     OGLFramebuffer src_framebuffer;
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index dc50132403..36ca72b4a2 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost;
 using VideoCore::MortonSwizzleMode;
 
 SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
-    : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()},
-      guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{
-                                                           params.GetHostSizeInBytes()} {
-    mipmap_offsets.reserve(params.num_levels);
-    mipmap_sizes.reserve(params.num_levels);
+    : params{params}, mipmap_sizes(params.num_levels),
+      mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{
+                                                                 params.GetHostSizeInBytes()} {
 
     std::size_t offset = 0;
     for (u32 level = 0; level < params.num_levels; ++level) {
         const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
-        mipmap_sizes.push_back(mipmap_size);
-        mipmap_offsets.push_back(offset);
+        mipmap_sizes[level] = mipmap_size;
+        mipmap_offsets[level] = offset;
         offset += mipmap_size;
     }
+    layer_size = offset;
+    if (params.is_layered) {
+        if (params.is_tiled) {
+            layer_size =
+                SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
+        }
+        guest_memory_size = layer_size * params.depth;
+    } else {
+        guest_memory_size = layer_size;
+    }
 }
 
 void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 179e80ddba..095deb6026 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "common/assert.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/morton.h"
@@ -16,16 +17,6 @@
 #include "video_core/texture_cache/surface_params.h"
 #include "video_core/texture_cache/surface_view.h"
 
-template <class ForwardIt, class T, class Compare = std::less<>>
-ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) {
-    // Note: BOTH type T and the type after ForwardIt is dereferenced
-    // must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
-    // This is stricter than lower_bound requirement (see above)
-
-    first = std::lower_bound(first, last, value, comp);
-    return first != last && !comp(value, *first) ? first : last;
-}
-
 namespace Tegra {
 class MemoryManager;
 }
@@ -153,7 +144,7 @@ public:
         const auto layer{static_cast<u32>(relative_address / layer_size)};
         const GPUVAddr mipmap_address = relative_address - layer_size * layer;
         const auto mipmap_it =
-            binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
+            Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
         if (mipmap_it == mipmap_offsets.end()) {
             return {};
         }
@@ -172,8 +163,8 @@ protected:
     virtual void DecorateSurfaceName() = 0;
 
     const SurfaceParams params;
-    const std::size_t layer_size;
-    const std::size_t guest_memory_size;
+    std::size_t layer_size;
+    std::size_t guest_memory_size;
     const std::size_t host_memory_size;
     GPUVAddr gpu_addr{};
     CacheAddr cache_addr{};
@@ -268,9 +259,11 @@ public:
         return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
     }
 
-    std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) {
-        if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D ||
-            params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) {
+    std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
+                                     const std::size_t candidate_size) {
+        if (params.target == SurfaceTarget::Texture3D ||
+            (params.num_levels == 1 && !params.is_layered) ||
+            view_params.target == SurfaceTarget::Texture3D) {
             return {};
         }
         const auto layer_mipmap{GetLayerMipmap(view_addr)};
@@ -279,8 +272,7 @@ public:
         }
         const u32 layer{layer_mipmap->first};
         const u32 mipmap{layer_mipmap->second};
-        const std::size_t size{view_params.GetGuestSizeInBytes()};
-        if (GetMipmapSize(mipmap) != size) {
+        if (GetMipmapSize(mipmap) != candidate_size) {
             // TODO: The view may cover many mimaps, this case can still go on.
             // This edge-case can be safely be ignored since it will just result in worse
             // performance.
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index d9052152cc..b537b26e2d 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -4,13 +4,12 @@
 
 #include <map>
 
-#include "common/cityhash.h"
 #include "common/alignment.h"
+#include "common/cityhash.h"
 #include "core/core.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/surface.h"
 #include "video_core/texture_cache/surface_params.h"
-#include "video_core/textures/decoders.h"
 
 namespace VideoCommon {
 
@@ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
     return params;
 }
 
-u32 SurfaceParams::GetMipWidth(u32 level) const {
-    return std::max(1U, width >> level);
-}
-
-u32 SurfaceParams::GetMipHeight(u32 level) const {
-    return std::max(1U, height >> level);
-}
-
-u32 SurfaceParams::GetMipDepth(u32 level) const {
-    return is_layered ? depth : std::max(1U, depth >> level);
-}
-
 bool SurfaceParams::IsLayered() const {
     switch (target) {
     case SurfaceTarget::Texture1DArray:
@@ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
     return GetInnerMipmapMemorySize(level, true, false);
 }
 
-u32 SurfaceParams::GetDefaultBlockWidth() const {
-    return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
-}
-
-u32 SurfaceParams::GetDefaultBlockHeight() const {
-    return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
-}
-
-u32 SurfaceParams::GetBitsPerPixel() const {
-    return VideoCore::Surface::GetFormatBpp(pixel_format);
-}
-
-u32 SurfaceParams::GetBytesPerPixel() const {
-    return VideoCore::Surface::GetBytesPerPixel(pixel_format);
-}
-
 bool SurfaceParams::IsPixelFormatZeta() const {
     return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
            pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index ec8efa210e..e0ec1be0ee 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -10,8 +10,9 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/surface.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/surface.h"
+#include "video_core/textures/decoders.h"
 
 namespace VideoCommon {
 
@@ -50,10 +51,17 @@ public:
     std::size_t GetHostSizeInBytes() const {
         std::size_t host_size_in_bytes;
         if (IsPixelFormatASTC(pixel_format)) {
+            constexpr std::size_t rgb8_bpp = 4ULL;
             // ASTC is uncompressed in software, in emulated as RGBA8
-            host_size_in_bytes = static_cast<std::size_t>(Common::AlignUp(width, GetDefaultBlockWidth())) *
-                                 static_cast<std::size_t>(Common::AlignUp(height, GetDefaultBlockHeight())) *
-                                 static_cast<std::size_t>(depth) * 4ULL;
+            host_size_in_bytes = 0;
+            for (std::size_t level = 0; level < num_levels; level++) {
+                const std::size_t width =
+                    Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth());
+                const std::size_t height =
+                    Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight());
+                const std::size_t depth = is_layered ? depth : GetMipDepth(level);
+                host_size_in_bytes += width * height * depth * rgb8_bpp;
+            }
         } else {
             host_size_in_bytes = GetInnerMemorySize(true, false, false);
         }
@@ -65,13 +73,19 @@ public:
     }
 
     /// Returns the width of a given mipmap level.
-    u32 GetMipWidth(u32 level) const;
+    u32 GetMipWidth(u32 level) const {
+        return std::max(1U, width >> level);
+    }
 
     /// Returns the height of a given mipmap level.
-    u32 GetMipHeight(u32 level) const;
+    u32 GetMipHeight(u32 level) const {
+        return std::max(1U, height >> level);
+    }
 
     /// Returns the depth of a given mipmap level.
-    u32 GetMipDepth(u32 level) const;
+    u32 GetMipDepth(u32 level) const {
+        return is_layered ? depth : std::max(1U, depth >> level);
+    }
 
     /// Returns the block height of a given mipmap level.
     u32 GetMipBlockHeight(u32 level) const;
@@ -79,6 +93,12 @@ public:
     /// Returns the block depth of a given mipmap level.
     u32 GetMipBlockDepth(u32 level) const;
 
+    // Helper used for out of class size calculations
+    static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
+                                    const u32 block_depth) {
+        return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
+    }
+
     /// Returns the offset in bytes in guest memory of a given mipmap level.
     std::size_t GetGuestMipmapLevelOffset(u32 level) const;
 
@@ -98,16 +118,24 @@ public:
     std::size_t GetHostLayerSize(u32 level) const;
 
     /// Returns the default block width.
-    u32 GetDefaultBlockWidth() const;
+    u32 GetDefaultBlockWidth() const {
+        return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
+    }
 
     /// Returns the default block height.
-    u32 GetDefaultBlockHeight() const;
+    u32 GetDefaultBlockHeight() const {
+        return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
+    }
 
     /// Returns the bits per pixel.
-    u32 GetBitsPerPixel() const;
+    u32 GetBitsPerPixel() const {
+        return VideoCore::Surface::GetFormatBpp(pixel_format);
+    }
 
     /// Returns the bytes per pixel.
-    u32 GetBytesPerPixel() const;
+    u32 GetBytesPerPixel() const {
+        return VideoCore::Surface::GetBytesPerPixel(pixel_format);
+    }
 
     /// Returns true if the pixel format is a depth and/or stencil format.
     bool IsPixelFormatZeta() const;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 96d1081476..fbfd1ff0ba 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -120,10 +120,6 @@ public:
             return {};
         }
 
-        if (regs.color_mask[index].raw == 0) {
-            return {};
-        }
-
         auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
                                        preserve_contents);
         if (render_targets[index].target)
@@ -165,7 +161,9 @@ public:
                      const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
                      const Common::Rectangle<u32>& src_rect,
                      const Common::Rectangle<u32>& dst_rect) {
-        ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect);
+        TSurface dst_surface = GetFermiSurface(dst_config);
+        ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect);
+        dst_surface->MarkAsModified(true, Tick());
     }
 
     TSurface TryFindFramebufferSurface(const u8* host_ptr) {
@@ -270,10 +268,6 @@ private:
 
     RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
                                  const GPUVAddr gpu_addr, const bool untopological) {
-        // Untopological decision
-        if (untopological) {
-            return RecycleStrategy::Ignore;
-        }
         // 3D Textures decision
         if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
             return RecycleStrategy::Flush;
@@ -284,12 +278,16 @@ private:
                 return RecycleStrategy::Flush;
             }
         }
+        // Untopological decision
+        if (untopological) {
+            return RecycleStrategy::Ignore;
+        }
         return RecycleStrategy::Ignore;
     }
 
     std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
                                               const SurfaceParams& params, const GPUVAddr gpu_addr,
-                                              const u8* host_ptr, const bool preserve_contents,
+                                              const bool preserve_contents,
                                               const bool untopological) {
         for (auto surface : overlaps) {
             Unregister(surface);
@@ -328,6 +326,7 @@ private:
         }
         Unregister(current_surface);
         Register(new_surface);
+        new_surface->MarkAsModified(current_surface->IsModified(), Tick());
         return {new_surface, new_surface->GetMainView()};
     }
 
@@ -351,6 +350,7 @@ private:
         if (params.target == SurfaceTarget::Texture3D) {
             return {};
         }
+        bool modified = false;
         TSurface new_surface = GetUncachedSurface(gpu_addr, params);
         for (auto surface : overlaps) {
             const SurfaceParams& src_params = surface->GetSurfaceParams();
@@ -358,7 +358,7 @@ private:
                 // We send this cases to recycle as they are more complex to handle
                 return {};
             }
-            const std::size_t candidate_size = src_params.GetGuestSizeInBytes();
+            const std::size_t candidate_size = surface->GetSizeInBytes();
             auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
             if (!mipmap_layer) {
                 return {};
@@ -368,6 +368,7 @@ private:
             if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
                 return {};
             }
+            modified |= surface->IsModified();
             // Now we got all the data set up
             const u32 dst_width{params.GetMipWidth(mipmap)};
             const u32 dst_height{params.GetMipHeight(mipmap)};
@@ -381,6 +382,7 @@ private:
             force_reconfiguration |= surface->IsProtected();
             Unregister(surface, true);
         }
+        new_surface->MarkAsModified(modified, Tick());
         Register(new_surface);
         return {{new_surface, new_surface->GetMainView()}};
     }
@@ -399,8 +401,7 @@ private:
 
         for (auto surface : overlaps) {
             if (!surface->MatchesTopology(params)) {
-                return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents,
-                                      true);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true);
             }
         }
 
@@ -418,27 +419,26 @@ private:
                 }
             }
             if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
-                return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents,
-                                      false);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
             }
-            std::optional<TView> view = current_surface->EmplaceView(params, gpu_addr);
+            std::optional<TView> view =
+                current_surface->EmplaceView(params, gpu_addr, candidate_size);
             if (view.has_value()) {
                 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
                 if (is_mirage) {
                     LOG_CRITICAL(HW_GPU, "Mirage View Unsupported");
-                    return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents,
-                                          false);
+                    return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
                 }
                 return {current_surface, *view};
             }
-            return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false);
+            return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
         } else {
             std::optional<std::pair<TSurface, TView>> view =
                 ReconstructSurface(overlaps, params, gpu_addr, host_ptr);
             if (view.has_value()) {
                 return *view;
             }
-            return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false);
+            return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
         }
     }