From 165ae823f522aa981129927f42e76763a9fa6006 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 17 Feb 2020 22:29:04 -0400
Subject: [PATCH] ThreadManager: Sync async reads on accurate gpu.

---
 src/video_core/buffer_cache/buffer_cache.h       | 12 ++++++++++++
 src/video_core/gpu.cpp                           |  2 +-
 src/video_core/gpu_thread.cpp                    |  7 ++++++-
 src/video_core/rasterizer_interface.h            | 10 ++++------
 src/video_core/renderer_opengl/gl_rasterizer.cpp |  4 ++++
 src/video_core/renderer_opengl/gl_rasterizer.h   |  1 +
 src/video_core/renderer_vulkan/vk_rasterizer.cpp |  4 ++++
 src/video_core/renderer_vulkan/vk_rasterizer.h   |  1 +
 src/video_core/texture_cache/texture_cache.h     | 15 +++++++++++++++
 9 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5b14d52e2b..df4c0211e3 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -145,6 +145,18 @@ public:
         }
     }
 
+    bool MustFlushRegion(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        for (auto& object : objects) {
+            if (object->IsModified() && object->IsRegistered()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     /// Mark the specified region as being invalidated
     void InvalidateRegion(VAddr addr, u64 size) {
         std::lock_guard lock{mutex};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d05b6a9d2f..19d3bd3052 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
 }
 
 void GPU::OnCommandListEnd() {
-    renderer.Rasterizer().ReleaseFences();
+    renderer->Rasterizer().ReleaseFences();
 }
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 251a9d911f..672f8d2fab 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
+#include "core/settings.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -80,7 +81,11 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    PushCommand(FlushRegionCommand(addr, size));
+    if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
+        u64 fence = PushCommand(FlushRegionCommand(addr, size));
+        while (fence < state.signaled_fence.load(std::memory_order_relaxed)) {
+        }
+    }
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 72f65b166c..2287521314 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,13 +49,9 @@ public:
     /// Records a GPU query and caches it
     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
-    virtual void SignalFence(GPUVAddr addr, u32 value) {
+    virtual void SignalFence(GPUVAddr addr, u32 value) {}
 
-    }
-
-    virtual void ReleaseFences() {
-
-    }
+    virtual void ReleaseFences() {}
 
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
@@ -63,6 +59,8 @@ public:
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
+    virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
+
     /// Notify rasterizer that any caches of the specified region should be invalidated
     virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 35bed444f8..bbf37a00d5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -650,6 +650,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
     query_cache.FlushRegion(addr, size);
 }
 
+bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+}
+
 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
     if (addr == 0 || size == 0) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6d173a9220..5c0f88e6f1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -67,6 +67,7 @@ public:
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
+    bool MustFlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 4d0c90aa39..9437a4aa11 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -514,6 +514,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
     query_cache.FlushRegion(addr, size);
 }
 
+bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+}
+
 void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
     if (addr == 0 || size == 0) {
         return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 285f731bce..7002a4fa39 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -118,6 +118,7 @@ public:
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
+    bool MustFlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 04fe69c11d..e251a30c30 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,6 +116,21 @@ public:
         }
     }
 
+    bool MustFlushRegion(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        auto surfaces = GetSurfacesInRegion(addr, size);
+        if (surfaces.empty()) {
+            return false;
+        }
+        for (const auto& surface : surfaces) {
+            if (surface->IsModified()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
                             const VideoCommon::Shader::Sampler& entry) {
         std::lock_guard lock{mutex};