From 084ceb925acad470b69467d64e4dfbb3bd7ef3f1 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 18 Feb 2020 16:51:42 -0400
Subject: [PATCH 01/31] UI: Replasce accurate GPU option for GPU Accuracy Level

---
 src/core/settings.cpp                         |  2 +-
 src/core/settings.h                           | 16 ++++++++-
 src/core/telemetry_session.cpp                | 16 +++++++--
 src/video_core/engines/maxwell_dma.cpp        |  2 +-
 .../renderer_opengl/gl_rasterizer.cpp         |  2 +-
 src/video_core/texture_cache/texture_cache.h  |  8 ++---
 src/yuzu/configuration/config.cpp             |  7 ++--
 .../configure_graphics_advanced.cpp           |  5 +--
 .../configure_graphics_advanced.ui            | 33 ++++++++++++++++---
 src/yuzu_cmd/config.cpp                       |  4 +--
 src/yuzu_cmd/default_ini.h                    |  6 ++--
 src/yuzu_tester/config.cpp                    |  4 +--
 12 files changed, 77 insertions(+), 28 deletions(-)

diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index c1282cb804..445047469e 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -92,7 +92,7 @@ void LogSettings() {
     LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
     LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
     LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
-    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
+    LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
     LogSetting("Renderer_UseAsynchronousGpuEmulation",
                Settings::values.use_asynchronous_gpu_emulation);
     LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
diff --git a/src/core/settings.h b/src/core/settings.h
index c73d1c5967..b54a0d4ea9 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -376,6 +376,12 @@ enum class RendererBackend {
     Vulkan = 1,
 };
 
+enum class GPUAccuracy : u32 {
+    Normal = 0,
+    High = 1,
+    Extreme = 2,
+};
+
 struct Values {
     // System
     bool use_docked_mode;
@@ -436,7 +442,7 @@ struct Values {
     bool use_frame_limit;
     u16 frame_limit;
     bool use_disk_shader_cache;
-    bool use_accurate_gpu_emulation;
+    GPUAccuracy gpu_accuracy;
     bool use_asynchronous_gpu_emulation;
     bool use_vsync;
     bool force_30fps_mode;
@@ -480,6 +486,14 @@ struct Values {
     std::map<u64, std::vector<std::string>> disabled_addons;
 } extern values;
 
+constexpr bool IsGPULevelExtreme() {
+    return values.gpu_accuracy == GPUAccuracy::Extreme;
+}
+
+constexpr bool IsGPULevelHigh() {
+    return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
+}
+
 void Apply();
 void LogSettings();
 } // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index fd5a3ee9f1..1c3b03a1c2 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
     return "Unknown";
 }
 
+static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
+    switch (backend) {
+    case Settings::GPUAccuracy::Normal:
+        return "Normal";
+    case Settings::GPUAccuracy::High:
+        return "High";
+    case Settings::GPUAccuracy::Extreme:
+        return "Extreme";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
     u64 telemetry_id{};
     const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
     AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
     AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
     AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
-    AddField(field_type, "Renderer_UseAccurateGpuEmulation",
-             Settings::values.use_accurate_gpu_emulation);
+    AddField(field_type, "Renderer_GPUAccuracyLevel",
+             TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
     AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
              Settings::values.use_asynchronous_gpu_emulation);
     AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c2610f992a..32b04e31ea 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -136,7 +136,7 @@ void MaxwellDMA::HandleCopy() {
             write_buffer.resize(dst_size);
         }
 
-        if (Settings::values.use_accurate_gpu_emulation) {
+        if (Settings::IsGPULevelExtreme()) {
             memory_manager.ReadBlock(source, read_buffer.data(), src_size);
             memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
         } else {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 175374f0df..ac4485a18a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -661,7 +661,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 }
 
 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
-    if (Settings::values.use_accurate_gpu_emulation) {
+    if (Settings::IsGPULevelExtreme()) {
         FlushRegion(addr, size);
     }
     InvalidateRegion(addr, size);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 69ca08fd10..7432691d1f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -417,7 +417,7 @@ private:
      **/
     RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
                                  const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
-        if (Settings::values.use_accurate_gpu_emulation) {
+        if (Settings::IsGPULevelExtreme()) {
             return RecycleStrategy::Flush;
         }
         // 3D Textures decision
@@ -461,7 +461,7 @@ private:
         }
         switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
         case RecycleStrategy::Ignore: {
-            return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
+            return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
         }
         case RecycleStrategy::Flush: {
             std::sort(overlaps.begin(), overlaps.end(),
@@ -598,7 +598,7 @@ private:
         if (passed_tests == 0) {
             return {};
             // In Accurate GPU all tests should pass, else we recycle
-        } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
+        } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
             return {};
         }
         for (const auto& surface : overlaps) {
@@ -668,7 +668,7 @@ private:
             for (const auto& surface : overlaps) {
                 if (!surface->MatchTarget(params.target)) {
                     if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
-                        if (Settings::values.use_accurate_gpu_emulation) {
+                        if (Settings::IsGPULevelExtreme()) {
                             return std::nullopt;
                         }
                         Unregister(surface);
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 7f6dfac849..da1fa4e029 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -639,8 +639,8 @@ void Config::ReadRendererValues() {
     Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
     Settings::values.use_disk_shader_cache =
         ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
-    Settings::values.use_accurate_gpu_emulation =
-        ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool();
+    const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
+    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
     Settings::values.use_asynchronous_gpu_emulation =
         ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
     Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
@@ -1080,8 +1080,7 @@ void Config::SaveRendererValues() {
     WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
     WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
                  true);
-    WriteSetting(QStringLiteral("use_accurate_gpu_emulation"),
-                 Settings::values.use_accurate_gpu_emulation, false);
+    WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy), 0);
     WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
                  Settings::values.use_asynchronous_gpu_emulation, false);
     WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index b9f429f840..0a3f47339d 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
 
 void ConfigureGraphicsAdvanced::SetConfiguration() {
     const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
-    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
+    ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
     ui->use_vsync->setEnabled(runtime_lock);
     ui->use_vsync->setChecked(Settings::values.use_vsync);
     ui->force_30fps_mode->setEnabled(runtime_lock);
@@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
 }
 
 void ConfigureGraphicsAdvanced::ApplyConfiguration() {
-    Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
+    auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
+    Settings::values.gpu_accuracy = gpu_accuracy;
     Settings::values.use_vsync = ui->use_vsync->isChecked();
     Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
     Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 42eec278ed..0c7b383e00 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -23,11 +23,34 @@
        </property>
        <layout class="QVBoxLayout" name="verticalLayout_3">
         <item>
-         <widget class="QCheckBox" name="use_accurate_gpu_emulation">
-          <property name="text">
-           <string>Use accurate GPU emulation (slow)</string>
-          </property>
-         </widget>
+         <layout class="QHBoxLayout" name="horizontalLayout_2">
+          <item>
+           <widget class="QLabel" name="label_gpu_accuracy">
+            <property name="text">
+             <string>Accuracy Level:</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QComboBox" name="gpu_accuracy">
+            <item>
+             <property name="text">
+              <string notr="true">Normal</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string notr="true">High</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string notr="true">Extreme(very slow)</string>
+             </property>
+            </item>
+           </widget>
+          </item>
+         </layout>
         </item>
         <item>
          <widget class="QCheckBox" name="use_vsync">
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 80341747f9..d1ac354bf1 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -388,8 +388,8 @@ void Config::ReadValues() {
         static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
     Settings::values.use_disk_shader_cache =
         sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
-    Settings::values.use_accurate_gpu_emulation =
-        sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
+    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
+    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
     Settings::values.use_asynchronous_gpu_emulation =
         sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
     Settings::values.use_vsync =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 171d16fa00..60b1a62fad 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -146,9 +146,9 @@ frame_limit =
 # 0 (default): Off, 1 : On
 use_disk_shader_cache =
 
-# Whether to use accurate GPU emulation
-# 0 (default): Off (fast), 1 : On (slow)
-use_accurate_gpu_emulation =
+# Which gpu accuracy level to use
+# 0 (Normal), 1 (High), 2 (Extreme)
+gpu_accuracy =
 
 # Whether to use asynchronous GPU emulation
 # 0 : Off (slow), 1 (default): On (fast)
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index ee2591c8fb..c0325cc3ca 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -126,8 +126,8 @@ void Config::ReadValues() {
     Settings::values.frame_limit = 100;
     Settings::values.use_disk_shader_cache =
         sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
-    Settings::values.use_accurate_gpu_emulation =
-        sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
+    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
+    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
     Settings::values.use_asynchronous_gpu_emulation =
         sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
 

From a60a22d9c284537be2e3a6a89b8afb37e6f0c510 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 16 Feb 2020 09:33:03 -0400
Subject: [PATCH 02/31] Texture Cache: Implement OnCPUWrite and SyncGuestHost

---
 src/video_core/texture_cache/surface_base.h  | 18 ++++++++
 src/video_core/texture_cache/texture_cache.h | 48 ++++++++++++++++++--
 2 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index c5ab21f567..79e10ffbb1 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -192,6 +192,22 @@ public:
         index = index_;
     }
 
+    void SetMemoryMarked(bool is_memory_marked_) {
+        is_memory_marked = is_memory_marked_;
+    }
+
+    bool IsMemoryMarked() const {
+        return is_memory_marked;
+    }
+
+    void SetSyncPending(bool is_sync_pending_) {
+        is_sync_pending = is_sync_pending_;
+    }
+
+    bool IsSyncPending() const {
+        return is_sync_pending;
+    }
+
     void MarkAsPicked(bool is_picked_) {
         is_picked = is_picked_;
     }
@@ -303,6 +319,8 @@ private:
     bool is_target{};
     bool is_registered{};
     bool is_picked{};
+    bool is_memory_marked{};
+    bool is_sync_pending{};
     u32 index{NO_RT};
     u64 modification_tick{};
 };
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 7432691d1f..441fda53d8 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,6 +6,7 @@
 
 #include <algorithm>
 #include <array>
+#include <list>
 #include <memory>
 #include <mutex>
 #include <set>
@@ -62,6 +63,30 @@ public:
         }
     }
 
+    void OnCPUWrite(CacheAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        for (const auto& surface : GetSurfacesInRegion(addr, size)) {
+            if (surface->IsMemoryMarked()) {
+                Unmark(surface);
+                surface->SetSyncPending(true);
+                marked_for_unregister.emplace_back(surface);
+            }
+        }
+    }
+
+    void SyncGuestHost() {
+        std::lock_guard lock{mutex};
+
+        for (const auto& surface : marked_for_unregister) {
+            if (surface->IsRegistered()) {
+                surface->SetSyncPending(false);
+                Unregister(surface);
+            }
+        }
+        marked_for_unregister.clear();
+    }
+
     /**
      * Guarantees that rendertargets don't unregister themselves if the
      * collide. Protection is currently only done on 3D slices.
@@ -85,7 +110,9 @@ public:
             return a->GetModificationTick() < b->GetModificationTick();
         });
         for (const auto& surface : surfaces) {
+            mutex.unlock();
             FlushSurface(surface);
+            mutex.lock();
         }
     }
 
@@ -345,9 +372,20 @@ protected:
         surface->SetCpuAddr(*cpu_addr);
         RegisterInnerCache(surface);
         surface->MarkAsRegistered(true);
+        surface->SetMemoryMarked(true);
         rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
     }
 
+    void Unmark(TSurface surface) {
+        if (!surface->IsMemoryMarked()) {
+            return;
+        }
+        const std::size_t size = surface->GetSizeInBytes();
+        const VAddr cpu_addr = surface->GetCpuAddr();
+        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+        surface->SetMemoryMarked(false);
+    }
+
     void Unregister(TSurface surface) {
         if (guard_render_targets && surface->IsProtected()) {
             return;
@@ -355,9 +393,11 @@ protected:
         if (!guard_render_targets && surface->IsRenderTarget()) {
             ManageRenderTargetUnregister(surface);
         }
-        const std::size_t size = surface->GetSizeInBytes();
-        const VAddr cpu_addr = surface->GetCpuAddr();
-        rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+        Unmark(surface);
+        if (surface->IsSyncPending()) {
+            marked_for_unregister.remove(surface);
+            surface->SetSyncPending(false);
+        }
         UnregisterInnerCache(surface);
         surface->MarkAsRegistered(false);
         ReserveSurface(surface->GetSurfaceParams(), surface);
@@ -1150,6 +1190,8 @@ private:
     std::unordered_map<u32, TSurface> invalid_cache;
     std::vector<u8> invalid_memory;
 
+    std::list<TSurface> marked_for_unregister;
+
     StagingCache staging_cache;
     std::recursive_mutex mutex;
 };

From da8f17715dbdc7eec92f5f0c11c968a51b86cab4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 16 Feb 2020 09:51:37 -0400
Subject: [PATCH 03/31] GPU: Refactor synchronization on Async GPU

---
 src/video_core/dma_pusher.cpp                    |  1 +
 src/video_core/engines/maxwell_3d.cpp            |  8 ++++++--
 src/video_core/gpu.cpp                           |  3 +++
 src/video_core/gpu.h                             |  1 +
 src/video_core/gpu_thread.cpp                    |  6 +++---
 src/video_core/rasterizer_interface.h            |  6 ++++++
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 ++++++++++++++++
 src/video_core/renderer_opengl/gl_rasterizer.h   |  2 ++
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 14 ++++++++++++++
 src/video_core/renderer_vulkan/vk_rasterizer.h   |  2 ++
 src/video_core/texture_cache/texture_cache.h     |  4 ++--
 11 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 0b77afc717..2516ea9935 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -32,6 +32,7 @@ void DmaPusher::DispatchCalls() {
         }
     }
     gpu.FlushCommands();
+    gpu.SyncGuestHost();
 }
 
 bool DmaPusher::Step() {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index baa74ad4ca..2298a62738 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -403,9 +403,13 @@ void Maxwell3D::ProcessQueryGet() {
                "Units other than CROP are unimplemented");
 
     switch (regs.query.query_get.operation) {
-    case Regs::QueryOperation::Release:
-        StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
+    case Regs::QueryOperation::Release: {
+        rasterizer.FlushCommands();
+        rasterizer.SyncGuestHost();
+        const u64 result = regs.query.query_sequence;
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
         break;
+    }
     case Regs::QueryOperation::Acquire:
         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
         // matches the current payload.
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index a606f4abdb..13bca5a78d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -142,6 +142,9 @@ void GPU::FlushCommands() {
     renderer->Rasterizer().FlushCommands();
 }
 
+void GPU::SyncGuestHost() {
+    renderer->Rasterizer().SyncGuestHost();
+}
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 // So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 1a2d747bef..99ed190bcd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -156,6 +156,7 @@ public:
     void CallMethod(const MethodCall& method_call);
 
     void FlushCommands();
+    void SyncGuestHost();
 
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 10cda686b9..1994d3bb45 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -40,7 +40,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
             renderer.Rasterizer().FlushRegion(data->addr, data->size);
         } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+            renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
             return;
         } else {
@@ -82,12 +82,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    system.Renderer().Rasterizer().InvalidateRegion(addr, size);
+    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
 }
 
 void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
-    InvalidateRegion(addr, size);
+    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
 }
 
 void ThreadManager::WaitIdle() const {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 8ae5b9c4e9..0d05a3fc71 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -58,6 +58,12 @@ public:
     /// Notify rasterizer that any caches of the specified region should be invalidated
     virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
+    /// Notify rasterizer that any caches of the specified region are desync with guest
+    virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
+
+    /// Sync memory between guest and host.
+    virtual void SyncGuestHost() = 0;
+
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ac4485a18a..5379127454 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -660,6 +660,22 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
     query_cache.InvalidateRegion(addr, size);
 }
 
+void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (!addr || !size) {
+        return;
+    }
+    texture_cache.OnCPUWrite(addr, size);
+    shader_cache.InvalidateRegion(addr, size);
+    buffer_cache.InvalidateRegion(addr, size);
+}
+
+void RasterizerOpenGL::SyncGuestHost() {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    texture_cache.SyncGuestHost();
+    // buffer_cache.SyncGuestHost();
+}
+
 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     if (Settings::IsGPULevelExtreme()) {
         FlushRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index caea174d2c..a870024c63 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -67,6 +67,8 @@ public:
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
+    void OnCPUWrite(VAddr addr, u64 size) override;
+    void SyncGuestHost() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 71007bbe85..ad59f558de 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -524,6 +524,20 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
     query_cache.InvalidateRegion(addr, size);
 }
 
+void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+    if (!addr || !size) {
+        return;
+    }
+    texture_cache.OnCPUWrite(addr, size);
+    pipeline_cache.InvalidateRegion(addr, size);
+    buffer_cache.InvalidateRegion(addr, size);
+}
+
+void RasterizerVulkan::SyncGuestHost() {
+    texture_cache.SyncGuestHost();
+    // buffer_cache.SyncGuestHost();
+}
+
 void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index d9108f862a..285f731bce 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -119,6 +119,8 @@ public:
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
+    void OnCPUWrite(VAddr addr, u64 size) override;
+    void SyncGuestHost() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 441fda53d8..c23b9f9b9f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -63,7 +63,7 @@ public:
         }
     }
 
-    void OnCPUWrite(CacheAddr addr, std::size_t size) {
+    void OnCPUWrite(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -549,7 +549,7 @@ private:
         }
         const auto& final_params = new_surface->GetSurfaceParams();
         if (cr_params.type != final_params.type) {
-            if (Settings::values.use_accurate_gpu_emulation) {
+            if (Settings::IsGPULevelExtreme()) {
                 BufferCopy(current_surface, new_surface);
             }
         } else {

From 8b1eb44b3ed5483071dc6754662a277b45e4a809 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 16 Feb 2020 10:08:07 -0400
Subject: [PATCH 04/31] BufferCache: Implement OnCPUWrite and SyncGuestHost

---
 src/video_core/buffer_cache/buffer_cache.h    | 45 ++++++++++++++++++-
 src/video_core/buffer_cache/map_interval.h    | 18 ++++++++
 src/video_core/dma_pusher.cpp                 |  1 +
 src/video_core/gpu_thread.cpp                 |  2 +-
 .../renderer_opengl/gl_rasterizer.cpp         |  4 +-
 .../renderer_vulkan/vk_rasterizer.cpp         |  4 +-
 6 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83e7a1cde1..5b14d52e2b 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <list>
 #include <memory>
 #include <mutex>
 #include <unordered_map>
@@ -137,7 +138,9 @@ public:
         });
         for (auto& object : objects) {
             if (object->IsModified() && object->IsRegistered()) {
+                mutex.unlock();
                 FlushMap(object);
+                mutex.lock();
             }
         }
     }
@@ -154,6 +157,30 @@ public:
         }
     }
 
+    void OnCPUWrite(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        for (const auto& object : GetMapsInRange(addr, size)) {
+            if (object->IsMemoryMarked() && object->IsRegistered()) {
+                Unmark(object);
+                object->SetSyncPending(true);
+                marked_for_unregister.emplace_back(object);
+            }
+        }
+    }
+
+    void SyncGuestHost() {
+        std::lock_guard lock{mutex};
+
+        for (const auto& object : marked_for_unregister) {
+            if (object->IsRegistered()) {
+                object->SetSyncPending(false);
+                Unregister(object);
+            }
+        }
+        marked_for_unregister.clear();
+    }
+
     virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
 
 protected:
@@ -196,17 +223,30 @@ protected:
         const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
         mapped_addresses.insert({interval, new_map});
         rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+        new_map->SetMemoryMarked(true);
         if (inherit_written) {
             MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
             new_map->MarkAsWritten(true);
         }
     }
 
-    /// Unregisters an object from the cache
-    void Unregister(MapInterval& map) {
+    void Unmark(const MapInterval& map) {
+        if (!map->IsMemoryMarked()) {
+            return;
+        }
         const std::size_t size = map->GetEnd() - map->GetStart();
         rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
+        map->SetMemoryMarked(false);
+    }
+
+    /// Unregisters an object from the cache
+    void Unregister(const MapInterval& map) {
+        Unmark(map);
         map->MarkAsRegistered(false);
+        if (map->IsSyncPending()) {
+            marked_for_unregister.remove(map);
+            map->SetSyncPending(false);
+        }
         if (map->IsWritten()) {
             UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
         }
@@ -479,6 +519,7 @@ private:
     u64 modified_ticks = 0;
 
     std::vector<u8> staging_buffer;
+    std::list<MapInterval> marked_for_unregister;
 
     std::recursive_mutex mutex;
 };
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index b0956029d3..29d8b26f3d 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -46,6 +46,22 @@ public:
         return is_registered;
     }
 
+    void SetMemoryMarked(bool is_memory_marked_) {
+        is_memory_marked = is_memory_marked_;
+    }
+
+    bool IsMemoryMarked() const {
+        return is_memory_marked;
+    }
+
+    void SetSyncPending(bool is_sync_pending_) {
+        is_sync_pending = is_sync_pending_;
+    }
+
+    bool IsSyncPending() const {
+        return is_sync_pending;
+    }
+
     VAddr GetStart() const {
         return start;
     }
@@ -83,6 +99,8 @@ private:
     bool is_written{};
     bool is_modified{};
     bool is_registered{};
+    bool is_memory_marked{};
+    bool is_sync_pending{};
     u64 ticks{};
 };
 
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 2516ea9935..31627b8121 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
 void DmaPusher::DispatchCalls() {
     MICROPROFILE_SCOPE(DispatchCalls);
 
+    gpu.SyncGuestHost();
     // On entering GPU code, assume all memory may be touched by the ARM core.
     gpu.Maxwell3D().OnMemoryWrite();
 
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1994d3bb45..0a8123cfef 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -78,7 +78,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    PushCommand(FlushRegionCommand(addr, size));
+    system.Renderer().Rasterizer().FlushRegion(addr, size);
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 5379127454..988eaeaa5a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -667,13 +667,13 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
     }
     texture_cache.OnCPUWrite(addr, size);
     shader_cache.InvalidateRegion(addr, size);
-    buffer_cache.InvalidateRegion(addr, size);
+    buffer_cache.OnCPUWrite(addr, size);
 }
 
 void RasterizerOpenGL::SyncGuestHost() {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
     texture_cache.SyncGuestHost();
-    // buffer_cache.SyncGuestHost();
+    buffer_cache.SyncGuestHost();
 }
 
 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index ad59f558de..4d0c90aa39 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -530,12 +530,12 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
     }
     texture_cache.OnCPUWrite(addr, size);
     pipeline_cache.InvalidateRegion(addr, size);
-    buffer_cache.InvalidateRegion(addr, size);
+    buffer_cache.OnCPUWrite(addr, size);
 }
 
 void RasterizerVulkan::SyncGuestHost() {
     texture_cache.SyncGuestHost();
-    // buffer_cache.SyncGuestHost();
+    buffer_cache.SyncGuestHost();
 }
 
 void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {

From 339d0d9d6c02cf79d6025dae7c60d8635fa4ea3b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 16 Feb 2020 16:24:37 -0400
Subject: [PATCH 05/31] GPU: Delay Fences.

---
 src/video_core/dma_pusher.cpp         |  1 +
 src/video_core/engines/maxwell_3d.cpp | 10 +++++++++-
 src/video_core/engines/maxwell_3d.h   |  4 ++++
 src/video_core/gpu.cpp                |  4 ++++
 src/video_core/gpu.h                  |  1 +
 src/video_core/gpu_thread.cpp         |  2 +-
 6 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 31627b8121..324dafdcda 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -34,6 +34,7 @@ void DmaPusher::DispatchCalls() {
     }
     gpu.FlushCommands();
     gpu.SyncGuestHost();
+    gpu.OnCommandListEnd();
 }
 
 bool DmaPusher::Step() {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2298a62738..2605c3b429 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -397,6 +397,14 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
     }
 }
 
+void Maxwell3D::ReleaseFences() {
+    for (const auto pair : delay_fences) {
+        const auto [addr, payload] = pair;
+        memory_manager.Write<u32>(addr, static_cast<u32>(payload));
+    }
+    delay_fences.clear();
+}
+
 void Maxwell3D::ProcessQueryGet() {
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -407,7 +415,7 @@ void Maxwell3D::ProcessQueryGet() {
         rasterizer.FlushCommands();
         rasterizer.SyncGuestHost();
         const u64 result = regs.query.query_sequence;
-        StampQueryResult(result, regs.query.query_get.short_query == 0);
+        delay_fences.emplace_back(regs.query.QueryAddress(), result);
         break;
     }
     case Regs::QueryOperation::Acquire:
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 59d5752d26..0a93827ec8 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1427,6 +1427,8 @@ public:
         Tables tables{};
     } dirty;
 
+    void ReleaseFences();
+
 private:
     void InitializeRegisterDefaults();
 
@@ -1467,6 +1469,8 @@ private:
 
     std::array<u8, Regs::NUM_REGS> dirty_pointers{};
 
+    std::vector<std::pair<GPUVAddr, u64>> delay_fences;
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 13bca5a78d..71ddfbd26e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -145,6 +145,10 @@ void GPU::FlushCommands() {
 void GPU::SyncGuestHost() {
     renderer->Rasterizer().SyncGuestHost();
 }
+
+void GPU::OnCommandListEnd() {
+    maxwell_3d->ReleaseFences();
+}
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 // So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 99ed190bcd..b884456349 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -157,6 +157,7 @@ public:
 
     void FlushCommands();
     void SyncGuestHost();
+    void OnCommandListEnd();
 
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 0a8123cfef..1994d3bb45 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -78,7 +78,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    system.Renderer().Rasterizer().FlushRegion(addr, size);
+    PushCommand(FlushRegionCommand(addr, size));
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

From ed7e9657120faea849af2933e539c72bc961c2a9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 16 Feb 2020 17:13:19 -0400
Subject: [PATCH 06/31] TextureCache: Flush linear textures after finishing
 rendering.

---
 src/video_core/texture_cache/texture_cache.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c23b9f9b9f..d8c8390bb9 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -233,8 +233,14 @@ public:
 
         auto surface_view = GetSurface(gpu_addr, *cpu_addr,
                                        SurfaceParams::CreateForFramebuffer(system, index), true);
-        if (render_targets[index].target)
-            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
+        if (render_targets[index].target) {
+            auto& surface = render_targets[index].target;
+            surface->MarkAsRenderTarget(false, NO_RT);
+            const auto& cr_params = surface->GetSurfaceParams();
+            if (!cr_params.is_tiled) {
+                FlushSurface(surface);
+            }
+        }
         render_targets[index].target = surface_view.first;
         render_targets[index].view = surface_view.second;
         if (render_targets[index].target)

From 487379c593bcaf3787ede187c5d44f7923b54dc9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 17 Feb 2020 18:10:23 -0400
Subject: [PATCH 07/31] OpenGL: Implement Fencing backend.

---
 src/video_core/engines/maxwell_3d.cpp         | 16 +++-------
 src/video_core/engines/maxwell_3d.h           |  4 ---
 src/video_core/gpu.cpp                        |  2 +-
 src/video_core/gpu.h                          |  2 +-
 src/video_core/gpu_asynch.cpp                 |  4 +++
 src/video_core/gpu_asynch.h                   |  2 ++
 src/video_core/gpu_thread.cpp                 |  6 ++++
 src/video_core/gpu_thread.h                   |  7 +++-
 src/video_core/rasterizer_interface.h         |  8 +++++
 .../renderer_opengl/gl_rasterizer.cpp         | 28 ++++++++++++++++
 .../renderer_opengl/gl_rasterizer.h           |  2 ++
 src/video_core/texture_cache/texture_cache.h  | 32 ++++++++++++++++++-
 12 files changed, 94 insertions(+), 19 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2605c3b429..c297bc31ba 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
     }
 }
 
-void Maxwell3D::ReleaseFences() {
-    for (const auto pair : delay_fences) {
-        const auto [addr, payload] = pair;
-        memory_manager.Write<u32>(addr, static_cast<u32>(payload));
-    }
-    delay_fences.clear();
-}
-
 void Maxwell3D::ProcessQueryGet() {
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() {
 
     switch (regs.query.query_get.operation) {
     case Regs::QueryOperation::Release: {
-        rasterizer.FlushCommands();
-        rasterizer.SyncGuestHost();
         const u64 result = regs.query.query_sequence;
-        delay_fences.emplace_back(regs.query.QueryAddress(), result);
+        if (regs.query.query_get.fence == 1) {
+            rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result));
+        } else {
+            StampQueryResult(result, regs.query.query_get.short_query == 0);
+        }
         break;
     }
     case Regs::QueryOperation::Acquire:
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0a93827ec8..59d5752d26 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1427,8 +1427,6 @@ public:
         Tables tables{};
     } dirty;
 
-    void ReleaseFences();
-
 private:
     void InitializeRegisterDefaults();
 
@@ -1469,8 +1467,6 @@ private:
 
     std::array<u8, Regs::NUM_REGS> dirty_pointers{};
 
-    std::vector<std::pair<GPUVAddr, u64>> delay_fences;
-
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 71ddfbd26e..d05b6a9d2f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
 }
 
 void GPU::OnCommandListEnd() {
-    maxwell_3d->ReleaseFences();
+    renderer.Rasterizer().ReleaseFences();
 }
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b884456349..fa9991c871 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -157,7 +157,7 @@ public:
 
     void FlushCommands();
     void SyncGuestHost();
-    void OnCommandListEnd();
+    virtual void OnCommandListEnd();
 
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 20e73a37e4..53305ab436 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
     gpu_thread.WaitIdle();
 }
 
+void GPUAsynch::OnCommandListEnd() {
+    gpu_thread.OnCommandListEnd();
+}
+
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 03fd0eef0a..5176586127 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -32,6 +32,8 @@ public:
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitIdle() const override;
 
+    void OnCommandListEnd() override;
+
 protected:
     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
 
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1994d3bb45..251a9d911f 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
             dma_pusher.DispatchCalls();
         } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
+        } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
+            renderer.Rasterizer().ReleaseFences();
         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
             renderer.Rasterizer().FlushRegion(data->addr, data->size);
         } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const {
     }
 }
 
+void ThreadManager::OnCommandListEnd() {
+    PushCommand(OnCommandListEndCommand());
+}
+
 u64 ThreadManager::PushCommand(CommandData&& command_data) {
     const u64 fence{++state.last_fence};
     state.queue.Push(CommandDataContainer(std::move(command_data), fence));
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cd74ad330a..9d08779216 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final {
     u64 size;
 };
 
+/// Command to signal to the GPU thread that processing has ended
+struct OnCommandListEndCommand final {};
+
 using CommandData =
     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
-                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
 
 struct CommandDataContainer {
     CommandDataContainer() = default;
@@ -122,6 +125,8 @@ public:
     // Wait until the gpu thread is idle.
     void WaitIdle() const;
 
+    void OnCommandListEnd();
+
 private:
     /// Pushes a command to be executed by the GPU thread
     u64 PushCommand(CommandData&& command_data);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0d05a3fc71..72f65b166c 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,6 +49,14 @@ public:
     /// Records a GPU query and caches it
     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
+    virtual void SignalFence(GPUVAddr addr, u32 value) {
+
+    }
+
+    virtual void ReleaseFences() {
+
+    }
+
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 988eaeaa5a..93bb33e8cc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() {
     buffer_cache.SyncGuestHost();
 }
 
+void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
+    if (!fences.empty()) {
+        const std::pair<GPUVAddr, u32>& current_fence = fences.front();
+        const auto [address, payload] = current_fence;
+        texture_cache.PopAsyncFlushes();
+        auto& gpu{system.GPU()};
+        auto& memory_manager{gpu.MemoryManager()};
+        memory_manager.Write<u32>(address, payload);
+        fences.pop_front();
+    }
+    fences.emplace_back(addr, value);
+    texture_cache.CommitAsyncFlushes();
+    FlushCommands();
+    SyncGuestHost();
+}
+
+void RasterizerOpenGL::ReleaseFences() {
+    while (!fences.empty()) {
+        const std::pair<GPUVAddr, u32>& current_fence = fences.front();
+        const auto [address, payload] = current_fence;
+        texture_cache.PopAsyncFlushes();
+        auto& gpu{system.GPU()};
+        auto& memory_manager{gpu.MemoryManager()};
+        memory_manager.Write<u32>(address, payload);
+        fences.pop_front();
+    }
+}
+
 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     if (Settings::IsGPULevelExtreme()) {
         FlushRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a870024c63..486a154ad6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -69,6 +69,8 @@ public:
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
+    void SignalFence(GPUVAddr addr, u32 value) override;
+    void ReleaseFences() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d8c8390bb9..6629c59ed2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -238,7 +238,7 @@ public:
             surface->MarkAsRenderTarget(false, NO_RT);
             const auto& cr_params = surface->GetSurfaceParams();
             if (!cr_params.is_tiled) {
-                FlushSurface(surface);
+                AsyncFlushSurface(surface);
             }
         }
         render_targets[index].target = surface_view.first;
@@ -317,6 +317,26 @@ public:
         return ++ticks;
     }
 
+    void CommitAsyncFlushes() {
+        commited_flushes.push_back(uncommited_flushes);
+        uncommited_flushes.reset();
+    }
+
+    void PopAsyncFlushes() {
+        if (commited_flushes.empty()) {
+            return;
+        }
+        auto& flush_list = commited_flushes.front();
+        if (!flush_list) {
+            commited_flushes.pop_front();
+            return;
+        }
+        for (TSurface& surface : *flush_list) {
+            FlushSurface(surface);
+        }
+        commited_flushes.pop_front();
+    }
+
 protected:
     explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                           bool is_astc_supported)
@@ -1152,6 +1172,13 @@ private:
         TView view;
     };
 
+    void AsyncFlushSurface(TSurface& surface) {
+        if (!uncommited_flushes) {
+            uncommited_flushes = std::make_shared<std::list<TSurface>>();
+        }
+        uncommited_flushes->push_back(surface);
+    }
+
     VideoCore::RasterizerInterface& rasterizer;
 
     FormatLookupTable format_lookup_table;
@@ -1198,6 +1225,9 @@ private:
 
     std::list<TSurface> marked_for_unregister;
 
+    std::shared_ptr<std::list<TSurface>> uncommited_flushes{};
+    std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes;
+
     StagingCache staging_cache;
     std::recursive_mutex mutex;
 };

From 1f345ebe3a5501b50f26f0c5c21cac5d55dd79c1 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 17 Feb 2020 20:19:26 -0400
Subject: [PATCH 08/31] GPU: Implement a Fence Manager.

---
 src/video_core/CMakeLists.txt                 |  3 +
 src/video_core/fence_manager.h                | 97 +++++++++++++++++++
 .../renderer_opengl/gl_fence_manager.cpp      | 55 +++++++++++
 .../renderer_opengl/gl_fence_manager.h        | 47 +++++++++
 .../renderer_opengl/gl_rasterizer.cpp         | 27 +-----
 .../renderer_opengl/gl_rasterizer.h           |  2 +
 6 files changed, 208 insertions(+), 23 deletions(-)
 create mode 100644 src/video_core/fence_manager.h
 create mode 100644 src/video_core/renderer_opengl/gl_fence_manager.cpp
 create mode 100644 src/video_core/renderer_opengl/gl_fence_manager.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 258d58eba7..9a3f568f9c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(video_core STATIC
     engines/shader_bytecode.h
     engines/shader_header.h
     engines/shader_type.h
+    fence_manager.h
     gpu.cpp
     gpu.h
     gpu_asynch.cpp
@@ -51,6 +52,8 @@ add_library(video_core STATIC
     renderer_opengl/gl_buffer_cache.h
     renderer_opengl/gl_device.cpp
     renderer_opengl/gl_device.h
+    renderer_opengl/gl_fence_manager.cpp
+    renderer_opengl/gl_fence_manager.h
     renderer_opengl/gl_framebuffer_cache.cpp
     renderer_opengl/gl_framebuffer_cache.h
     renderer_opengl/gl_rasterizer.cpp
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
new file mode 100644
index 0000000000..19cec0f66a
--- /dev/null
+++ b/src/video_core/fence_manager.h
@@ -0,0 +1,97 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <queue>
+#include <memory>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "core/settings.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+class FenceBase {
+public:
+    FenceBase(GPUVAddr address, u32 payload) : address{address}, payload{payload} {}
+
+    constexpr GPUVAddr GetAddress() const {
+        return address;
+    }
+
+    constexpr u32 GetPayload() const {
+        return payload;
+    }
+
+private:
+    GPUVAddr address;
+    u32 payload;
+};
+
+template <typename TFence, typename TTextureCache>
+class FenceManager {
+public:
+    void SignalFence(GPUVAddr addr, u32 value) {
+        TryReleasePendingFences();
+        TFence new_fence = CreateFence(addr, value);
+        QueueFence(new_fence);
+        fences.push(new_fence);
+        texture_cache.CommitAsyncFlushes();
+        rasterizer.FlushCommands();
+        rasterizer.SyncGuestHost();
+    }
+
+    void WaitPendingFences() {
+        while (!fences.empty()) {
+            TFence& current_fence = fences.front();
+            WaitFence(current_fence);
+            texture_cache.PopAsyncFlushes();
+            auto& gpu{system.GPU()};
+            auto& memory_manager{gpu.MemoryManager()};
+            memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            fences.pop();
+        }
+    }
+
+protected:
+    FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                 TTextureCache& texture_cache)
+        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache} {}
+
+    virtual TFence CreateFence(GPUVAddr addr, u32 value) = 0;
+    virtual void QueueFence(TFence& fence) = 0;
+    virtual bool IsFenceSignaled(TFence& fence) = 0;
+    virtual void WaitFence(TFence& fence) = 0;
+
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
+    TTextureCache& texture_cache;
+
+private:
+    void TryReleasePendingFences() {
+        while (!fences.empty()) {
+            TFence& current_fence = fences.front();
+            if (!IsFenceSignaled(current_fence)) {
+                return;
+            }
+            texture_cache.PopAsyncFlushes();
+            auto& gpu{system.GPU()};
+            auto& memory_manager{gpu.MemoryManager()};
+            memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            fences.pop();
+        }
+    }
+
+    std::queue<TFence> fences;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
new file mode 100644
index 0000000000..4517ef1507
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -0,0 +1,55 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+
+#include "video_core/renderer_opengl/gl_fence_manager.h"
+
+namespace OpenGL {
+
+GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload)
+    : VideoCommon::FenceBase(address, payload), sync_object{} {}
+
+GLInnerFence::~GLInnerFence() = default;
+
+void GLInnerFence::Queue() {
+    ASSERT(sync_object.handle == 0);
+    sync_object.Create();
+}
+
+bool GLInnerFence::IsSignaled() const {
+    ASSERT(sync_object.handle != 0);
+    GLsizei length;
+    GLint sync_status;
+    glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
+    return sync_status == GL_SIGNALED;
+}
+
+void GLInnerFence::Wait() {
+    ASSERT(sync_object.handle != 0);
+    while (glClientWaitSync(sync_object.handle, 0, 1000) == GL_TIMEOUT_EXPIRED)
+        ;
+}
+
+FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                               TextureCacheOpenGL& texture_cache)
+    : GenericFenceManager(system, rasterizer, texture_cache) {}
+
+Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value) {
+    return std::make_shared<GLInnerFence>(addr, value);
+}
+
+void FenceManagerOpenGL::QueueFence(Fence& fence) {
+    fence->Queue();
+}
+
+bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) {
+    return fence->IsSignaled();
+}
+
+void FenceManagerOpenGL::WaitFence(Fence& fence) {
+    fence->Wait();
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
new file mode 100644
index 0000000000..3cfa8b1d01
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -0,0 +1,47 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/fence_manager.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace OpenGL {
+
+class GLInnerFence : public VideoCommon::FenceBase {
+public:
+    GLInnerFence(GPUVAddr address, u32 payload);
+    ~GLInnerFence();
+
+    void Queue();
+
+    bool IsSignaled() const;
+
+    void Wait();
+
+private:
+    OGLSync sync_object;
+};
+
+using Fence = std::shared_ptr<GLInnerFence>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCacheOpenGL>;
+
+class FenceManagerOpenGL final : public GenericFenceManager {
+public:
+    FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                   TextureCacheOpenGL& texture_cache);
+
+protected:
+    Fence CreateFence(GPUVAddr addr, u32 value) override;
+    void QueueFence(Fence& fence) override;
+    bool IsFenceSignaled(Fence& fence) override;
+    void WaitFence(Fence& fence) override;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 93bb33e8cc..35bed444f8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -101,7 +101,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
     : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
-      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this,
+                                                                             texture_cache} {
     CheckExtensions();
 }
 
@@ -677,31 +678,11 @@ void RasterizerOpenGL::SyncGuestHost() {
 }
 
 void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
-    if (!fences.empty()) {
-        const std::pair<GPUVAddr, u32>& current_fence = fences.front();
-        const auto [address, payload] = current_fence;
-        texture_cache.PopAsyncFlushes();
-        auto& gpu{system.GPU()};
-        auto& memory_manager{gpu.MemoryManager()};
-        memory_manager.Write<u32>(address, payload);
-        fences.pop_front();
-    }
-    fences.emplace_back(addr, value);
-    texture_cache.CommitAsyncFlushes();
-    FlushCommands();
-    SyncGuestHost();
+    fence_manager.SignalFence(addr, value);
 }
 
 void RasterizerOpenGL::ReleaseFences() {
-    while (!fences.empty()) {
-        const std::pair<GPUVAddr, u32>& current_fence = fences.front();
-        const auto [address, payload] = current_fence;
-        texture_cache.PopAsyncFlushes();
-        auto& gpu{system.GPU()};
-        auto& memory_manager{gpu.MemoryManager()};
-        memory_manager.Write<u32>(address, payload);
-        fences.pop_front();
-    }
+    fence_manager.WaitPendingFences();
 }
 
 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 486a154ad6..6d173a9220 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_fence_manager.h"
 #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 #include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -226,6 +227,7 @@ private:
     SamplerCacheOpenGL sampler_cache;
     FramebufferCacheOpenGL framebuffer_cache;
     QueryCache query_cache;
+    FenceManagerOpenGL fence_manager;
 
     Core::System& system;
     ScreenInfo& screen_info;

From 57fdbd9b8992de4eaf2b262e6a2cece43c141894 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 17 Feb 2020 22:15:43 -0400
Subject: [PATCH 09/31] FenceManager: Implement should wait.

---
 src/video_core/fence_manager.h               |  8 ++++++--
 src/video_core/texture_cache/texture_cache.h | 11 +++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 19cec0f66a..036f3996c3 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -53,7 +53,10 @@ public:
     void WaitPendingFences() {
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
-            WaitFence(current_fence);
+            bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
+            if (should_wait) {
+                WaitFence(current_fence);
+            }
             texture_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
             auto& memory_manager{gpu.MemoryManager()};
@@ -80,7 +83,8 @@ private:
     void TryReleasePendingFences() {
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
-            if (!IsFenceSignaled(current_fence)) {
+            bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
+            if (should_wait && !IsFenceSignaled(current_fence)) {
                 return;
             }
             texture_cache.PopAsyncFlushes();
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6629c59ed2..04fe69c11d 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -322,6 +322,17 @@ public:
         uncommited_flushes.reset();
     }
 
+    bool ShouldWaitAsyncFlushes() {
+        if (commited_flushes.empty()) {
+            return false;
+        }
+        auto& flush_list = commited_flushes.front();
+        if (!flush_list) {
+            return false;
+        }
+        return true;
+    }
+
     void PopAsyncFlushes() {
         if (commited_flushes.empty()) {
             return;

From 165ae823f522aa981129927f42e76763a9fa6006 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 17 Feb 2020 22:29:04 -0400
Subject: [PATCH 10/31] ThreadManager: Sync async reads on accurate gpu.

---
 src/video_core/buffer_cache/buffer_cache.h       | 12 ++++++++++++
 src/video_core/gpu.cpp                           |  2 +-
 src/video_core/gpu_thread.cpp                    |  7 ++++++-
 src/video_core/rasterizer_interface.h            | 10 ++++------
 src/video_core/renderer_opengl/gl_rasterizer.cpp |  4 ++++
 src/video_core/renderer_opengl/gl_rasterizer.h   |  1 +
 src/video_core/renderer_vulkan/vk_rasterizer.cpp |  4 ++++
 src/video_core/renderer_vulkan/vk_rasterizer.h   |  1 +
 src/video_core/texture_cache/texture_cache.h     | 15 +++++++++++++++
 9 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5b14d52e2b..df4c0211e3 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -145,6 +145,18 @@ public:
         }
     }
 
+    bool MustFlushRegion(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        for (auto& object : objects) {
+            if (object->IsModified() && object->IsRegistered()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     /// Mark the specified region as being invalidated
     void InvalidateRegion(VAddr addr, u64 size) {
         std::lock_guard lock{mutex};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index d05b6a9d2f..19d3bd3052 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
 }
 
 void GPU::OnCommandListEnd() {
-    renderer.Rasterizer().ReleaseFences();
+    renderer->Rasterizer().ReleaseFences();
 }
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 251a9d911f..672f8d2fab 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
+#include "core/settings.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -80,7 +81,11 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    PushCommand(FlushRegionCommand(addr, size));
+    if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
+        u64 fence = PushCommand(FlushRegionCommand(addr, size));
+        while (fence < state.signaled_fence.load(std::memory_order_relaxed)) {
+        }
+    }
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 72f65b166c..2287521314 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,13 +49,9 @@ public:
     /// Records a GPU query and caches it
     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
-    virtual void SignalFence(GPUVAddr addr, u32 value) {
+    virtual void SignalFence(GPUVAddr addr, u32 value) {}
 
-    }
-
-    virtual void ReleaseFences() {
-
-    }
+    virtual void ReleaseFences() {}
 
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
@@ -63,6 +59,8 @@ public:
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
+    virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
+
     /// Notify rasterizer that any caches of the specified region should be invalidated
     virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 35bed444f8..bbf37a00d5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -650,6 +650,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
     query_cache.FlushRegion(addr, size);
 }
 
+bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+}
+
 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
     if (addr == 0 || size == 0) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6d173a9220..5c0f88e6f1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -67,6 +67,7 @@ public:
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
+    bool MustFlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 4d0c90aa39..9437a4aa11 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -514,6 +514,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
     query_cache.FlushRegion(addr, size);
 }
 
+bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+}
+
 void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
     if (addr == 0 || size == 0) {
         return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 285f731bce..7002a4fa39 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -118,6 +118,7 @@ public:
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
+    bool MustFlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 04fe69c11d..e251a30c30 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,6 +116,21 @@ public:
         }
     }
 
+    bool MustFlushRegion(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        auto surfaces = GetSurfacesInRegion(addr, size);
+        if (surfaces.empty()) {
+            return false;
+        }
+        for (const auto& surface : surfaces) {
+            if (surface->IsModified()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
                             const VideoCommon::Shader::Sampler& entry) {
         std::lock_guard lock{mutex};

From e84eb64e511cd4699cc2371744fccd24628c0749 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 18 Feb 2020 11:26:31 -0400
Subject: [PATCH 11/31] Rasterizer: Disable fence managing in synchronous gpu.

---
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 ++++++++++
 src/video_core/texture_cache/texture_cache.h     |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bbf37a00d5..ffa0a04292 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -682,10 +682,20 @@ void RasterizerOpenGL::SyncGuestHost() {
 }
 
 void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        auto& memory_manager{gpu.MemoryManager()};
+        memory_manager.Write<u32>(addr, value);
+        return;
+    }
     fence_manager.SignalFence(addr, value);
 }
 
 void RasterizerOpenGL::ReleaseFences() {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        return;
+    }
     fence_manager.WaitPendingFences();
 }
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index e251a30c30..e1a1edbd24 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -252,7 +252,7 @@ public:
             auto& surface = render_targets[index].target;
             surface->MarkAsRenderTarget(false, NO_RT);
             const auto& cr_params = surface->GetSurfaceParams();
-            if (!cr_params.is_tiled) {
+            if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
                 AsyncFlushSurface(surface);
             }
         }

From a081a7c8558500249f4871f3a1b8c9046cb1b6f1 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 18 Feb 2020 13:19:24 -0400
Subject: [PATCH 12/31] GPU: Fix rebase errors.

---
 src/video_core/engines/maxwell_3d.cpp            | 7 +++----
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 +
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c297bc31ba..cff90bc1d9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -403,12 +403,11 @@ void Maxwell3D::ProcessQueryGet() {
                "Units other than CROP are unimplemented");
 
     switch (regs.query.query_get.operation) {
-    case Regs::QueryOperation::Release: {
-        const u64 result = regs.query.query_sequence;
+    case Regs::QueryOperation::Release:
         if (regs.query.query_get.fence == 1) {
-            rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result));
+            rasterizer.SignalFence(regs.query.QueryAddress(), regs.query.query_sequence);
         } else {
-            StampQueryResult(result, regs.query.query_get.short_query == 0);
+            StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
         }
         break;
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ffa0a04292..db7eae065c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -673,6 +673,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
     texture_cache.OnCPUWrite(addr, size);
     shader_cache.InvalidateRegion(addr, size);
     buffer_cache.OnCPUWrite(addr, size);
+    query_cache.InvalidateRegion(addr, size);
 }
 
 void RasterizerOpenGL::SyncGuestHost() {

From 4adfc9bb0870296b372dc96296436538d6aa6c32 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 18 Feb 2020 13:24:38 -0400
Subject: [PATCH 13/31] Rasterizer: Document SignalFence & ReleaseFences and
 setup skeletons on Vulkan.

---
 src/video_core/engines/maxwell_3d.cpp         |  1 -
 src/video_core/gpu_thread.cpp                 |  5 +++-
 src/video_core/rasterizer_interface.h         |  6 +++--
 .../renderer_vulkan/vk_rasterizer.cpp         | 25 +++++++++++++++++++
 .../renderer_vulkan/vk_rasterizer.h           |  2 ++
 5 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index cff90bc1d9..a7e9514330 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -410,7 +410,6 @@ void Maxwell3D::ProcessQueryGet() {
             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
         }
         break;
-    }
     case Regs::QueryOperation::Acquire:
         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
         // matches the current payload.
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 672f8d2fab..1c3ab2145d 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -81,9 +81,12 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelExtreme()) {
+        return;
+    }
     if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
         u64 fence = PushCommand(FlushRegionCommand(addr, size));
-        while (fence < state.signaled_fence.load(std::memory_order_relaxed)) {
+        while (fence > state.signaled_fence.load(std::memory_order_relaxed)) {
         }
     }
 }
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 2287521314..b49f15df2e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,9 +49,11 @@ public:
     /// Records a GPU query and caches it
     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
-    virtual void SignalFence(GPUVAddr addr, u32 value) {}
+    /// Signal a GPU based fence
+    virtual void SignalFence(GPUVAddr addr, u32 value) = 0;
 
-    virtual void ReleaseFences() {}
+    /// Release all pending fences.
+    virtual void ReleaseFences() = 0;
 
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 9437a4aa11..1d75a4766a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -535,6 +535,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
     texture_cache.OnCPUWrite(addr, size);
     pipeline_cache.InvalidateRegion(addr, size);
     buffer_cache.OnCPUWrite(addr, size);
+    query_cache.InvalidateRegion(addr, size);
 }
 
 void RasterizerVulkan::SyncGuestHost() {
@@ -542,6 +543,30 @@ void RasterizerVulkan::SyncGuestHost() {
     buffer_cache.SyncGuestHost();
 }
 
+void RasterizerVulkan::SignalFence(GPUVAddr addr, u32 value) {
+    auto& gpu{system.GPU()};
+    auto& memory_manager{gpu.MemoryManager()};
+    memory_manager.Write<u32>(addr, value);
+    /*
+    if (!gpu.IsAsync()) {
+        auto& memory_manager{gpu.MemoryManager()};
+        memory_manager.Write<u32>(addr, value);
+        return;
+    }
+    fence_manager.SignalFence(addr, value);
+    */
+}
+
+void RasterizerVulkan::ReleaseFences() {
+    /*
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        return;
+    }
+    fence_manager.WaitPendingFences();
+    */
+}
+
 void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 7002a4fa39..08a9af401c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -122,6 +122,8 @@ public:
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
+    void SignalFence(GPUVAddr addr, u32 value) override;
+    void ReleaseFences() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;

From b10db7e4a5f43414679b7969ea309b1829937a37 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 18 Feb 2020 17:20:39 -0400
Subject: [PATCH 14/31] FenceManager: Implement async buffer cache flushes on
 High settings

---
 src/video_core/buffer_cache/buffer_cache.h    | 50 +++++++++++++++++++
 src/video_core/fence_manager.h                | 15 ++++--
 src/video_core/gpu_thread.cpp                 |  2 +-
 .../renderer_opengl/gl_fence_manager.cpp      |  4 +-
 .../renderer_opengl/gl_fence_manager.h        |  5 +-
 .../renderer_opengl/gl_rasterizer.cpp         |  3 +-
 6 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index df4c0211e3..d72df90eff 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -21,6 +21,7 @@
 #include "common/common_types.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/buffer_cache/buffer_block.h"
 #include "video_core/buffer_cache/map_interval.h"
 #include "video_core/memory_manager.h"
@@ -80,6 +81,9 @@ public:
         auto map = MapAddress(block, gpu_addr, cpu_addr, size);
         if (is_written) {
             map->MarkAsModified(true, GetModifiedTicks());
+            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
+                AsyncFlushMap(map);
+            }
             if (!map->IsWritten()) {
                 map->MarkAsWritten(true);
                 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -193,6 +197,39 @@ public:
         marked_for_unregister.clear();
     }
 
+    void CommitAsyncFlushes() {
+        commited_flushes.push_back(uncommited_flushes);
+        uncommited_flushes.reset();
+    }
+
+    bool ShouldWaitAsyncFlushes() {
+        if (commited_flushes.empty()) {
+            return false;
+        }
+        auto& flush_list = commited_flushes.front();
+        if (!flush_list) {
+            return false;
+        }
+        return true;
+    }
+
+    void PopAsyncFlushes() {
+        if (commited_flushes.empty()) {
+            return;
+        }
+        auto& flush_list = commited_flushes.front();
+        if (!flush_list) {
+            commited_flushes.pop_front();
+            return;
+        }
+        for (MapInterval& map : *flush_list) {
+            if (map->IsRegistered()) {
+                FlushMap(map);
+            }
+        }
+        commited_flushes.pop_front();
+    }
+
     virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
 
 protected:
@@ -316,6 +353,9 @@ private:
         MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
         if (modified_inheritance) {
             new_map->MarkAsModified(true, GetModifiedTicks());
+            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
+                AsyncFlushMap(new_map);
+            }
         }
         Register(new_map, write_inheritance);
         return new_map;
@@ -502,6 +542,13 @@ private:
         return false;
     }
 
+    void AsyncFlushMap(MapInterval& map) {
+        if (!uncommited_flushes) {
+            uncommited_flushes = std::make_shared<std::list<MapInterval>>();
+        }
+        uncommited_flushes->push_back(map);
+    }
+
     VideoCore::RasterizerInterface& rasterizer;
     Core::System& system;
 
@@ -533,6 +580,9 @@ private:
     std::vector<u8> staging_buffer;
     std::list<MapInterval> marked_for_unregister;
 
+    std::shared_ptr<std::list<MapInterval>> uncommited_flushes{};
+    std::list<std::shared_ptr<std::list<MapInterval>>> commited_flushes;
+
     std::recursive_mutex mutex;
 };
 
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 036f3996c3..c4b190503f 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -6,8 +6,8 @@
 
 #include <algorithm>
 #include <array>
-#include <queue>
 #include <memory>
+#include <queue>
 
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -37,7 +37,7 @@ private:
     u32 payload;
 };
 
-template <typename TFence, typename TTextureCache>
+template <typename TFence, typename TTextureCache, typename TTBufferCache>
 class FenceManager {
 public:
     void SignalFence(GPUVAddr addr, u32 value) {
@@ -46,6 +46,7 @@ public:
         QueueFence(new_fence);
         fences.push(new_fence);
         texture_cache.CommitAsyncFlushes();
+        buffer_cache.CommitAsyncFlushes();
         rasterizer.FlushCommands();
         rasterizer.SyncGuestHost();
     }
@@ -54,10 +55,12 @@ public:
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
             bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
+            should_wait |= buffer_cache.ShouldWaitAsyncFlushes();
             if (should_wait) {
                 WaitFence(current_fence);
             }
             texture_cache.PopAsyncFlushes();
+            buffer_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
             auto& memory_manager{gpu.MemoryManager()};
             memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
@@ -67,8 +70,9 @@ public:
 
 protected:
     FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 TTextureCache& texture_cache)
-        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache} {}
+                 TTextureCache& texture_cache, TTBufferCache& buffer_cache)
+        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, buffer_cache{
+                                                                                    buffer_cache} {}
 
     virtual TFence CreateFence(GPUVAddr addr, u32 value) = 0;
     virtual void QueueFence(TFence& fence) = 0;
@@ -78,16 +82,19 @@ protected:
     Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
     TTextureCache& texture_cache;
+    TTBufferCache& buffer_cache;
 
 private:
     void TryReleasePendingFences() {
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
             bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
+            should_wait |= buffer_cache.ShouldWaitAsyncFlushes();
             if (should_wait && !IsFenceSignaled(current_fence)) {
                 return;
             }
             texture_cache.PopAsyncFlushes();
+            buffer_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
             auto& memory_manager{gpu.MemoryManager()};
             memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1c3ab2145d..3e2be00e9d 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -81,7 +81,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    if (!Settings::IsGPULevelExtreme()) {
+    if (!Settings::IsGPULevelHigh()) {
         return;
     }
     if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 4517ef1507..69dd3211b0 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -33,8 +33,8 @@ void GLInnerFence::Wait() {
 }
 
 FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                               TextureCacheOpenGL& texture_cache)
-    : GenericFenceManager(system, rasterizer, texture_cache) {}
+                               TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache)
+    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache) {}
 
 Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value) {
     return std::make_shared<GLInnerFence>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 3cfa8b1d01..b48d5eaa0d 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -9,6 +9,7 @@
 
 #include "common/common_types.h"
 #include "video_core/fence_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 
@@ -30,12 +31,12 @@ private:
 };
 
 using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCacheOpenGL>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache>;
 
 class FenceManagerOpenGL final : public GenericFenceManager {
 public:
     FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                   TextureCacheOpenGL& texture_cache);
+                       TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache);
 
 protected:
     Fence CreateFence(GPUVAddr addr, u32 value) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index db7eae065c..88914828ca 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,7 +102,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
       buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this,
-                                                                             texture_cache} {
+                                                                             texture_cache,
+                                                                             buffer_cache} {
     CheckExtensions();
 }
 

From 96bb961a6485c2f5c8b7fb91aa0dd7eb24fa5e5d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 19 Feb 2020 10:49:07 -0400
Subject: [PATCH 15/31] BufferCache: Refactor async managing.

---
 src/video_core/buffer_cache/buffer_cache.h | 31 +++++++++++++++++-----
 src/video_core/fence_manager.h             |  6 ++---
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d72df90eff..06fb931d75 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -82,7 +82,7 @@ public:
         if (is_written) {
             map->MarkAsModified(true, GetModifiedTicks());
             if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
-                AsyncFlushMap(map);
+                MarkForAsyncFlush(map);
             }
             if (!map->IsWritten()) {
                 map->MarkAsWritten(true);
@@ -198,7 +198,23 @@ public:
     }
 
     void CommitAsyncFlushes() {
-        commited_flushes.push_back(uncommited_flushes);
+        if (uncommited_flushes) {
+            auto commit_list = std::make_shared<std::list<MapInterval>>();
+            for (auto& map : *uncommited_flushes) {
+                if (map->IsRegistered() && map->IsModified()) {
+                    // TODO(Blinkhawk): Implement backend asynchronous flushing
+                    // AsyncFlushMap(map)
+                    commit_list->push_back(map);
+                }
+            }
+            if (!commit_list->empty()) {
+                commited_flushes.push_back(commit_list);
+            } else {
+                commited_flushes.emplace_back();
+            }
+        } else {
+            commited_flushes.emplace_back();
+        }
         uncommited_flushes.reset();
     }
 
@@ -224,6 +240,7 @@ public:
         }
         for (MapInterval& map : *flush_list) {
             if (map->IsRegistered()) {
+                // TODO(Blinkhawk): Replace this for reading the asynchronous flush
                 FlushMap(map);
             }
         }
@@ -354,7 +371,7 @@ private:
         if (modified_inheritance) {
             new_map->MarkAsModified(true, GetModifiedTicks());
             if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
-                AsyncFlushMap(new_map);
+                MarkForAsyncFlush(new_map);
             }
         }
         Register(new_map, write_inheritance);
@@ -542,11 +559,11 @@ private:
         return false;
     }
 
-    void AsyncFlushMap(MapInterval& map) {
+    void MarkForAsyncFlush(MapInterval& map) {
         if (!uncommited_flushes) {
-            uncommited_flushes = std::make_shared<std::list<MapInterval>>();
+            uncommited_flushes = std::make_shared<std::unordered_set<MapInterval>>();
         }
-        uncommited_flushes->push_back(map);
+        uncommited_flushes->insert(map);
     }
 
     VideoCore::RasterizerInterface& rasterizer;
@@ -580,7 +597,7 @@ private:
     std::vector<u8> staging_buffer;
     std::list<MapInterval> marked_for_unregister;
 
-    std::shared_ptr<std::list<MapInterval>> uncommited_flushes{};
+    std::shared_ptr<std::unordered_set<MapInterval>> uncommited_flushes{};
     std::list<std::shared_ptr<std::list<MapInterval>>> commited_flushes;
 
     std::recursive_mutex mutex;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index c4b190503f..72ee509550 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -42,11 +42,11 @@ class FenceManager {
 public:
     void SignalFence(GPUVAddr addr, u32 value) {
         TryReleasePendingFences();
-        TFence new_fence = CreateFence(addr, value);
-        QueueFence(new_fence);
-        fences.push(new_fence);
         texture_cache.CommitAsyncFlushes();
         buffer_cache.CommitAsyncFlushes();
+        TFence new_fence = CreateFence(addr, value);
+        fences.push(new_fence);
+        QueueFence(new_fence);
         rasterizer.FlushCommands();
         rasterizer.SyncGuestHost();
     }

From b7bc3c25496849661846f2fe42f591f2a81fbc87 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 19 Feb 2020 13:40:37 -0400
Subject: [PATCH 16/31] FenceManager: Manage syncpoints and rename fences to
 semaphores.

---
 src/video_core/buffer_cache/buffer_cache.h    |  7 +++
 src/video_core/engines/maxwell_3d.cpp         |  4 +-
 src/video_core/fence_manager.h                | 58 ++++++++++++++++---
 src/video_core/rasterizer_interface.h         |  7 ++-
 .../renderer_opengl/gl_fence_manager.cpp      | 24 ++++++--
 .../renderer_opengl/gl_fence_manager.h        |  6 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 13 ++++-
 .../renderer_opengl/gl_rasterizer.h           |  3 +-
 .../renderer_vulkan/vk_rasterizer.cpp         | 16 ++++-
 .../renderer_vulkan/vk_rasterizer.h           |  3 +-
 src/video_core/texture_cache/texture_cache.h  |  7 +++
 11 files changed, 123 insertions(+), 25 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 06fb931d75..54c75ca4e1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -229,6 +229,13 @@ public:
         return true;
     }
 
+    bool HasUncommitedFlushes() {
+        if (uncommited_flushes) {
+            return true;
+        }
+        return false;
+    }
+
     void PopAsyncFlushes() {
         if (commited_flushes.empty()) {
             return;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a7e9514330..2824ed707d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -405,7 +405,7 @@ void Maxwell3D::ProcessQueryGet() {
     switch (regs.query.query_get.operation) {
     case Regs::QueryOperation::Release:
         if (regs.query.query_get.fence == 1) {
-            rasterizer.SignalFence(regs.query.QueryAddress(), regs.query.query_sequence);
+            rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
         } else {
             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
         }
@@ -487,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
     const u32 increment = regs.sync_info.increment.Value();
     [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
     if (increment) {
-        system.GPU().IncrementSyncPoint(sync_point);
+        rasterizer.SignalSyncPoint(sync_point);
     }
 }
 
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 72ee509550..417cb113fb 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -22,7 +22,11 @@ namespace VideoCommon {
 
 class FenceBase {
 public:
-    FenceBase(GPUVAddr address, u32 payload) : address{address}, payload{payload} {}
+    FenceBase(u32 payload, bool is_stubbed)
+        : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
+
+    FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
+        : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
 
     constexpr GPUVAddr GetAddress() const {
         return address;
@@ -32,22 +36,49 @@ public:
         return payload;
     }
 
+    constexpr bool IsSemaphore() const {
+        return is_semaphore;
+    }
+
 private:
     GPUVAddr address;
     u32 payload;
+    bool is_semaphore;
+
+protected:
+    bool is_stubbed;
 };
 
 template <typename TFence, typename TTextureCache, typename TTBufferCache>
 class FenceManager {
 public:
-    void SignalFence(GPUVAddr addr, u32 value) {
+    void SignalSemaphore(GPUVAddr addr, u32 value) {
         TryReleasePendingFences();
+        bool should_flush = texture_cache.HasUncommitedFlushes();
+        should_flush |= buffer_cache.HasUncommitedFlushes();
         texture_cache.CommitAsyncFlushes();
         buffer_cache.CommitAsyncFlushes();
-        TFence new_fence = CreateFence(addr, value);
+        TFence new_fence = CreateFence(addr, value, !should_flush);
         fences.push(new_fence);
         QueueFence(new_fence);
-        rasterizer.FlushCommands();
+        if (should_flush) {
+            rasterizer.FlushCommands();
+        }
+        rasterizer.SyncGuestHost();
+    }
+
+    void SignalSyncPoint(u32 value) {
+        TryReleasePendingFences();
+        bool should_flush = texture_cache.HasUncommitedFlushes();
+        should_flush |= buffer_cache.HasUncommitedFlushes();
+        texture_cache.CommitAsyncFlushes();
+        buffer_cache.CommitAsyncFlushes();
+        TFence new_fence = CreateFence(value, !should_flush);
+        fences.push(new_fence);
+        QueueFence(new_fence);
+        if (should_flush) {
+            rasterizer.FlushCommands();
+        }
         rasterizer.SyncGuestHost();
     }
 
@@ -62,8 +93,12 @@ public:
             texture_cache.PopAsyncFlushes();
             buffer_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
-            auto& memory_manager{gpu.MemoryManager()};
-            memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            if (current_fence->IsSemaphore()) {
+                auto& memory_manager{gpu.MemoryManager()};
+                memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            } else {
+                gpu.IncrementSyncPoint(current_fence->GetPayload());
+            }
             fences.pop();
         }
     }
@@ -74,7 +109,8 @@ protected:
         : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, buffer_cache{
                                                                                     buffer_cache} {}
 
-    virtual TFence CreateFence(GPUVAddr addr, u32 value) = 0;
+    virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
+    virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
     virtual void QueueFence(TFence& fence) = 0;
     virtual bool IsFenceSignaled(TFence& fence) = 0;
     virtual void WaitFence(TFence& fence) = 0;
@@ -96,8 +132,12 @@ private:
             texture_cache.PopAsyncFlushes();
             buffer_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
-            auto& memory_manager{gpu.MemoryManager()};
-            memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            if (current_fence->IsSemaphore()) {
+                auto& memory_manager{gpu.MemoryManager()};
+                memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            } else {
+                gpu.IncrementSyncPoint(current_fence->GetPayload());
+            }
             fences.pop();
         }
     }
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b49f15df2e..4e9c8fb595 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,8 +49,11 @@ public:
     /// Records a GPU query and caches it
     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
-    /// Signal a GPU based fence
-    virtual void SignalFence(GPUVAddr addr, u32 value) = 0;
+    /// Signal a GPU based semaphore as a fence
+    virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
+
+    /// Signal a GPU based syncpoint as a fence
+    virtual void SignalSyncPoint(u32 value) = 0;
 
     /// Release all pending fences.
     virtual void ReleaseFences() = 0;
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 69dd3211b0..579c03a1ea 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -8,17 +8,26 @@
 
 namespace OpenGL {
 
-GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload)
-    : VideoCommon::FenceBase(address, payload), sync_object{} {}
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+
+GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
 
 GLInnerFence::~GLInnerFence() = default;
 
 void GLInnerFence::Queue() {
+    if (is_stubbed) {
+        return;
+    }
     ASSERT(sync_object.handle == 0);
     sync_object.Create();
 }
 
 bool GLInnerFence::IsSignaled() const {
+    if (is_stubbed) {
+        return true;
+    }
     ASSERT(sync_object.handle != 0);
     GLsizei length;
     GLint sync_status;
@@ -27,6 +36,9 @@ bool GLInnerFence::IsSignaled() const {
 }
 
 void GLInnerFence::Wait() {
+    if (is_stubbed) {
+        return;
+    }
     ASSERT(sync_object.handle != 0);
     while (glClientWaitSync(sync_object.handle, 0, 1000) == GL_TIMEOUT_EXPIRED)
         ;
@@ -36,8 +48,12 @@ FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, VideoCore::Rasteriz
                                TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache)
     : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache) {}
 
-Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value) {
-    return std::make_shared<GLInnerFence>(addr, value);
+Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
+    return std::make_shared<GLInnerFence>(value, is_stubbed);
+}
+
+Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+    return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
 }
 
 void FenceManagerOpenGL::QueueFence(Fence& fence) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index b48d5eaa0d..ba48d2f840 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,7 +17,8 @@ namespace OpenGL {
 
 class GLInnerFence : public VideoCommon::FenceBase {
 public:
-    GLInnerFence(GPUVAddr address, u32 payload);
+    GLInnerFence(u32 payload, bool is_stubbed);
+    GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
     ~GLInnerFence();
 
     void Queue();
@@ -39,7 +40,8 @@ public:
                        TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache);
 
 protected:
-    Fence CreateFence(GPUVAddr addr, u32 value) override;
+    Fence CreateFence(u32 value, bool is_stubbed) override;
+    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
     void QueueFence(Fence& fence) override;
     bool IsFenceSignaled(Fence& fence) override;
     void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 88914828ca..e52e5961f1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -683,14 +683,23 @@ void RasterizerOpenGL::SyncGuestHost() {
     buffer_cache.SyncGuestHost();
 }
 
-void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
+void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
     auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
         auto& memory_manager{gpu.MemoryManager()};
         memory_manager.Write<u32>(addr, value);
         return;
     }
-    fence_manager.SignalFence(addr, value);
+    fence_manager.SignalSemaphore(addr, value);
+}
+
+void RasterizerOpenGL::SignalSyncPoint(u32 value) {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        gpu.IncrementSyncPoint(value);
+        return;
+    }
+    fence_manager.SignalSyncPoint(value);
 }
 
 void RasterizerOpenGL::ReleaseFences() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 5c0f88e6f1..15e9ff7d79 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -71,7 +71,8 @@ public:
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
-    void SignalFence(GPUVAddr addr, u32 value) override;
+    void SignalSemaphore(GPUVAddr addr, u32 value) override;
+    void SignalSyncPoint(u32 value) override;
     void ReleaseFences() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1d75a4766a..507262c8fe 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -543,7 +543,7 @@ void RasterizerVulkan::SyncGuestHost() {
     buffer_cache.SyncGuestHost();
 }
 
-void RasterizerVulkan::SignalFence(GPUVAddr addr, u32 value) {
+void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
     auto& gpu{system.GPU()};
     auto& memory_manager{gpu.MemoryManager()};
     memory_manager.Write<u32>(addr, value);
@@ -553,7 +553,19 @@ void RasterizerVulkan::SignalFence(GPUVAddr addr, u32 value) {
         memory_manager.Write<u32>(addr, value);
         return;
     }
-    fence_manager.SignalFence(addr, value);
+    fence_manager.SignalSemaphore(addr, value);
+    */
+}
+
+void RasterizerVulkan::SignalSyncPoint(u32 value) {
+    auto& gpu{system.GPU()};
+    gpu.IncrementSyncPoint(value);
+    /*
+    if (!gpu.IsAsync()) {
+        gpu.IncrementSyncPoint(value);
+        return;
+    }
+    fence_manager.SignalSyncPoint(value);
     */
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 08a9af401c..145bdf899a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -122,7 +122,8 @@ public:
     void InvalidateRegion(VAddr addr, u64 size) override;
     void OnCPUWrite(VAddr addr, u64 size) override;
     void SyncGuestHost() override;
-    void SignalFence(GPUVAddr addr, u32 value) override;
+    void SignalSemaphore(GPUVAddr addr, u32 value) override;
+    void SignalSyncPoint(u32 value) override;
     void ReleaseFences() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index e1a1edbd24..f3ca1ffd15 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -337,6 +337,13 @@ public:
         uncommited_flushes.reset();
     }
 
+    bool HasUncommitedFlushes() {
+        if (uncommited_flushes) {
+            return true;
+        }
+        return false;
+    }
+
     bool ShouldWaitAsyncFlushes() {
         if (commited_flushes.empty()) {
             return false;

From 1fb516cd979ed0dbf8fa7cb4f6a334932dfb6434 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 20 Feb 2020 11:55:32 -0400
Subject: [PATCH 17/31] GPU: Implement Flush Requests for Async mode.

---
 src/video_core/gpu.cpp                        | 22 +++++++++++++++++++
 src/video_core/gpu.h                          | 21 ++++++++++++++++++
 src/video_core/gpu_thread.cpp                 | 17 +++++++++-----
 src/video_core/gpu_thread.h                   |  8 +++++--
 .../renderer_opengl/gl_rasterizer.cpp         |  6 +++++
 .../renderer_vulkan/vk_rasterizer.cpp         |  4 ++++
 6 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 19d3bd3052..85a6c7bb5a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
     return true;
 }
 
+u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) {
+    std::unique_lock lck{flush_request_mutex};
+    const u64 fence = ++last_flush_fence;
+    flush_requests.emplace_back(fence, addr, size);
+    return fence;
+}
+
+void GPU::TickWork() {
+    std::unique_lock lck{flush_request_mutex};
+    while (!flush_requests.empty()) {
+        auto& request = flush_requests.front();
+        const u64 fence = request.fence;
+        const CacheAddr addr = request.addr;
+        const std::size_t size = request.size;
+        flush_requests.pop_front();
+        flush_request_mutex.unlock();
+        renderer->Rasterizer().FlushRegion(addr, size);
+        current_flush_fence.store(fence);
+        flush_request_mutex.lock();
+    }
+}
+
 u64 GPU::GetTicks() const {
     // This values were reversed engineered by fincs from NVN
     // The gpu clock is reported in units of 385/625 nanoseconds
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fa9991c871..943a5b1100 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -159,6 +159,14 @@ public:
     void SyncGuestHost();
     virtual void OnCommandListEnd();
 
+    u64 RequestFlush(CacheAddr addr, std::size_t size);
+
+    u64 CurrentFlushRequestFence() const {
+        return current_flush_fence.load(std::memory_order_relaxed);
+    }
+
+    void TickWork();
+
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
 
@@ -327,6 +335,19 @@ private:
 
     std::condition_variable sync_cv;
 
+    struct FlushRequest {
+        FlushRequest(u64 fence, CacheAddr addr, std::size_t size)
+            : fence{fence}, addr{addr}, size{size} {}
+        u64 fence;
+        CacheAddr addr;
+        std::size_t size;
+    };
+
+    std::list<FlushRequest> flush_requests;
+    std::atomic<u64> current_flush_fence{};
+    u64 last_flush_fence{};
+    std::mutex flush_request_mutex;
+
     const bool is_async;
 };
 
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3e2be00e9d..9460364a3e 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -15,8 +15,9 @@
 namespace VideoCommon::GPUThread {
 
 /// Runs the GPU thread
-static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
+static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
+                      Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
+                      SynchState& state) {
     MicroProfileOnThreadCreate("GpuThread");
 
     // Wait for first GPU command before acquiring the window context
@@ -40,6 +41,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
         } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
             renderer.Rasterizer().ReleaseFences();
+        } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
+            system.GPU().TickWork();
         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
             renderer.Rasterizer().FlushRegion(data->addr, data->size);
         } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -68,8 +71,8 @@ ThreadManager::~ThreadManager() {
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                 Core::Frontend::GraphicsContext& context,
                                 Tegra::DmaPusher& dma_pusher) {
-    thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
-                         std::ref(state)};
+    thread = std::thread{RunThread,         std::ref(system),     std::ref(renderer),
+                         std::ref(context), std::ref(dma_pusher), std::ref(state)};
 }
 
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -85,8 +88,10 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
         return;
     }
     if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
-        u64 fence = PushCommand(FlushRegionCommand(addr, size));
-        while (fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+        auto& gpu = system.GPU();
+        u64 fence = gpu.RequestFlush(addr, size);
+        PushCommand(GPUTickCommand());
+        while (fence > gpu.CurrentFlushRequestFence()) {
         }
     }
 }
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 9d08779216..5a28335d61 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,12 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
     u64 size;
 };
 
-/// Command to signal to the GPU thread that processing has ended
+/// Command called within the gpu, to schedule actions after a command list end
 struct OnCommandListEndCommand final {};
 
+/// Command to make the gpu look into pending requests
+struct GPUTickCommand final {};
+
 using CommandData =
     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
-                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
+                 GPUTickCommand>;
 
 struct CommandDataContainer {
     CommandDataContainer() = default;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e52e5961f1..fbd81b895f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -601,6 +601,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     EndTransformFeedback();
 
     ++num_queued_commands;
+
+    system.GPU().TickWork();
 }
 
 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -628,6 +630,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
     const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
     ++num_queued_commands;
+    system.GPU().TickWork();
 }
 
 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -652,6 +655,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
 }
 
 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelExtreme()) {
+        return buffer_cache.MustFlushRegion(addr, size);
+    }
     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 507262c8fe..926ecf38ea 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -365,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     });
 
     EndTransformFeedback();
+
+    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::Clear() {
@@ -492,6 +494,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
                                   descriptor_set, {});
         cmdbuf.Dispatch(grid_x, grid_y, grid_z);
     });
+
+    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {

From 131b342130f21cf66ca64ece8034951ad6cce1f7 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 20 Feb 2020 13:14:03 -0400
Subject: [PATCH 18/31] OpenGL: Guarantee writes to Buffers.

---
 src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 ++-
 src/video_core/renderer_opengl/gl_rasterizer.cpp   | 1 -
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   | 2 --
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index cb5792407e..4efce0de77 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
 }
 
 void OGLBufferCache::WriteBarrier() {
-    glMemoryBarrier(GL_ALL_BARRIER_BITS);
+    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
 }
 
 GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
@@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
 void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                                        u8* data) {
     MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
+    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
     glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
                             static_cast<GLsizeiptr>(size), data);
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index fbd81b895f..bc57d396ec 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -630,7 +630,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
     const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
     ++num_queued_commands;
-    system.GPU().TickWork();
 }
 
 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 926ecf38ea..f8b5a5a929 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -494,8 +494,6 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
                                   descriptor_set, {});
         cmdbuf.Dispatch(grid_x, grid_y, grid_z);
     });
-
-    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {

From 0649f0590047e8cc0b16a10dec5eb74938fef718 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 15 Apr 2020 16:36:14 -0400
Subject: [PATCH 19/31] QueryCache: Implement Async Flushes.

---
 src/video_core/fence_manager.h                | 18 +++++--
 src/video_core/query_cache.h                  | 50 +++++++++++++++++++
 .../renderer_opengl/gl_fence_manager.cpp      |  8 +--
 .../renderer_opengl/gl_fence_manager.h        |  7 ++-
 .../renderer_opengl/gl_rasterizer.cpp         |  6 +--
 5 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 417cb113fb..99a138b5b7 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -49,15 +49,17 @@ protected:
     bool is_stubbed;
 };
 
-template <typename TFence, typename TTextureCache, typename TTBufferCache>
+template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
 class FenceManager {
 public:
     void SignalSemaphore(GPUVAddr addr, u32 value) {
         TryReleasePendingFences();
         bool should_flush = texture_cache.HasUncommitedFlushes();
         should_flush |= buffer_cache.HasUncommitedFlushes();
+        should_flush |= query_cache.HasUncommitedFlushes();
         texture_cache.CommitAsyncFlushes();
         buffer_cache.CommitAsyncFlushes();
+        query_cache.CommitAsyncFlushes();
         TFence new_fence = CreateFence(addr, value, !should_flush);
         fences.push(new_fence);
         QueueFence(new_fence);
@@ -71,8 +73,10 @@ public:
         TryReleasePendingFences();
         bool should_flush = texture_cache.HasUncommitedFlushes();
         should_flush |= buffer_cache.HasUncommitedFlushes();
+        should_flush |= query_cache.HasUncommitedFlushes();
         texture_cache.CommitAsyncFlushes();
         buffer_cache.CommitAsyncFlushes();
+        query_cache.CommitAsyncFlushes();
         TFence new_fence = CreateFence(value, !should_flush);
         fences.push(new_fence);
         QueueFence(new_fence);
@@ -87,11 +91,13 @@ public:
             TFence& current_fence = fences.front();
             bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
             should_wait |= buffer_cache.ShouldWaitAsyncFlushes();
+            should_wait |= query_cache.ShouldWaitAsyncFlushes();
             if (should_wait) {
                 WaitFence(current_fence);
             }
             texture_cache.PopAsyncFlushes();
             buffer_cache.PopAsyncFlushes();
+            query_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
             if (current_fence->IsSemaphore()) {
                 auto& memory_manager{gpu.MemoryManager()};
@@ -105,9 +111,10 @@ public:
 
 protected:
     FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 TTextureCache& texture_cache, TTBufferCache& buffer_cache)
-        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, buffer_cache{
-                                                                                    buffer_cache} {}
+                 TTextureCache& texture_cache, TTBufferCache& buffer_cache,
+                 TQueryCache& query_cache)
+        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
+          buffer_cache{buffer_cache}, query_cache{query_cache} {}
 
     virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
     virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
@@ -119,6 +126,7 @@ protected:
     VideoCore::RasterizerInterface& rasterizer;
     TTextureCache& texture_cache;
     TTBufferCache& buffer_cache;
+    TQueryCache& query_cache;
 
 private:
     void TryReleasePendingFences() {
@@ -126,11 +134,13 @@ private:
             TFence& current_fence = fences.front();
             bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
             should_wait |= buffer_cache.ShouldWaitAsyncFlushes();
+            should_wait |= query_cache.ShouldWaitAsyncFlushes();
             if (should_wait && !IsFenceSignaled(current_fence)) {
                 return;
             }
             texture_cache.PopAsyncFlushes();
             buffer_cache.PopAsyncFlushes();
+            query_cache.PopAsyncFlushes();
             auto& gpu{system.GPU()};
             if (current_fence->IsSemaphore()) {
                 auto& memory_manager{gpu.MemoryManager()};
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 5ea2b01f2a..1b1c23995f 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -12,6 +12,7 @@
 #include <mutex>
 #include <optional>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "common/assert.h"
@@ -130,6 +131,7 @@ public:
         }
 
         query->BindCounter(Stream(type).Current(), timestamp);
+        AsyncFlushQuery(cpu_addr);
     }
 
     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -170,6 +172,44 @@ public:
         return streams[static_cast<std::size_t>(type)];
     }
 
+    void CommitAsyncFlushes() {
+        commited_flushes.push_back(uncommited_flushes);
+        uncommited_flushes.reset();
+    }
+
+    bool HasUncommitedFlushes() {
+        if (uncommited_flushes) {
+            return true;
+        }
+        return false;
+    }
+
+    bool ShouldWaitAsyncFlushes() {
+        if (commited_flushes.empty()) {
+            return false;
+        }
+        auto& flush_list = commited_flushes.front();
+        if (!flush_list) {
+            return false;
+        }
+        return true;
+    }
+
+    void PopAsyncFlushes() {
+        if (commited_flushes.empty()) {
+            return;
+        }
+        auto& flush_list = commited_flushes.front();
+        if (!flush_list) {
+            commited_flushes.pop_front();
+            return;
+        }
+        for (VAddr query_address : *flush_list) {
+            FlushAndRemoveRegion(query_address, 4);
+        }
+        commited_flushes.pop_front();
+    }
+
 protected:
     std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
 
@@ -224,6 +264,13 @@ private:
         return found != std::end(contents) ? &*found : nullptr;
     }
 
+    void AsyncFlushQuery(VAddr addr) {
+        if (!uncommited_flushes) {
+            uncommited_flushes = std::make_shared<std::unordered_set<VAddr>>();
+        }
+        uncommited_flushes->insert(addr);
+    }
+
     static constexpr std::uintptr_t PAGE_SIZE = 4096;
     static constexpr unsigned PAGE_SHIFT = 12;
 
@@ -235,6 +282,9 @@ private:
     std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
 
     std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+
+    std::shared_ptr<std::unordered_set<VAddr>> uncommited_flushes{};
+    std::list<std::shared_ptr<std::unordered_set<VAddr>>> commited_flushes;
 };
 
 template <class QueryCache, class HostCounter>
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 579c03a1ea..aa57a0ae01 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -44,9 +44,11 @@ void GLInnerFence::Wait() {
         ;
 }
 
-FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                               TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache)
-    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache) {}
+FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
+                                       VideoCore::RasterizerInterface& rasterizer,
+                                       TextureCacheOpenGL& texture_cache,
+                                       OGLBufferCache& buffer_cache, QueryCache& query_cache)
+    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
 
 Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
     return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index ba48d2f840..c76e69cb8d 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "video_core/fence_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 
@@ -32,12 +33,14 @@ private:
 };
 
 using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache>;
+using GenericFenceManager =
+    VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
 
 class FenceManagerOpenGL final : public GenericFenceManager {
 public:
     FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                       TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache);
+                       TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
+                       QueryCache& query_cache);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bc57d396ec..6d3b5f3f40 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -101,9 +101,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
     : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
-      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this,
-                                                                             texture_cache,
-                                                                             buffer_cache} {
+      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{
+                                                                   system, *this, texture_cache,
+                                                                   buffer_cache, query_cache} {
     CheckExtensions();
 }
 

From b752faf2d3aae882a1a35a3aec393ef5765c035f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 16 Mar 2020 21:43:05 -0300
Subject: [PATCH 20/31] vk_fence_manager: Initial implementation

---
 src/video_core/CMakeLists.txt                 |   2 +
 src/video_core/buffer_cache/buffer_cache.h    |   1 +
 .../renderer_vulkan/vk_fence_manager.cpp      | 101 ++++++++++++++++++
 .../renderer_vulkan/vk_fence_manager.h        |  74 +++++++++++++
 .../renderer_vulkan/vk_rasterizer.cpp         |  16 +--
 .../renderer_vulkan/vk_rasterizer.h           |   2 +
 src/video_core/renderer_vulkan/wrapper.cpp    |  18 ++++
 src/video_core/renderer_vulkan/wrapper.h      |  20 ++++
 8 files changed, 222 insertions(+), 12 deletions(-)
 create mode 100644 src/video_core/renderer_vulkan/vk_fence_manager.cpp
 create mode 100644 src/video_core/renderer_vulkan/vk_fence_manager.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 9a3f568f9c..55047dde41 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -177,6 +177,8 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_descriptor_pool.h
         renderer_vulkan/vk_device.cpp
         renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_fence_manager.cpp
+        renderer_vulkan/vk_fence_manager.h
         renderer_vulkan/vk_graphics_pipeline.cpp
         renderer_vulkan/vk_graphics_pipeline.h
         renderer_vulkan/vk_image.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 54c75ca4e1..3725450803 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
 
 #include "common/alignment.h"
 #include "common/common_types.h"
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "core/settings.h"
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
new file mode 100644
index 0000000000..a2b2bc408f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -0,0 +1,101 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <thread>
+
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_fence_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
+
+InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
+                       u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
+
+InnerFence::~InnerFence() = default;
+
+void InnerFence::Queue() {
+    if (is_stubbed) {
+        return;
+    }
+    ASSERT(!event);
+
+    event = device.GetLogical().CreateEvent();
+    ticks = scheduler.Ticks();
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
+        cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+    });
+}
+
+bool InnerFence::IsSignaled() const {
+    if (is_stubbed) {
+        return true;
+    }
+    ASSERT(event);
+    return IsEventSignalled();
+}
+
+void InnerFence::Wait() {
+    if (is_stubbed) {
+        return;
+    }
+    ASSERT(event);
+
+    if (ticks >= scheduler.Ticks()) {
+        scheduler.Flush();
+    }
+    while (!IsEventSignalled()) {
+        std::this_thread::yield();
+    }
+}
+
+bool InnerFence::IsEventSignalled() const {
+    switch (const VkResult result = event.GetStatus()) {
+    case VK_EVENT_SET:
+        return true;
+    case VK_EVENT_RESET:
+        return false;
+    default:
+        throw vk::Exception(result);
+    }
+}
+
+VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                               const VKDevice& device, VKScheduler& scheduler,
+                               VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
+                               VKQueryCache& query_cache)
+    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
+      device{device}, scheduler{scheduler} {}
+
+Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
+    return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
+}
+
+Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+    return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
+}
+
+void VKFenceManager::QueueFence(Fence& fence) {
+    fence->Queue();
+}
+
+bool VKFenceManager::IsFenceSignaled(Fence& fence) {
+    return fence->IsSignaled();
+}
+
+void VKFenceManager::WaitFence(Fence& fence) {
+    fence->Wait();
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
new file mode 100644
index 0000000000..30651e9c72
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -0,0 +1,74 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "video_core/fence_manager.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class VKBufferCache;
+class VKDevice;
+class VKQueryCache;
+class VKScheduler;
+class VKTextureCache;
+
+class InnerFence : public VideoCommon::FenceBase {
+public:
+    explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
+                        bool is_stubbed);
+    explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
+                        u32 payload, bool is_stubbed);
+    ~InnerFence();
+
+    void Queue();
+
+    bool IsSignaled() const;
+
+    void Wait();
+
+private:
+    bool IsEventSignalled() const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    vk::Event event;
+    u64 ticks = 0;
+};
+using Fence = std::shared_ptr<InnerFence>;
+
+using GenericFenceManager =
+    VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
+
+class VKFenceManager final : public GenericFenceManager {
+public:
+    explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                            const VKDevice& device, VKScheduler& scheduler,
+                            VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
+                            VKQueryCache& query_cache);
+
+protected:
+    Fence CreateFence(u32 value, bool is_stubbed) override;
+    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    void QueueFence(Fence& fence) override;
+    bool IsFenceSignaled(Fence& fence) override;
+    void WaitFence(Fence& fence) override;
+
+private:
+    const VKDevice& device;
+    VKScheduler& scheduler;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f8b5a5a929..4dc7555aa0 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -299,7 +299,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
                      renderpass_cache),
       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
-      sampler_cache(device), query_cache(system, *this, device, scheduler) {
+      sampler_cache(device),
+      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
+      query_cache(system, *this, device, scheduler) {
     scheduler.SetQueryCache(query_cache);
 }
 
@@ -547,38 +549,28 @@ void RasterizerVulkan::SyncGuestHost() {
 
 void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
     auto& gpu{system.GPU()};
-    auto& memory_manager{gpu.MemoryManager()};
-    memory_manager.Write<u32>(addr, value);
-    /*
     if (!gpu.IsAsync()) {
-        auto& memory_manager{gpu.MemoryManager()};
-        memory_manager.Write<u32>(addr, value);
+        gpu.MemoryManager().Write<u32>(addr, value);
         return;
     }
     fence_manager.SignalSemaphore(addr, value);
-    */
 }
 
 void RasterizerVulkan::SignalSyncPoint(u32 value) {
     auto& gpu{system.GPU()};
-    gpu.IncrementSyncPoint(value);
-    /*
     if (!gpu.IsAsync()) {
         gpu.IncrementSyncPoint(value);
         return;
     }
     fence_manager.SignalSyncPoint(value);
-    */
 }
 
 void RasterizerVulkan::ReleaseFences() {
-    /*
     auto& gpu{system.GPU()};
     if (!gpu.IsAsync()) {
         return;
     }
     fence_manager.WaitPendingFences();
-    */
 }
 
 void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 145bdf899a..2fa46b0cc4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,6 +21,7 @@
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_fence_manager.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
@@ -267,6 +268,7 @@ private:
     VKPipelineCache pipeline_cache;
     VKBufferCache buffer_cache;
     VKSamplerCache sampler_cache;
+    VKFenceManager fence_manager;
     VKQueryCache query_cache;
 
     std::array<View, Maxwell::NumRenderTargets> color_attachments;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 9b94dfff18..5e24b8f052 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -64,6 +64,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkCmdSetCheckpointNV);
     X(vkCmdSetDepthBias);
     X(vkCmdSetDepthBounds);
+    X(vkCmdSetEvent);
     X(vkCmdSetScissor);
     X(vkCmdSetStencilCompareMask);
     X(vkCmdSetStencilReference);
@@ -76,6 +77,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkCreateDescriptorPool);
     X(vkCreateDescriptorSetLayout);
     X(vkCreateDescriptorUpdateTemplateKHR);
+    X(vkCreateEvent);
     X(vkCreateFence);
     X(vkCreateFramebuffer);
     X(vkCreateGraphicsPipelines);
@@ -94,6 +96,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkDestroyDescriptorPool);
     X(vkDestroyDescriptorSetLayout);
     X(vkDestroyDescriptorUpdateTemplateKHR);
+    X(vkDestroyEvent);
     X(vkDestroyFence);
     X(vkDestroyFramebuffer);
     X(vkDestroyImage);
@@ -113,6 +116,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkFreeMemory);
     X(vkGetBufferMemoryRequirements);
     X(vkGetDeviceQueue);
+    X(vkGetEventStatus);
     X(vkGetFenceStatus);
     X(vkGetImageMemoryRequirements);
     X(vkGetQueryPoolResults);
@@ -271,6 +275,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
     dld.vkFreeMemory(device, handle, nullptr);
 }
 
+void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyEvent(device, handle, nullptr);
+}
+
 void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
     dld.vkDestroyFence(device, handle, nullptr);
 }
@@ -613,6 +621,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
     return ShaderModule(object, handle, *dld);
 }
 
+Event Device::CreateEvent() const {
+    VkEventCreateInfo ci;
+    ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
+    ci.pNext = nullptr;
+    ci.flags = 0;
+    VkEvent object;
+    Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
+    return Event(object, handle, *dld);
+}
+
 SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
     VkSwapchainKHR object;
     Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index fb36578198..cd2b8e6c9e 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -200,6 +200,7 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV;
     PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
     PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
+    PFN_vkCmdSetEvent vkCmdSetEvent;
     PFN_vkCmdSetScissor vkCmdSetScissor;
     PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
     PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
@@ -212,6 +213,7 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
     PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
     PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
+    PFN_vkCreateEvent vkCreateEvent;
     PFN_vkCreateFence vkCreateFence;
     PFN_vkCreateFramebuffer vkCreateFramebuffer;
     PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
@@ -230,6 +232,7 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
     PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
     PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
+    PFN_vkDestroyEvent vkDestroyEvent;
     PFN_vkDestroyFence vkDestroyFence;
     PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
     PFN_vkDestroyImage vkDestroyImage;
@@ -249,6 +252,7 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkFreeMemory vkFreeMemory;
     PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
     PFN_vkGetDeviceQueue vkGetDeviceQueue;
+    PFN_vkGetEventStatus vkGetEventStatus;
     PFN_vkGetFenceStatus vkGetFenceStatus;
     PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
     PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
@@ -281,6 +285,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
@@ -654,6 +659,15 @@ public:
     std::vector<VkImage> GetImages() const;
 };
 
+class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
+    using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    VkResult GetStatus() const noexcept {
+        return dld->vkGetEventStatus(owner, handle);
+    }
+};
+
 class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
     using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
 
@@ -702,6 +716,8 @@ public:
 
     ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
 
+    Event CreateEvent() const;
+
     SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
 
     DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
@@ -956,6 +972,10 @@ public:
         dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
     }
 
+    void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
+        dld->vkCmdSetEvent(handle, event, stage_flags);
+    }
+
     void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
                                          const VkDeviceSize* offsets,
                                          const VkDeviceSize* sizes) const noexcept {

From f4ab223ef0eca55666de32c7f9b9b591e6c17235 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 15 Apr 2020 20:07:32 -0400
Subject: [PATCH 21/31] Async GPU: Only do reactive flushing on Extreme Level.

---
 src/video_core/gpu_thread.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9460364a3e..7df854a2f9 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -84,7 +84,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    if (!Settings::IsGPULevelHigh()) {
+    if (!Settings::IsGPULevelExtreme()) {
         return;
     }
     if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {

From b3e5f177ba3de381c4ad4e4b20a3bd17e4577e24 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 15 Apr 2020 21:03:30 -0400
Subject: [PATCH 22/31] QueryCache: Only do async flushes on async gpu.

---
 src/video_core/query_cache.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 1b1c23995f..98d956b68c 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -17,6 +17,7 @@
 
 #include "common/assert.h"
 #include "core/core.h"
+#include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
@@ -131,7 +132,9 @@ public:
         }
 
         query->BindCounter(Stream(type).Current(), timestamp);
-        AsyncFlushQuery(cpu_addr);
+        if (Settings::values.use_asynchronous_gpu_emulation) {
+            AsyncFlushQuery(cpu_addr);
+        }
     }
 
     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.

From d2d4a6cbcf089d6a56adb84358155dbc1bff5ee6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 15 Apr 2020 21:17:06 -0400
Subject: [PATCH 23/31] Clang format.

---
 src/yuzu/configuration/config.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index da1fa4e029..196a3a1168 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -1080,7 +1080,8 @@ void Config::SaveRendererValues() {
     WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
     WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
                  true);
-    WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy), 0);
+    WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy),
+                 0);
     WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
                  Settings::values.use_asynchronous_gpu_emulation, false);
     WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);

From 7f44f224517a853be945350e6f537a7977f7e41c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 15 Apr 2020 21:42:05 -0400
Subject: [PATCH 24/31] Correct Linux Compile Error.

---
 src/core/settings.cpp | 8 ++++++++
 src/core/settings.h   | 9 ++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 445047469e..cd6c257f5a 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -109,4 +109,12 @@ void LogSettings() {
     LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
 }
 
+bool IsGPULevelExtreme() {
+    return values.gpu_accuracy == GPUAccuracy::Extreme;
+}
+
+bool IsGPULevelHigh() {
+    return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
+}
+
 } // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index b54a0d4ea9..7d09253f58 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -486,13 +486,8 @@ struct Values {
     std::map<u64, std::vector<std::string>> disabled_addons;
 } extern values;
 
-constexpr bool IsGPULevelExtreme() {
-    return values.gpu_accuracy == GPUAccuracy::Extreme;
-}
-
-constexpr bool IsGPULevelHigh() {
-    return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
-}
+bool IsGPULevelExtreme();
+bool IsGPULevelHigh();
 
 void Apply();
 void LogSettings();

From ec2f3e48e196508bb2e777511a6d2138e325e722 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 15 Apr 2020 22:59:29 -0400
Subject: [PATCH 25/31] Fix GCC error.

---
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 9 ++++-----
 src/video_core/renderer_opengl/gl_rasterizer.h   | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6d3b5f3f40..847d67159b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -99,11 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
                                    ScreenInfo& info, GLShader::ProgramManager& program_manager,
                                    StateTracker& state_tracker)
     : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
-      shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
-      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
-      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{
-                                                                   system, *this, texture_cache,
-                                                                   buffer_cache, query_cache} {
+      shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
+      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
+      fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
+      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
     CheckExtensions();
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 15e9ff7d79..ebd2173eba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -229,6 +229,7 @@ private:
     SamplerCacheOpenGL sampler_cache;
     FramebufferCacheOpenGL framebuffer_cache;
     QueryCache query_cache;
+    OGLBufferCache buffer_cache;
     FenceManagerOpenGL fence_manager;
 
     Core::System& system;
@@ -237,7 +238,6 @@ private:
     StateTracker& state_tracker;
 
     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
-    OGLBufferCache buffer_cache;
 
     GLint vertex_binding = 0;
 

From f616dc0b591b783b3fb75ca89633f1c26cce05a9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 16 Apr 2020 12:29:53 -0400
Subject: [PATCH 26/31] Address Feedback.

---
 src/video_core/buffer_cache/buffer_cache.h    | 56 ++++++---------
 src/video_core/fence_manager.h                | 72 +++++++++++--------
 src/video_core/gpu.cpp                        |  4 +-
 src/video_core/gpu.h                          | 12 +++-
 src/video_core/query_cache.h                  | 39 +++++-----
 src/video_core/rasterizer_interface.h         |  1 +
 .../renderer_opengl/gl_fence_manager.cpp      |  2 +-
 .../renderer_opengl/gl_fence_manager.h        |  2 +-
 .../renderer_opengl/gl_rasterizer.cpp         |  5 +-
 .../renderer_vulkan/vk_fence_manager.cpp      |  2 +-
 .../renderer_vulkan/vk_fence_manager.h        |  2 +-
 .../renderer_vulkan/vk_rasterizer.cpp         |  2 +-
 src/video_core/texture_cache/texture_cache.h  | 50 +++++--------
 13 files changed, 117 insertions(+), 132 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 3725450803..f3aa352959 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -154,12 +154,9 @@ public:
         std::lock_guard lock{mutex};
 
         std::vector<MapInterval> objects = GetMapsInRange(addr, size);
-        for (auto& object : objects) {
-            if (object->IsModified() && object->IsRegistered()) {
-                return true;
-            }
-        }
-        return false;
+        return std::any_of(objects.begin(), objects.end(), [](const MapInterval& map) {
+            return map->IsModified() && map->IsRegistered();
+        });
     }
 
     /// Mark the specified region as being invalidated
@@ -199,9 +196,9 @@ public:
     }
 
     void CommitAsyncFlushes() {
-        if (uncommited_flushes) {
+        if (uncommitted_flushes) {
             auto commit_list = std::make_shared<std::list<MapInterval>>();
-            for (auto& map : *uncommited_flushes) {
+            for (auto& map : *uncommitted_flushes) {
                 if (map->IsRegistered() && map->IsModified()) {
                     // TODO(Blinkhawk): Implement backend asynchronous flushing
                     // AsyncFlushMap(map)
@@ -209,41 +206,34 @@ public:
                 }
             }
             if (!commit_list->empty()) {
-                commited_flushes.push_back(commit_list);
+                committed_flushes.push_back(commit_list);
             } else {
-                commited_flushes.emplace_back();
+                committed_flushes.emplace_back();
             }
         } else {
-            commited_flushes.emplace_back();
+            committed_flushes.emplace_back();
         }
-        uncommited_flushes.reset();
+        uncommitted_flushes.reset();
     }
 
-    bool ShouldWaitAsyncFlushes() {
-        if (commited_flushes.empty()) {
+    bool ShouldWaitAsyncFlushes() const {
+        if (committed_flushes.empty()) {
             return false;
         }
-        auto& flush_list = commited_flushes.front();
-        if (!flush_list) {
-            return false;
-        }
-        return true;
+        return committed_flushes.front() != nullptr;
     }
 
-    bool HasUncommitedFlushes() {
-        if (uncommited_flushes) {
-            return true;
-        }
-        return false;
+    bool HasUncommittedFlushes() const {
+        return uncommitted_flushes != nullptr;
     }
 
     void PopAsyncFlushes() {
-        if (commited_flushes.empty()) {
+        if (committed_flushes.empty()) {
             return;
         }
-        auto& flush_list = commited_flushes.front();
+        auto& flush_list = committed_flushes.front();
         if (!flush_list) {
-            commited_flushes.pop_front();
+            committed_flushes.pop_front();
             return;
         }
         for (MapInterval& map : *flush_list) {
@@ -252,7 +242,7 @@ public:
                 FlushMap(map);
             }
         }
-        commited_flushes.pop_front();
+        committed_flushes.pop_front();
     }
 
     virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
@@ -568,10 +558,10 @@ private:
     }
 
     void MarkForAsyncFlush(MapInterval& map) {
-        if (!uncommited_flushes) {
-            uncommited_flushes = std::make_shared<std::unordered_set<MapInterval>>();
+        if (!uncommitted_flushes) {
+            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
         }
-        uncommited_flushes->insert(map);
+        uncommitted_flushes->insert(map);
     }
 
     VideoCore::RasterizerInterface& rasterizer;
@@ -605,8 +595,8 @@ private:
     std::vector<u8> staging_buffer;
     std::list<MapInterval> marked_for_unregister;
 
-    std::shared_ptr<std::unordered_set<MapInterval>> uncommited_flushes{};
-    std::list<std::shared_ptr<std::list<MapInterval>>> commited_flushes;
+    std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
+    std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
 
     std::recursive_mutex mutex;
 };
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 99a138b5b7..9fe9c1bf25 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -28,15 +28,15 @@ public:
     FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
         : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
 
-    constexpr GPUVAddr GetAddress() const {
+    GPUVAddr GetAddress() const {
         return address;
     }
 
-    constexpr u32 GetPayload() const {
+    u32 GetPayload() const {
         return payload;
     }
 
-    constexpr bool IsSemaphore() const {
+    bool IsSemaphore() const {
         return is_semaphore;
     }
 
@@ -54,12 +54,8 @@ class FenceManager {
 public:
     void SignalSemaphore(GPUVAddr addr, u32 value) {
         TryReleasePendingFences();
-        bool should_flush = texture_cache.HasUncommitedFlushes();
-        should_flush |= buffer_cache.HasUncommitedFlushes();
-        should_flush |= query_cache.HasUncommitedFlushes();
-        texture_cache.CommitAsyncFlushes();
-        buffer_cache.CommitAsyncFlushes();
-        query_cache.CommitAsyncFlushes();
+        bool should_flush = ShouldFlush();
+        CommitAsyncFlushes();
         TFence new_fence = CreateFence(addr, value, !should_flush);
         fences.push(new_fence);
         QueueFence(new_fence);
@@ -71,12 +67,8 @@ public:
 
     void SignalSyncPoint(u32 value) {
         TryReleasePendingFences();
-        bool should_flush = texture_cache.HasUncommitedFlushes();
-        should_flush |= buffer_cache.HasUncommitedFlushes();
-        should_flush |= query_cache.HasUncommitedFlushes();
-        texture_cache.CommitAsyncFlushes();
-        buffer_cache.CommitAsyncFlushes();
-        query_cache.CommitAsyncFlushes();
+        bool should_flush = ShouldFlush();
+        CommitAsyncFlushes();
         TFence new_fence = CreateFence(value, !should_flush);
         fences.push(new_fence);
         QueueFence(new_fence);
@@ -89,15 +81,10 @@ public:
     void WaitPendingFences() {
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
-            bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
-            should_wait |= buffer_cache.ShouldWaitAsyncFlushes();
-            should_wait |= query_cache.ShouldWaitAsyncFlushes();
-            if (should_wait) {
+            if (ShouldWait()) {
                 WaitFence(current_fence);
             }
-            texture_cache.PopAsyncFlushes();
-            buffer_cache.PopAsyncFlushes();
-            query_cache.PopAsyncFlushes();
+            PopAsyncFlushes();
             auto& gpu{system.GPU()};
             if (current_fence->IsSemaphore()) {
                 auto& memory_manager{gpu.MemoryManager()};
@@ -116,10 +103,18 @@ protected:
         : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
           buffer_cache{buffer_cache}, query_cache{query_cache} {}
 
+    virtual ~FenceManager() {}
+
+    /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
+    /// true
     virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
+    /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
     virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
+    /// Queues a fence into the backend if the fence isn't stubbed.
     virtual void QueueFence(TFence& fence) = 0;
-    virtual bool IsFenceSignaled(TFence& fence) = 0;
+    /// Notifies that the backend fence has been signaled/reached in host GPU.
+    virtual bool IsFenceSignaled(TFence& fence) const = 0;
+    /// Waits until a fence has been signalled by the host GPU.
     virtual void WaitFence(TFence& fence) = 0;
 
     Core::System& system;
@@ -132,15 +127,10 @@ private:
     void TryReleasePendingFences() {
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
-            bool should_wait = texture_cache.ShouldWaitAsyncFlushes();
-            should_wait |= buffer_cache.ShouldWaitAsyncFlushes();
-            should_wait |= query_cache.ShouldWaitAsyncFlushes();
-            if (should_wait && !IsFenceSignaled(current_fence)) {
+            if (ShouldWait() && !IsFenceSignaled(current_fence)) {
                 return;
             }
-            texture_cache.PopAsyncFlushes();
-            buffer_cache.PopAsyncFlushes();
-            query_cache.PopAsyncFlushes();
+            PopAsyncFlushes();
             auto& gpu{system.GPU()};
             if (current_fence->IsSemaphore()) {
                 auto& memory_manager{gpu.MemoryManager()};
@@ -152,6 +142,28 @@ private:
         }
     }
 
+    bool ShouldWait() const {
+        return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
+               query_cache.ShouldWaitAsyncFlushes();
+    }
+
+    bool ShouldFlush() const {
+        return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
+               query_cache.HasUncommittedFlushes();
+    }
+
+    void PopAsyncFlushes() {
+        texture_cache.PopAsyncFlushes();
+        buffer_cache.PopAsyncFlushes();
+        query_cache.PopAsyncFlushes();
+    }
+
+    void CommitAsyncFlushes() {
+        texture_cache.CommitAsyncFlushes();
+        buffer_cache.CommitAsyncFlushes();
+        query_cache.CommitAsyncFlushes();
+    }
+
     std::queue<TFence> fences;
 };
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 85a6c7bb5a..3b7572d61a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -125,7 +125,7 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
     return true;
 }
 
-u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) {
+u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
     std::unique_lock lck{flush_request_mutex};
     const u64 fence = ++last_flush_fence;
     flush_requests.emplace_back(fence, addr, size);
@@ -137,7 +137,7 @@ void GPU::TickWork() {
     while (!flush_requests.empty()) {
         auto& request = flush_requests.front();
         const u64 fence = request.fence;
-        const CacheAddr addr = request.addr;
+        const VAddr addr = request.addr;
         const std::size_t size = request.size;
         flush_requests.pop_front();
         flush_request_mutex.unlock();
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 943a5b1100..5e3eb94e95 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,16 +155,22 @@ public:
     /// Calls a GPU method.
     void CallMethod(const MethodCall& method_call);
 
+    /// Flush all current written commands into the host GPU for execution.
     void FlushCommands();
+    /// Synchronizes CPU writes with Host GPU memory.
     void SyncGuestHost();
+    /// Signal the ending of command list.
     virtual void OnCommandListEnd();
 
-    u64 RequestFlush(CacheAddr addr, std::size_t size);
+    /// Request a host GPU memory flush from the CPU.
+    u64 RequestFlush(VAddr addr, std::size_t size);
 
+    /// Obtains current flush request fence id.
     u64 CurrentFlushRequestFence() const {
         return current_flush_fence.load(std::memory_order_relaxed);
     }
 
+    /// Tick pending requests within the GPU.
     void TickWork();
 
     /// Returns a reference to the Maxwell3D GPU engine.
@@ -336,10 +342,10 @@ private:
     std::condition_variable sync_cv;
 
     struct FlushRequest {
-        FlushRequest(u64 fence, CacheAddr addr, std::size_t size)
+        FlushRequest(u64 fence, VAddr addr, std::size_t size)
             : fence{fence}, addr{addr}, size{size} {}
         u64 fence;
-        CacheAddr addr;
+        VAddr addr;
         std::size_t size;
     };
 
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 98d956b68c..2f75f88015 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -176,41 +176,34 @@ public:
     }
 
     void CommitAsyncFlushes() {
-        commited_flushes.push_back(uncommited_flushes);
-        uncommited_flushes.reset();
+        committed_flushes.push_back(uncommitted_flushes);
+        uncommitted_flushes.reset();
     }
 
-    bool HasUncommitedFlushes() {
-        if (uncommited_flushes) {
-            return true;
-        }
-        return false;
+    bool HasUncommittedFlushes() const {
+        return uncommitted_flushes != nullptr;
     }
 
-    bool ShouldWaitAsyncFlushes() {
-        if (commited_flushes.empty()) {
+    bool ShouldWaitAsyncFlushes() const {
+        if (committed_flushes.empty()) {
             return false;
         }
-        auto& flush_list = commited_flushes.front();
-        if (!flush_list) {
-            return false;
-        }
-        return true;
+        return committed_flushes.front() != nullptr;
     }
 
     void PopAsyncFlushes() {
-        if (commited_flushes.empty()) {
+        if (committed_flushes.empty()) {
             return;
         }
-        auto& flush_list = commited_flushes.front();
+        auto& flush_list = committed_flushes.front();
         if (!flush_list) {
-            commited_flushes.pop_front();
+            committed_flushes.pop_front();
             return;
         }
         for (VAddr query_address : *flush_list) {
             FlushAndRemoveRegion(query_address, 4);
         }
-        commited_flushes.pop_front();
+        committed_flushes.pop_front();
     }
 
 protected:
@@ -268,10 +261,10 @@ private:
     }
 
     void AsyncFlushQuery(VAddr addr) {
-        if (!uncommited_flushes) {
-            uncommited_flushes = std::make_shared<std::unordered_set<VAddr>>();
+        if (!uncommitted_flushes) {
+            uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
         }
-        uncommited_flushes->insert(addr);
+        uncommitted_flushes->insert(addr);
     }
 
     static constexpr std::uintptr_t PAGE_SIZE = 4096;
@@ -286,8 +279,8 @@ private:
 
     std::array<CounterStream, VideoCore::NumQueryTypes> streams;
 
-    std::shared_ptr<std::unordered_set<VAddr>> uncommited_flushes{};
-    std::list<std::shared_ptr<std::unordered_set<VAddr>>> commited_flushes;
+    std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
+    std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
 };
 
 template <class QueryCache, class HostCounter>
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 4e9c8fb595..603f619523 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -64,6 +64,7 @@ public:
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
+    /// Check if the the specified memory area requires flushing to CPU Memory.
     virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index aa57a0ae01..476c899408 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -62,7 +62,7 @@ void FenceManagerOpenGL::QueueFence(Fence& fence) {
     fence->Queue();
 }
 
-bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) {
+bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
     return fence->IsSignaled();
 }
 
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index c76e69cb8d..c917b3343f 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -46,7 +46,7 @@ protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
     Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
     void QueueFence(Fence& fence) override;
-    bool IsFenceSignaled(Fence& fence) override;
+    bool IsFenceSignaled(Fence& fence) const override;
     void WaitFence(Fence& fence) override;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 847d67159b..d662657cf5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -653,9 +653,6 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
 }
 
 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
-    if (!Settings::IsGPULevelExtreme()) {
-        return buffer_cache.MustFlushRegion(addr, size);
-    }
     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
 }
 
@@ -672,7 +669,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 
 void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    if (!addr || !size) {
+    if (addr == 0 || size == 0) {
         return;
     }
     texture_cache.OnCPUWrite(addr, size);
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a2b2bc408f..a02be5487a 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -90,7 +90,7 @@ void VKFenceManager::QueueFence(Fence& fence) {
     fence->Queue();
 }
 
-bool VKFenceManager::IsFenceSignaled(Fence& fence) {
+bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
     return fence->IsSignaled();
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 30651e9c72..04d07fe6af 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -63,7 +63,7 @@ protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
     Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
     void QueueFence(Fence& fence) override;
-    bool IsFenceSignaled(Fence& fence) override;
+    bool IsFenceSignaled(Fence& fence) const override;
     void WaitFence(Fence& fence) override;
 
 private:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 4dc7555aa0..2350cd5f4c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -533,7 +533,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
 }
 
 void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
-    if (!addr || !size) {
+    if (addr == 0 || size == 0) {
         return;
     }
     texture_cache.OnCPUWrite(addr, size);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f3ca1ffd15..1148c3a340 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -120,15 +120,8 @@ public:
         std::lock_guard lock{mutex};
 
         auto surfaces = GetSurfacesInRegion(addr, size);
-        if (surfaces.empty()) {
-            return false;
-        }
-        for (const auto& surface : surfaces) {
-            if (surface->IsModified()) {
-                return true;
-            }
-        }
-        return false;
+        return std::any_of(surfaces.begin(), surfaces.end(),
+                           [](const TSurface& surface) { return surface->IsModified(); });
     }
 
     TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
@@ -333,41 +326,34 @@ public:
     }
 
     void CommitAsyncFlushes() {
-        commited_flushes.push_back(uncommited_flushes);
-        uncommited_flushes.reset();
+        committed_flushes.push_back(uncommitted_flushes);
+        uncommitted_flushes.reset();
     }
 
-    bool HasUncommitedFlushes() {
-        if (uncommited_flushes) {
-            return true;
-        }
-        return false;
+    bool HasUncommittedFlushes() const {
+        return uncommitted_flushes != nullptr;
     }
 
-    bool ShouldWaitAsyncFlushes() {
-        if (commited_flushes.empty()) {
+    bool ShouldWaitAsyncFlushes() const {
+        if (committed_flushes.empty()) {
             return false;
         }
-        auto& flush_list = commited_flushes.front();
-        if (!flush_list) {
-            return false;
-        }
-        return true;
+        return committed_flushes.front() != nullptr;
     }
 
     void PopAsyncFlushes() {
-        if (commited_flushes.empty()) {
+        if (committed_flushes.empty()) {
             return;
         }
-        auto& flush_list = commited_flushes.front();
+        auto& flush_list = committed_flushes.front();
         if (!flush_list) {
-            commited_flushes.pop_front();
+            committed_flushes.pop_front();
             return;
         }
         for (TSurface& surface : *flush_list) {
             FlushSurface(surface);
         }
-        commited_flushes.pop_front();
+        committed_flushes.pop_front();
     }
 
 protected:
@@ -1206,10 +1192,10 @@ private:
     };
 
     void AsyncFlushSurface(TSurface& surface) {
-        if (!uncommited_flushes) {
-            uncommited_flushes = std::make_shared<std::list<TSurface>>();
+        if (!uncommitted_flushes) {
+            uncommitted_flushes = std::make_shared<std::list<TSurface>>();
         }
-        uncommited_flushes->push_back(surface);
+        uncommitted_flushes->push_back(surface);
     }
 
     VideoCore::RasterizerInterface& rasterizer;
@@ -1258,8 +1244,8 @@ private:
 
     std::list<TSurface> marked_for_unregister;
 
-    std::shared_ptr<std::list<TSurface>> uncommited_flushes{};
-    std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes;
+    std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
+    std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
 
     StagingCache staging_cache;
     std::recursive_mutex mutex;

From 644588fd883fb45bf6d1cb7895e98ec65120c7f1 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 16 Apr 2020 13:50:12 -0400
Subject: [PATCH 27/31] ShaderCache/PipelineCache: Cache null shaders.

---
 .../renderer_opengl/gl_shader_cache.cpp         | 17 +++++++++++++----
 .../renderer_opengl/gl_shader_cache.h           |  3 +++
 .../renderer_vulkan/vk_pipeline_cache.cpp       | 16 ++++++++++++----
 .../renderer_vulkan/vk_pipeline_cache.h         |  3 +++
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 6d2ff20f96..f63156b8d9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     // Look up shader in the cache based on address
     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
-    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
+    Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
     if (shader) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
@@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
         const std::size_t size_in_bytes = code.size() * sizeof(u64);
         shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
     }
-    Register(shader);
+
+    if (cpu_addr) {
+        Register(shader);
+    } else {
+        null_shader = shader;
+    }
 
     return last_shaders[static_cast<std::size_t>(program)] = shader;
 }
@@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     auto& memory_manager{system.GPU().MemoryManager()};
     const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
 
-    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
+    auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
     if (kernel) {
         return kernel;
     }
@@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
         kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
     }
 
-    Register(kernel);
+    if (cpu_addr) {
+        Register(kernel);
+    } else {
+        null_kernel = kernel;
+    }
     return kernel;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index c836df5bd1..91690b4705 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -125,6 +125,9 @@ private:
     ShaderDiskCacheOpenGL disk_cache;
     std::unordered_map<u64, PrecompiledShader> runtime_cache;
 
+    Shader null_shader{};
+    Shader null_kernel{};
+
     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8fdc6400dc..c4b3bc6c87 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
         const GPUVAddr program_addr{GetShaderAddress(system, program)};
         const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
         ASSERT(cpu_addr);
-        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
+        auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
         if (!shader) {
             const auto host_ptr{memory_manager.GetPointer(program_addr)};
 
@@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 
             shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
                                                     std::move(code), stage_offset);
-            Register(shader);
+            if (cpu_addr) {
+                Register(shader);
+            } else {
+                null_shader = shader;
+            }
         }
         shaders[index] = std::move(shader);
     }
@@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
     ASSERT(cpu_addr);
 
-    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
+    auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
     if (!shader) {
         // No shader found - create a new one
         const auto host_ptr = memory_manager.GetPointer(program_addr);
@@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
         shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
                                                 program_addr, *cpu_addr, std::move(code),
                                                 kernel_main_offset);
-        Register(shader);
+        if (cpu_addr) {
+            Register(shader);
+        } else {
+            null_kernel = shader;
+        }
     }
 
     Specialization specialization;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 7ccdb7083d..602a0a3403 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -182,6 +182,9 @@ private:
     VKUpdateDescriptorQueue& update_descriptor_queue;
     VKRenderPassCache& renderpass_cache;
 
+    Shader null_shader{};
+    Shader null_kernel{};
+
     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 
     GraphicsPipelineCacheKey last_graphics_key;

From 1b3be8a8f86f0315004a4c60ef1828ba09f48b32 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 19 Apr 2020 13:27:56 -0400
Subject: [PATCH 28/31] MaxwellDMA: Correct copying on accuracy level.

---
 src/video_core/engines/maxwell_dma.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 32b04e31ea..3bfed6ab81 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
             write_buffer.resize(dst_size);
         }
 
-        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+        if (Settings::IsGPULevelExtreme()) {
+            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+        } else {
+            memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
+            memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
+        }
 
         Texture::UnswizzleSubrect(
             regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,

From 39e5b7294898c45cf247b61e46ef735bd16e96ae Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 19 Apr 2020 13:47:45 -0400
Subject: [PATCH 29/31] Async GPU: Correct flushing behavior to be similar to
 old async GPU behavior.

---
 src/video_core/gpu_thread.cpp                    | 4 ++++
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 +++
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++++
 3 files changed, 11 insertions(+)

diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 7df854a2f9..c3bb4fe062 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -84,6 +84,10 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }
 
 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelHigh()) {
+        PushCommand(FlushRegionCommand(addr, size));
+        return;
+    }
     if (!Settings::IsGPULevelExtreme()) {
         return;
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d662657cf5..4c16c89d29 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -653,6 +653,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
 }
 
 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelHigh()) {
+        return buffer_cache.MustFlushRegion(addr, size);
+    }
     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2350cd5f4c..0dc9979165 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -17,6 +17,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -519,6 +520,9 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
 }
 
 bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelHigh()) {
+        return buffer_cache.MustFlushRegion(addr, size);
+    }
     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
 }
 

From 4e37f1b1130b083b42f21029155e5a2e4e9a9eb3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 22 Apr 2020 11:14:40 -0400
Subject: [PATCH 30/31] Address Feedback.

---
 src/video_core/buffer_cache/buffer_cache.h   | 15 ++++++---------
 src/video_core/fence_manager.h               | 12 ++++++------
 src/video_core/texture_cache/texture_cache.h | 15 ++++++---------
 3 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f3aa352959..510f11089f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -153,8 +153,8 @@ public:
     bool MustFlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
-        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
-        return std::any_of(objects.begin(), objects.end(), [](const MapInterval& map) {
+        const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
             return map->IsModified() && map->IsRegistered();
         });
     }
@@ -176,7 +176,7 @@ public:
 
         for (const auto& object : GetMapsInRange(addr, size)) {
             if (object->IsMemoryMarked() && object->IsRegistered()) {
-                Unmark(object);
+                UnmarkMemory(object);
                 object->SetSyncPending(true);
                 marked_for_unregister.emplace_back(object);
             }
@@ -217,10 +217,7 @@ public:
     }
 
     bool ShouldWaitAsyncFlushes() const {
-        if (committed_flushes.empty()) {
-            return false;
-        }
-        return committed_flushes.front() != nullptr;
+        return !committed_flushes.empty() && committed_flushes.front() != nullptr;
     }
 
     bool HasUncommittedFlushes() const {
@@ -294,7 +291,7 @@ protected:
         }
     }
 
-    void Unmark(const MapInterval& map) {
+    void UnmarkMemory(const MapInterval& map) {
         if (!map->IsMemoryMarked()) {
             return;
         }
@@ -305,7 +302,7 @@ protected:
 
     /// Unregisters an object from the cache
     void Unregister(const MapInterval& map) {
-        Unmark(map);
+        UnmarkMemory(map);
         map->MarkAsRegistered(false);
         if (map->IsSyncPending()) {
             marked_for_unregister.remove(map);
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 9fe9c1bf25..dabd1588ce 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -54,7 +54,7 @@ class FenceManager {
 public:
     void SignalSemaphore(GPUVAddr addr, u32 value) {
         TryReleasePendingFences();
-        bool should_flush = ShouldFlush();
+        const bool should_flush = ShouldFlush();
         CommitAsyncFlushes();
         TFence new_fence = CreateFence(addr, value, !should_flush);
         fences.push(new_fence);
@@ -67,7 +67,7 @@ public:
 
     void SignalSyncPoint(u32 value) {
         TryReleasePendingFences();
-        bool should_flush = ShouldFlush();
+        const bool should_flush = ShouldFlush();
         CommitAsyncFlushes();
         TFence new_fence = CreateFence(value, !should_flush);
         fences.push(new_fence);
@@ -79,15 +79,15 @@ public:
     }
 
     void WaitPendingFences() {
+        auto& gpu{system.GPU()};
+        auto& memory_manager{gpu.MemoryManager()};
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
             if (ShouldWait()) {
                 WaitFence(current_fence);
             }
             PopAsyncFlushes();
-            auto& gpu{system.GPU()};
             if (current_fence->IsSemaphore()) {
-                auto& memory_manager{gpu.MemoryManager()};
                 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
@@ -125,15 +125,15 @@ protected:
 
 private:
     void TryReleasePendingFences() {
+        auto& gpu{system.GPU()};
+        auto& memory_manager{gpu.MemoryManager()};
         while (!fences.empty()) {
             TFence& current_fence = fences.front();
             if (ShouldWait() && !IsFenceSignaled(current_fence)) {
                 return;
             }
             PopAsyncFlushes();
-            auto& gpu{system.GPU()};
             if (current_fence->IsSemaphore()) {
-                auto& memory_manager{gpu.MemoryManager()};
                 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1148c3a340..cf6bd005aa 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -68,7 +68,7 @@ public:
 
         for (const auto& surface : GetSurfacesInRegion(addr, size)) {
             if (surface->IsMemoryMarked()) {
-                Unmark(surface);
+                UnmarkMemory(surface);
                 surface->SetSyncPending(true);
                 marked_for_unregister.emplace_back(surface);
             }
@@ -119,8 +119,8 @@ public:
     bool MustFlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
-        auto surfaces = GetSurfacesInRegion(addr, size);
-        return std::any_of(surfaces.begin(), surfaces.end(),
+        const auto surfaces = GetSurfacesInRegion(addr, size);
+        return std::any_of(surfaces.cbegin(), surfaces.cend(),
                            [](const TSurface& surface) { return surface->IsModified(); });
     }
 
@@ -335,10 +335,7 @@ public:
     }
 
     bool ShouldWaitAsyncFlushes() const {
-        if (committed_flushes.empty()) {
-            return false;
-        }
-        return committed_flushes.front() != nullptr;
+        return !committed_flushes.empty() && committed_flushes.front() != nullptr;
     }
 
     void PopAsyncFlushes() {
@@ -421,7 +418,7 @@ protected:
         rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
     }
 
-    void Unmark(TSurface surface) {
+    void UnmarkMemory(TSurface surface) {
         if (!surface->IsMemoryMarked()) {
             return;
         }
@@ -438,7 +435,7 @@ protected:
         if (!guard_render_targets && surface->IsRenderTarget()) {
             ManageRenderTargetUnregister(surface);
         }
-        Unmark(surface);
+        UnmarkMemory(surface);
         if (surface->IsSyncPending()) {
             marked_for_unregister.remove(surface);
             surface->SetSyncPending(false);

From c043ac4f139be7cbf5b7edbf050aa84e1896f75d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 22 Apr 2020 20:34:32 -0400
Subject: [PATCH 31/31] GL_Fence_Manager: use GL_TIMEOUT_IGNORED instead of a
 loop,

---
 src/video_core/renderer_opengl/gl_fence_manager.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 476c899408..99ddcb3f85 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -40,8 +40,7 @@ void GLInnerFence::Wait() {
         return;
     }
     ASSERT(sync_object.handle != 0);
-    while (glClientWaitSync(sync_object.handle, 0, 1000) == GL_TIMEOUT_EXPIRED)
-        ;
+    glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
 }
 
 FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,