diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index dc2b1f4473..3faa7e0d07 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -586,7 +586,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
                     cpu_addr_base += u64(std::max<s64>(difference2, 0));
                     const u64 new_size = cpu_addr_end2 - cpu_addr_base;
                     const u64 new_offset = cpu_addr_base - buffer.CpuAddr();
-                    ASSERT(!IsRegionCpuModified(cpu_addr_base, new_size));
+                    if (IsRegionCpuModified(cpu_addr_base, new_size)) {
+                        return;
+                    }
                     downloads.push_back({
                         BufferCopy{
                             .src_offset = new_offset,
@@ -596,8 +598,15 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
                         buffer_id,
                     });
                     total_size_bytes += new_size;
-                    buffer.UnmarkRegionAsGpuModified(cpu_addr_base, new_size);
                     largest_copy = std::max(largest_copy, new_size);
+                    constexpr u64 align_mask = ~(32ULL - 1);
+                    const VAddr align_up_address = (cpu_addr_base + 31) & align_mask;
+                    const u64 difference = align_up_address - cpu_addr_base;
+                    if (difference > new_size) {
+                        return;
+                    }
+                    const u64 fixed_size = new_size - difference;
+                    buffer.UnmarkRegionAsGpuModified(align_up_address, fixed_size & align_mask);
                 });
         });
     }
@@ -1380,7 +1389,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
     // Binding the whole map range would be technically correct, but games have large maps that make
     // this approach unaffordable for now.
     static constexpr u32 arbitrary_extra_bytes = 0xc000;
-    const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
+    const u32 bytes_to_map_end =
+        std::max(size, static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr)));
     const Binding binding{
         .cpu_addr = *cpu_addr,
         .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index f055b61e94..cd17667cb5 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -8,6 +8,7 @@
 #include <queue>
 
 #include "common/common_types.h"
+#include "common/settings.h"
 #include "core/core.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/gpu.h"
@@ -53,6 +54,23 @@ public:
         delayed_destruction_ring.Tick();
     }
 
+    void SignalReference() {
+        // Only sync references on High
+        if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
+            return;
+        }
+        TryReleasePendingFences();
+        const bool should_flush = ShouldFlush();
+        CommitAsyncFlushes();
+        TFence new_fence = CreateFence(0, 0, !should_flush);
+        fences.push(new_fence);
+        QueueFence(new_fence);
+        if (should_flush) {
+            rasterizer.FlushCommands();
+        }
+        rasterizer.SyncGuestHost();
+    }
+
     void SignalSemaphore(GPUVAddr addr, u32 value) {
         TryReleasePendingFences();
         const bool should_flush = ShouldFlush();
@@ -87,8 +105,10 @@ public:
             }
             PopAsyncFlushes();
             if (current_fence->IsSemaphore()) {
-                gpu_memory.template Write<u32>(current_fence->GetAddress(),
-                                               current_fence->GetPayload());
+                if (current_fence->GetAddress() != 0) {
+                    gpu_memory.template Write<u32>(current_fence->GetAddress(),
+                                                   current_fence->GetPayload());
+                }
             } else {
                 gpu.IncrementSyncPoint(current_fence->GetPayload());
             }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 35cc561be1..f317ddc2bb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -268,11 +268,13 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::SemaphoreAddressHigh:
     case BufferMethods::SemaphoreAddressLow:
     case BufferMethods::SemaphoreSequence:
-    case BufferMethods::RefCnt:
     case BufferMethods::UnkCacheFlush:
     case BufferMethods::WrcacheFlush:
     case BufferMethods::FenceValue:
         break;
+    case BufferMethods::RefCnt:
+        rasterizer->SignalReference();
+        break;
     case BufferMethods::FenceAction:
         ProcessFenceActionMethod();
         break;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0cec4225b0..67aef60000 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -63,6 +63,9 @@ public:
     /// Signal a GPU based syncpoint as a fence
     virtual void SignalSyncPoint(u32 value) = 0;
 
+    /// Signal a GPU based reference as point
+    virtual void SignalReference() = 0;
+
     /// Release all pending fences.
     virtual void ReleaseFences() = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 07ad0e205b..64869abf95 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -634,6 +634,13 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
     fence_manager.SignalSyncPoint(value);
 }
 
+void RasterizerOpenGL::SignalReference() {
+    if (!gpu.IsAsync()) {
+        return;
+    }
+    fence_manager.SignalReference();
+}
+
 void RasterizerOpenGL::ReleaseFences() {
     if (!gpu.IsAsync()) {
         return;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 482efed7a0..d8df71962e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -83,6 +83,7 @@ public:
     void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
     void SignalSemaphore(GPUVAddr addr, u32 value) override;
     void SignalSyncPoint(u32 value) override;
+    void SignalReference() override;
     void ReleaseFences() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitForIdle() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index bd4d649cc7..687c13009d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -580,6 +580,13 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
     fence_manager.SignalSyncPoint(value);
 }
 
+void RasterizerVulkan::SignalReference() {
+    if (!gpu.IsAsync()) {
+        return;
+    }
+    fence_manager.SignalReference();
+}
+
 void RasterizerVulkan::ReleaseFences() {
     if (!gpu.IsAsync()) {
         return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 41459c5c5c..5450ccfb51 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -75,6 +75,7 @@ public:
     void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
     void SignalSemaphore(GPUVAddr addr, u32 value) override;
     void SignalSyncPoint(u32 value) override;
+    void SignalReference() override;
     void ReleaseFences() override;
     void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitForIdle() override;