From 3b582d5fb2ca19d1a45f9675244021a3d302c9ff Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 24 Nov 2022 14:02:58 +0100
Subject: [PATCH] GPU: Fix buffer cache issue, engine upload not inlining
 memory in multiline and pessismistic invalidation.

---
 src/video_core/buffer_cache/buffer_cache.h | 4 ++--
 src/video_core/engines/engine_upload.cpp   | 8 ++++----
 src/video_core/engines/maxwell_3d.cpp      | 8 +-------
 src/video_core/engines/puller.cpp          | 4 ++--
 4 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5995510137..5d3a8293bd 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1742,12 +1742,12 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
     SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
 
     if constexpr (USE_MEMORY_MAPS) {
+        auto upload_staging = runtime.UploadStagingBuffer(copy_size);
         std::array copies{BufferCopy{
-            .src_offset = 0,
+            .src_offset = upload_staging.offset,
             .dst_offset = buffer.Offset(dest_address),
             .size = copy_size,
         }};
-        auto upload_staging = runtime.UploadStagingBuffer(copy_size);
         u8* const src_pointer = upload_staging.mapped_span.data();
         std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
         runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index a348192346..28aa85f32e 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -51,11 +51,11 @@ void State::ProcessData(std::span<const u8> read_buffer) {
         } else {
             for (u32 line = 0; line < regs.line_count; ++line) {
                 const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
-                memory_manager.WriteBlockUnsafe(
-                    dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
-                    regs.line_length_in);
+                std::span<const u8> buffer(read_buffer.data() +
+                                               static_cast<size_t>(line) * regs.line_length_in,
+                                           regs.line_length_in);
+                rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
             }
-            memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
         }
     } else {
         u32 width = regs.dest.width;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5bb1427c17..6d43e23eab 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -249,9 +249,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
         return;
     case MAXWELL3D_REG_INDEX(fragment_barrier):
         return rasterizer->FragmentBarrier();
-    case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
-        rasterizer->InvalidateGPUCache();
-        return rasterizer->WaitForIdle();
     case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
         return rasterizer->TiledCacheBarrier();
     }
@@ -511,10 +508,7 @@ void Maxwell3D::ProcessCounterReset() {
 
 void Maxwell3D::ProcessSyncPoint() {
     const u32 sync_point = regs.sync_info.sync_point.Value();
-    const u32 cache_flush = regs.sync_info.clean_l2.Value();
-    if (cache_flush != 0) {
-        rasterizer->InvalidateGPUCache();
-    }
+    [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value();
     rasterizer->SignalSyncPoint(sync_point);
 }
 
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 4d22788114..c308ba3fc7 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -118,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() {
     std::function<void()> operation([this, sequence_address, payload] {
         memory_manager.Write<u32>(sequence_address, payload);
     });
-    rasterizer->SyncOperation(std::move(operation));
+    rasterizer->SignalFence(std::move(operation));
 }
 
 void Puller::ProcessSemaphoreAcquire() {
@@ -151,8 +151,8 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::SemaphoreAddressLow:
     case BufferMethods::SemaphoreSequencePayload:
     case BufferMethods::SyncpointPayload:
-        break;
     case BufferMethods::WrcacheFlush:
+        break;
     case BufferMethods::RefCnt:
         rasterizer->SignalReference();
         break;