From 70ea1c20002e8cb9a0f9e98b1992d4c731be0041 Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Sat, 12 Nov 2022 11:26:56 -0500
Subject: [PATCH 1/2] common: add cache management functions

---
 src/common/CMakeLists.txt       |  2 ++
 src/common/cache_management.cpp | 60 +++++++++++++++++++++++++++++++++
 src/common/cache_management.h   | 27 +++++++++++++++
 3 files changed, 89 insertions(+)
 create mode 100644 src/common/cache_management.cpp
 create mode 100644 src/common/cache_management.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c0555f840c..b7c15c1919 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -34,6 +34,8 @@ add_library(common STATIC
     bit_util.h
     cityhash.cpp
     cityhash.h
+    cache_management.cpp
+    cache_management.h
     common_funcs.h
     common_types.h
     concepts.h
diff --git a/src/common/cache_management.cpp b/src/common/cache_management.cpp
new file mode 100644
index 0000000000..57810b76a7
--- /dev/null
+++ b/src/common/cache_management.cpp
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <cstring>
+
+#include "alignment.h"
+#include "cache_management.h"
+#include "common_types.h"
+
+namespace Common {
+
+#if defined(ARCHITECTURE_x86_64)
+
+// Most cache operations are no-ops on x86
+
+void DataCacheLineCleanByVAToPoU(void* start, size_t size) {}
+void DataCacheLineCleanAndInvalidateByVAToPoC(void* start, size_t size) {}
+void DataCacheLineCleanByVAToPoC(void* start, size_t size) {}
+void DataCacheZeroByVA(void* start, size_t size) {
+    std::memset(start, 0, size);
+}
+
+#elif defined(ARCHITECTURE_arm64)
+
+// BS/DminLine is log2(cache size in words), we want size in bytes
+#define EXTRACT_DMINLINE(ctr_el0) (1 << ((((ctr_el0) >> 16) & 0xf) + 2))
+#define EXTRACT_BS(dczid_el0) (1 << (((dczid_el0)&0xf) + 2))
+
+#define DEFINE_DC_OP(op_name, function_name)                                                       \
+    void function_name(void* start, size_t size) {                                                 \
+        size_t ctr_el0;                                                                            \
+        asm volatile("mrs %[ctr_el0], ctr_el0\n\t" : [ctr_el0] "=r"(ctr_el0));                     \
+        size_t cacheline_size = EXTRACT_DMINLINE(ctr_el0);                                         \
+        uintptr_t va_start = reinterpret_cast<uintptr_t>(start);                                   \
+        uintptr_t va_end = va_start + size;                                                        \
+        for (uintptr_t va = va_start; va < va_end; va += cacheline_size) {                         \
+            asm volatile("dc " #op_name ", %[va]\n\t" : : [va] "r"(va) : "memory");                \
+        }                                                                                          \
+    }
+
+#define DEFINE_DC_OP_DCZID(op_name, function_name)                                                 \
+    void function_name(void* start, size_t size) {                                                 \
+        size_t dczid_el0;                                                                          \
+        asm volatile("mrs %[dczid_el0], dczid_el0\n\t" : [dczid_el0] "=r"(dczid_el0));             \
+        size_t cacheline_size = EXTRACT_BS(dczid_el0);                                             \
+        uintptr_t va_start = reinterpret_cast<uintptr_t>(start);                                   \
+        uintptr_t va_end = va_start + size;                                                        \
+        for (uintptr_t va = va_start; va < va_end; va += cacheline_size) {                         \
+            asm volatile("dc " #op_name ", %[va]\n\t" : : [va] "r"(va) : "memory");                \
+        }                                                                                          \
+    }
+
+DEFINE_DC_OP(cvau, DataCacheLineCleanByVAToPoU);
+DEFINE_DC_OP(civac, DataCacheLineCleanAndInvalidateByVAToPoC);
+DEFINE_DC_OP(cvac, DataCacheLineCleanByVAToPoC);
+DEFINE_DC_OP_DCZID(zva, DataCacheZeroByVA);
+
+#endif
+
+} // namespace Common
diff --git a/src/common/cache_management.h b/src/common/cache_management.h
new file mode 100644
index 0000000000..e467b87e4c
--- /dev/null
+++ b/src/common/cache_management.h
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "stdlib.h"
+
+namespace Common {
+
+// Data cache instructions enabled at EL0 by SCTLR_EL1.UCI.
+// VA = virtual address
+// PoC = point of coherency
+// PoU = point of unification
+
+// dc cvau
+void DataCacheLineCleanByVAToPoU(void* start, size_t size);
+
+// dc civac
+void DataCacheLineCleanAndInvalidateByVAToPoC(void* start, size_t size);
+
+// dc cvac
+void DataCacheLineCleanByVAToPoC(void* start, size_t size);
+
+// dc zva
+void DataCacheZeroByVA(void* start, size_t size);
+
+} // namespace Common

From 651f6598ac8a980700c330f382d711f7429571a8 Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Sat, 12 Nov 2022 11:02:07 -0500
Subject: [PATCH 2/2] kernel: implement FlushProcessDataCache

---
 src/core/hle/kernel/svc.cpp    | 26 +++++++++-----
 src/core/hle/kernel/svc_wrap.h |  8 +++++
 src/core/memory.cpp            | 65 ++++++++++++++++++++++++++++++++++
 src/core/memory.h              | 34 ++++++++++++++++++
 4 files changed, 125 insertions(+), 8 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 9962ad1711..e520cab470 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -2701,14 +2701,24 @@ static Result GetThreadList(Core::System& system, u32* out_num_threads, VAddr ou
     return ResultSuccess;
 }
 
-static Result FlushProcessDataCache32([[maybe_unused]] Core::System& system,
-                                      [[maybe_unused]] Handle handle, [[maybe_unused]] u32 address,
-                                      [[maybe_unused]] u32 size) {
-    // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a no-op,
-    // as all emulation is done in the same cache level in host architecture, thus data cache
-    // does not need flushing.
-    LOG_DEBUG(Kernel_SVC, "called");
-    return ResultSuccess;
+static Result FlushProcessDataCache32(Core::System& system, Handle process_handle, u64 address,
+                                      u64 size) {
+    // Validate address/size.
+    R_UNLESS(size > 0, ResultInvalidSize);
+    R_UNLESS(address == static_cast<uintptr_t>(address), ResultInvalidCurrentMemory);
+    R_UNLESS(size == static_cast<size_t>(size), ResultInvalidCurrentMemory);
+
+    // Get the process from its handle.
+    KScopedAutoObject process =
+        system.Kernel().CurrentProcess()->GetHandleTable().GetObject<KProcess>(process_handle);
+    R_UNLESS(process.IsNotNull(), ResultInvalidHandle);
+
+    // Verify the region is within range.
+    auto& page_table = process->PageTable();
+    R_UNLESS(page_table.Contains(address, size), ResultInvalidCurrentMemory);
+
+    // Perform the operation.
+    R_RETURN(system.Memory().FlushDataCache(*process, address, size));
 }
 
 namespace {
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 272c54cf75..3730937fe3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -722,4 +722,12 @@ void SvcWrap32(Core::System& system) {
     FuncReturn(system, retval);
 }
 
+// Used by Invalidate/Store/FlushProcessDataCache32
+template <Result func(Core::System&, Handle, u64, u64)>
+void SvcWrap32(Core::System& system) {
+    const u64 address = (Param(system, 3) << 32) | Param(system, 2);
+    const u64 size = (Param(system, 4) << 32) | Param(system, 1);
+    FuncReturn32(system, func(system, Param32(system, 0), address, size).raw);
+}
+
 } // namespace Kernel
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3ca80c8ff0..3141122f13 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -6,6 +6,7 @@
 
 #include "common/assert.h"
 #include "common/atomic_ops.h"
+#include "common/cache_management.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/page_table.h"
@@ -329,6 +330,55 @@ struct Memory::Impl {
             });
     }
 
+    template <typename Callback>
+    Result PerformCacheOperation(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size,
+                                 Callback&& cb) {
+        class InvalidMemoryException : public std::exception {};
+
+        try {
+            WalkBlock(
+                process, dest_addr, size,
+                [&](const std::size_t block_size, const VAddr current_vaddr) {
+                    LOG_ERROR(HW_Memory, "Unmapped cache maintenance @ {:#018X}", current_vaddr);
+                    throw InvalidMemoryException();
+                },
+                [&](const std::size_t block_size, u8* const host_ptr) { cb(block_size, host_ptr); },
+                [&](const VAddr current_vaddr, const std::size_t block_size, u8* const host_ptr) {
+                    system.GPU().FlushRegion(current_vaddr, block_size);
+                    cb(block_size, host_ptr);
+                },
+                [](const std::size_t block_size) {});
+        } catch (InvalidMemoryException&) {
+            return Kernel::ResultInvalidCurrentMemory;
+        }
+
+        return ResultSuccess;
+    }
+
+    Result InvalidateDataCache(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) {
+        auto perform = [&](const std::size_t block_size, u8* const host_ptr) {
+            // Do nothing; this operation (dc ivac) cannot be supported
+            // from EL0
+        };
+        return PerformCacheOperation(process, dest_addr, size, perform);
+    }
+
+    Result StoreDataCache(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) {
+        auto perform = [&](const std::size_t block_size, u8* const host_ptr) {
+            // dc cvac: Store to point of coherency
+            Common::DataCacheLineCleanByVAToPoC(host_ptr, block_size);
+        };
+        return PerformCacheOperation(process, dest_addr, size, perform);
+    }
+
+    Result FlushDataCache(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) {
+        auto perform = [&](const std::size_t block_size, u8* const host_ptr) {
+            // dc civac: Store to point of coherency, and invalidate from cache
+            Common::DataCacheLineCleanAndInvalidateByVAToPoC(host_ptr, block_size);
+        };
+        return PerformCacheOperation(process, dest_addr, size, perform);
+    }
+
     void MarkRegionDebug(VAddr vaddr, u64 size, bool debug) {
         if (vaddr == 0) {
             return;
@@ -786,6 +836,21 @@ void Memory::ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, const s
     impl->ZeroBlock(process, dest_addr, size);
 }
 
+Result Memory::InvalidateDataCache(const Kernel::KProcess& process, VAddr dest_addr,
+                                   const std::size_t size) {
+    return impl->InvalidateDataCache(process, dest_addr, size);
+}
+
+Result Memory::StoreDataCache(const Kernel::KProcess& process, VAddr dest_addr,
+                              const std::size_t size) {
+    return impl->StoreDataCache(process, dest_addr, size);
+}
+
+Result Memory::FlushDataCache(const Kernel::KProcess& process, VAddr dest_addr,
+                              const std::size_t size) {
+    return impl->FlushDataCache(process, dest_addr, size);
+}
+
 void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
     impl->RasterizerMarkRegionCached(vaddr, size, cached);
 }
diff --git a/src/core/memory.h b/src/core/memory.h
index 81eac448b4..31fe699d80 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -7,6 +7,7 @@
 #include <memory>
 #include <string>
 #include "common/common_types.h"
+#include "core/hle/result.h"
 
 namespace Common {
 struct PageTable;
@@ -449,6 +450,39 @@ public:
      */
     void ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
 
+    /**
+     * Invalidates a range of bytes within the current process' address space at the specified
+     * virtual address.
+     *
+     * @param process   The process that will have data invalidated within its address space.
+     * @param dest_addr The destination virtual address to invalidate the data from.
+     * @param size      The size of the range to invalidate, in bytes.
+     *
+     */
+    Result InvalidateDataCache(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
+
+    /**
+     * Stores a range of bytes within the current process' address space at the specified
+     * virtual address.
+     *
+     * @param process   The process that will have data stored within its address space.
+     * @param dest_addr The destination virtual address to store the data from.
+     * @param size      The size of the range to store, in bytes.
+     *
+     */
+    Result StoreDataCache(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
+
+    /**
+     * Flushes a range of bytes within the current process' address space at the specified
+     * virtual address.
+     *
+     * @param process   The process that will have data flushed within its address space.
+     * @param dest_addr The destination virtual address to flush the data from.
+     * @param size      The size of the range to flush, in bytes.
+     *
+     */
+    Result FlushDataCache(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
+
     /**
      * Marks each page within the specified address range as cached or uncached.
      *