From 64e93dc9598eea324eee63df648b30b3d55acef3 Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Sun, 24 Apr 2022 18:55:44 -0400
Subject: [PATCH] service: jit: document and clean up

---
 src/core/hle/service/jit/jit.cpp         | 203 +++++++++++++++--------
 src/core/hle/service/jit/jit_context.cpp | 153 +++++++++--------
 src/core/hle/service/jit/jit_context.h   |   1 +
 3 files changed, 225 insertions(+), 132 deletions(-)

diff --git a/src/core/hle/service/jit/jit.cpp b/src/core/hle/service/jit/jit.cpp
index 185d0387b1..8f2920c515 100644
--- a/src/core/hle/service/jit/jit.cpp
+++ b/src/core/hle/service/jit/jit.cpp
@@ -21,9 +21,10 @@ struct CodeRange {
 
 class IJitEnvironment final : public ServiceFramework<IJitEnvironment> {
 public:
-    explicit IJitEnvironment(Core::System& system_, CodeRange user_rx, CodeRange user_ro)
+    explicit IJitEnvironment(Core::System& system_, Kernel::KProcess& process_, CodeRange user_rx,
+                             CodeRange user_ro)
         : ServiceFramework{system_, "IJitEnvironment", ServiceThreadType::CreateNew},
-          context{system_.Memory()} {
+          process{&process_}, context{system_.Memory()} {
         // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IJitEnvironment::GenerateCode, "GenerateCode"},
@@ -43,54 +44,80 @@ public:
     }
 
     void GenerateCode(Kernel::HLERequestContext& ctx) {
-        struct Parameters {
+        LOG_DEBUG(Service_JIT, "called");
+
+        struct InputParameters {
             u32 data_size;
             u64 command;
-            CodeRange cr1;
-            CodeRange cr2;
+            std::array<CodeRange, 2> ranges;
             Struct32 data;
         };
 
+        struct OutputParameters {
+            s32 return_value;
+            std::array<CodeRange, 2> ranges;
+        };
+
         IPC::RequestParser rp{ctx};
-        const auto parameters{rp.PopRaw<Parameters>()};
+        const auto parameters{rp.PopRaw<InputParameters>()};
+
+        // Optional input/output buffers
         std::vector<u8> input_buffer{ctx.CanReadBuffer() ? ctx.ReadBuffer() : std::vector<u8>()};
         std::vector<u8> output_buffer(ctx.CanWriteBuffer() ? ctx.GetWriteBufferSize() : 0);
 
-        const VAddr return_ptr{context.AddHeap(0u)};
-        const VAddr cr1_in_ptr{context.AddHeap(parameters.cr1)};
-        const VAddr cr2_in_ptr{context.AddHeap(parameters.cr2)};
-        const VAddr cr1_out_ptr{
-            context.AddHeap(CodeRange{.offset = parameters.cr1.offset, .size = 0})};
-        const VAddr cr2_out_ptr{
-            context.AddHeap(CodeRange{.offset = parameters.cr2.offset, .size = 0})};
+        // Function call prototype:
+        // void GenerateCode(s32* ret, CodeRange* c0_out, CodeRange* c1_out, JITConfiguration* cfg,
+        //                   u64 cmd, u8* input_buf, size_t input_size, CodeRange* c0_in,
+        //                   CodeRange* c1_in, Struct32* data, size_t data_size, u8* output_buf,
+        //                   size_t output_size);
+        //
+        // The command argument is used to control the behavior of the plugin during code
+        // generation. The configuration allows the plugin to access the output code ranges, and the
+        // other arguments are used to transfer state between the game and the plugin.
+
+        const VAddr ret_ptr{context.AddHeap(0u)};
+        const VAddr c0_in_ptr{context.AddHeap(parameters.ranges[0])};
+        const VAddr c1_in_ptr{context.AddHeap(parameters.ranges[1])};
+        const VAddr c0_out_ptr{context.AddHeap(ClearSize(parameters.ranges[0]))};
+        const VAddr c1_out_ptr{context.AddHeap(ClearSize(parameters.ranges[1]))};
+
         const VAddr input_ptr{context.AddHeap(input_buffer.data(), input_buffer.size())};
         const VAddr output_ptr{context.AddHeap(output_buffer.data(), output_buffer.size())};
         const VAddr data_ptr{context.AddHeap(parameters.data)};
         const VAddr configuration_ptr{context.AddHeap(configuration)};
 
-        context.CallFunction(callbacks.GenerateCode, return_ptr, cr1_out_ptr, cr2_out_ptr,
+        // The callback does not directly return a value, it only writes to the output pointer
+        context.CallFunction(callbacks.GenerateCode, ret_ptr, c0_out_ptr, c1_out_ptr,
                              configuration_ptr, parameters.command, input_ptr, input_buffer.size(),
-                             cr1_in_ptr, cr2_in_ptr, data_ptr, parameters.data_size, output_ptr,
+                             c0_in_ptr, c1_in_ptr, data_ptr, parameters.data_size, output_ptr,
                              output_buffer.size());
 
-        const s32 return_value{context.GetHeap<s32>(return_ptr)};
+        const s32 return_value{context.GetHeap<s32>(ret_ptr)};
 
         if (return_value == 0) {
+            // The callback has written to the output executable code range,
+            // requiring an instruction cache invalidation
             system.InvalidateCpuInstructionCacheRange(configuration.user_rx_memory.offset,
                                                       configuration.user_rx_memory.size);
 
+            // Write back to the IPC output buffer, if provided
             if (ctx.CanWriteBuffer()) {
                 context.GetHeap(output_ptr, output_buffer.data(), output_buffer.size());
                 ctx.WriteBuffer(output_buffer.data(), output_buffer.size());
             }
-            const auto cr1_out{context.GetHeap<CodeRange>(cr1_out_ptr)};
-            const auto cr2_out{context.GetHeap<CodeRange>(cr2_out_ptr)};
+
+            const OutputParameters out{
+                .return_value = return_value,
+                .ranges =
+                    {
+                        context.GetHeap<CodeRange>(c0_out_ptr),
+                        context.GetHeap<CodeRange>(c1_out_ptr),
+                    },
+            };
 
             IPC::ResponseBuilder rb{ctx, 8};
             rb.Push(ResultSuccess);
-            rb.Push<u64>(return_value);
-            rb.PushRaw(cr1_out);
-            rb.PushRaw(cr2_out);
+            rb.PushRaw(out);
         } else {
             LOG_WARNING(Service_JIT, "plugin GenerateCode callback failed");
             IPC::ResponseBuilder rb{ctx, 2};
@@ -99,25 +126,40 @@ public:
     };
 
     void Control(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_JIT, "called");
+
         IPC::RequestParser rp{ctx};
         const auto command{rp.PopRaw<u64>()};
-        const auto input_buffer{ctx.ReadBuffer()};
+
+        // Optional input/output buffers
+        std::vector<u8> input_buffer{ctx.CanReadBuffer() ? ctx.ReadBuffer() : std::vector<u8>()};
         std::vector<u8> output_buffer(ctx.CanWriteBuffer() ? ctx.GetWriteBufferSize() : 0);
 
-        const VAddr return_ptr{context.AddHeap(0u)};
+        // Function call prototype:
+        // u64 Control(s32* ret, JITConfiguration* cfg, u64 cmd, u8* input_buf, size_t input_size,
+        //             u8* output_buf, size_t output_size);
+        //
+        // This function is used to set up the state of the plugin before code generation, generally
+        // passing objects like pointers to VM state from the game. It is usually called once.
+
+        const VAddr ret_ptr{context.AddHeap(0u)};
         const VAddr configuration_ptr{context.AddHeap(configuration)};
         const VAddr input_ptr{context.AddHeap(input_buffer.data(), input_buffer.size())};
         const VAddr output_ptr{context.AddHeap(output_buffer.data(), output_buffer.size())};
-        const u64 wrapper_value{
-            context.CallFunction(callbacks.Control, return_ptr, configuration_ptr, command,
-                                 input_ptr, input_buffer.size(), output_ptr, output_buffer.size())};
-        const s32 return_value{context.GetHeap<s32>(return_ptr)};
+
+        const u64 wrapper_value{context.CallFunction(callbacks.Control, ret_ptr, configuration_ptr,
+                                                     command, input_ptr, input_buffer.size(),
+                                                     output_ptr, output_buffer.size())};
+
+        const s32 return_value{context.GetHeap<s32>(ret_ptr)};
 
         if (wrapper_value == 0 && return_value == 0) {
+            // Write back to the IPC output buffer, if provided
             if (ctx.CanWriteBuffer()) {
                 context.GetHeap(output_ptr, output_buffer.data(), output_buffer.size());
                 ctx.WriteBuffer(output_buffer.data(), output_buffer.size());
             }
+
             IPC::ResponseBuilder rb{ctx, 3};
             rb.Push(ResultSuccess);
             rb.Push(return_value);
@@ -129,8 +171,13 @@ public:
     }
 
     void LoadPlugin(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_JIT, "called");
+
         IPC::RequestParser rp{ctx};
         const auto tmem_size{rp.PopRaw<u64>()};
+        const auto tmem_handle{ctx.GetCopyHandle(0)};
+        const auto nro_plugin{ctx.ReadBuffer(1)};
+
         if (tmem_size == 0) {
             LOG_ERROR(Service_JIT, "attempted to load plugin with empty transfer memory");
             IPC::ResponseBuilder rb{ctx, 2};
@@ -138,9 +185,7 @@ public:
             return;
         }
 
-        const auto tmem_handle{ctx.GetCopyHandle(0)};
-        auto tmem{system.CurrentProcess()->GetHandleTable().GetObject<Kernel::KTransferMemory>(
-            tmem_handle)};
+        auto tmem{process->GetHandleTable().GetObject<Kernel::KTransferMemory>(tmem_handle)};
         if (tmem.IsNull()) {
             LOG_ERROR(Service_JIT, "attempted to load plugin with invalid transfer memory handle");
             IPC::ResponseBuilder rb{ctx, 2};
@@ -148,24 +193,24 @@ public:
             return;
         }
 
-        configuration.work_memory.offset = tmem->GetSourceAddress();
-        configuration.work_memory.size = tmem_size;
+        // Set up the configuration with the required TransferMemory address
+        configuration.transfer_memory.offset = tmem->GetSourceAddress();
+        configuration.transfer_memory.size = tmem_size;
 
-        const auto nro_plugin{ctx.ReadBuffer(1)};
+        // Gather up all the callbacks from the loaded plugin
         auto symbols{Core::Symbols::GetSymbols(nro_plugin, true)};
-        const auto GetSymbol{[&](std::string name) { return symbols[name].first; }};
+        const auto GetSymbol{[&](const std::string& name) { return symbols[name].first; }};
 
-        callbacks =
-            GuestCallbacks{.rtld_fini = GetSymbol("_fini"),
-                           .rtld_init = GetSymbol("_init"),
-                           .Control = GetSymbol("nnjitpluginControl"),
-                           .ResolveBasicSymbols = GetSymbol("nnjitpluginResolveBasicSymbols"),
-                           .SetupDiagnostics = GetSymbol("nnjitpluginSetupDiagnostics"),
-                           .Configure = GetSymbol("nnjitpluginConfigure"),
-                           .GenerateCode = GetSymbol("nnjitpluginGenerateCode"),
-                           .GetVersion = GetSymbol("nnjitpluginGetVersion"),
-                           .Keeper = GetSymbol("nnjitpluginKeeper"),
-                           .OnPrepared = GetSymbol("nnjitpluginOnPrepared")};
+        callbacks.rtld_fini = GetSymbol("_fini");
+        callbacks.rtld_init = GetSymbol("_init");
+        callbacks.Control = GetSymbol("nnjitpluginControl");
+        callbacks.ResolveBasicSymbols = GetSymbol("nnjitpluginResolveBasicSymbols");
+        callbacks.SetupDiagnostics = GetSymbol("nnjitpluginSetupDiagnostics");
+        callbacks.Configure = GetSymbol("nnjitpluginConfigure");
+        callbacks.GenerateCode = GetSymbol("nnjitpluginGenerateCode");
+        callbacks.GetVersion = GetSymbol("nnjitpluginGetVersion");
+        callbacks.OnPrepared = GetSymbol("nnjitpluginOnPrepared");
+        callbacks.Keeper = GetSymbol("nnjitpluginKeeper");
 
         if (callbacks.GetVersion == 0 || callbacks.Configure == 0 || callbacks.GenerateCode == 0 ||
             callbacks.OnPrepared == 0) {
@@ -186,12 +231,16 @@ public:
                                  configuration.sys_ro_memory.size);
         context.MapProcessMemory(configuration.sys_rx_memory.offset,
                                  configuration.sys_rx_memory.size);
-        context.MapProcessMemory(configuration.work_memory.offset, configuration.work_memory.size);
+        context.MapProcessMemory(configuration.transfer_memory.offset,
+                                 configuration.transfer_memory.size);
 
+        // Run ELF constructors, if needed
         if (callbacks.rtld_init != 0) {
             context.CallFunction(callbacks.rtld_init);
         }
 
+        // Function prototype:
+        // u64 GetVersion();
         const auto version{context.CallFunction(callbacks.GetVersion)};
         if (version != 1) {
             LOG_ERROR(Service_JIT, "unknown plugin version {}", version);
@@ -200,16 +249,26 @@ public:
             return;
         }
 
+        // Function prototype:
+        // void ResolveBasicSymbols(void (*resolver)(const char* name));
         const auto resolve{context.GetHelper("_resolve")};
         if (callbacks.ResolveBasicSymbols != 0) {
             context.CallFunction(callbacks.ResolveBasicSymbols, resolve);
         }
+
+        // Function prototype:
+        // void SetupDiagnostics(u32 enabled, void (**resolver)(const char* name));
         const auto resolve_ptr{context.AddHeap(resolve)};
         if (callbacks.SetupDiagnostics != 0) {
             context.CallFunction(callbacks.SetupDiagnostics, 0u, resolve_ptr);
         }
 
-        context.CallFunction(callbacks.Configure, 0u);
+        // Function prototype:
+        // void Configure(u32* memory_flags);
+        context.CallFunction(callbacks.Configure, 0ull);
+
+        // Function prototype:
+        // void OnPrepared(JITConfiguration* cfg);
         const auto configuration_ptr{context.AddHeap(configuration)};
         context.CallFunction(callbacks.OnPrepared, configuration_ptr);
 
@@ -218,6 +277,8 @@ public:
     }
 
     void GetCodeAddress(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_JIT, "called");
+
         IPC::ResponseBuilder rb{ctx, 6};
         rb.Push(ResultSuccess);
         rb.Push(configuration.user_rx_memory.offset);
@@ -243,11 +304,17 @@ private:
     struct JITConfiguration {
         CodeRange user_rx_memory;
         CodeRange user_ro_memory;
-        CodeRange work_memory;
+        CodeRange transfer_memory;
         CodeRange sys_rx_memory;
         CodeRange sys_ro_memory;
     };
 
+    static CodeRange ClearSize(CodeRange in) {
+        in.size = 0;
+        return in;
+    }
+
+    Kernel::KScopedAutoObject<Kernel::KProcess> process;
     GuestCallbacks callbacks;
     JITConfiguration configuration;
     JITContext context;
@@ -275,8 +342,9 @@ public:
 
         IPC::RequestParser rp{ctx};
         const auto parameters{rp.PopRaw<Parameters>()};
-        const auto executable_mem_handle{ctx.GetCopyHandle(1)};
-        const auto readable_mem_handle{ctx.GetCopyHandle(2)};
+        const auto process_handle{ctx.GetCopyHandle(0)};
+        const auto rx_mem_handle{ctx.GetCopyHandle(1)};
+        const auto ro_mem_handle{ctx.GetCopyHandle(2)};
 
         if (parameters.rx_size == 0 || parameters.ro_size == 0) {
             LOG_ERROR(Service_JIT, "attempted to init with empty code regions");
@@ -285,42 +353,47 @@ public:
             return;
         }
 
-        // The copy handle at index 0 is the process handle, but handle tables are
-        // per-process, so there is no point reading it here until we are multiprocess
-        const auto& process{*system.CurrentProcess()};
+        // Fetch using the handle table for the current process here,
+        // since we are not multiprocess yet.
+        const auto& handle_table{system.CurrentProcess()->GetHandleTable()};
 
-        auto executable_mem{
-            process.GetHandleTable().GetObject<Kernel::KCodeMemory>(executable_mem_handle)};
-        if (executable_mem.IsNull()) {
-            LOG_ERROR(Service_JIT, "executable_mem is null for handle=0x{:08X}",
-                      executable_mem_handle);
+        auto process{handle_table.GetObject<Kernel::KProcess>(process_handle)};
+        if (process.IsNull()) {
+            LOG_ERROR(Service_JIT, "process is null for handle=0x{:08X}", process_handle);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ResultUnknown);
             return;
         }
 
-        auto readable_mem{
-            process.GetHandleTable().GetObject<Kernel::KCodeMemory>(readable_mem_handle)};
-        if (readable_mem.IsNull()) {
-            LOG_ERROR(Service_JIT, "readable_mem is null for handle=0x{:08X}", readable_mem_handle);
+        auto rx_mem{handle_table.GetObject<Kernel::KCodeMemory>(rx_mem_handle)};
+        if (rx_mem.IsNull()) {
+            LOG_ERROR(Service_JIT, "rx_mem is null for handle=0x{:08X}", rx_mem_handle);
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ResultUnknown);
+            return;
+        }
+
+        auto ro_mem{handle_table.GetObject<Kernel::KCodeMemory>(ro_mem_handle)};
+        if (ro_mem.IsNull()) {
+            LOG_ERROR(Service_JIT, "ro_mem is null for handle=0x{:08X}", ro_mem_handle);
             IPC::ResponseBuilder rb{ctx, 2};
             rb.Push(ResultUnknown);
             return;
         }
 
         const CodeRange user_rx{
-            .offset = executable_mem->GetSourceAddress(),
+            .offset = rx_mem->GetSourceAddress(),
             .size = parameters.rx_size,
         };
 
         const CodeRange user_ro{
-            .offset = readable_mem->GetSourceAddress(),
+            .offset = ro_mem->GetSourceAddress(),
             .size = parameters.ro_size,
         };
 
         IPC::ResponseBuilder rb{ctx, 2, 0, 1};
         rb.Push(ResultSuccess);
-        rb.PushIpcInterface<IJitEnvironment>(system, user_rx, user_ro);
+        rb.PushIpcInterface<IJitEnvironment>(system, *process, user_rx, user_ro);
     }
 };
 
diff --git a/src/core/hle/service/jit/jit_context.cpp b/src/core/hle/service/jit/jit_context.cpp
index 17e58131c7..19bd85b6c2 100644
--- a/src/core/hle/service/jit/jit_context.cpp
+++ b/src/core/hle/service/jit/jit_context.cpp
@@ -17,61 +17,15 @@
 
 namespace Service::JIT {
 
-constexpr std::array<u8, 4> STOP_ARM64 = {
+constexpr std::array<u8, 8> SVC0_ARM64 = {
     0x01, 0x00, 0x00, 0xd4, // svc  #0
-};
-
-constexpr std::array<u8, 8> RESOLVE_ARM64 = {
-    0x21, 0x00, 0x00, 0xd4, // svc  #1
     0xc0, 0x03, 0x5f, 0xd6, // ret
 };
 
-constexpr std::array<u8, 4> PANIC_ARM64 = {
-    0x41, 0x00, 0x00, 0xd4, // svc  #2
+constexpr std::array HELPER_FUNCTIONS{
+    "_stop", "_resolve", "_panic", "memcpy", "memmove", "memset",
 };
 
-constexpr std::array<u8, 60> MEMMOVE_ARM64 = {
-    0x1f, 0x00, 0x01, 0xeb, // cmp  x0, x1
-    0x83, 0x01, 0x00, 0x54, // b.lo #+34
-    0x42, 0x04, 0x00, 0xd1, // sub  x2, x2, 1
-    0x22, 0x01, 0xf8, 0xb7, // tbnz x2, #63, #+36
-    0x23, 0x68, 0x62, 0x38, // ldrb w3, [x1, x2]
-    0x03, 0x68, 0x22, 0x38, // strb w3, [x0, x2]
-    0xfc, 0xff, 0xff, 0x17, // b    #-16
-    0x24, 0x68, 0x63, 0x38, // ldrb w4, [x1, x3]
-    0x04, 0x68, 0x23, 0x38, // strb w4, [x0, x3]
-    0x63, 0x04, 0x00, 0x91, // add  x3, x3, 1
-    0x7f, 0x00, 0x02, 0xeb, // cmp  x3, x2
-    0x8b, 0xff, 0xff, 0x54, // b.lt #-16
-    0xc0, 0x03, 0x5f, 0xd6, // ret
-    0x03, 0x00, 0x80, 0xd2, // mov  x3, 0
-    0xfc, 0xff, 0xff, 0x17, // b    #-16
-};
-
-constexpr std::array<u8, 28> MEMSET_ARM64 = {
-    0x03, 0x00, 0x80, 0xd2, // mov  x3, 0
-    0x7f, 0x00, 0x02, 0xeb, // cmp  x3, x2
-    0x4b, 0x00, 0x00, 0x54, // b.lt #+8
-    0xc0, 0x03, 0x5f, 0xd6, // ret
-    0x01, 0x68, 0x23, 0x38, // strb w1, [x0, x3]
-    0x63, 0x04, 0x00, 0x91, // add  x3, x3, 1
-    0xfb, 0xff, 0xff, 0x17, // b    #-20
-};
-
-struct HelperFunction {
-    const char* name;
-    const std::span<const u8> data;
-};
-
-constexpr std::array<HelperFunction, 6> HELPER_FUNCTIONS{{
-    {"_stop", STOP_ARM64},
-    {"_resolve", RESOLVE_ARM64},
-    {"_panic", PANIC_ARM64},
-    {"memcpy", MEMMOVE_ARM64},
-    {"memmove", MEMMOVE_ARM64},
-    {"memset", MEMSET_ARM64},
-}};
-
 struct Elf64_Dyn {
     u64 d_tag;
     u64 d_un;
@@ -224,17 +178,24 @@ public:
             InsertHelperFunctions();
             InsertStack();
             return true;
-        } else {
-            return false;
         }
+
+        return false;
     }
 
     bool FixupRelocations() {
+        // The loaded NRO file has ELF relocations that must be processed before it can run.
+        // Normally this would be processed by RTLD, but in HLE context, we don't have
+        // the linker available, so we have to do it ourselves.
+
         const VAddr mod_offset{callbacks->MemoryRead32(4)};
         if (callbacks->MemoryRead32(mod_offset) != Common::MakeMagic('M', 'O', 'D', '0')) {
             return false;
         }
 
+        // For more info about dynamic entries, see the ELF ABI specification:
+        // https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html
+        // https://refspecs.linuxbase.org/elf/gabi4+/ch4.reloc.html
         VAddr dynamic_offset{mod_offset + callbacks->MemoryRead32(mod_offset + 4)};
         VAddr rela_dyn = 0;
         size_t num_rela = 0;
@@ -266,13 +227,15 @@ public:
     }
 
     void InsertHelperFunctions() {
-        for (const auto& [name, contents] : HELPER_FUNCTIONS) {
+        for (const auto& name : HELPER_FUNCTIONS) {
             helpers[name] = local_memory.size();
-            local_memory.insert(local_memory.end(), contents.begin(), contents.end());
+            local_memory.insert(local_memory.end(), SVC0_ARM64.begin(), SVC0_ARM64.end());
         }
     }
 
     void InsertStack() {
+        // Allocate enough space to avoid any reasonable risk of
+        // overflowing the stack during plugin execution
         const u64 pad_amount{Common::AlignUp(local_memory.size(), STACK_ALIGN) -
                              local_memory.size()};
         local_memory.insert(local_memory.end(), 0x10000 + pad_amount, 0);
@@ -292,9 +255,21 @@ public:
     }
 
     void SetupArguments() {
+        // The first 8 integer registers are used for the first 8 integer
+        // arguments. Floating-point arguments are not handled at this time.
+        //
+        // If a function takes more than 8 arguments, then stack space is reserved
+        // for the remaining arguments, and the remaining arguments are inserted in
+        // ascending memory order, each argument aligned to an 8-byte boundary. The
+        // stack pointer must remain aligned to 16 bytes.
+        //
+        // For more info, see the AArch64 ABI PCS:
+        // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
+
         for (size_t i = 0; i < 8 && i < argument_stack.size(); i++) {
             jit->SetRegister(i, argument_stack[i]);
         }
+
         if (argument_stack.size() > 8) {
             const VAddr new_sp = Common::AlignDown(
                 top_of_stack - (argument_stack.size() - 8) * sizeof(u64), STACK_ALIGN);
@@ -303,6 +278,8 @@ public:
             }
             jit->SetSP(new_sp);
         }
+
+        // Reset the call state for the next invocation
         argument_stack.clear();
         heap_pointer = top_of_stack;
     }
@@ -322,11 +299,16 @@ public:
     }
 
     VAddr AddHeap(const void* data, size_t size) {
+        // Require all heap data types to have the same alignment as the
+        // stack pointer, for compatibility
         const size_t num_bytes{Common::AlignUp(size, STACK_ALIGN)};
+
+        // Make additional memory space if required
         if (heap_pointer + num_bytes > local_memory.size()) {
             local_memory.insert(local_memory.end(),
                                 (heap_pointer + num_bytes) - local_memory.size(), 0);
         }
+
         const VAddr location{heap_pointer};
         std::memcpy(local_memory.data() + location, data, size);
         heap_pointer += num_bytes;
@@ -350,30 +332,67 @@ public:
 };
 
 void DynarmicCallbacks64::CallSVC(u32 swi) {
-    switch (swi) {
-    case 0:
-        parent.jit->HaltExecution();
-        break;
+    // Service calls are used to implement helper functionality.
+    //
+    // The most important of these is the _stop helper, which transfers control
+    // from the plugin back to HLE context to return a value. However, a few more
+    // are also implemented to reduce the need for direct ARM implementations of
+    // basic functionality, like memory operations.
+    //
+    // When we receive a helper request, the swi number will be zero, and the call
+    // will have originated from an address we know is a helper function. Otherwise,
+    // the plugin may be trying to issue a service call, which we shouldn't handle.
 
-    case 1: {
+    if (swi != 0) {
+        LOG_CRITICAL(Service_JIT, "plugin issued unknown service call {}", swi);
+        parent.jit->HaltExecution();
+        return;
+    }
+
+    u64 pc{parent.jit->GetPC() - 4};
+    auto& helpers{parent.helpers};
+
+    if (pc == helpers["memcpy"] || pc == helpers["memmove"]) {
+        const VAddr dest{parent.jit->GetRegister(0)};
+        const VAddr src{parent.jit->GetRegister(1)};
+        const size_t n{parent.jit->GetRegister(2)};
+
+        if (dest < src) {
+            for (size_t i = 0; i < n; i++) {
+                MemoryWrite8(dest + i, MemoryRead8(src + i));
+            }
+        } else {
+            for (size_t i = n; i > 0; i--) {
+                MemoryWrite8(dest + i - 1, MemoryRead8(src + i - 1));
+            }
+        }
+    } else if (pc == helpers["memset"]) {
+        const VAddr dest{parent.jit->GetRegister(0)};
+        const u64 c{parent.jit->GetRegister(1)};
+        const size_t n{parent.jit->GetRegister(2)};
+
+        for (size_t i = 0; i < n; i++) {
+            MemoryWrite8(dest + i, static_cast<u8>(c));
+        }
+    } else if (pc == helpers["_resolve"]) {
         // X0 contains a char* for a symbol to resolve
-        std::string name{MemoryReadCString(parent.jit->GetRegister(0))};
-        const auto helper{parent.helpers[name]};
+        const auto name{MemoryReadCString(parent.jit->GetRegister(0))};
+        const auto helper{helpers[name]};
 
         if (helper != 0) {
             parent.jit->SetRegister(0, helper);
         } else {
             LOG_WARNING(Service_JIT, "plugin requested unknown function {}", name);
-            parent.jit->SetRegister(0, parent.helpers["_panic"]);
+            parent.jit->SetRegister(0, helpers["_panic"]);
         }
-        break;
-    }
-
-    case 2:
-    default:
+    } else if (pc == helpers["_stop"]) {
+        parent.jit->HaltExecution();
+    } else if (pc == helpers["_panic"]) {
         LOG_CRITICAL(Service_JIT, "plugin panicked!");
         parent.jit->HaltExecution();
-        break;
+    } else {
+        LOG_CRITICAL(Service_JIT, "plugin issued syscall at unknown address 0x{:x}", pc);
+        parent.jit->HaltExecution();
     }
 }
 
diff --git a/src/core/hle/service/jit/jit_context.h b/src/core/hle/service/jit/jit_context.h
index 3cb935e3c3..f17fc5e244 100644
--- a/src/core/hle/service/jit/jit_context.h
+++ b/src/core/hle/service/jit/jit_context.h
@@ -28,6 +28,7 @@ public:
     template <typename T, typename... Ts>
     u64 CallFunction(VAddr func, T argument, Ts... rest) {
         static_assert(std::is_trivially_copyable_v<T>);
+        static_assert(!std::is_floating_point_v<T>);
         PushArgument(&argument, sizeof(argument));
 
         if constexpr (sizeof...(rest) > 0) {