From 9fdb311d6e2d636c4599ddc3d4cb9adad6cec540 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Wed, 11 Mar 2015 16:10:14 -0400
Subject: [PATCH] dyncom: Make Load/Store instructions support big endian

---
 .../arm/dyncom/arm_dyncom_interpreter.cpp     | 114 ++++++++++--------
 src/core/arm/interpreter/armsupp.cpp          |   6 +
 src/core/arm/skyeye_common/armdefs.h          |   3 +-
 src/core/arm/skyeye_common/armmmu.h           |  55 +++++++++
 src/core/arm/skyeye_common/vfp/vfpinstr.cpp   | 100 ++++++++++-----
 src/core/mem_map.h                            |   1 +
 src/core/mem_map_funcs.cpp                    |   6 +
 7 files changed, 204 insertions(+), 81 deletions(-)

diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 4dd5416563..cfa6de8fce 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -4362,30 +4362,30 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             if (BIT(inst, 22) && !BIT(inst, 15)) {
                 for (int i = 0; i < 13; i++) {
                     if(BIT(inst, i)) {
-                        cpu->Reg[i] = Memory::Read32(addr);
+                        cpu->Reg[i] = ReadMemory32(cpu, addr);
                         addr += 4;
                     }
                 }
                 if (BIT(inst, 13)) {
                     if (cpu->Mode == USER32MODE) 
-                        cpu->Reg[13] = Memory::Read32(addr);
+                        cpu->Reg[13] = ReadMemory32(cpu, addr);
                     else
-                        cpu->Reg_usr[0] = Memory::Read32(addr);
+                        cpu->Reg_usr[0] = ReadMemory32(cpu, addr);
 
                     addr += 4;
                 }
                 if (BIT(inst, 14)) {
                     if (cpu->Mode == USER32MODE) 
-                        cpu->Reg[14] = Memory::Read32(addr);
+                        cpu->Reg[14] = ReadMemory32(cpu, addr);
                     else
-                        cpu->Reg_usr[1] = Memory::Read32(addr);
+                        cpu->Reg_usr[1] = ReadMemory32(cpu, addr);
 
                     addr += 4;
                 }
             } else if (!BIT(inst, 22)) {
                 for(int i = 0; i < 16; i++ ){
                     if(BIT(inst, i)){
-                        unsigned int ret = Memory::Read32(addr);
+                        unsigned int ret = ReadMemory32(cpu, addr);
 
                         // For armv5t, should enter thumb when bits[0] is non-zero.
                         if(i == 15){
@@ -4400,7 +4400,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             } else if (BIT(inst, 22) && BIT(inst, 15)) {
                 for(int i = 0; i < 15; i++ ){
                     if(BIT(inst, i)){
-                        cpu->Reg[i] = Memory::Read32(addr);
+                        cpu->Reg[i] = ReadMemory32(cpu, addr);
                         addr += 4;
                      }
                  }
@@ -4411,7 +4411,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
                     LOAD_NZCVT;
                 }
 
-                cpu->Reg[15] = Memory::Read32(addr);
+                cpu->Reg[15] = ReadMemory32(cpu, addr);
             }
 
             if (BIT(inst, 15)) {
@@ -4445,20 +4445,18 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
     LDR_INST:
     {
         ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
-        //if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
-            inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
+        inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
 
-            unsigned int value = Memory::Read32(addr);
-            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
+        unsigned int value = ReadMemory32(cpu, addr);
+        cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
 
-            if (BITS(inst_cream->inst, 12, 15) == 15) {
-                // For armv5t, should enter thumb when bits[0] is non-zero.
-                cpu->TFlag = value & 0x1;
-                cpu->Reg[15] &= 0xFFFFFFFE;
-                INC_PC(sizeof(ldst_inst));
-                goto DISPATCH;
-            }
-        //}
+        if (BITS(inst_cream->inst, 12, 15) == 15) {
+            // For armv5t, should enter thumb when bits[0] is non-zero.
+            cpu->TFlag = value & 0x1;
+            cpu->Reg[15] &= 0xFFFFFFFE;
+            INC_PC(sizeof(ldst_inst));
+            goto DISPATCH;
+        }
 
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(ldst_inst));
@@ -4471,7 +4469,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             ldst_inst *inst_cream = (ldst_inst *)inst_base->component;
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
 
-            unsigned int value = Memory::Read32(addr);
+            unsigned int value = ReadMemory32(cpu, addr);
             cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
 
             if (BITS(inst_cream->inst, 12, 15) == 15) {
@@ -4554,8 +4552,10 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             // Should check if RD is even-numbered, Rd != 14, addr[0:1] == 0, (CP15_reg1_U == 1 || addr[2] == 0)
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
 
-            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = Memory::Read32(addr);
-            cpu->Reg[BITS(inst_cream->inst, 12, 15) + 1] = Memory::Read32(addr + 4);
+            // The 3DS doesn't have LPAE (Large Physical Access Extension), so it
+            // wouldn't do this as a single read.
+            cpu->Reg[BITS(inst_cream->inst, 12, 15) + 0] = ReadMemory32(cpu, addr);
+            cpu->Reg[BITS(inst_cream->inst, 12, 15) + 1] = ReadMemory32(cpu, addr + 4);
 
             // No dispatch since this operation should not modify R15
         }
@@ -4574,7 +4574,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             add_exclusive_addr(cpu, read_addr);
             cpu->exclusive_state = 1;
 
-            RD = Memory::Read32(read_addr);
+            RD = ReadMemory32(cpu, read_addr);
             if (inst_cream->Rd == 15) {
                 INC_PC(sizeof(generic_arm_inst));
                 goto DISPATCH;
@@ -4614,7 +4614,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             add_exclusive_addr(cpu, read_addr);
             cpu->exclusive_state = 1;
 
-            RD = Memory::Read16(read_addr);
+            RD = ReadMemory16(cpu, read_addr);
             if (inst_cream->Rd == 15) {
                 INC_PC(sizeof(generic_arm_inst));
                 goto DISPATCH;
@@ -4634,8 +4634,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             add_exclusive_addr(cpu, read_addr);
             cpu->exclusive_state = 1;
 
-            RD = Memory::Read32(read_addr);
-            RD2 = Memory::Read32(read_addr + 4);
+            RD  = ReadMemory32(cpu, read_addr);
+            RD2 = ReadMemory32(cpu, read_addr + 4);
 
             if (inst_cream->Rd == 15) {
                 INC_PC(sizeof(generic_arm_inst));
@@ -4652,7 +4652,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
             ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
-            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = Memory::Read16(addr);
+
+            cpu->Reg[BITS(inst_cream->inst, 12, 15)] = ReadMemory16(cpu, addr);
             if (BITS(inst_cream->inst, 12, 15) == 15) {
                 INC_PC(sizeof(ldst_inst));
                 goto DISPATCH;
@@ -4688,7 +4689,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
         if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
             ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
-            unsigned int value = Memory::Read16(addr);
+
+            unsigned int value = ReadMemory16(cpu, addr);
             if (BIT(value, 15)) {
                 value |= 0xffff0000;
             }
@@ -4709,7 +4711,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 1);
 
-            unsigned int value = Memory::Read32(addr);
+            unsigned int value = ReadMemory32(cpu, addr);
             cpu->Reg[BITS(inst_cream->inst, 12, 15)] = value;
 
             if (BITS(inst_cream->inst, 12, 15) == 15) {
@@ -6010,36 +6012,36 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             if (BIT(inst_cream->inst, 22) == 1) {
                 for (int i = 0; i < 13; i++) {
                     if (BIT(inst_cream->inst, i)) {
-                        Memory::Write32(addr, cpu->Reg[i]);
+                        WriteMemory32(cpu, addr, cpu->Reg[i]);
                         addr += 4;
                     }
                 }
                 if (BIT(inst_cream->inst, 13)) {
                     if (cpu->Mode == USER32MODE)
-                        Memory::Write32(addr, cpu->Reg[13]);
+                        WriteMemory32(cpu, addr, cpu->Reg[13]);
                     else
-                        Memory::Write32(addr, cpu->Reg_usr[0]);
+                        WriteMemory32(cpu, addr, cpu->Reg_usr[0]);
 
                     addr += 4;
                 }
                 if (BIT(inst_cream->inst, 14)) {
                     if (cpu->Mode == USER32MODE)
-                        Memory::Write32(addr, cpu->Reg[14]);
+                        WriteMemory32(cpu, addr, cpu->Reg[14]);
                     else
-                        Memory::Write32(addr, cpu->Reg_usr[1]);
+                        WriteMemory32(cpu, addr, cpu->Reg_usr[1]);
 
                     addr += 4;
                 }
                 if (BIT(inst_cream->inst, 15)) {
-                    Memory::Write32(addr, cpu->Reg_usr[1] + 8);
+                    WriteMemory32(cpu, addr, cpu->Reg_usr[1] + 8);
                 }
             } else {
                 for (int i = 0; i < 15; i++) {
                     if (BIT(inst_cream->inst, i)) {
                         if (i == Rn)
-                            Memory::Write32(addr, old_RN);
+                            WriteMemory32(cpu, addr, old_RN);
                         else
-                            Memory::Write32(addr, cpu->Reg[i]);
+                            WriteMemory32(cpu, addr, cpu->Reg[i]);
 
                         addr += 4;
                     }
@@ -6047,7 +6049,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
 
                 // Check PC reg
                 if (BIT(inst_cream->inst, 15))
-                    Memory::Write32(addr, cpu->Reg_usr[1] + 8);
+                    WriteMemory32(cpu, addr, cpu->Reg_usr[1] + 8);
             }
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
@@ -6080,7 +6082,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 0);
 
             unsigned int value = cpu->Reg[BITS(inst_cream->inst, 12, 15)];
-            Memory::Write32(addr, value);
+            WriteMemory32(cpu, addr, value);
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(ldst_inst));
@@ -6143,10 +6145,10 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 0);
 
-            unsigned int value = cpu->Reg[BITS(inst_cream->inst, 12, 15)];
-            Memory::Write32(addr, value);
-            value = cpu->Reg[BITS(inst_cream->inst, 12, 15) + 1];
-            Memory::Write32(addr + 4, value);
+            // The 3DS doesn't have the Large Physical Access Extension (LPAE)
+            // so STRD wouldn't store these as a single write.
+            WriteMemory32(cpu, addr + 0, cpu->Reg[BITS(inst_cream->inst, 12, 15)]);
+            WriteMemory32(cpu, addr + 4, cpu->Reg[BITS(inst_cream->inst, 12, 15) + 1]);
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(ldst_inst));
@@ -6163,7 +6165,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
                 remove_exclusive(cpu, write_addr);
                 cpu->exclusive_state = 0;
 
-                Memory::Write32(write_addr, cpu->Reg[inst_cream->Rm]);
+                WriteMemory32(cpu, write_addr, RM);
                 RD = 0;
             } else {
                 // Failed to write due to mutex access
@@ -6207,8 +6209,16 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
                 remove_exclusive(cpu, write_addr);
                 cpu->exclusive_state = 0;
 
-                Memory::Write32(write_addr, cpu->Reg[inst_cream->Rm]);
-                Memory::Write32(write_addr + 4, cpu->Reg[inst_cream->Rm + 1]);
+                const u32 rt  = cpu->Reg[inst_cream->Rm + 0];
+                const u32 rt2 = cpu->Reg[inst_cream->Rm + 1];
+                u64 value;
+
+                if (InBigEndianMode(cpu))
+                    value = (((u64)rt << 32) | rt2);
+                else
+                    value = (((u64)rt2 << 32) | rt);
+
+                WriteMemory64(cpu, write_addr, value);
                 RD = 0;
             }
             else {
@@ -6231,7 +6241,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
                 remove_exclusive(cpu, write_addr);
                 cpu->exclusive_state = 0;
 
-                Memory::Write16(write_addr, cpu->Reg[inst_cream->Rm]);
+                WriteMemory16(cpu, write_addr, RM);
                 RD = 0;
             } else {
                 // Failed to write due to mutex access
@@ -6250,7 +6260,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 0);
 
             unsigned int value = cpu->Reg[BITS(inst_cream->inst, 12, 15)] & 0xffff;
-            Memory::Write16(addr, value);
+            WriteMemory16(cpu, addr, value);
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(ldst_inst));
@@ -6264,7 +6274,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             inst_cream->get_addr(cpu, inst_cream->inst, addr, 0);
 
             unsigned int value = cpu->Reg[BITS(inst_cream->inst, 12, 15)];
-            Memory::Write32(addr, value);
+            WriteMemory32(cpu, addr, value);
         }
         cpu->Reg[15] += GET_INST_SIZE(cpu);
         INC_PC(sizeof(ldst_inst));
@@ -6323,8 +6333,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
             swp_inst* inst_cream = (swp_inst*)inst_base->component;
 
             addr = RN;
-            unsigned int value = Memory::Read32(addr);
-            Memory::Write32(addr, RM);
+            unsigned int value = ReadMemory32(cpu, addr);
+            WriteMemory32(cpu, addr, RM);
 
             RD = value;
         }
diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp
index ed4f6c2a26..aca2bfbbd5 100644
--- a/src/core/arm/interpreter/armsupp.cpp
+++ b/src/core/arm/interpreter/armsupp.cpp
@@ -201,3 +201,9 @@ u32 ARMul_UnsignedSatQ(s32 value, u8 shift, bool* saturation_occurred)
     *saturation_occurred = false;
     return (u32)value;
 }
+
+// Whether or not the given CPU is in big endian mode (E bit is set)
+bool InBigEndianMode(ARMul_State* cpu)
+{
+    return (cpu->Cpsr & (1 << 9)) != 0;
+}
diff --git a/src/core/arm/skyeye_common/armdefs.h b/src/core/arm/skyeye_common/armdefs.h
index 16f3ac86c7..c1a19fecc4 100644
--- a/src/core/arm/skyeye_common/armdefs.h
+++ b/src/core/arm/skyeye_common/armdefs.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "core/arm/skyeye_common/armmmu.h"
 #include "core/arm/skyeye_common/arm_regformat.h"
 #include "core/arm/skyeye_common/skyeye_defs.h"
 
@@ -356,3 +355,5 @@ extern u16 ARMul_UnsignedSaturatedSub16(u16, u16);
 extern u8 ARMul_UnsignedAbsoluteDifference(u8, u8);
 extern u32 ARMul_SignedSatQ(s32, u8, bool*);
 extern u32 ARMul_UnsignedSatQ(s32, u8, bool*);
+
+extern bool InBigEndianMode(ARMul_State*);
diff --git a/src/core/arm/skyeye_common/armmmu.h b/src/core/arm/skyeye_common/armmmu.h
index 6e54142ee5..0f9eadafa4 100644
--- a/src/core/arm/skyeye_common/armmmu.h
+++ b/src/core/arm/skyeye_common/armmmu.h
@@ -20,6 +20,9 @@
 
 #pragma once
 
+#include "core/mem_map.h"
+#include "core/arm/skyeye_common/armdefs.h"
+
 // Register numbers in the MMU
 enum
 {
@@ -54,3 +57,55 @@ enum
 	XSCALE_CP15_AUX_CONTROL = 1,
 	XSCALE_CP15_COPRO_ACCESS = 15,
 };
+
+// Reads data in big/little endian format based on the
+// state of the E (endian) bit in the emulated CPU's APSR.
+inline u16 ReadMemory16(ARMul_State* cpu, u32 address) {
+    u16 data = Memory::Read16(address);
+
+    if (InBigEndianMode(cpu))
+        data = Common::swap16(data);
+
+    return data;
+}
+
+inline u32 ReadMemory32(ARMul_State* cpu, u32 address) {
+    u32 data = Memory::Read32(address);
+
+    if (InBigEndianMode(cpu))
+        data = Common::swap32(data);
+
+    return data;
+}
+
+inline u64 ReadMemory64(ARMul_State* cpu, u32 address) {
+    u64 data = Memory::Read64(address);
+
+    if (InBigEndianMode(cpu))
+        data = Common::swap64(data);
+
+    return data;
+}
+
+// Writes data in big/little endian format based on the
+// state of the E (endian) bit in the emulated CPU's APSR.
+inline void WriteMemory16(ARMul_State* cpu, u32 address, u16 data) {
+    if (InBigEndianMode(cpu))
+        data = Common::swap16(data);
+
+    Memory::Write16(address, data);
+}
+
+inline void WriteMemory32(ARMul_State* cpu, u32 address, u32 data) {
+    if (InBigEndianMode(cpu))
+        data = Common::swap32(data);
+
+    Memory::Write32(address, data);
+}
+
+inline void WriteMemory64(ARMul_State* cpu, u32 address, u64 data) {
+    if (InBigEndianMode(cpu))
+        data = Common::swap64(data);
+
+    Memory::Write64(address, data);
+}
diff --git a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
index b9b96c3887..368b5a25da 100644
--- a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp
@@ -1388,12 +1388,20 @@ VSTR_INST:
 
         if (inst_cream->single)
         {
-            Memory::Write32(addr, cpu->ExtReg[inst_cream->d]);
+            WriteMemory32(cpu, addr, cpu->ExtReg[inst_cream->d]);
         }
         else
         {
-            Memory::Write32(addr, cpu->ExtReg[inst_cream->d*2]);
-            Memory::Write32(addr + 4, cpu->ExtReg[inst_cream->d*2+1]);
+            const u32 word1 = cpu->ExtReg[inst_cream->d*2+0];
+            const u32 word2 = cpu->ExtReg[inst_cream->d*2+1];
+
+            if (InBigEndianMode(cpu)) {
+                WriteMemory32(cpu, addr + 0, word2);
+                WriteMemory32(cpu, addr + 4, word1);
+            } else {
+                WriteMemory32(cpu, addr + 0, word1);
+                WriteMemory32(cpu, addr + 4, word2);
+            }
         }
     }
     cpu->Reg[15] += GET_INST_SIZE(cpu);
@@ -1447,17 +1455,27 @@ VPUSH_INST:
         {
             if (inst_cream->single)
             {
-                Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+                WriteMemory32(cpu, addr, cpu->ExtReg[inst_cream->d+i]);
                 addr += 4;
             }
             else
             {
-                Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
-                Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
+                const u32 word1 = cpu->ExtReg[(inst_cream->d+i)*2+0];
+                const u32 word2 = cpu->ExtReg[(inst_cream->d+i)*2+1];
+
+                if (InBigEndianMode(cpu)) {
+                    WriteMemory32(cpu, addr + 0, word2);
+                    WriteMemory32(cpu, addr + 4, word1);
+                } else {
+                    WriteMemory32(cpu, addr + 0, word1);
+                    WriteMemory32(cpu, addr + 4, word2);
+                }
+
                 addr += 8;
             }
         }
-        cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
+
+        cpu->Reg[R13] -= inst_cream->imm32;
     }
     cpu->Reg[15] += GET_INST_SIZE(cpu);
     INC_PC(sizeof(vpush_inst));
@@ -1516,13 +1534,22 @@ VSTM_INST: /* encoding 1 */
         {
             if (inst_cream->single)
             {
-                Memory::Write32(addr, cpu->ExtReg[inst_cream->d+i]);
+                WriteMemory32(cpu, addr, cpu->ExtReg[inst_cream->d+i]);
                 addr += 4;
             }
             else
             {
-                Memory::Write32(addr, cpu->ExtReg[(inst_cream->d+i)*2]);
-                Memory::Write32(addr + 4, cpu->ExtReg[(inst_cream->d+i)*2 + 1]);
+                const u32 word1 = cpu->ExtReg[(inst_cream->d+i)*2+0];
+                const u32 word2 = cpu->ExtReg[(inst_cream->d+i)*2+1];
+
+                if (InBigEndianMode(cpu)) {
+                    WriteMemory32(cpu, addr + 0, word2);
+                    WriteMemory32(cpu, addr + 4, word1);
+                } else {
+                    WriteMemory32(cpu, addr + 0, word1);
+                    WriteMemory32(cpu, addr + 4, word2);
+                }
+
                 addr += 8;
             }
         }
@@ -1575,8 +1602,6 @@ VPOP_INST:
     if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
         CHECK_VFP_ENABLED;
 
-        unsigned int value1, value2;
-
         vpop_inst *inst_cream = (vpop_inst *)inst_base->component;
 
         addr = cpu->Reg[R13];
@@ -1585,20 +1610,26 @@ VPOP_INST:
         {
             if (inst_cream->single)
             {
-                value1 = Memory::Read32(addr);
-                cpu->ExtReg[inst_cream->d+i] = value1;
+                cpu->ExtReg[inst_cream->d+i] = ReadMemory32(cpu, addr);
                 addr += 4;
             }
             else
             {
-                value1 = Memory::Read32(addr);
-                value2 = Memory::Read32(addr + 4);
-                cpu->ExtReg[(inst_cream->d+i)*2] = value1;
-                cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
+                const u32 word1 = ReadMemory32(cpu, addr + 0);
+                const u32 word2 = ReadMemory32(cpu, addr + 4);
+
+                if (InBigEndianMode(cpu)) {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word2;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word1;
+                } else {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word1;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word2;
+                }
+
                 addr += 8;
             }
         }
-        cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
+        cpu->Reg[R13] += inst_cream->imm32;
     }
     cpu->Reg[15] += GET_INST_SIZE(cpu);
     INC_PC(sizeof(vpop_inst));
@@ -1653,16 +1684,20 @@ VLDR_INST:
 
         if (inst_cream->single)
         {
-            cpu->ExtReg[inst_cream->d] = Memory::Read32(addr);
+            cpu->ExtReg[inst_cream->d] = ReadMemory32(cpu, addr);
         }
         else
         {
-            unsigned int word1, word2;
-            word1 = Memory::Read32(addr);
-            word2 = Memory::Read32(addr + 4);
+            const u32 word1 = ReadMemory32(cpu, addr + 0);
+            const u32 word2 = ReadMemory32(cpu, addr + 4);
 
-            cpu->ExtReg[inst_cream->d*2] = word1;
-            cpu->ExtReg[inst_cream->d*2+1] = word2;
+            if (InBigEndianMode(cpu)) {
+                cpu->ExtReg[inst_cream->d*2+0] = word2;
+                cpu->ExtReg[inst_cream->d*2+1] = word1;
+            } else {
+                cpu->ExtReg[inst_cream->d*2+0] = word1;
+                cpu->ExtReg[inst_cream->d*2+1] = word2;
+            }
         }
     }
     cpu->Reg[15] += GET_INST_SIZE(cpu);
@@ -1722,13 +1757,22 @@ VLDM_INST:
         {
             if (inst_cream->single)
             {
-                cpu->ExtReg[inst_cream->d+i] = Memory::Read32(addr);
+                cpu->ExtReg[inst_cream->d+i] = ReadMemory32(cpu, addr);
                 addr += 4;
             }
             else
             {
-                cpu->ExtReg[(inst_cream->d+i)*2] = Memory::Read32(addr);
-                cpu->ExtReg[(inst_cream->d+i)*2 + 1] = Memory::Read32(addr + 4);
+                const u32 word1 = ReadMemory32(cpu, addr + 0);
+                const u32 word2 = ReadMemory32(cpu, addr + 4);
+
+                if (InBigEndianMode(cpu)) {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word2;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word1;
+                } else {
+                    cpu->ExtReg[(inst_cream->d+i)*2+0] = word1;
+                    cpu->ExtReg[(inst_cream->d+i)*2+1] = word2;
+                }
+
                 addr += 8;
             }
         }
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
index 8f4f21fecf..bce99dffa6 100644
--- a/src/core/mem_map.h
+++ b/src/core/mem_map.h
@@ -147,6 +147,7 @@ inline void Write(VAddr addr, T data);
 u8 Read8(VAddr addr);
 u16 Read16(VAddr addr);
 u32 Read32(VAddr addr);
+u64 Read64(VAddr addr);
 
 u32 Read8_ZX(VAddr addr);
 u32 Read16_ZX(VAddr addr);
diff --git a/src/core/mem_map_funcs.cpp b/src/core/mem_map_funcs.cpp
index 48f61db4e9..a161a82049 100644
--- a/src/core/mem_map_funcs.cpp
+++ b/src/core/mem_map_funcs.cpp
@@ -245,6 +245,12 @@ u32 Read32(const VAddr addr) {
     return (u32)data;
 }
 
+u64 Read64(const VAddr addr) {
+    u64_le data = 0;
+    Read<u64_le>(data, addr);
+    return (u64)data;
+}
+
 u32 Read8_ZX(const VAddr addr) {
     return (u32)Read8(addr);
 }