From ae93adddd1f52b368c3faa8bc85bb85dd62ee83d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 26 Aug 2014 18:24:40 -0400
Subject: [PATCH 1/5] srv::Initialize: Return "success" status code.

---
 src/core/hle/service/srv.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/core/hle/service/srv.cpp b/src/core/hle/service/srv.cpp
index 8f8413d02b..23be3cf2cc 100644
--- a/src/core/hle/service/srv.cpp
+++ b/src/core/hle/service/srv.cpp
@@ -16,6 +16,10 @@ Handle g_event_handle = 0;
 
 void Initialize(Service::Interface* self) {
     DEBUG_LOG(OSHLE, "called");
+
+    u32* cmd_buff = Service::GetCommandBuffer();
+
+    cmd_buff[1] = 0; // No error
 }
 
 void GetProcSemaphore(Service::Interface* self) {

From 3ade84cb7b9e0170b0025aa6af0ec0ed4097899c Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 26 Aug 2014 23:58:03 -0400
Subject: [PATCH 2/5] Threading: Fix thread starting to execute first
 instruction correctly.

---
 src/core/arm/interpreter/arm_interpreter.cpp | 2 +-
 src/core/hle/kernel/thread.cpp               | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/core/arm/interpreter/arm_interpreter.cpp b/src/core/arm/interpreter/arm_interpreter.cpp
index d35a3ae171..0842d2f8ec 100644
--- a/src/core/arm/interpreter/arm_interpreter.cpp
+++ b/src/core/arm/interpreter/arm_interpreter.cpp
@@ -27,7 +27,7 @@ ARM_Interpreter::ARM_Interpreter()  {
     // Reset the core to initial state
     ARMul_CoProInit(state); 
     ARMul_Reset(state);
-    state->NextInstr = RESUME;
+    state->NextInstr = RESUME; // NOTE: This will be overwritten by LoadContext
     state->Emulate = 3;
 
     state->pc = state->Reg[15] = 0x00000000;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 554ec97565..8bd9ca1a18 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,6 +117,11 @@ void ResetThread(Thread* t, u32 arg, s32 lowest_priority) {
     t->context.sp = t->stack_top;
     t->context.cpsr = 0x1F; // Usermode
     
+    // TODO(bunnei): This instructs the CPU core to start the execution as if it is "resuming" a
+    // thread. This is somewhat Sky-Eye specific, and should be re-architected in the future to be
+    // agnostic of the CPU core.
+    t->context.mode = 8;
+
     if (t->current_priority < lowest_priority) {
         t->current_priority = t->initial_priority;
     }

From 738b88293ca82a7823d79d0406ac38019b7dec9c Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 27 Aug 2014 00:04:26 -0400
Subject: [PATCH 3/5] Loader: Added support for loading raw BIN executables.

- Useful for debugging homebrew

Qt: Updated GUI to support loading .bin files.
---
 src/citra_qt/main.cpp      |  2 +-
 src/core/loader/loader.cpp | 20 ++++++++++++++++++++
 src/core/loader/loader.h   |  1 +
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index a6b87f781c..1bf9bc53c0 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -150,7 +150,7 @@ void GMainWindow::BootGame(std::string filename)
 
 void GMainWindow::OnMenuLoadFile()
 {
-    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS executable (*.elf *.axf *.cci *.cxi)"));
+    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS executable (*.elf *.axf *.bin *.cci *.cxi)"));
     if (filename.size())
        BootGame(filename.toLatin1().data());
 }
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 2b42e3c647..365f5a2777 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -9,6 +9,7 @@
 #include "core/loader/elf.h"
 #include "core/loader/ncch.h"
 #include "core/hle/kernel/archive.h"
+#include "core/mem_map.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -39,6 +40,9 @@ FileType IdentifyFile(const std::string &filename) {
     else if (!strcasecmp(extension.c_str(), ".cci")) {
         return FileType::CCI; // TODO(bunnei): Do some filetype checking :p
     }
+    else if (!strcasecmp(extension.c_str(), ".bin")) {
+        return FileType::BIN; // TODO(bunnei): Do some filetype checking :p
+    }
     return FileType::Unknown;
 }
 
@@ -69,6 +73,22 @@ ResultStatus LoadFile(const std::string& filename) {
         break;
     }
 
+    // Raw BIN file format...
+    case FileType::BIN:
+    {
+        INFO_LOG(LOADER, "Loading BIN file %s...", filename.c_str());
+
+        File::IOFile file(filename, "rb");
+
+        if (file.IsOpen()) {
+            file.ReadBytes(Memory::GetPointer(Memory::EXEFS_CODE_VADDR), (size_t)file.GetSize());
+            Kernel::LoadExec(Memory::EXEFS_CODE_VADDR);
+        } else {
+            return ResultStatus::Error;
+        }
+        return ResultStatus::Success;
+    }
+
     // Error occurred durring IdentifyFile...
     case FileType::Error:
 
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 4ba10de527..68f843005b 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -21,6 +21,7 @@ enum class FileType {
     CXI,
     CIA,
     ELF,
+    BIN,
 };
 
 /// Return type for functions in Loader namespace

From eb36d3fc903db8848f7493009c7b59c8ce038de9 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 29 Aug 2014 23:24:32 -0400
Subject: [PATCH 4/5] Core: Refactor core to use only one function for
 execution.

Core: Cleaned up comment to be more readable.

Citra: Changed loop to be more readable.
---
 src/citra/citra.cpp |  4 +++-
 src/core/core.cpp   | 26 ++++++++------------------
 src/core/core.h     | 11 +++++++++--
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 9399ff2964..7dc721dc3b 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) {
         return -1;
     }
 
-    Core::RunLoop();
+    while(true) {
+        Core::RunLoop();
+    }
 
     delete emu_window;
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fc9909377a..f21801e52b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -6,6 +6,8 @@
 #include "common/log.h"
 #include "common/symbols.h"
 
+#include "video_core/video_core.h"
+
 #include "core/core.h"
 #include "core/mem_map.h"
 #include "core/hw/hw.h"
@@ -24,29 +26,17 @@ ARM_Interface*  g_app_core      = nullptr;  ///< ARM11 application core
 ARM_Interface*  g_sys_core      = nullptr;  ///< ARM11 system (OS) core
 
 /// Run the core CPU loop
-void RunLoop() {
-    for (;;){
-        // This function loops for 100 instructions in the CPU before trying to update hardware.
-        // This is a little bit faster than SingleStep, and should be pretty much equivalent. The 
-        // number of instructions chosen is fairly arbitrary, however a large number will more 
-        // drastically affect the frequency of GSP interrupts and likely break things. The point of
-        // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
-        // it a little bit faster...
-        g_app_core->Run(100);
-        HW::Update();
-        if (HLE::g_reschedule) {
-            Kernel::Reschedule();
-        }
+void RunLoop(int tight_loop) {
+    g_app_core->Run(tight_loop);
+    HW::Update();
+    if (HLE::g_reschedule) {
+        Kernel::Reschedule();
     }
 }
 
 /// Step the CPU one instruction
 void SingleStep() {
-    g_app_core->Step();
-    HW::Update();
-    if (HLE::g_reschedule) {
-        Kernel::Reschedule();
-    }
+    RunLoop(1);
 }
 
 /// Halt the core
diff --git a/src/core/core.h b/src/core/core.h
index 4b42dabcbd..9c72c8b3f9 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -19,8 +19,15 @@ extern ARM_Interface*   g_sys_core;     ///< ARM11 system (OS) core
 /// Start the core
 void Start();
 
-/// Run the core CPU loop
-void RunLoop();
+/**
+ * Run the core CPU loop
+ * This function loops for 100 instructions in the CPU before trying to update hardware. This is a
+ * little bit faster than SingleStep, and should be pretty much equivalent. The number of
+ * instructions chosen is fairly arbitrary, however a large number will more drastically affect the
+ * frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU
+ * for more than 1 instruction to reduce overhead and make it a little bit faster...
+ */
+void RunLoop(int tight_loop=100);
 
 /// Step the CPU one instruction
 void SingleStep();

From aabfcfe6ad5b64016ceccfae4ac7d441dd2c2619 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 29 Aug 2014 23:50:38 -0400
Subject: [PATCH 5/5] GPU: Improve frame synchronization, increases
 compatibility with both homebrew and retail applications.

---
 src/core/hw/gpu.cpp | 44 +++++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index f1f3e7ab3d..8709b8eb77 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -24,6 +24,7 @@ Regs g_regs;
 
 u32 g_cur_line = 0;         ///< Current vertical screen line
 u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
+u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame
 
 template <typename T>
 inline void Read(T &var, const u32 raw_addr) {
@@ -179,27 +180,44 @@ void Update() {
     auto& framebuffer_top = g_regs.framebuffer_config[0];
     u64 current_ticks = Core::g_app_core->GetTicks();
 
-    // Synchronize line...
-    if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
-        GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
-        g_cur_line++;
-        g_last_line_ticks = current_ticks;
+    // Update the frame after a certain number of CPU ticks have elapsed. This assumes that the
+    // active frame in memory is always complete to render. There also may be issues with this
+    // becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to
+    // be the most effective solution for both homebrew and retail applications. With retail, this
+    // could be moved below (and probably would guarantee more accurate synchronization). However,
+    // primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a
+    // threading reschedule).
+
+    if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) {
+        VideoCore::g_renderer->SwapBuffers();
+        g_last_frame_ticks = current_ticks;
     }
 
-    // Synchronize frame...
-    if (g_cur_line >= framebuffer_top.height) {
-        g_cur_line = 0;
-        GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
-        VideoCore::g_renderer->SwapBuffers();
-        Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
-        HLE::Reschedule(__func__);
+    // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical
+    // blank, we need to simulate it. Based on testing, it seems that retail applications work more
+    // accurately when this is signalled between thread switches.
+
+    if (HLE::g_reschedule) {
+
+        // Synchronize line...
+        if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
+            GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
+            g_cur_line++;
+            g_last_line_ticks = current_ticks;
+        }
+
+        // Synchronize frame...
+        if (g_cur_line >= framebuffer_top.height) {
+            g_cur_line = 0;
+            GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
+        }
     }
 }
 
 /// Initialize hardware
 void Init() {
     g_cur_line = 0;
-    g_last_line_ticks = Core::g_app_core->GetTicks();
+    g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks();
 
     auto& framebuffer_top = g_regs.framebuffer_config[0];
     auto& framebuffer_sub = g_regs.framebuffer_config[1];