From 409dcf0e0aecfdb676fd3b64223a25e47c1b1c1a Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Sun, 18 Nov 2018 23:44:19 -0500
Subject: [PATCH] svc: Implement yield types 0 and -1

---
 src/common/thread_queue_list.h    | 16 +++++++++
 src/core/hle/kernel/scheduler.cpp | 18 ++++++++++
 src/core/hle/kernel/scheduler.h   |  6 ++++
 src/core/hle/kernel/svc.cpp       | 27 ++++++++++++--
 src/core/hle/kernel/thread.cpp    | 60 +++++++++++++++++++++++++++++++
 src/core/hle/kernel/thread.h      |  5 +++
 6 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index 133122c5f4..323eab97c1 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <deque>
+#include <functional>
 #include <boost/range/algorithm_ext/erase.hpp>
 
 namespace Common {
@@ -49,6 +50,21 @@ struct ThreadQueueList {
         return T();
     }
 
+    T get_first_filter(std::function<bool(T)> filter) const {
+        const Queue* cur = first;
+        while (cur != nullptr) {
+            if (!cur->data.empty()) {
+                for (const auto& item : cur->data) {
+                    if (filter(item))
+                        return item;
+                }
+            }
+            cur = cur->next_nonempty;
+        }
+
+        return T();
+    }
+
     T pop_first() {
         Queue* cur = first;
         while (cur != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5a5f4cef1a..fb5e149508 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -169,6 +169,16 @@ void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
     ready_queue.remove(priority, thread);
 }
 
+void Scheduler::RescheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
+    // Thread is not in queue
+    ASSERT(ready_queue.contains(thread) != -1);
+
+    ready_queue.remove(priority, thread);
+    ready_queue.push_back(priority, thread);
+}
+
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
     std::lock_guard<std::mutex> lock(scheduler_mutex);
 
@@ -179,4 +189,12 @@ void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
         ready_queue.prepare(priority);
 }
 
+Thread* Scheduler::GetNextSuggestedThread(u32 core) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
+    const auto mask = 1 << core;
+    return ready_queue.get_first_filter(
+        [&mask](Thread* thread) { return (thread->GetAffinityMask() & mask) != 0; });
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index c63032b7d5..8444afdbc0 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -48,9 +48,15 @@ public:
     /// Unschedules a thread that was already scheduled
     void UnscheduleThread(Thread* thread, u32 priority);
 
+    /// Moves a thread to the back of the current priority queue
+    void RescheduleThread(Thread* thread, u32 priority);
+
     /// Sets the priority of a thread in the scheduler
     void SetThreadPriority(Thread* thread, u32 priority);
 
+    /// Gets the next suggested thread for load balancing
+    Thread* GetNextSuggestedThread(u32 core);
+
     /// Returns a list of all threads managed by the scheduler
     const std::vector<SharedPtr<Thread>>& GetThreadList() const {
         return thread_list;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 75dbfc31dc..467575c935 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -962,16 +962,39 @@ static void SleepThread(s64 nanoseconds) {
 
     // Don't attempt to yield execution if there are no available threads to run,
     // this way we avoid a useless reschedule to the idle thread.
-    if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
+    if (!Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
         return;
 
+    if (nanoseconds <= 0) {
+        switch (nanoseconds) {
+        case 0:
+            GetCurrentThread()->YieldNormal();
+            break;
+        case -1:
+            GetCurrentThread()->YieldWithLoadBalancing();
+            break;
+        case -2:
+            GetCurrentThread()->YieldAndWaitForLoadBalancing();
+            break;
+        default:
+            UNREACHABLE_MSG(
+                "Unimplemented sleep yield type '{:016X}'! Falling back to forced reschedule...",
+                nanoseconds);
+        }
+
+        nanoseconds = 0;
+    }
+
     // Sleep current thread and check for next thread to schedule
     WaitCurrentThread_Sleep();
 
     // Create an event to wake the thread up after the specified nanosecond delay has passed
     GetCurrentThread()->WakeAfterDelay(nanoseconds);
 
-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(0).PrepareReschedule();
+    Core::System::GetInstance().CpuCore(1).PrepareReschedule();
+    Core::System::GetInstance().CpuCore(2).PrepareReschedule();
+    Core::System::GetInstance().CpuCore(3).PrepareReschedule();
 }
 
 /// Wait process wide key atomic
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 4ffb768183..ddc4da1c0d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -388,6 +388,66 @@ bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thread> t
     return wakeup_callback(reason, std::move(thread), std::move(object), index);
 }
 
+void Thread::YieldNormal() {
+    // Avoid yielding if the thread isn't even running.
+    if (status != ThreadStatus::Running) {
+        return;
+    }
+
+    if (nominal_priority < THREADPRIO_COUNT) {
+        scheduler->RescheduleThread(this, nominal_priority);
+        scheduler->Reschedule();
+    }
+}
+
+void Thread::YieldWithLoadBalancing() {
+    auto priority = nominal_priority;
+    auto core = processor_id;
+
+    // Avoid yielding if the thread isn't even running.
+    if (status != ThreadStatus::Running) {
+        Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
+        return;
+    }
+
+    SharedPtr<Thread> next;
+    const auto& threads = scheduler->GetThreadList();
+
+    if (priority < THREADPRIO_COUNT) {
+        // Reschedule thread to end of queue.
+        scheduler->RescheduleThread(this, priority);
+
+        const auto iter = std::find_if(threads.begin(), threads.end(),
+                                       [&priority](const SharedPtr<Thread>& thread) {
+                                           return thread->GetNominalPriority() == priority;
+                                       });
+
+        if (iter != threads.end())
+            next = iter->get();
+    }
+
+    Thread* suggested_thread = nullptr;
+
+    for (int i = 0; i < 4; ++i) {
+        if (i == core)
+            continue;
+
+        const auto res =
+            Core::System::GetInstance().CpuCore(i).Scheduler().GetNextSuggestedThread(core);
+        if (res != nullptr) {
+            suggested_thread = res;
+            break;
+        }
+    }
+
+    if (suggested_thread != nullptr)
+        suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
+}
+
+void Thread::YieldAndWaitForLoadBalancing() {
+    UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index d384d50dbc..e97434dd8d 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -26,6 +26,7 @@ enum ThreadPriority : u32 {
     THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
     THREADPRIO_DEFAULT = 44,      ///< Default thread priority for userland apps
     THREADPRIO_LOWEST = 63,       ///< Lowest thread priority
+    THREADPRIO_COUNT = 64,        ///< Total number of possible thread priorities.
 };
 
 enum ThreadProcessorId : s32 {
@@ -370,6 +371,10 @@ public:
         return affinity_mask;
     }
 
+    void YieldNormal();
+    void YieldWithLoadBalancing();
+    void YieldAndWaitForLoadBalancing();
+
 private:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;