diff --git a/src/core/core.cpp b/src/core/core.cpp
index bb2ed7a92f..b5c2582304 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -61,10 +61,6 @@ int Init() {
     g_sys_core = new ARM_DynCom(USER32MODE);
     g_app_core = new ARM_DynCom(USER32MODE);
 
-    // TODO: Whenever TLS is implemented, this should contain
-    // the address of the 0x200-byte TLS
-    g_app_core->SetCP15Register(CP15_THREAD_URO, Memory::TLS_AREA_VADDR);
-
     LOG_DEBUG(Core, "Initialized OK");
     return 0;
 }
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h
index 0fd18148aa..8c3886ffd0 100644
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
 #include "core/mem_map.h"
 
 namespace Kernel {
@@ -12,12 +13,15 @@ namespace Kernel {
 static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header
 
 /**
- * Returns a pointer to the command buffer in kernel memory
+ * Returns a pointer to the command buffer in the current thread's TLS
+ * TODO(Subv): This is not entirely correct, the command buffer should be copied from
+ * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to
+ * the service handler process' memory.
  * @param offset Optional offset into command buffer
  * @return Pointer to command buffer
  */
-inline static u32* GetCommandBuffer(const int offset=0) {
-    return (u32*)Memory::GetPointer(Memory::TLS_AREA_VADDR + kCommandHeaderOffset + offset);
+inline static u32* GetCommandBuffer(const int offset = 0) {
+    return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset + offset);
 }
 
 /**
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 0a3fd7cb19..5de8f9a73d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -197,6 +197,7 @@ static void SwitchContext(Thread* new_thread) {
         new_thread->current_priority = new_thread->nominal_priority;
 
         Core::g_app_core->LoadContext(new_thread->context);
+        Core::g_app_core->SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress());
     } else {
         current_thread = nullptr;
     }
@@ -402,6 +403,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->name = std::move(name);
     thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
 
+    VAddr tls_address = Memory::TLS_AREA_VADDR + (thread->thread_id - 1) * 0x200;
+
+    ASSERT_MSG(tls_address < Memory::TLS_AREA_VADDR_END, "Too many threads");
+
+    thread->tls_address = tls_address;
+
     // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
     // to initialize the context
     Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg);
@@ -495,6 +502,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
     context.cpu_registers[1] = output;
 }
 
+VAddr Thread::GetTLSAddress() const {
+    return tls_address;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 9958b16e66..6891c8c2f7 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -135,6 +135,12 @@ public:
      */
     void Stop();
 
+    /*
+     * Returns the Thread Local Storage address of the current thread
+     * @returns VAddr of the thread's TLS
+     */
+    VAddr GetTLSAddress() const;
+
     Core::ThreadContext context;
 
     u32 thread_id;
@@ -150,6 +156,8 @@ public:
 
     s32 processor_id;
 
+    VAddr tls_address; ///< Address of the Thread Local Storage of the thread
+
     /// Mutexes currently held by this thread, which will be released when it exits.
     boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;