diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index becdb7d54a..d6d8e5f598 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC
     backend/glasm/emit_context.h
     backend/glasm/emit_glasm.cpp
     backend/glasm/emit_glasm.h
-    backend/glasm/emit_glasm_atomic.cpp
     backend/glasm/emit_glasm_barriers.cpp
     backend/glasm/emit_glasm_bitwise_conversion.cpp
     backend/glasm/emit_glasm_composite.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
index b5b0e22045..e18526816b 100644
--- a/src/shader_recompiler/backend/glasm/emit_context.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -7,6 +7,7 @@
 #include "shader_recompiler/backend/bindings.h"
 #include "shader_recompiler/backend/glasm/emit_context.h"
 #include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
 
 namespace Shader::Backend::GLASM {
 namespace {
@@ -40,13 +41,21 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
         Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
         ++cbuf_index;
     }
+    u32 ssbo_index{};
     for (const auto& desc : info.storage_buffers_descriptors) {
         if (desc.count != 1) {
             throw NotImplementedException("Storage buffer descriptor array");
         }
+        if (runtime_info.glasm_use_storage_buffers) {
+            Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
+            ++bindings.storage_buffer;
+            ++ssbo_index;
+        }
     }
-    if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
-        Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+    if (!runtime_info.glasm_use_storage_buffers) {
+        if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
+            Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+        }
     }
     stage = program.stage;
     switch (program.stage) {
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
index 3d02d873e7..3df32a4a6f 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM {
 [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
                                     IR::Program& program, Bindings& bindings);
 
-[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) {
+[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+                                           IR::Program& program) {
     Bindings binding;
-    return EmitGLASM(profile, {}, program, binding);
+    return EmitGLASM(profile, runtime_info, program, binding);
 }
 
 } // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp
deleted file mode 100644
index e72b252a3f..0000000000
--- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp
+++ /dev/null
@@ -1,351 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "shader_recompiler/backend/glasm/emit_context.h"
-#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
-#include "shader_recompiler/frontend/ir/value.h"
-
-namespace Shader::Backend::GLASM {
-namespace {
-void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
-               std::string_view then_expr, std::string_view else_expr = {}) {
-    // Operate on bindless SSBO, call the expression with bounds checking
-    // address = c[binding].xy
-    // length  = c[binding].z
-    const u32 sb_binding{binding.U32()};
-    ctx.Add("PK64.U DC,c[{}];"           // pointer = address
-            "CVT.U64.U32 DC.z,{};"       // offset = uint64_t(offset)
-            "ADD.U64 DC.x,DC.x,DC.z;"    // pointer += offset
-            "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
-            sb_binding, offset, offset, sb_binding);
-    if (else_expr.empty()) {
-        ctx.Add("IF NE.x;{}ENDIF;", then_expr);
-    } else {
-        ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
-    }
-}
-
-template <typename ValueType>
-void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
-          ValueType value, std::string_view operation, std::string_view size) {
-    const Register ret{ctx.reg_alloc.Define(inst)};
-    StorageOp(ctx, binding, offset,
-              fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
-}
-} // namespace
-
-void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                            ScalarU32 value) {
-    ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                            ScalarS32 value) {
-    ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                            ScalarU32 value) {
-    ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                            ScalarS32 value) {
-    ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                            ScalarU32 value) {
-    ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                           ScalarU32 value) {
-    ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                           ScalarU32 value) {
-    ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                           ScalarU32 value) {
-    ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                          ScalarU32 value) {
-    ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                           ScalarU32 value) {
-    ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                                ScalarU32 value) {
-    ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
-                                Register value) {
-    ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
-}
-
-void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "ADD", "U32");
-}
-
-void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, ScalarS32 value) {
-    Atom(ctx, inst, binding, offset, value, "MIN", "S32");
-}
-
-void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "MIN", "U32");
-}
-
-void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, ScalarS32 value) {
-    Atom(ctx, inst, binding, offset, value, "MAX", "S32");
-}
-
-void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "MAX", "U32");
-}
-
-void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                            ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
-}
-
-void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                            ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
-}
-
-void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                            ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "AND", "U32");
-}
-
-void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                           ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "OR", "U32");
-}
-
-void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                            ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "XOR", "U32");
-}
-
-void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                                 ScalarU32 offset, ScalarU32 value) {
-    Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
-}
-
-void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "ADD", "U64");
-}
-
-void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "MIN", "S64");
-}
-
-void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "MIN", "U64");
-}
-
-void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "MAX", "S64");
-}
-
-void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "MAX", "U64");
-}
-
-void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                            ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "AND", "U64");
-}
-
-void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                           ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "OR", "U64");
-}
-
-void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                            ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "XOR", "U64");
-}
-
-void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                                 ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
-}
-
-void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                             ScalarU32 offset, ScalarF32 value) {
-    Atom(ctx, inst, binding, offset, value, "ADD", "F32");
-}
-
-void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                               ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
-}
-
-void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                               [[maybe_unused]] const IR::Value& binding,
-                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                               ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
-}
-
-void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                               [[maybe_unused]] const IR::Value& binding,
-                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
-                               ScalarU32 offset, Register value) {
-    Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
-}
-
-void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
-                               [[maybe_unused]] const IR::Value& binding,
-                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicIAdd32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicSMin32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicUMin32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicSMax32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicUMax32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicInc32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicDec32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicAnd32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicOr32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicXor32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicExchange32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicIAdd64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicSMin64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicUMin64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicSMax64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicUMax64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicInc64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicDec64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicAnd64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicOr64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicXor64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicExchange64(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicAddF32(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicAddF16x2(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicAddF32x2(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicMinF16x2(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicMinF32x2(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicMaxF16x2(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-
-void EmitGlobalAtomicMaxF32x2(EmitContext&) {
-    throw NotImplementedException("GLASM instruction");
-}
-} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
index 26b03587e7..90dbb80d29 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -8,6 +8,7 @@
 #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
 #include "shader_recompiler/frontend/ir/program.h"
 #include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
 
 namespace Shader::Backend::GLASM {
 namespace {
@@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
     }
 }
 
-void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr,
+void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
                      std::string_view else_expr = {}) {
     const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
     for (size_t index = 0; index < num_buffers; ++index) {
@@ -44,14 +45,22 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e
                 "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1
                 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end   ? -1 : 1
                 "AND.U.CC RC.x,RC.x,RC.y;"
-                "IF NE.x;"                // a && b
-                "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr
-                "PK64.U DC.y,c[{}];"      // host_ssbo = cbuf
-                "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
-                "{}"
-                "ELSE;",
+                "IF NE.x;"                 // a && b
+                "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
                 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
-                address, address, index, then_expr);
+                address, address);
+        if (pointer_based) {
+            ctx.Add("PK64.U DC.y,c[{}];"      // host_ssbo = cbuf
+                    "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
+                    "{}"
+                    "ELSE;",
+                    index, expr);
+        } else {
+            ctx.Add("CVT.U32.U64 RC.x,DC.x;"
+                    "{},ssbo{}[RC.x];"
+                    "ELSE;",
+                    expr, index);
+        }
     }
     if (!else_expr.empty()) {
         ctx.Add("{}", else_expr);
@@ -64,25 +73,54 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e
 template <typename ValueType>
 void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
            std::string_view size) {
-    StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
+    } else {
+        StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
+    }
 }
 
 void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
           std::string_view size) {
     const Register ret{ctx.reg_alloc.Define(inst)};
-    StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
-              fmt::format("MOV.U {},{{0,0,0,0}};", ret));
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
+    } else {
+        StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
+                  fmt::format("MOV.U {},{{0,0,0,0}};", ret));
+    }
 }
 
 template <typename ValueType>
 void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
-    GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value));
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
+    } else {
+        GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
+    }
 }
 
 void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
     const Register ret{ctx.reg_alloc.Define(inst)};
-    GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret),
-                    fmt::format("MOV.S {},0;", ret));
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
+    } else {
+        GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
+                        fmt::format("MOV.S {},0;", ret));
+    }
+}
+
+template <typename ValueType>
+void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+          ValueType value, std::string_view operation, std::string_view size) {
+    const Register ret{ctx.reg_alloc.Define(inst)};
+    if (ctx.runtime_info.glasm_use_storage_buffers) {
+        ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
+                offset);
+    } else {
+        StorageOp(ctx, binding, offset,
+                  fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
+    }
 }
 } // Anonymous namespace
 
@@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o
     Write(ctx, binding, offset, value, "U32X4");
 }
 
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value) {
+    ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarS32 value) {
+    ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value) {
+    ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarS32 value) {
+    ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                            ScalarU32 value) {
+    ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                          ScalarU32 value) {
+    ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                           ScalarU32 value) {
+    ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                                ScalarU32 value) {
+    ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+                                Register value) {
+    ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "U32");
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarS32 value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "S32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "U32");
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarS32 value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "S32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "U32");
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "AND", "U32");
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "OR", "U32");
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "XOR", "U32");
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 ScalarU32 offset, ScalarU32 value) {
+    Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "U64");
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "S64");
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "U64");
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "S64");
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "U64");
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "AND", "U64");
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                           ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "OR", "U64");
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                            ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "XOR", "U64");
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                                 ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                             ScalarU32 offset, ScalarF32 value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "F32");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
+}
+
+void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                               [[maybe_unused]] const IR::Value& binding,
+                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
+}
+
+void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                               [[maybe_unused]] const IR::Value& binding,
+                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+                               ScalarU32 offset, Register value) {
+    Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
+}
+
+void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+                               [[maybe_unused]] const IR::Value& binding,
+                               [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+    throw NotImplementedException("GLASM instruction");
+}
+
 } // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index c46452c3db..f8913bf146 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -111,7 +111,10 @@ struct RuntimeInfo {
     std::optional<CompareFunction> alpha_test_func;
     float alpha_test_reference{};
 
+    // Static y negate value
     bool y_negate{};
+    // Use storage buffers instead of global pointers on GLASM
+    bool glasm_use_storage_buffers{};
 
     std::vector<TransformFeedbackVarying> xfb_varyings;
 };
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2d0ef1307d..334ed470f2 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
 
 void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
                                            u32 offset, u32 size, bool is_written) {
-    if (use_assembly_shaders) {
+    if (use_storage_buffers) {
+        const GLuint base_binding = graphics_base_storage_bindings[stage];
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    } else {
         const BindlessSSBO ssbo{
             .address = buffer.HostGpuAddr() + offset,
             .length = static_cast<GLsizei>(size),
@@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
         glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
                                         reinterpret_cast<const GLuint*>(&ssbo));
-    } else {
-        const GLuint base_binding = graphics_base_storage_bindings[stage];
-        const GLuint binding = base_binding + binding_index;
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
-                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
     }
 }
 
 void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
                                                   u32 size, bool is_written) {
-    if (use_assembly_shaders) {
+    if (use_storage_buffers) {
+        if (size != 0) {
+            glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+                              static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+        } else {
+            glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+        }
+    } else {
         const BindlessSSBO ssbo{
             .address = buffer.HostGpuAddr() + offset,
             .length = static_cast<GLsizei>(size),
@@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
         glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
                                         reinterpret_cast<const GLuint*>(&ssbo));
-    } else if (size == 0) {
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
-    } else {
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
-                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 4986c65fd4..bc16abafb2 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -147,6 +147,10 @@ public:
         image_handles = image_handles_;
     }
 
+    void SetEnableStorageBuffers(bool use_storage_buffers_) {
+        use_storage_buffers = use_storage_buffers_;
+    }
+
 private:
     static constexpr std::array PABO_LUT{
         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -160,6 +164,8 @@ private:
     bool use_assembly_shaders = false;
     bool has_unified_vertex_buffers = false;
 
+    bool use_storage_buffers = false;
+
     u32 max_attributes = 0;
 
     std::array<GLuint, 5> graphics_base_uniform_bindings{};
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 700ebd8b84..5cf5f97a9a 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -17,6 +17,15 @@ using VideoCommon::ImageId;
 constexpr u32 MAX_TEXTURES = 64;
 constexpr u32 MAX_IMAGES = 16;
 
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+    u32 num{};
+    for (const auto& desc : range) {
+        num += desc.count;
+    }
+    return num;
+}
+
 size_t ComputePipelineKey::Hash() const noexcept {
     return static_cast<size_t>(
         Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
@@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
     return std::memcmp(this, &rhs, sizeof *this) == 0;
 }
 
-ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                                 Tegra::MemoryManager& gpu_memory_,
+ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
+                                 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
                                  Tegra::Engines::KeplerCompute& kepler_compute_,
                                  ProgramManager& program_manager_, const Shader::Info& info_,
                                  OGLProgram source_program_, OGLAssemblyProgram assembly_program_)
     : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
       kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_},
       source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} {
-    for (const auto& desc : info.texture_buffer_descriptors) {
-        num_texture_buffers += desc.count;
-    }
-    for (const auto& desc : info.image_buffer_descriptors) {
-        num_image_buffers += desc.count;
-    }
-    u32 num_textures = num_texture_buffers;
-    for (const auto& desc : info.texture_descriptors) {
-        num_textures += desc.count;
-    }
+
+    num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
+    num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+
+    const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
     ASSERT(num_textures <= MAX_TEXTURES);
 
-    u32 num_images = num_image_buffers;
-    for (const auto& desc : info.image_descriptors) {
-        num_images += desc.count;
-    }
+    const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
     ASSERT(num_images <= MAX_IMAGES);
+
+    const bool is_glasm{assembly_program.handle != 0};
+    const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+    use_storage_buffers =
+        !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
+    writes_global_memory = !use_storage_buffers &&
+                           std::ranges::any_of(info.storage_buffers_descriptors,
+                                               [](const auto& desc) { return desc.is_written; });
 }
 
 void ComputePipeline::Configure() {
@@ -150,6 +159,7 @@ void ComputePipeline::Configure() {
 
     buffer_cache.UpdateComputeBuffers();
 
+    buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
     buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
     buffer_cache.BindHostComputeBuffers();
 
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index e3b94e2f30..dd6b62ef20 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -28,6 +28,7 @@ struct Info;
 
 namespace OpenGL {
 
+class Device;
 class ProgramManager;
 
 struct ComputePipelineKey {
@@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
 
 class ComputePipeline {
 public:
-    explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                             Tegra::MemoryManager& gpu_memory_,
+    explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
+                             BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
                              Tegra::Engines::KeplerCompute& kepler_compute_,
                              ProgramManager& program_manager_, const Shader::Info& info_,
                              OGLProgram source_program_, OGLAssemblyProgram assembly_program_);
 
     void Configure();
 
+    [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+        return writes_global_memory;
+    }
+
 private:
     TextureCache& texture_cache;
     BufferCache& buffer_cache;
@@ -70,6 +75,9 @@ private:
 
     u32 num_texture_buffers{};
     u32 num_image_buffers{};
+
+    bool use_storage_buffers{};
+    bool writes_global_memory{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 18bbc4c1f1..01da2bb573 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -135,13 +135,13 @@ Device::Device() {
             "Beta driver 443.24 is known to have issues. There might be performance issues.");
         disable_fast_buffer_sub_data = true;
     }
-
     max_uniform_buffers = BuildMaxUniformBuffers();
     uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
     shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
+    max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
                           GLAD_GL_NV_shader_thread_shuffle;
     has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -236,22 +236,6 @@ std::string Device::GetVendorName() const {
     return vendor_name;
 }
 
-Device::Device(std::nullptr_t) {
-    max_uniform_buffers.fill(std::numeric_limits<u32>::max());
-    uniform_buffer_alignment = 4;
-    shader_storage_alignment = 4;
-    max_vertex_attributes = 16;
-    max_varyings = 15;
-    max_compute_shared_memory_size = 0x10000;
-    has_warp_intrinsics = true;
-    has_shader_ballot = true;
-    has_vertex_viewport_layer = true;
-    has_image_load_formatted = true;
-    has_texture_shadow_lod = true;
-    has_variable_aoffi = true;
-    has_depth_buffer_float = true;
-}
-
 bool Device::TestVariableAoffi() {
     return TestProgram(R"(#version 430 core
 // This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 152a3acd31..d67f5693c5 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -13,7 +13,6 @@ namespace OpenGL {
 class Device {
 public:
     explicit Device();
-    explicit Device(std::nullptr_t);
 
     [[nodiscard]] std::string GetVendorName() const;
 
@@ -41,6 +40,10 @@ public:
         return max_compute_shared_memory_size;
     }
 
+    u32 GetMaxGLASMStorageBufferBlocks() const {
+        return max_glasm_storage_buffer_blocks;
+    }
+
     bool HasWarpIntrinsics() const {
         return has_warp_intrinsics;
     }
@@ -124,6 +127,7 @@ private:
     u32 max_vertex_attributes{};
     u32 max_varyings{};
     u32 max_compute_shared_memory_size{};
+    u32 max_glasm_storage_buffer_blocks{};
     bool has_warp_intrinsics{};
     bool has_shader_ballot{};
     bool has_vertex_viewport_layer{};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 32df35202d..19d85c482f 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64;
 constexpr u32 MAX_IMAGES = 8;
 
 template <typename Range>
-u32 AccumulateCount(Range&& range) {
+u32 AccumulateCount(const Range& range) {
     u32 num{};
     for (const auto& desc : range) {
         num += desc.count;
@@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc
     return std::memcmp(this, &rhs, Size()) == 0;
 }
 
-GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                                   Tegra::MemoryManager& gpu_memory_,
+GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+                                   BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
                                    Tegra::Engines::Maxwell3D& maxwell3d_,
                                    ProgramManager& program_manager_, StateTracker& state_tracker_,
                                    OGLProgram program_,
@@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
     }
     u32 num_textures{};
     u32 num_images{};
+    u32 num_storage_buffers{};
     for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
         const auto& info{stage_infos[stage]};
         if (stage < 4) {
@@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
 
         num_textures += AccumulateCount(info.texture_descriptors);
         num_images += AccumulateCount(info.image_descriptors);
+        num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+
+        writes_global_memory |= std::ranges::any_of(
+            info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
     }
     ASSERT(num_textures <= MAX_TEXTURES);
     ASSERT(num_images <= MAX_IMAGES);
 
-    if (assembly_programs[0].handle != 0 && xfb_state) {
+    const bool assembly_shaders{assembly_programs[0].handle != 0};
+    use_storage_buffers =
+        !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+    writes_global_memory &= !use_storage_buffers;
+
+    if (assembly_shaders && xfb_state) {
         GenerateTransformFeedbackState(*xfb_state);
     }
 }
@@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) {
 
     buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
     buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+    buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
 
     const auto& regs{maxwell3d.regs};
     const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 62f700cf53..c1113e1805 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -20,6 +20,7 @@
 
 namespace OpenGL {
 
+class Device;
 class ProgramManager;
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
 
 class GraphicsPipeline {
 public:
-    explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                              Tegra::MemoryManager& gpu_memory_,
+    explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+                              BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
                               Tegra::Engines::Maxwell3D& maxwell3d_,
                               ProgramManager& program_manager_, StateTracker& state_tracker_,
                               OGLProgram program_,
@@ -77,6 +78,10 @@ public:
         }
     }
 
+    [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+        return writes_global_memory;
+    }
+
 private:
     void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state);
 
@@ -99,6 +104,9 @@ private:
     std::array<u32, 5> num_texture_buffers{};
     std::array<u32, 5> num_image_buffers{};
 
+    bool use_storage_buffers{};
+    bool writes_global_memory{};
+
     static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
     GLsizei num_xfb_attribs{};
     GLsizei num_xfb_strides{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eec01e8c25..5d4e80364a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     EndTransformFeedback();
 
     ++num_queued_commands;
+    has_written_global_memory |= pipeline->WritesGlobalMemory();
 
     gpu.TickWork();
 }
 
 void RasterizerOpenGL::DispatchCompute() {
-    ComputePipeline* const program{shader_cache.CurrentComputePipeline()};
-    if (!program) {
+    ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
+    if (!pipeline) {
         return;
     }
-    program->Configure();
+    pipeline->Configure();
     const auto& qmd{kepler_compute.launch_description};
     glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
     ++num_queued_commands;
+    has_written_global_memory |= pipeline->WritesGlobalMemory();
 }
 
 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() {
 
     // Make sure memory stored from the previous GL command stream is visible
     // This is only needed on assembly shaders where we write to GPU memory with raw pointers
-    // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used
-    //       and prefer using NV_shader_storage_buffer_object when possible
-    if (Settings::values.use_assembly_shaders.GetValue()) {
+    if (has_written_global_memory) {
+        has_written_global_memory = false;
         glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
     }
     glFlush();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index afd43b2ee8..d0397b7454 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -225,7 +225,8 @@ private:
     std::array<GLuint, MAX_IMAGES> image_handles{};
 
     /// Number of commands queued to the OpenGL driver. Resetted on flush.
-    std::size_t num_queued_commands = 0;
+    size_t num_queued_commands = 0;
+    bool has_written_global_memory = false;
 
     u32 last_clip_distance_mask = 0;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3aa5ac31d7..287f497b52 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) {
 }
 
 Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
-                                    const Shader::IR::Program& program) {
+                                    const Shader::IR::Program& program,
+                                    bool glasm_use_storage_buffers) {
     Shader::RuntimeInfo info;
     switch (program.stage) {
     case Shader::Stage::TessellationEval:
@@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
         info.input_topology = Shader::InputTopology::TrianglesAdjacency;
         break;
     }
+    info.glasm_use_storage_buffers = glasm_use_storage_buffers;
     return info;
 }
 
@@ -435,7 +437,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
     ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs,
     bool build_in_parallel) {
     LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
-    size_t env_index{0};
+    size_t env_index{};
+    u32 total_storage_buffers{};
     std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         if (key.unique_hashes[index] == 0) {
@@ -447,7 +450,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
         const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
         programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+
+        for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
+            total_storage_buffers += desc.count;
+        }
     }
+    const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
+    const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
+
     std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
 
     OGLProgram source_program;
@@ -466,7 +476,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
         const size_t stage_index{index - 1};
         infos[stage_index] = &program.info;
 
-        const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)};
+        const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)};
         if (device.UseAssemblyShaders()) {
             const std::string code{EmitGLASM(profile, runtime_info, program, binding)};
             assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index));
@@ -479,7 +489,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
         LinkProgram(source_program.handle);
     }
     return std::make_unique<GraphicsPipeline>(
-        texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
+        device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
         std::move(source_program), std::move(assembly_programs), infos,
         key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
 }
@@ -508,10 +518,18 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
 
     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
     Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
+
+    u32 num_storage_buffers{};
+    for (const auto& desc : program.info.storage_buffers_descriptors) {
+        num_storage_buffers += desc.count;
+    }
+    Shader::RuntimeInfo info;
+    info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+
     OGLAssemblyProgram asm_program;
     OGLProgram source_program;
     if (device.UseAssemblyShaders()) {
-        const std::string code{EmitGLASM(profile, program)};
+        const std::string code{EmitGLASM(profile, info, program)};
         asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
     } else {
         const std::vector<u32> code{EmitSPIRV(profile, program)};
@@ -519,7 +537,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
         AddShader(GL_COMPUTE_SHADER, source_program.handle, code);
         LinkProgram(source_program.handle);
     }
-    return std::make_unique<ComputePipeline>(texture_cache, buffer_cache, gpu_memory,
+    return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
                                              kepler_compute, program_manager, program.info,
                                              std::move(source_program), std::move(asm_program));
 }