diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 308d013d63..3e8bd00604 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -133,8 +133,8 @@ add_library(video_core STATIC
     renderer_opengl/gl_shader_util.h
     renderer_opengl/gl_state_tracker.cpp
     renderer_opengl/gl_state_tracker.h
-    renderer_opengl/gl_stream_buffer.cpp
-    renderer_opengl/gl_stream_buffer.h
+    renderer_opengl/gl_staging_buffer_pool.cpp
+    renderer_opengl/gl_staging_buffer_pool.h
     renderer_opengl/gl_texture_cache.cpp
     renderer_opengl/gl_texture_cache.h
     renderer_opengl/gl_texture_cache_base.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 65494097b1..08bc66aaa6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -465,7 +465,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
 
     if (committed_ranges.empty()) {
         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-
             async_buffers.emplace_back(std::optional<Async_Buffer>{});
         }
         return;
@@ -526,7 +525,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
     committed_ranges.clear();
     if (downloads.empty()) {
         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-
             async_buffers.emplace_back(std::optional<Async_Buffer>{});
         }
         return;
@@ -678,7 +676,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
     const u32 size = index_buffer.size;
     const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
     if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
-        if constexpr (USE_MEMORY_MAPS) {
+        if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
             auto upload_staging = runtime.UploadStagingBuffer(size);
             std::array<BufferCopy, 1> copies{
                 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
@@ -1446,7 +1444,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
 template <class P>
 void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
                                   std::span<BufferCopy> copies) {
-    if constexpr (USE_MEMORY_MAPS) {
+    if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
         MappedUploadMemory(buffer, total_size_bytes, copies);
     } else {
         ImmediateUploadMemory(buffer, largest_copy, copies);
@@ -1457,7 +1455,7 @@ template <class P>
 void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
                                            [[maybe_unused]] u64 largest_copy,
                                            [[maybe_unused]] std::span<const BufferCopy> copies) {
-    if constexpr (!USE_MEMORY_MAPS) {
+    if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
         std::span<u8> immediate_buffer;
         for (const BufferCopy& copy : copies) {
             std::span<const u8> upload_span;
@@ -1516,7 +1514,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
     auto& buffer = slot_buffers[buffer_id];
     SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
 
-    if constexpr (USE_MEMORY_MAPS) {
+    if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
         auto upload_staging = runtime.UploadStagingBuffer(copy_size);
         std::array copies{BufferCopy{
             .src_offset = upload_staging.offset,
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index ac00d4d9d7..7c6ef49d51 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -103,6 +103,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
     static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
     static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
+    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
 
     static constexpr BufferId NULL_BUFFER_ID{0};
 
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6af4ae7931..724e53edbb 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
     return views.back().texture.handle;
 }
 
-BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
-    : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
+                                       StagingBufferPool& staging_buffer_pool_)
+    : device{device_}, staging_buffer_pool{staging_buffer_pool_},
+      has_fast_buffer_sub_data{device.HasFastBufferSubData()},
       use_assembly_shaders{device.UseAssemblyShaders()},
       has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
       stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
     }();
 }
 
+StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+    return staging_buffer_pool.RequestUploadBuffer(size);
+}
+
+StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+    return staging_buffer_pool.RequestDownloadBuffer(size);
+}
+
 u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
     if (device.CanReportMemoryUsage()) {
         return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
     return 2_GiB;
 }
 
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+    if (barrier) {
+        PreCopyBarrier();
+    }
+    for (const VideoCommon::BufferCopy& copy : copies) {
+        glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
+                                 static_cast<GLintptr>(copy.dst_offset),
+                                 static_cast<GLsizeiptr>(copy.size));
+    }
+    if (barrier) {
+        PostCopyBarrier();
+    }
+}
+
+void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+    CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
+}
+
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
+    CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
+}
+
 void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
                                     std::span<const VideoCommon::BufferCopy> copies) {
-    for (const VideoCommon::BufferCopy& copy : copies) {
-        glCopyNamedBufferSubData(
-            src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
-            static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
-    }
+    CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
+}
+
+void BufferCacheRuntime::PreCopyBarrier() {
+    // TODO: finer grained barrier?
+    glMemoryBarrier(GL_ALL_BARRIER_BITS);
+}
+
+void BufferCacheRuntime::PostCopyBarrier() {
+    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+}
+
+void BufferCacheRuntime::Finish() {
+    glFinish();
 }
 
 void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 18d3c3ac09..a24991585f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -12,7 +12,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
 
 namespace OpenGL {
 
@@ -60,11 +60,28 @@ class BufferCacheRuntime {
 public:
     static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
 
-    explicit BufferCacheRuntime(const Device& device_);
+    explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
+
+    [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
+
+    [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
+
+    void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
+                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+    void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
+                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
+
+    void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
+                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
 
     void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
                     std::span<const VideoCommon::BufferCopy> copies);
 
+    void PreCopyBarrier();
+    void PostCopyBarrier();
+    void Finish();
+
     void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
 
     void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@@ -169,6 +186,7 @@ private:
     };
 
     const Device& device;
+    StagingBufferPool& staging_buffer_pool;
 
     bool has_fast_buffer_sub_data = false;
     bool use_assembly_shaders = false;
@@ -201,7 +219,7 @@ private:
 struct BufferCacheParams {
     using Runtime = OpenGL::BufferCacheRuntime;
     using Buffer = OpenGL::Buffer;
-    using Async_Buffer = u32;
+    using Async_Buffer = OpenGL::StagingBufferMap;
     using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
 
     static constexpr bool IS_OPENGL = true;
@@ -209,9 +227,12 @@ struct BufferCacheParams {
     static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
     static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
-    static constexpr bool USE_MEMORY_MAPS = false;
+    static constexpr bool USE_MEMORY_MAPS = true;
     static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
+
+    // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
+    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
 };
 
 using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f5baa0f3cb..fc711c44ae 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -24,6 +24,7 @@
 #include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
@@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
                                    StateTracker& state_tracker_)
     : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
       program_manager(program_manager_), state_tracker(state_tracker_),
-      texture_cache_runtime(device, program_manager, state_tracker),
-      texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+      texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
+      texture_cache(texture_cache_runtime, *this),
+      buffer_cache_runtime(device, staging_buffer_pool),
       buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
       shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
                    state_tracker, gpu.ShaderNotify()),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 410d8ffc5e..a73ad15c16 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -230,6 +230,7 @@ private:
     ProgramManager& program_manager;
     StateTracker& state_tracker;
 
+    StagingBufferPool staging_buffer_pool;
     TextureCacheRuntime texture_cache_runtime;
     TextureCache texture_cache;
     BufferCacheRuntime buffer_cache_runtime;
diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
new file mode 100644
index 0000000000..72b1dbb325
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp
@@ -0,0 +1,134 @@
+// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
+
+namespace OpenGL {
+
+StagingBufferMap::~StagingBufferMap() {
+    if (sync) {
+        sync->Create();
+    }
+}
+
+StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
+    : storage_flags{storage_flags_}, map_flags{map_flags_} {}
+
+StagingBuffers::~StagingBuffers() = default;
+
+StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
+    const size_t index = RequestBuffer(requested_size);
+    OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
+    return StagingBufferMap{
+        .mapped_span = std::span(maps[index], requested_size),
+        .sync = sync,
+        .buffer = buffers[index].handle,
+    };
+}
+
+size_t StagingBuffers::RequestBuffer(size_t requested_size) {
+    if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
+        return *index;
+    }
+
+    OGLBuffer& buffer = buffers.emplace_back();
+    buffer.Create();
+    glNamedBufferStorage(buffer.handle, requested_size, nullptr,
+                         storage_flags | GL_MAP_PERSISTENT_BIT);
+    maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
+                                                          map_flags | GL_MAP_PERSISTENT_BIT)));
+
+    syncs.emplace_back();
+    sizes.push_back(requested_size);
+
+    ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
+           maps.size() == sizes.size());
+
+    return buffers.size() - 1;
+}
+
+std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
+    size_t smallest_buffer = std::numeric_limits<size_t>::max();
+    std::optional<size_t> found;
+    const size_t num_buffers = sizes.size();
+    for (size_t index = 0; index < num_buffers; ++index) {
+        const size_t buffer_size = sizes[index];
+        if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
+            continue;
+        }
+        if (syncs[index].handle != 0) {
+            if (!syncs[index].IsSignaled()) {
+                continue;
+            }
+            syncs[index].Release();
+        }
+        smallest_buffer = buffer_size;
+        found = index;
+    }
+    return found;
+}
+
+StreamBuffer::StreamBuffer() {
+    static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+    buffer.Create();
+    glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+    glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+    mapped_pointer =
+        static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+    for (OGLSync& sync : fences) {
+        sync.Create();
+    }
+}
+
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+    ASSERT(size < REGION_SIZE);
+    for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+         ++region) {
+        fences[region].Create();
+    }
+    used_iterator = iterator;
+
+    for (size_t region = Region(free_iterator) + 1,
+                region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+         region < region_end; ++region) {
+        glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+        fences[region].Release();
+    }
+    if (iterator + size >= free_iterator) {
+        free_iterator = iterator + size;
+    }
+    if (iterator + size > STREAM_BUFFER_SIZE) {
+        for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+            fences[region].Create();
+        }
+        used_iterator = 0;
+        iterator = 0;
+        free_iterator = size;
+
+        for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+            glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+            fences[region].Release();
+        }
+    }
+    const size_t offset = iterator;
+    iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+    return {std::span(mapped_pointer + offset, size), offset};
+}
+
+StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
+    return upload_buffers.RequestMap(size, true);
+}
+
+StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
+    return download_buffers.RequestMap(size, false);
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
similarity index 54%
rename from src/video_core/renderer_opengl/gl_stream_buffer.h
rename to src/video_core/renderer_opengl/gl_staging_buffer_pool.h
index 8fe927aaf5..2c467be3d9 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h
@@ -4,8 +4,10 @@
 #pragma once
 
 #include <array>
+#include <optional>
 #include <span>
 #include <utility>
+#include <vector>
 
 #include <glad/glad.h>
 
@@ -17,6 +19,33 @@ namespace OpenGL {
 
 using namespace Common::Literals;
 
+struct StagingBufferMap {
+    ~StagingBufferMap();
+
+    std::span<u8> mapped_span;
+    size_t offset = 0;
+    OGLSync* sync;
+    GLuint buffer;
+};
+
+struct StagingBuffers {
+    explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
+    ~StagingBuffers();
+
+    StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
+
+    size_t RequestBuffer(size_t requested_size);
+
+    std::optional<size_t> FindBuffer(size_t requested_size);
+
+    std::vector<OGLSync> syncs;
+    std::vector<OGLBuffer> buffers;
+    std::vector<u8*> maps;
+    std::vector<size_t> sizes;
+    GLenum storage_flags;
+    GLenum map_flags;
+};
+
 class StreamBuffer {
     static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
     static constexpr size_t NUM_SYNCS = 16;
@@ -48,4 +77,17 @@ private:
     std::array<OGLSync, NUM_SYNCS> fences;
 };
 
+class StagingBufferPool {
+public:
+    StagingBufferPool() = default;
+    ~StagingBufferPool() = default;
+
+    StagingBufferMap RequestUploadBuffer(size_t size);
+    StagingBufferMap RequestDownloadBuffer(size_t size);
+
+private:
+    StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
+    StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
+};
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
deleted file mode 100644
index 2005c8993d..0000000000
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <array>
-#include <memory>
-#include <span>
-
-#include <glad/glad.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
-
-namespace OpenGL {
-
-StreamBuffer::StreamBuffer() {
-    static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
-    buffer.Create();
-    glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
-    glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
-    mapped_pointer =
-        static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
-    for (OGLSync& sync : fences) {
-        sync.Create();
-    }
-}
-
-std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
-    ASSERT(size < REGION_SIZE);
-    for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
-         ++region) {
-        fences[region].Create();
-    }
-    used_iterator = iterator;
-
-    for (size_t region = Region(free_iterator) + 1,
-                region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
-         region < region_end; ++region) {
-        glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
-        fences[region].Release();
-    }
-    if (iterator + size >= free_iterator) {
-        free_iterator = iterator + size;
-    }
-    if (iterator + size > STREAM_BUFFER_SIZE) {
-        for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
-            fences[region].Create();
-        }
-        used_iterator = 0;
-        iterator = 0;
-        free_iterator = size;
-
-        for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
-            glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
-            fences[region].Release();
-        }
-    }
-    const size_t offset = iterator;
-    iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
-    return {std::span(mapped_pointer + offset, size), offset};
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 1e08238362..ad87f3e80c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -451,19 +451,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
         return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
     }
 }
-
 } // Anonymous namespace
 
-ImageBufferMap::~ImageBufferMap() {
-    if (sync) {
-        sync->Create();
-    }
-}
-
 TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
-                                         StateTracker& state_tracker_)
-    : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
-      format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
+                                         StateTracker& state_tracker_,
+                                         StagingBufferPool& staging_buffer_pool_)
+    : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
+      util_shaders(program_manager), format_conversion_pass{util_shaders},
+      resolution{Settings::values.resolution_info} {
     static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
     for (size_t i = 0; i < TARGETS.size(); ++i) {
         const GLenum target = TARGETS[i];
@@ -553,12 +548,12 @@ void TextureCacheRuntime::Finish() {
     glFinish();
 }
 
-ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
-    return upload_buffers.RequestMap(size, true);
+StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+    return staging_buffer_pool.RequestUploadBuffer(size);
 }
 
-ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
-    return download_buffers.RequestMap(size, false);
+StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+    return staging_buffer_pool.RequestDownloadBuffer(size);
 }
 
 u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@@ -643,7 +638,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
                            is_linear ? GL_LINEAR : GL_NEAREST);
 }
 
-void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
                                                 std::span<const SwizzleParameters> swizzles) {
     switch (image.info.type) {
     case ImageType::e2D:
@@ -685,64 +680,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
     return device.HasASTC();
 }
 
-TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
-    : storage_flags{storage_flags_}, map_flags{map_flags_} {}
-
-TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
-
-ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
-                                                               bool insert_fence) {
-    const size_t index = RequestBuffer(requested_size);
-    OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
-    return ImageBufferMap{
-        .mapped_span = std::span(maps[index], requested_size),
-        .sync = sync,
-        .buffer = buffers[index].handle,
-    };
-}
-
-size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
-    if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
-        return *index;
-    }
-
-    OGLBuffer& buffer = buffers.emplace_back();
-    buffer.Create();
-    glNamedBufferStorage(buffer.handle, requested_size, nullptr,
-                         storage_flags | GL_MAP_PERSISTENT_BIT);
-    maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
-                                                          map_flags | GL_MAP_PERSISTENT_BIT)));
-
-    syncs.emplace_back();
-    sizes.push_back(requested_size);
-
-    ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
-           maps.size() == sizes.size());
-
-    return buffers.size() - 1;
-}
-
-std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
-    size_t smallest_buffer = std::numeric_limits<size_t>::max();
-    std::optional<size_t> found;
-    const size_t num_buffers = sizes.size();
-    for (size_t index = 0; index < num_buffers; ++index) {
-        const size_t buffer_size = sizes[index];
-        if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
-            continue;
-        }
-        if (syncs[index].handle != 0) {
-            if (!syncs[index].IsSignaled()) {
-                continue;
-            }
-            syncs[index].Release();
-        }
-        smallest_buffer = buffer_size;
-        found = index;
-    }
-    return found;
-}
-
 Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
              VAddr cpu_addr_)
     : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@@ -818,7 +755,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
     }
 }
 
-void Image::UploadMemory(const ImageBufferMap& map,
+void Image::UploadMemory(const StagingBufferMap& map,
                          std::span<const VideoCommon::BufferImageCopy> copies) {
     UploadMemory(map.buffer, map.offset, copies);
 }
@@ -865,7 +802,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
     }
 }
 
-void Image::DownloadMemory(ImageBufferMap& map,
+void Image::DownloadMemory(StagingBufferMap& map,
                            std::span<const VideoCommon::BufferImageCopy> copies) {
     DownloadMemory(map.buffer, map.offset, copies);
 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3e9b3302b6..1148b73d70 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -11,6 +11,7 @@
 #include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
 #include "video_core/renderer_opengl/util_shaders.h"
 #include "video_core/texture_cache/image_view_base.h"
 #include "video_core/texture_cache/texture_cache_base.h"
@@ -37,15 +38,6 @@ using VideoCommon::Region2D;
 using VideoCommon::RenderTargets;
 using VideoCommon::SlotVector;
 
-struct ImageBufferMap {
-    ~ImageBufferMap();
-
-    std::span<u8> mapped_span;
-    size_t offset = 0;
-    OGLSync* sync;
-    GLuint buffer;
-};
-
 struct FormatProperties {
     GLenum compatibility_class;
     bool compatibility_by_size;
@@ -74,14 +66,15 @@ class TextureCacheRuntime {
 
 public:
     explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
-                                 StateTracker& state_tracker);
+                                 StateTracker& state_tracker,
+                                 StagingBufferPool& staging_buffer_pool);
     ~TextureCacheRuntime();
 
     void Finish();
 
-    ImageBufferMap UploadStagingBuffer(size_t size);
+    StagingBufferMap UploadStagingBuffer(size_t size);
 
-    ImageBufferMap DownloadStagingBuffer(size_t size);
+    StagingBufferMap DownloadStagingBuffer(size_t size);
 
     u64 GetDeviceLocalMemory() const {
         return device_access_memory;
@@ -120,7 +113,7 @@ public:
                          const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
                          Tegra::Engines::Fermi2D::Operation operation);
 
-    void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
+    void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
                                std::span<const VideoCommon::SwizzleParameters> swizzles);
 
     void InsertUploadMemoryBarrier();
@@ -149,35 +142,16 @@ public:
     }
 
 private:
-    struct StagingBuffers {
-        explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
-        ~StagingBuffers();
-
-        ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
-
-        size_t RequestBuffer(size_t requested_size);
-
-        std::optional<size_t> FindBuffer(size_t requested_size);
-
-        std::vector<OGLSync> syncs;
-        std::vector<OGLBuffer> buffers;
-        std::vector<u8*> maps;
-        std::vector<size_t> sizes;
-        GLenum storage_flags;
-        GLenum map_flags;
-    };
-
     const Device& device;
     StateTracker& state_tracker;
+    StagingBufferPool& staging_buffer_pool;
+
     UtilShaders util_shaders;
     FormatConversionPass format_conversion_pass;
 
     std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
     bool has_broken_texture_view_formats = false;
 
-    StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
-    StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
-
     OGLTexture null_image_1d_array;
     OGLTexture null_image_cube_array;
     OGLTexture null_image_3d;
@@ -213,7 +187,7 @@ public:
     void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
                       std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void UploadMemory(const ImageBufferMap& map,
+    void UploadMemory(const StagingBufferMap& map,
                       std::span<const VideoCommon::BufferImageCopy> copies);
 
     void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
@@ -222,7 +196,8 @@ public:
     void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
                         std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
+    void DownloadMemory(StagingBufferMap& map,
+                        std::span<const VideoCommon::BufferImageCopy> copies);
 
     GLuint StorageHandle() noexcept;
 
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 2c7ac210bc..544982d18b 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -19,6 +19,7 @@
 #include "video_core/host_shaders/pitch_unswizzle_comp.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/renderer_opengl/util_shaders.h"
 #include "video_core/texture_cache/accelerated_swizzle.h"
@@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
 
 UtilShaders::~UtilShaders() = default;
 
-void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
+void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
                              std::span<const VideoCommon::SwizzleParameters> swizzles) {
     static constexpr GLuint BINDING_INPUT_BUFFER = 0;
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
     program_manager.RestoreGuestCompute();
 }
 
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
                                       std::span<const SwizzleParameters> swizzles) {
     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
     static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
     program_manager.RestoreGuestCompute();
 }
 
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
                                       std::span<const SwizzleParameters> swizzles) {
     static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
 
@@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
     program_manager.RestoreGuestCompute();
 }
 
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
+void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
                               std::span<const SwizzleParameters> swizzles) {
     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
     static constexpr GLuint BINDING_INPUT_BUFFER = 0;
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 9013808e71..feecd404ca 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -16,23 +16,23 @@ namespace OpenGL {
 class Image;
 class ProgramManager;
 
-struct ImageBufferMap;
+struct StagingBufferMap;
 
 class UtilShaders {
 public:
     explicit UtilShaders(ProgramManager& program_manager);
     ~UtilShaders();
 
-    void ASTCDecode(Image& image, const ImageBufferMap& map,
+    void ASTCDecode(Image& image, const StagingBufferMap& map,
                     std::span<const VideoCommon::SwizzleParameters> swizzles);
 
-    void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
+    void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
                              std::span<const VideoCommon::SwizzleParameters> swizzles);
 
-    void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
+    void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
                              std::span<const VideoCommon::SwizzleParameters> swizzles);
 
-    void PitchUpload(Image& image, const ImageBufferMap& map,
+    void PitchUpload(Image& image, const StagingBufferMap& map,
                      std::span<const VideoCommon::SwizzleParameters> swizzles);
 
     void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 5e96029051..b1f3c071f0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -157,6 +157,7 @@ struct BufferCacheParams {
     static constexpr bool USE_MEMORY_MAPS = true;
     static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
     static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
+    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
 };
 
 using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;