diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index c36ede8988..1ba544943c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -1,3 +1,146 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "core/core.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+namespace {
+
+const auto BufferUsage =
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+    vk::PipelineStageFlagBits::eComputeShader;
+
+const auto UploadAccessBarriers =
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+    vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
+}
+
+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     CacheAddr cache_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cache_addr, size} {
+    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
+    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer.commit = memory_manager.Commit(*buffer.handle, false);
+}
+
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+                                                                   CreateStreamBuffer(device,
+                                                                                      scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
+
+VKBufferCache::~VKBufferCache() = default;
+
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
+}
+
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
+}
+
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+    });
+    return &*empty.handle;
+}
+
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    std::memcpy(staging.commit->Map(size), data, size);
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+                                     offset, size)},
+            {}, dld);
+    });
+}
+
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+                vk::PipelineStageFlagBits::eComputeShader,
+            vk::PipelineStageFlagBits::eTransfer, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+            {}, dld);
+        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+    });
+    scheduler.Finish();
+
+    std::memcpy(data, staging.commit->Map(size), size);
+}
+
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+                      dst_offset, size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+                                     dst_offset, size)},
+            {}, dld);
+    });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index bc6e584cf8..3f38eed0c9 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -3,3 +3,76 @@
 // Refer to the license.txt file included.
 
 #pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKMemoryManager;
+class VKScheduler;
+
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
+public:
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               CacheAddr cache_addr, std::size_t size);
+    ~CachedBufferBlock();
+
+    const vk::Buffer* GetHandle() const {
+        return &*buffer.handle;
+    }
+
+private:
+    VKBuffer buffer;
+};
+
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
+public:
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
+    ~VKBufferCache();
+
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
+
+protected:
+    void WriteBarrier() override {}
+
+    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+
+    const vk::Buffer* ToHandle(const Buffer& buffer) override;
+
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
+
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
+
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;
+
+private:
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 62f1427f5a..d48d3b44ca 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <memory>
 #include <optional>
+#include <tuple>
 #include <vector>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
 namespace Vulkan {
 
+namespace {
+
 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
 
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
-    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
-                                                                                   pipeline_stage} {
-    CreateBuffers(memory_manager, usage);
-    ReserveWatches(WATCHES_INITIAL_RESERVE);
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
+                                  vk::MemoryPropertyFlags wanted) {
+    const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
+    for (u32 i = 0; i < properties.memoryTypeCount; i++) {
+        if (!(filter & (1 << i))) {
+            continue;
+        }
+        if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
+            return i;
+        }
+    }
+    return {};
+}
+
+} // Anonymous namespace
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                               vk::BufferUsageFlags usage)
+    : device{device}, scheduler{scheduler} {
+    CreateBuffers(usage);
+    ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
+    ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
 }
 
 VKStreamBuffer::~VKStreamBuffer() = default;
 
-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
-    ASSERT(size <= buffer_size);
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+    ASSERT(size <= STREAM_BUFFER_SIZE);
     mapped_size = size;
 
-    if (offset + size > buffer_size) {
-        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
-        // reset the state.
-        invalidation_mark = used_watches;
-        used_watches = 0;
+    if (alignment > 0) {
+        offset = Common::AlignUp(offset, alignment);
+    }
+
+    WaitPendingOperations(offset);
+
+    bool invalidated = false;
+    if (offset + size > STREAM_BUFFER_SIZE) {
+        // The buffer would overflow, save the amount of used watches and reset the state.
+        invalidation_mark = current_watch_cursor;
+        current_watch_cursor = 0;
         offset = 0;
-    }
 
-    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
-}
+        // Swap watches and reset waiting cursors.
+        std::swap(previous_watches, current_watches);
+        wait_cursor = 0;
+        wait_bound = 0;
 
-void VKStreamBuffer::Send(u64 size) {
-    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
-
-    if (invalidation_mark) {
-        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        // Ensure that we don't wait for uncommitted fences.
         scheduler.Flush();
-        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
-                      [&](auto& resource) { resource->Wait(); });
-        invalidation_mark = std::nullopt;
+
+        invalidated = true;
     }
 
-    if (used_watches + 1 >= watches.size()) {
-        // Ensure that there are enough watches.
-        ReserveWatches(WATCHES_RESERVE_CHUNK);
-    }
-    // Add a watch for this allocation.
-    watches[used_watches++]->Watch(scheduler.GetFence());
-
-    offset += size;
-}
-
-void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
-    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
-                                         nullptr);
-
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
-    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
-    commit = memory_manager.Commit(*buffer, true);
-    mapped_pointer = commit->GetData();
+    const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
+    return {pointer, offset, invalidated};
 }
 
-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
-    const std::size_t previous_size = watches.size();
-    watches.resize(previous_size + grow_size);
-    std::generate(watches.begin() + previous_size, watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
+void VKStreamBuffer::Unmap(u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(*memory, device.GetDispatchLoader());
+
+    offset += size;
+
+    if (current_watch_cursor + 1 >= current_watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
+    }
+    auto& watch = current_watches[current_watch_cursor++];
+    watch.upper_bound = offset;
+    watch.fence.Watch(scheduler.GetFence());
+}
+
+void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
+                                         0, nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+
+    const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
+    // Prefer device local host visible allocations (this should hit AMD's pinned memory).
+    auto type = FindMemoryType(device, requirements.memoryTypeBits,
+                               vk::MemoryPropertyFlagBits::eHostVisible |
+                                   vk::MemoryPropertyFlagBits::eHostCoherent |
+                                   vk::MemoryPropertyFlagBits::eDeviceLocal);
+    if (!type) {
+        // Otherwise search for a host visible allocation.
+        type = FindMemoryType(device, requirements.memoryTypeBits,
+                              vk::MemoryPropertyFlagBits::eHostVisible |
+                                  vk::MemoryPropertyFlagBits::eHostCoherent);
+        ASSERT_MSG(type, "No host visible and coherent memory type found");
+    }
+    const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
+    memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
+
+    dev.bindBufferMemory(*buffer, *memory, 0, dld);
+}
+
+void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
+    watches.resize(watches.size() + grow_size);
+}
+
+void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
+    if (!invalidation_mark) {
+        return;
+    }
+    while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
+        auto& watch = previous_watches[wait_cursor];
+        wait_bound = watch.upper_bound;
+        watch.fence.Wait();
+        ++wait_cursor;
+    }
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 842e541625..187c0c612a 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@
 
 #pragma once
 
-#include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>
 
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 
 namespace Vulkan {
 
 class VKDevice;
 class VKFence;
 class VKFenceWatch;
-class VKResourceManager;
 class VKScheduler;
 
-class VKStreamBuffer {
+class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                            vk::BufferUsageFlags usage);
     ~VKStreamBuffer();
 
     /**
@@ -34,39 +30,47 @@ public:
      * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
      * offset and a boolean that's true when buffer has been invalidated.
      */
-    std::tuple<u8*, u64, bool> Reserve(u64 size);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
 
     /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
-    void Send(u64 size);
+    void Unmap(u64 size);
 
-    vk::Buffer GetBuffer() const {
+    vk::Buffer GetHandle() const {
         return *buffer;
     }
 
 private:
+    struct Watch final {
+        VKFenceWatch fence;
+        u64 upper_bound{};
+    };
+
     /// Creates Vulkan buffer handles committing the required the required memory.
-    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+    void CreateBuffers(vk::BufferUsageFlags usage);
 
     /// Increases the amount of watches available.
-    void ReserveWatches(std::size_t grow_size);
+    void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
+
+    void WaitPendingOperations(u64 requested_upper_bound);
 
     const VKDevice& device;                      ///< Vulkan device manager.
     VKScheduler& scheduler;                      ///< Command scheduler.
-    const u64 buffer_size;                       ///< Total size of the stream buffer.
     const vk::AccessFlags access;                ///< Access usage of this stream buffer.
     const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
 
-    UniqueBuffer buffer;   ///< Mapped buffer.
-    VKMemoryCommit commit; ///< Memory commit.
-    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+    UniqueBuffer buffer;       ///< Mapped buffer.
+    UniqueDeviceMemory memory; ///< Memory allocation.
 
     u64 offset{};      ///< Buffer iterator.
     u64 mapped_size{}; ///< Size reserved for the current copy.
 
-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
-    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
-    std::optional<std::size_t>
-        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+    std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration.
+    std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
+
+    std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
+    std::size_t wait_cursor{};           ///< Last watch being waited for completion.
+    u64 wait_bound{};                    ///< Highest offset being watched for completion.
 };
 
 } // namespace Vulkan