From 72541af3bc8973fba0f0e3d1302fcd0fa7fb9f06 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 3 Jan 2021 18:38:15 -0300
Subject: [PATCH] vulkan_memory_allocator: Add "download" memory usage hint

Allow users of the allocator to hint memory usage for downloads. This
removes the non-descriptive boolean passed for "host visible" or not
host visible memory commits, and uses an enum to hint device local,
upload and download usages.
---
 .../renderer_vulkan/vk_blit_screen.cpp        |  4 +-
 .../renderer_vulkan/vk_buffer_cache.cpp       |  8 ++--
 .../renderer_vulkan/vk_compute_pass.cpp       |  6 +--
 .../renderer_vulkan/vk_rasterizer.cpp         |  2 +-
 .../vk_staging_buffer_pool.cpp                | 42 ++++++++++++-------
 .../renderer_vulkan/vk_staging_buffer_pool.h  | 15 +++----
 .../renderer_vulkan/vk_texture_cache.cpp      |  6 +--
 .../vulkan_common/vulkan_memory_allocator.cpp | 24 ++++++++---
 .../vulkan_common/vulkan_memory_allocator.h   | 24 ++++++++---
 9 files changed, 86 insertions(+), 45 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index f06af06c82..3e3b895e0c 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -657,7 +657,7 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
     };
 
     buffer = device.GetLogical().CreateBuffer(ci);
-    buffer_commit = memory_allocator.Commit(buffer, true);
+    buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
 }
 
 void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
@@ -688,7 +688,7 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
             .pQueueFamilyIndices = nullptr,
             .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
         });
-        raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], false);
+        raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
         raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 94d3a91345..d8ad40a0f6 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -50,13 +50,13 @@ Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKSched
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
     });
-    commit = memory_allocator.Commit(buffer, false);
+    commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
 }
 
 Buffer::~Buffer() = default;
 
 void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
-    const auto& staging = staging_pool.Request(data_size, true);
+    const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
     std::memcpy(staging.mapped_span.data(), data, data_size);
 
     scheduler.RequestOutsideRenderPassOperationContext();
@@ -98,7 +98,7 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
 }
 
 void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
-    auto staging = staging_pool.Request(data_size, true);
+    auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
     scheduler.RequestOutsideRenderPassOperationContext();
 
     const VkBuffer handle = Handle();
@@ -179,7 +179,7 @@ std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t s
 
 VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
     size = std::max(size, std::size_t(4));
-    const auto& empty = staging_pool.Request(size, false);
+    const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
         cmdbuf.FillBuffer(buffer, 0, size, 0);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index d38087f41c..5eb6a54be9 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -177,7 +177,7 @@ QuadArrayPass::~QuadArrayPass() = default;
 std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
     const u32 num_triangle_vertices = (num_vertices / 4) * 6;
     const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
-    const auto staging_ref = staging_buffer_pool.Request(staging_size, false);
+    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
@@ -224,7 +224,7 @@ Uint8Pass::~Uint8Pass() = default;
 std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
                                              u64 src_offset) {
     const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
-    const auto staging_ref = staging_buffer_pool.Request(staging_size, false);
+    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
@@ -286,7 +286,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
     const u32 num_tri_vertices = (num_vertices / 4) * 6;
 
     const std::size_t staging_size = num_tri_vertices * sizeof(u32);
-    const auto staging_ref = staging_buffer_pool.Request(staging_size, false);
+    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f38ead9c2b..f0a1118291 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -1445,7 +1445,7 @@ VkBuffer RasterizerVulkan::DefaultBuffer() {
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
     });
-    default_buffer_commit = memory_allocator.Commit(default_buffer, false);
+    default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
 
     scheduler.RequestOutsideRenderPassOperationContext();
     scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 44d332ed22..97fd41cc17 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
 
 #include <fmt/format.h>
 
+#include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -23,23 +24,24 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
 
 StagingBufferPool::~StagingBufferPool() = default;
 
-StagingBufferRef StagingBufferPool::Request(size_t size, bool host_visible) {
-    if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, host_visible)) {
+StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
+    if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
         return *ref;
     }
-    return CreateStagingBuffer(size, host_visible);
+    return CreateStagingBuffer(size, usage);
 }
 
 void StagingBufferPool::TickFrame() {
     current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
 
-    ReleaseCache(true);
-    ReleaseCache(false);
+    ReleaseCache(MemoryUsage::DeviceLocal);
+    ReleaseCache(MemoryUsage::Upload);
+    ReleaseCache(MemoryUsage::Download);
 }
 
 std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
-                                                                        bool host_visible) {
-    StagingBuffers& cache_level = GetCache(host_visible)[Common::Log2Ceil64(size)];
+                                                                        MemoryUsage usage) {
+    StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
 
     const auto is_free = [this](const StagingBuffer& entry) {
         return scheduler.IsFree(entry.tick);
@@ -58,7 +60,7 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
     return it->Ref();
 }
 
-StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, bool host_visible) {
+StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
     const u32 log2 = Common::Log2Ceil64(size);
     vk::Buffer buffer = device.GetLogical().CreateBuffer({
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@@ -76,10 +78,10 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, bool host_v
         ++buffer_index;
         buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
     }
-    MemoryCommit commit = memory_allocator.Commit(buffer, host_visible);
-    const std::span<u8> mapped_span = host_visible ? commit.Map() : std::span<u8>{};
+    MemoryCommit commit = memory_allocator.Commit(buffer, usage);
+    const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
 
-    StagingBuffer& entry = GetCache(host_visible)[log2].entries.emplace_back(StagingBuffer{
+    StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
         .buffer = std::move(buffer),
         .commit = std::move(commit),
         .mapped_span = mapped_span,
@@ -88,12 +90,22 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, bool host_v
     return entry.Ref();
 }
 
-StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(bool host_visible) {
-    return host_visible ? host_staging_buffers : device_staging_buffers;
+StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
+    switch (usage) {
+    case MemoryUsage::DeviceLocal:
+        return device_local_cache;
+    case MemoryUsage::Upload:
+        return upload_cache;
+    case MemoryUsage::Download:
+        return download_cache;
+    default:
+        UNREACHABLE_MSG("Invalid memory usage={}", usage);
+        return upload_cache;
+    }
 }
 
-void StagingBufferPool::ReleaseCache(bool host_visible) {
-    ReleaseLevel(GetCache(host_visible), current_delete_level);
+void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
+    ReleaseLevel(GetCache(usage), current_delete_level);
 }
 
 void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 1a43386095..d42918a47b 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -28,7 +28,7 @@ public:
                                VKScheduler& scheduler);
     ~StagingBufferPool();
 
-    StagingBufferRef Request(size_t size, bool host_visible);
+    StagingBufferRef Request(size_t size, MemoryUsage usage);
 
     void TickFrame();
 
@@ -56,13 +56,13 @@ private:
     static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
     using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
 
-    std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, bool host_visible);
+    std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
 
-    StagingBufferRef CreateStagingBuffer(size_t size, bool host_visible);
+    StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
 
-    StagingBuffersCache& GetCache(bool host_visible);
+    StagingBuffersCache& GetCache(MemoryUsage usage);
 
-    void ReleaseCache(bool host_visible);
+    void ReleaseCache(MemoryUsage usage);
 
     void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
 
@@ -70,8 +70,9 @@ private:
     MemoryAllocator& memory_allocator;
     VKScheduler& scheduler;
 
-    StagingBuffersCache host_staging_buffers;
-    StagingBuffersCache device_staging_buffers;
+    StagingBuffersCache device_local_cache;
+    StagingBuffersCache upload_cache;
+    StagingBuffersCache download_cache;
 
     size_t current_delete_level = 0;
     u64 buffer_index = 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 15cd50c247..a55f84045a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -554,7 +554,7 @@ void TextureCacheRuntime::Finish() {
 }
 
 ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
-    const auto staging_ref = staging_buffer_pool.Request(size, true);
+    const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
     return ImageBufferMap{
         .handle = staging_ref.buffer,
         .span = staging_ref.mapped_span,
@@ -788,9 +788,9 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
       image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
       aspect_mask(ImageAspectMask(info.format)) {
     if (image) {
-        commit = runtime.memory_allocator.Commit(image, false);
+        commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
     } else {
-        commit = runtime.memory_allocator.Commit(buffer, false);
+        commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
     }
     if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
         flags |= VideoCommon::ImageFlagBits::Converted;
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index c1cf292afc..8bb15794d7 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -156,11 +156,13 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
 
 MemoryAllocator::~MemoryAllocator() = default;
 
-MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, bool host_visible) {
+MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
     const u64 chunk_size = GetAllocationChunkSize(requirements.size);
 
     // When a host visible commit is asked, search for host visible and coherent, otherwise search
     // for a fast device local type.
+    // TODO: Deduce memory types from usage in a better way
+    const bool host_visible = IsHostVisible(usage);
     const VkMemoryPropertyFlags wanted_properties =
         host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
                      : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
@@ -176,14 +178,14 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, b
     return TryAllocCommit(requirements, wanted_properties).value();
 }
 
-MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, bool host_visible) {
-    auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible);
+MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
+    auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
     buffer.BindMemory(commit.Memory(), commit.Offset());
     return commit;
 }
 
-MemoryCommit MemoryAllocator::Commit(const vk::Image& image, bool host_visible) {
-    auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible);
+MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
+    auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage);
     image.BindMemory(commit.Memory(), commit.Offset());
     return commit;
 }
@@ -224,4 +226,16 @@ std::optional<MemoryCommit> MemoryAllocator::TryAllocCommit(
     return std::nullopt;
 }
 
+bool IsHostVisible(MemoryUsage usage) noexcept {
+    switch (usage) {
+    case MemoryUsage::DeviceLocal:
+        return false;
+    case MemoryUsage::Upload:
+    case MemoryUsage::Download:
+        return true;
+    }
+    UNREACHABLE_MSG("Invalid memory usage={}", usage);
+    return false;
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 69a6341e1b..efb32167aa 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -17,7 +17,16 @@ class Device;
 class MemoryMap;
 class MemoryAllocation;
 
-class MemoryCommit final {
+/// Hints and requirements for the backing memory type of a commit
+enum class MemoryUsage {
+    DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU
+    Upload,      ///< Requires a host visible memory type optimized for CPU to GPU uploads
+    Download,    ///< Requires a host visible memory type optimized for GPU to CPU readbacks
+};
+
+/// Ownership handle of a memory commitment.
+/// Points to a subregion of a memory allocation.
+class MemoryCommit {
 public:
     explicit MemoryCommit() noexcept = default;
     explicit MemoryCommit(const Device& device_, MemoryAllocation* allocation_,
@@ -54,7 +63,9 @@ private:
     std::span<u8> span;             ///< Host visible memory span. Empty if not queried before.
 };
 
-class MemoryAllocator final {
+/// Memory allocator container.
+/// Allocates and releases memory allocations on demand.
+class MemoryAllocator {
 public:
     explicit MemoryAllocator(const Device& device_);
     ~MemoryAllocator();
@@ -71,13 +82,13 @@ public:
      *
      * @returns A memory commit.
      */
-    MemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible);
+    MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage);
 
     /// Commits memory required by the buffer and binds it.
-    MemoryCommit Commit(const vk::Buffer& buffer, bool host_visible);
+    MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
 
     /// Commits memory required by the image and binds it.
-    MemoryCommit Commit(const vk::Image& image, bool host_visible);
+    MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
 
 private:
     /// Allocates a chunk of memory.
@@ -92,4 +103,7 @@ private:
     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
 };
 
+/// Returns true when a memory usage is guaranteed to be host visible.
+bool IsHostVisible(MemoryUsage usage) noexcept;
+
 } // namespace Vulkan