mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2025-01-14 23:34:07 +00:00
vulkan: Defer descriptor set work to the Vulkan thread
Move descriptor lookup and update code to a separate thread. Delaying this removes work from the main GPU thread and allows creating descriptor layouts on another thread. This reduces a bit the workload of the main thread when new pipelines are encountered.
This commit is contained in:
parent
2f3c3dfc10
commit
ac8835659e
8 changed files with 69 additions and 79 deletions
|
@ -172,11 +172,12 @@ struct AstcPushConstants {
|
|||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
|
||||
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) {
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
|
||||
: device{device_} {
|
||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool,
|
|||
|
||||
ComputePass::~ComputePass() = default;
|
||||
|
||||
VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) {
|
||||
if (!descriptor_template) {
|
||||
return nullptr;
|
||||
}
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
update_descriptor_queue.Send(descriptor_template.address(), set);
|
||||
return set;
|
||||
}
|
||||
|
||||
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
|
@ -265,10 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
|
|||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
const VkBuffer buffer{staging.buffer};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) {
|
||||
scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
|
@ -276,6 +269,8 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
|
|||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
|
||||
|
@ -321,10 +316,10 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
|||
update_descriptor_queue.Acquire();
|
||||
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex,
|
||||
scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
|
||||
index_shift](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr u32 DISPATCH_SIZE = 1024;
|
||||
static constexpr VkMemoryBarrier WRITE_BARRIER{
|
||||
|
@ -333,7 +328,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
|||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
};
|
||||
const std::array push_constants = {base_vertex, index_shift};
|
||||
const std::array push_constants{base_vertex, index_shift};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||
|
@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
|
|||
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
|
||||
device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
|
||||
|
||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||
|
@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
|
||||
sizeof(SWIZZLE_TABLE));
|
||||
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||
|
||||
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
|
||||
const VkPipelineLayout vk_layout = *layout;
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
|
||||
// To unswizzle the ASTC data
|
||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
||||
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
|
||||
scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
|
||||
block_dims, params, set](vk::CommandBuffer cmdbuf) {
|
||||
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
|
||||
params, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
const AstcPushConstants uniforms{
|
||||
.blocks_dims = block_dims,
|
||||
.bytes_per_block_log2 = params.bytes_per_block_log2,
|
||||
|
@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
.block_height = params.block_height,
|
||||
.block_height_mask = params.block_height_mask,
|
||||
};
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
|
||||
cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
|
||||
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -36,15 +36,14 @@ public:
|
|||
~ComputePass();
|
||||
|
||||
protected:
|
||||
VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
|
||||
|
||||
const Device& device;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_template;
|
||||
vk::PipelineLayout layout;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
private:
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
|
||||
private:
|
||||
vk::ShaderModule module;
|
||||
};
|
||||
|
||||
|
@ -99,7 +98,6 @@ public:
|
|||
private:
|
||||
void MakeDataBuffer();
|
||||
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
|
|
@ -18,21 +18,22 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
|
||||
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
Common::ThreadWorker* thread_worker, const Shader::Info& info_,
|
||||
vk::ShaderModule spv_module_)
|
||||
: update_descriptor_queue{update_descriptor_queue_}, info{info_},
|
||||
: device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
|
||||
spv_module(std::move(spv_module_)) {
|
||||
DescriptorLayoutBuilder builder{device.GetLogical()};
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
auto func{[this, &descriptor_pool] {
|
||||
DescriptorLayoutBuilder builder{device.GetLogical()};
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout();
|
||||
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
|
||||
descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout();
|
||||
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
|
||||
descriptor_update_template =
|
||||
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
|
||||
|
||||
auto func{[this, &device] {
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
|
@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
||||
});
|
||||
}
|
||||
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
});
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set);
|
||||
scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
|
||||
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
const vk::Device& dev{device.GetLogical()};
|
||||
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||
descriptor_set, nullptr);
|
||||
});
|
||||
|
|
|
@ -40,6 +40,7 @@ public:
|
|||
VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
Shader::Info info;
|
||||
|
||||
|
|
|
@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
|
|||
GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
|
||||
BufferCache& buffer_cache_, TextureCache& texture_cache_,
|
||||
const Device& device, DescriptorPool& descriptor_pool,
|
||||
const Device& device_, DescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
Common::ThreadWorker* worker_thread,
|
||||
RenderPassCache& render_pass_cache,
|
||||
const GraphicsPipelineCacheKey& key_,
|
||||
std::array<vk::ShaderModule, NUM_STAGES> stages,
|
||||
const std::array<const Shader::Info*, NUM_STAGES>& infos)
|
||||
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
|
||||
buffer_cache{buffer_cache_}, scheduler{scheduler_},
|
||||
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
|
||||
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
|
||||
std::ranges::transform(infos, stage_infos.begin(),
|
||||
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
|
||||
|
||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout();
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
|
||||
auto func{[this, &render_pass_cache, &descriptor_pool] {
|
||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout();
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
|
||||
|
||||
auto func{[this, &device, &render_pass_cache, builder] {
|
||||
const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
|
||||
pipeline_layout = builder.CreatePipelineLayout(set_layout);
|
||||
descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout);
|
||||
|
||||
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
|
||||
Validate();
|
||||
MakePipeline(device, render_pass);
|
||||
MakePipeline(render_pass);
|
||||
|
||||
std::lock_guard lock{build_mutex};
|
||||
is_built = true;
|
||||
|
@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() {
|
|||
build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
|
||||
});
|
||||
}
|
||||
if (scheduler.UpdateGraphicsPipeline(this)) {
|
||||
scheduler.Record([this](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||
});
|
||||
}
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set);
|
||||
|
||||
scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) {
|
||||
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
const vk::Device& dev{device.GetLogical()};
|
||||
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
|
||||
descriptor_set, nullptr);
|
||||
});
|
||||
}
|
||||
|
||||
void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) {
|
||||
void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
|
||||
FixedPipelineState::DynamicState dynamic{};
|
||||
if (!device.IsExtExtendedDynamicStateSupported()) {
|
||||
dynamic = key.state.dynamic_state;
|
||||
|
|
|
@ -109,19 +109,20 @@ private:
|
|||
|
||||
void ConfigureDraw();
|
||||
|
||||
void MakePipeline(const Device& device, VkRenderPass render_pass);
|
||||
void MakePipeline(VkRenderPass render_pass);
|
||||
|
||||
void Validate();
|
||||
|
||||
const GraphicsPipelineCacheKey key;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
const Device& device;
|
||||
TextureCache& texture_cache;
|
||||
BufferCache& buffer_cache;
|
||||
VKScheduler& scheduler;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
void (*configure_func)(GraphicsPipeline*, bool);
|
||||
void (*configure_func)(GraphicsPipeline*, bool){};
|
||||
|
||||
std::vector<GraphicsPipelineCacheKey> transition_keys;
|
||||
std::vector<GraphicsPipeline*> transitions;
|
||||
|
|
|
@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() {
|
|||
upload_start = payload_cursor;
|
||||
}
|
||||
|
||||
void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template,
|
||||
VkDescriptorSet set) {
|
||||
const void* const data = upload_start;
|
||||
const vk::Device* const logical = &device.GetLogical();
|
||||
scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
|
||||
logical->UpdateDescriptorSet(set, *update_template, data);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -39,7 +39,9 @@ public:
|
|||
|
||||
void Acquire();
|
||||
|
||||
void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set);
|
||||
const DescriptorUpdateEntry* UpdateData() const noexcept {
|
||||
return upload_start;
|
||||
}
|
||||
|
||||
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
|
|
Loading…
Reference in a new issue