35 uint32_t find_memory_type(vk::PhysicalDevice phys,
37 vk::MemoryPropertyFlags props)
39 auto mem_props = phys.getMemoryProperties();
40 for (uint32_t i = 0; i < mem_props.memoryTypeCount; ++i) {
41 if ((type_filter & (1U << i))
42 && (mem_props.memoryTypes[i].propertyFlags & props) == props) {
46 error<std::runtime_error>(
49 std::source_location::current(),
50 "GpuResourceManager: no suitable memory type found");
53 void free_slot(vk::Device device, VulkanBufferSlot& slot)
55 if (slot.mapped_ptr) {
56 device.unmapMemory(slot.memory);
57 slot.mapped_ptr =
nullptr;
60 device.destroyBuffer(slot.buffer);
61 slot.buffer = vk::Buffer {};
64 device.freeMemory(slot.memory);
65 slot.memory = vk::DeviceMemory {};
67 slot.allocated_bytes = 0;
70 void allocate_slot(vk::Device device, vk::PhysicalDevice phys,
71 VulkanBufferSlot& slot,
size_t byte_size)
73 free_slot(device, slot);
75 vk::BufferCreateInfo bi;
77 bi.usage = vk::BufferUsageFlagBits::eStorageBuffer;
78 bi.sharingMode = vk::SharingMode::eExclusive;
79 slot.buffer = device.createBuffer(bi);
81 auto req = device.getBufferMemoryRequirements(slot.buffer);
83 vk::MemoryAllocateInfo ai;
84 ai.allocationSize = req.size;
85 ai.memoryTypeIndex = find_memory_type(phys, req.memoryTypeBits,
86 vk::MemoryPropertyFlagBits::eHostVisible
87 | vk::MemoryPropertyFlagBits::eHostCoherent);
89 slot.memory = device.allocateMemory(ai);
90 device.bindBufferMemory(slot.buffer, slot.memory, 0);
91 slot.mapped_ptr = device.mapMemory(slot.memory, 0, VK_WHOLE_SIZE);
92 slot.allocated_bytes = byte_size;
99 return vk::DescriptorType::eStorageImage;
101 return vk::DescriptorType::eCombinedImageSampler;
103 return vk::DescriptorType::eStorageBuffer;
121 const std::vector<GpuBufferBinding>& bindings)
132 "GpuResourceManager: failed to load shader '{}'", config.
shader_path);
136 std::map<uint32_t, std::vector<Portal::Graphics::DescriptorBindingInfo>> by_set;
137 for (
const auto&
b : bindings) {
138 const auto et =
b.element_type;
142 by_set[
b.set].push_back({
144 .binding =
b.binding,
145 .type = element_type_to_vk(et),
150 for (
const auto&
b : bindings) {
151 const auto et =
b.element_type;
155 by_set[
b.set].push_back({
157 .binding =
b.binding,
158 .type = vk::DescriptorType::eStorageBuffer,
163 std::vector<std::vector<Portal::Graphics::DescriptorBindingInfo>> descriptor_sets;
164 descriptor_sets.reserve(by_set.size());
165 for (
auto& [set_idx, set_bindings] : by_set)
166 descriptor_sets.push_back(std::move(set_bindings));
173 "GpuResourceManager: failed to create pipeline for '{}'",
183 "GpuResourceManager: failed to allocate descriptor sets");
191 m_impl = std::make_unique<GpuResourceManagerImpl>();
192 m_impl->buffers.resize(bindings.size());
208 auto device = foundry.get_device();
211 for (
auto& slot :
m_impl->buffers) {
212 free_slot(device, slot);
239 auto& vk_slot =
m_impl->buffers[index];
240 if (vk_slot.allocated_bytes >= required_bytes) {
245 allocate_slot(foundry.get_device(), foundry.get_physical_device(),
246 vk_slot, required_bytes);
253 auto& vk_slot =
m_impl->buffers[index];
254 std::memcpy(vk_slot.mapped_ptr, data, byte_size);
259 auto& vk_slot =
m_impl->buffers[index];
260 std::memcpy(vk_slot.mapped_ptr, data, byte_size);
265 auto& vk_slot =
m_impl->buffers[index];
266 std::memcpy(dest, vk_slot.mapped_ptr, byte_size);
272 auto& vk_slot =
m_impl->buffers[index];
274 foundry.update_descriptor_buffer(
277 vk::DescriptorType::eStorageBuffer,
278 vk_slot.buffer, 0, vk_slot.allocated_bytes);
288 const std::shared_ptr<Core::VKImage>&
image,
297 foundry.update_descriptor_storage_image(
300 image->get_image_view(),
301 vk::ImageLayout::eGeneral);
306 const std::shared_ptr<Core::VKImage>&
image,
316 foundry.update_descriptor_image(
319 image->get_image_view(),
321 vk::ImageLayout::eShaderReadOnlyOptimal);
325 const std::shared_ptr<Core::VKImage>&
image,
326 vk::ImageLayout old_layout,
327 vk::ImageLayout new_layout)
332 backend.transition_layout(
336 1, 1, vk::ImageAspectFlagBits::eColor);
344 const std::array<uint32_t, 3>& groups,
345 const std::vector<GpuBufferBinding>& bindings,
346 const uint8_t* push_constant_data,
347 size_t push_constant_size)
352 auto cmd_id = foundry.begin_commands(
355 compute_press.bind_all(
357 push_constant_data, push_constant_size);
359 compute_press.dispatch(cmd_id, groups[0], groups[1], groups[2]);
361 for (
size_t i = 0; i < bindings.size(); ++i) {
362 const auto et = bindings[i].element_type;
368 foundry.buffer_barrier(
370 m_impl->buffers[i].buffer,
371 vk::AccessFlagBits::eShaderWrite,
372 vk::AccessFlagBits::eHostRead,
373 vk::PipelineStageFlagBits::eComputeShader,
374 vk::PipelineStageFlagBits::eHost);
378 foundry.submit_and_wait(cmd_id);
383 const std::array<uint32_t, 3>& groups,
384 const std::vector<GpuBufferBinding>& bindings,
385 const std::function<
void(uint32_t pass, std::vector<uint8_t>&)>& push_constant_updater,
386 size_t push_constant_size,
387 const std::unordered_map<std::string, std::any>& execution_metadata)
392 const uint32_t workgroups_per_pass = groups[0] * groups[1] * groups[2];
394 const uint32_t default_passes = std::max(1U, 65536U / std::max(1U, workgroups_per_pass));
395 const uint32_t passes_per_batch = [&] {
396 auto it = execution_metadata.find(
"passes_per_batch");
397 if (it != execution_metadata.end())
398 return safe_any_cast_or_default<uint32_t>(it->second, default_passes);
399 return default_passes;
402 for (uint32_t base = 0; base < pass_count; base += passes_per_batch) {
403 const uint32_t batch_end = std::min(base + passes_per_batch, pass_count);
405 auto cmd_id = foundry.begin_commands(
408 for (uint32_t pass = base; pass < batch_end; ++pass) {
409 std::vector<uint8_t> pc_data(push_constant_size);
410 push_constant_updater(pass, pc_data);
412 compute_press.bind_all(
414 pc_data.data(), push_constant_size);
416 compute_press.dispatch(cmd_id, groups[0], groups[1], groups[2]);
418 for (
size_t i = 0; i < bindings.size(); ++i) {
420 foundry.buffer_barrier(
422 m_impl->buffers[i].buffer,
423 vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eShaderRead,
424 vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eShaderRead,
425 vk::PipelineStageFlagBits::eComputeShader,
426 vk::PipelineStageFlagBits::eComputeShader);
431 for (
size_t i = 0; i < bindings.size(); ++i) {
434 foundry.buffer_barrier(
436 m_impl->buffers[i].buffer,
437 vk::AccessFlagBits::eShaderWrite,
438 vk::AccessFlagBits::eHostRead,
439 vk::PipelineStageFlagBits::eComputeShader,
440 vk::PipelineStageFlagBits::eHost);
444 foundry.submit_and_wait(cmd_id);
449 const std::array<uint32_t, 3>& groups,
450 const std::vector<GpuBufferBinding>& bindings,
451 const uint8_t* push_constant_data,
452 size_t push_constant_size)
457 auto cmd_id = foundry.begin_commands(
460 compute_press.bind_all(
462 push_constant_data, push_constant_size);
464 compute_press.dispatch(cmd_id, groups[0], groups[1], groups[2]);
466 for (
size_t i = 0; i < bindings.size(); ++i) {
467 const auto et = bindings[i].element_type;
473 foundry.buffer_barrier(
475 m_impl->buffers[i].buffer,
476 vk::AccessFlagBits::eShaderWrite,
477 vk::AccessFlagBits::eHostRead,
478 vk::PipelineStageFlagBits::eComputeShader,
479 vk::PipelineStageFlagBits::eHost);
483 return foundry.submit_async(cmd_id);
#define MF_ERROR(comp, ctx,...)
size_t buffer_allocated_bytes(size_t index) const
void upload_raw(size_t index, const uint8_t *data, size_t byte_size)
void upload(size_t index, const float *data, size_t byte_size)
std::vector< BufferSlot > m_buffer_slots
Portal::Graphics::ComputePipelineID m_pipeline_id
Portal::Graphics::ShaderID m_shader_id
std::vector< std::shared_ptr< Core::VKImage > > m_image_slots
Portal::Graphics::FenceID dispatch_async(const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const uint8_t *push_constant_data, size_t push_constant_size)
Submit a compute dispatch without blocking.
std::vector< Portal::Graphics::DescriptorSetID > m_descriptor_set_ids
void download(size_t index, float *dest, size_t byte_size)
void bind_image_storage(size_t index, const std::shared_ptr< Core::VKImage > &image, const GpuBufferBinding &spec)
Bind a storage image descriptor at the given slot index.
void dispatch_batched(uint32_t pass_count, const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const std::function< void(uint32_t pass, std::vector< uint8_t > &)> &push_constant_updater, size_t push_constant_size, const std::unordered_map< std::string, std::any > &execution_metadata={})
bool initialise(const GpuShaderConfig &config, const std::vector< GpuBufferBinding > &bindings)
void bind_image_sampled(size_t index, const std::shared_ptr< Core::VKImage > &image, vk::Sampler sampler, const GpuBufferBinding &spec)
Bind a combined image+sampler descriptor at the given slot index.
void transition_image(const std::shared_ptr< Core::VKImage > &image, vk::ImageLayout old_layout, vk::ImageLayout new_layout)
Transition a VKImage layout via an immediate command submission.
void ensure_buffer(size_t index, size_t required_bytes)
void dispatch(const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const uint8_t *push_constant_data, size_t push_constant_size)
void bind_descriptor(size_t index, const GpuBufferBinding &spec)
std::unique_ptr< GpuResourceManagerImpl > m_impl
@ BufferProcessing
Buffer processing (Buffers::BufferManager, processing chains)
@ Yantra
DSP algorithms, computational units, matrix operations, Grammar.
MAYAFLUX_API TextureLoom & get_texture_manager()
Get the global texture manager instance.
constexpr ShaderID INVALID_SHADER
MAYAFLUX_API ShaderFoundry & get_shader_foundry()
Get the global shader compiler instance.
constexpr ComputePipelineID INVALID_COMPUTE_PIPELINE
MAYAFLUX_API ComputePress & get_compute_press()
bool is_image(const fs::path &filepath)
ElementType
Element type the shader expects in this buffer.
Declares a single storage buffer the shader expects.
std::vector< VulkanBufferSlot > buffers
size_t push_constant_size
Plain-data description of the compute shader to dispatch.