34 uint32_t find_memory_type(vk::PhysicalDevice phys,
36 vk::MemoryPropertyFlags props)
38 auto mem_props = phys.getMemoryProperties();
39 for (uint32_t i = 0; i < mem_props.memoryTypeCount; ++i) {
40 if ((type_filter & (1U << i))
41 && (mem_props.memoryTypes[i].propertyFlags & props) == props) {
45 error<std::runtime_error>(
48 std::source_location::current(),
49 "GpuResourceManager: no suitable memory type found");
52 void free_slot(vk::Device device, VulkanBufferSlot& slot)
55 device.destroyBuffer(slot.buffer);
56 slot.buffer = vk::Buffer {};
59 device.freeMemory(slot.memory);
60 slot.memory = vk::DeviceMemory {};
62 slot.allocated_bytes = 0;
65 void allocate_slot(vk::Device device, vk::PhysicalDevice phys,
66 VulkanBufferSlot& slot,
size_t byte_size)
68 free_slot(device, slot);
70 vk::BufferCreateInfo bi;
72 bi.usage = vk::BufferUsageFlagBits::eStorageBuffer;
73 bi.sharingMode = vk::SharingMode::eExclusive;
74 slot.buffer = device.createBuffer(bi);
76 auto req = device.getBufferMemoryRequirements(slot.buffer);
78 vk::MemoryAllocateInfo ai;
79 ai.allocationSize = req.size;
80 ai.memoryTypeIndex = find_memory_type(phys, req.memoryTypeBits,
81 vk::MemoryPropertyFlagBits::eHostVisible
82 | vk::MemoryPropertyFlagBits::eHostCoherent);
84 slot.memory = device.allocateMemory(ai);
85 device.bindBufferMemory(slot.buffer, slot.memory, 0);
86 slot.allocated_bytes = byte_size;
89 void map_copy_unmap(vk::Device device, vk::DeviceMemory memory,
90 const void* src,
size_t byte_size)
92 void* mapped = device.mapMemory(memory, 0, byte_size);
93 std::memcpy(mapped, src, byte_size);
94 device.unmapMemory(memory);
111 const std::vector<GpuBufferBinding>& bindings)
123 "GpuResourceManager: failed to load shader '{}'", config.
shader_path);
132 "GpuResourceManager: failed to create pipeline for '{}'",
142 "GpuResourceManager: failed to allocate descriptor sets");
150 m_impl = std::make_unique<GpuResourceManagerImpl>();
151 m_impl->buffers.resize(bindings.size());
167 auto device = foundry.get_device();
170 for (
auto& slot :
m_impl->buffers) {
171 free_slot(device, slot);
198 auto& vk_slot =
m_impl->buffers[index];
199 if (vk_slot.allocated_bytes >= required_bytes) {
204 allocate_slot(foundry.get_device(), foundry.get_physical_device(),
205 vk_slot, required_bytes);
213 auto& vk_slot =
m_impl->buffers[index];
214 map_copy_unmap(foundry.get_device(), vk_slot.memory, data, byte_size);
220 auto& vk_slot =
m_impl->buffers[index];
221 map_copy_unmap(foundry.get_device(), vk_slot.memory, data, byte_size);
227 auto device = foundry.get_device();
228 auto& vk_slot =
m_impl->buffers[index];
230 void* mapped = device.mapMemory(vk_slot.memory, 0, VK_WHOLE_SIZE);
231 std::memcpy(dest, mapped, byte_size);
232 device.unmapMemory(vk_slot.memory);
238 auto& vk_slot =
m_impl->buffers[index];
240 foundry.update_descriptor_buffer(
243 vk::DescriptorType::eStorageBuffer,
244 vk_slot.buffer, 0, vk_slot.allocated_bytes);
254 const std::shared_ptr<Core::VKImage>&
image,
263 foundry.update_descriptor_storage_image(
266 image->get_image_view(),
267 vk::ImageLayout::eGeneral);
272 const std::shared_ptr<Core::VKImage>&
image,
282 foundry.update_descriptor_image(
285 image->get_image_view(),
287 vk::ImageLayout::eShaderReadOnlyOptimal);
291 const std::shared_ptr<Core::VKImage>&
image,
292 vk::ImageLayout old_layout,
293 vk::ImageLayout new_layout)
298 backend.transition_layout(
302 1, 1, vk::ImageAspectFlagBits::eColor);
310 const std::array<uint32_t, 3>& groups,
311 const std::vector<GpuBufferBinding>& bindings,
312 const uint8_t* push_constant_data,
313 size_t push_constant_size)
318 auto cmd_id = foundry.begin_commands(
321 compute_press.bind_all(
323 push_constant_data, push_constant_size);
325 compute_press.dispatch(cmd_id, groups[0], groups[1], groups[2]);
327 for (
size_t i = 0; i < bindings.size(); ++i) {
328 const auto et = bindings[i].element_type;
334 foundry.buffer_barrier(
336 m_impl->buffers[i].buffer,
337 vk::AccessFlagBits::eShaderWrite,
338 vk::AccessFlagBits::eHostRead,
339 vk::PipelineStageFlagBits::eComputeShader,
340 vk::PipelineStageFlagBits::eHost);
344 foundry.submit_and_wait(cmd_id);
349 const std::array<uint32_t, 3>& groups,
350 const std::vector<GpuBufferBinding>& bindings,
351 const std::function<
void(uint32_t pass, std::vector<uint8_t>&)>& push_constant_updater,
352 size_t push_constant_size,
353 const std::unordered_map<std::string, std::any>& execution_metadata)
358 const uint32_t workgroups_per_pass = groups[0] * groups[1] * groups[2];
360 const uint32_t default_passes = std::max(1U, 65536U / std::max(1U, workgroups_per_pass));
361 const uint32_t passes_per_batch = [&] {
362 auto it = execution_metadata.find(
"passes_per_batch");
363 if (it != execution_metadata.end())
364 return safe_any_cast_or_default<uint32_t>(it->second, default_passes);
365 return default_passes;
368 for (uint32_t base = 0; base < pass_count; base += passes_per_batch) {
369 const uint32_t batch_end = std::min(base + passes_per_batch, pass_count);
371 auto cmd_id = foundry.begin_commands(
374 for (uint32_t pass = base; pass < batch_end; ++pass) {
375 std::vector<uint8_t> pc_data(push_constant_size);
376 push_constant_updater(pass, pc_data);
378 compute_press.bind_all(
380 pc_data.data(), push_constant_size);
382 compute_press.dispatch(cmd_id, groups[0], groups[1], groups[2]);
384 for (
size_t i = 0; i < bindings.size(); ++i) {
386 foundry.buffer_barrier(
388 m_impl->buffers[i].buffer,
389 vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eShaderRead,
390 vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eShaderRead,
391 vk::PipelineStageFlagBits::eComputeShader,
392 vk::PipelineStageFlagBits::eComputeShader);
397 for (
size_t i = 0; i < bindings.size(); ++i) {
400 foundry.buffer_barrier(
402 m_impl->buffers[i].buffer,
403 vk::AccessFlagBits::eShaderWrite,
404 vk::AccessFlagBits::eHostRead,
405 vk::PipelineStageFlagBits::eComputeShader,
406 vk::PipelineStageFlagBits::eHost);
410 foundry.submit_and_wait(cmd_id);
#define MF_ERROR(comp, ctx,...)
size_t buffer_allocated_bytes(size_t index) const
void upload_raw(size_t index, const uint8_t *data, size_t byte_size)
void upload(size_t index, const float *data, size_t byte_size)
std::vector< BufferSlot > m_buffer_slots
Portal::Graphics::ComputePipelineID m_pipeline_id
Portal::Graphics::ShaderID m_shader_id
std::vector< std::shared_ptr< Core::VKImage > > m_image_slots
std::vector< Portal::Graphics::DescriptorSetID > m_descriptor_set_ids
void download(size_t index, float *dest, size_t byte_size)
void bind_image_storage(size_t index, const std::shared_ptr< Core::VKImage > &image, const GpuBufferBinding &spec)
Bind a storage image descriptor at the given slot index.
void dispatch_batched(uint32_t pass_count, const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const std::function< void(uint32_t pass, std::vector< uint8_t > &)> &push_constant_updater, size_t push_constant_size, const std::unordered_map< std::string, std::any > &execution_metadata={})
bool initialise(const GpuShaderConfig &config, const std::vector< GpuBufferBinding > &bindings)
void bind_image_sampled(size_t index, const std::shared_ptr< Core::VKImage > &image, vk::Sampler sampler, const GpuBufferBinding &spec)
Bind a combined image+sampler descriptor at the given slot index.
void transition_image(const std::shared_ptr< Core::VKImage > &image, vk::ImageLayout old_layout, vk::ImageLayout new_layout)
Transition a VKImage layout via an immediate command submission.
void ensure_buffer(size_t index, size_t required_bytes)
void dispatch(const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const uint8_t *push_constant_data, size_t push_constant_size)
void bind_descriptor(size_t index, const GpuBufferBinding &spec)
std::unique_ptr< GpuResourceManagerImpl > m_impl
@ BufferProcessing
Buffer processing (Buffers::BufferManager, processing chains)
@ Yantra
DSP algorithms, computational units, matrix operations, Grammar.
MAYAFLUX_API TextureLoom & get_texture_manager()
Get the global texture manager instance.
constexpr ShaderID INVALID_SHADER
MAYAFLUX_API ShaderFoundry & get_shader_foundry()
Get the global shader compiler instance.
constexpr ComputePipelineID INVALID_COMPUTE_PIPELINE
MAYAFLUX_API ComputePress & get_compute_press()
bool is_image(const fs::path &filepath)
Declares a single storage buffer the shader expects.
std::vector< VulkanBufferSlot > buffers
size_t push_constant_size
Plain-data description of the compute shader to dispatch.