8 : m_gpu_config(
std::move(config))
63 slot.resize(byte_size);
64 std::memcpy(slot.data(), data, byte_size);
75 std::shared_ptr<Core::VKImage>
image,
96 const std::vector<std::vector<double>>&,
102 const std::vector<std::vector<double>>& channels,
108 const size_t fallback_bytes = float_byte_size > 0
112 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
122 const auto et =
b.element_type;
131 std::vector<uint8_t> zeros(sz, 0);
138 switch (
b.element_type) {
151 if (img->get_current_layout() != vk::ImageLayout::eGeneral) {
153 vk::ImageLayout::eGeneral);
163 if (img->get_current_layout() != vk::ImageLayout::eShaderReadOnlyOptimal) {
165 vk::ImageLayout::eShaderReadOnlyOptimal);
172 if (!channels.empty()) {
173 const size_t raw_bytes = channels[0].size()
179 reinterpret_cast<const uint8_t*
>(channels[0].data()),
196 uint64_t sz_x = 0, sz_y = 0, sz_z = 0;
197 for (
const auto& dim : structure_info.
dimensions) {
216 static_cast<uint32_t
>((sz_x + ws[0] - 1) / ws[0]),
217 sz_y > 0 ?
static_cast<uint32_t
>((sz_y + ws[1] - 1) / ws[1]) : 1U,
218 sz_z > 0 ?
static_cast<uint32_t
>((sz_z + ws[2] - 1) / ws[2]) : 1U,
222 return {
static_cast<uint32_t
>((total_elements + ws[0] - 1) / ws[0]), 1U, 1U };
230 const std::vector<std::vector<double>>& channels,
236 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
260 const std::vector<std::vector<double>>& channels,
267 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
282 std::source_location::current(),
283 "GpuDispatchCore: dispatch_core_chained requires 'pass_count' and 'pc_updater' in execution_metadata");
286 const auto pass_count = safe_any_cast_or_throw<uint32_t>(ctx.
execution_metadata.at(
"pass_count"));
287 const auto& pc_updater = safe_any_cast_or_throw<std::function<void(uint32_t,
void*)>>(ctx.
execution_metadata.at(
"pc_updater"));
291 [&](uint32_t pass, std::vector<uint8_t>& pc_data) { pc_updater(pass, pc_data.data()); },
317 const size_t byte_size = std::min(float_count *
sizeof(
float), allocated);
318 std::vector<float> out(byte_size /
sizeof(
float));
325 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
336 std::vector<uint8_t> raw(sz);
338 result.
aux[i] = std::move(raw);
348 const std::vector<std::vector<double>>& channels,
357 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
361 all_inputs_staged =
false;
365 if (all_inputs_staged)
370 for (
const auto& ch : channels)
374 for (
const auto& ch : channels) {
382 size_t first_inout = SIZE_MAX;
383 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
387 && first_inout == SIZE_MAX)
390 if (first_inout != SIZE_MAX)
395 std::source_location::current(),
396 "GpuDispatchCore: no output buffer declared");
401 size_t max_bytes = 0;
403 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
410 return max_bytes /
sizeof(float);
GpuDispatchCore(GpuShaderConfig config)
void readback_aux(GpuChannelResult &result)
Read back all OUTPUT bindings that have explicit size overrides into the aux map of a GpuChannelResul...
std::vector< ImageBinding > m_image_bindings
size_t largest_binding_data_element_count() const
GpuResourceManager m_resources
virtual std::array< uint32_t, 3 > calculate_dispatch_size(size_t total_elements, const DataStructureInfo &structure_info) const
Calculate workgroup dispatch counts from structure dimensions.
bool is_gpu_ready() const
Query GPU readiness without attempting initialisation.
std::shared_ptr< Core::VKImage > get_output_image(size_t binding_index) const
Return the image registered at an IMAGE_STORAGE output binding.
GpuShaderConfig m_gpu_config
std::vector< uint8_t > m_push_constants
std::vector< std::vector< uint8_t > > m_binding_data
GpuChannelResult dispatch_core(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info)
Full single-pass dispatch.
void stage_passthrough(size_t binding_index, const void *data, size_t byte_size)
Stage raw bytes for a PASSTHROUGH binding before dispatch.
std::vector< GpuBufferBinding > m_bindings
bool ensure_gpu_ready()
Ensure GPU resources are initialised.
std::vector< size_t > m_output_size_overrides
void stage_image_storage(size_t binding_index, std::shared_ptr< Core::VKImage > image)
Register a VKImage for an IMAGE_STORAGE binding.
void stage_image_sampled(size_t binding_index, std::shared_ptr< Core::VKImage > image, vk::Sampler sampler)
Register a VKImage + sampler for an IMAGE_SAMPLED binding.
const GpuShaderConfig & gpu_config() const
GpuChannelResult dispatch_core_chained(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info, const ExecutionContext &ctx)
Multi-pass (chained) dispatch.
virtual void prepare_gpu_inputs(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info)
Marshal channel data into GPU input buffers.
std::vector< std::vector< uint8_t > > m_passthrough_bytes
virtual void on_before_gpu_dispatch(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info)
Called immediately before dispatch.
void set_output_size(size_t index, size_t byte_size)
Declare the byte capacity of an output binding independently of input data.
std::vector< float > m_staging_floats
std::vector< float > readback_primary(size_t float_count)
Read back the primary output buffer into a float vector.
void flatten_channels_to_staging(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info)
Flatten planar double channels into m_staging_floats.
virtual std::vector< GpuBufferBinding > declare_buffer_bindings() const
Declare the storage buffers the shader expects.
size_t find_first_output_index() const
void set_push_constants(const void *data, size_t bytes)
Set push constant data from a raw byte pointer.
size_t buffer_allocated_bytes(size_t index) const
void upload_raw(size_t index, const uint8_t *data, size_t byte_size)
void upload(size_t index, const float *data, size_t byte_size)
void download(size_t index, float *dest, size_t byte_size)
void bind_image_storage(size_t index, const std::shared_ptr< Core::VKImage > &image, const GpuBufferBinding &spec)
Bind a storage image descriptor at the given slot index.
void dispatch_batched(uint32_t pass_count, const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const std::function< void(uint32_t pass, std::vector< uint8_t > &)> &push_constant_updater, size_t push_constant_size, const std::unordered_map< std::string, std::any > &execution_metadata={})
bool initialise(const GpuShaderConfig &config, const std::vector< GpuBufferBinding > &bindings)
void bind_image_sampled(size_t index, const std::shared_ptr< Core::VKImage > &image, vk::Sampler sampler, const GpuBufferBinding &spec)
Bind a combined image+sampler descriptor at the given slot index.
void transition_image(const std::shared_ptr< Core::VKImage > &image, vk::ImageLayout old_layout, vk::ImageLayout new_layout)
Transition a VKImage layout via an immediate command submission.
void ensure_buffer(size_t index, size_t required_bytes)
void dispatch(const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const uint8_t *push_constant_data, size_t push_constant_size)
void bind_descriptor(size_t index, const GpuBufferBinding &spec)
@ BufferProcessing
Buffer processing (Buffers::BufferManager, processing chains)
@ Runtime
General runtime operations (default fallback)
@ Yantra
DSP algorithms, computational units, matrix operations, Grammar.
bool is_structured_modality(DataModality modality)
Check if a modality represents structured data (vectors, matrices).
bool is_image(const fs::path &filepath)
uint64_t get_total_elements() const
@ SPATIAL_Y
Spatial Y axis.
@ SPATIAL_Z
Spatial Z axis.
@ SPATIAL_X
Spatial X axis (images, tensors)
Kakshya::DataModality modality
std::vector< Kakshya::DataDimension > dimensions
Metadata about data structure for reconstruction.
std::unordered_map< std::string, std::any > execution_metadata
Arbitrary metadata parameters used by operations.
Context information controlling how a compute operation executes.
std::unordered_map< size_t, std::vector< uint8_t > > aux
std::vector< float > primary
Erased output of a GPU dispatch: reconstructed float data plus any raw auxiliary outputs keyed by bin...
size_t push_constant_size
std::array< uint32_t, 3 > workgroup_size
Plain-data description of the compute shader to dispatch.