Multi-pass (chained) dispatch.
263{
266
267 for (
size_t i = 0; i <
m_bindings.size(); ++i) {
272 }
273
278
279 if (!ctx.execution_metadata.contains("pass_count") || !ctx.execution_metadata.contains("pc_updater")) {
282 std::source_location::current(),
283 "GpuDispatchCore: dispatch_core_chained requires 'pass_count' and 'pc_updater' in execution_metadata");
284 }
285
286 const auto pass_count = safe_any_cast_or_throw<uint32_t>(ctx.execution_metadata.at("pass_count"));
287 const auto& pc_updater = safe_any_cast_or_throw<std::function<void(uint32_t, void*)>>(ctx.execution_metadata.at("pc_updater"));
288
291 [&](uint32_t pass, std::vector<uint8_t>& pc_data) { pc_updater(pass, pc_data.data()); },
293 ctx.execution_metadata);
294
295 GpuChannelResult result;
298 return result;
299}
void readback_aux(GpuChannelResult &result)
Read back all OUTPUT bindings that have explicit size overrides into the aux map of a GpuChannelResul...
size_t largest_binding_data_element_count() const
GpuResourceManager m_resources
virtual std::array< uint32_t, 3 > calculate_dispatch_size(size_t total_elements, const DataStructureInfo &structure_info) const
Calculate workgroup dispatch counts from structure dimensions.
GpuShaderConfig m_gpu_config
std::vector< GpuBufferBinding > m_bindings
virtual void prepare_gpu_inputs(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info)
Marshal channel data into GPU input buffers.
virtual void on_before_gpu_dispatch(const std::vector< std::vector< double > > &channels, const DataStructureInfo &structure_info)
Called immediately before dispatch.
std::vector< float > m_staging_floats
std::vector< float > readback_primary(size_t float_count)
Read back the primary output buffer into a float vector.
void dispatch_batched(uint32_t pass_count, const std::array< uint32_t, 3 > &groups, const std::vector< GpuBufferBinding > &bindings, const std::function< void(uint32_t pass, std::vector< uint8_t > &)> &push_constant_updater, size_t push_constant_size, const std::unordered_map< std::string, std::any > &execution_metadata={})
void bind_descriptor(size_t index, const GpuBufferBinding &spec)
@ Runtime
General runtime operations (default fallback)
@ Yantra
DSP algorithms, computational units, matrix operations, Grammar.
size_t push_constant_size