MayaFlux 0.4.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
ShaderExecutionContext.hpp
Go to the documentation of this file.
1#pragma once
2
4
5namespace MayaFlux::Yantra {
6
7/**
8 * @class ShaderExecutionContext
9 * @brief Concrete GpuExecutionContext for a single fixed shader with fixed bindings.
10 *
11 * The standard path for attaching GPU dispatch to any ComputeOperation via
12 * ComputeOperation::set_gpu_backend(). Bindings are declared at construction
13 * or built incrementally through the fluent API. The owning ComputeOperation
14 * provides category identity, parameter system, and CPU fallback.
15 *
16 * Construction followed by fluent configuration:
17 * @code
18 * auto executor = std::make_shared<ShaderExecutionContext<>>(
19 * GpuShaderConfig { "graph_build.comp", { 256, 1, 1 }, sizeof(GraphBuildPC) });
20 * executor->input(positions)
21 * .input(attributes)
22 * .output(k_max_edges * 2 * sizeof(float))
23 * .output(sizeof(uint32_t), GpuBufferBinding::ElementType::UINT32)
24 * .push(pc);
25 *
26 * my_operation->set_gpu_backend(executor);
27 * @endcode
28 *
29 * Or with explicit binding indices when order cannot be inferred:
30 * @code
31 * executor->input(0, positions)
32 * .inout(1, data)
33 * .output(2, output_bytes);
34 * @endcode
35 *
36 * Output readback after pipeline execution:
37 * @code
38 * auto edges = ShaderExecutionContext<>::read_output<float>(result, 2);
39 * auto count = ShaderExecutionContext<>::read_output<uint32_t>(result, 3)[0];
40 * @endcode
41 *
42 * @tparam InputType ComputeData type accepted.
43 * @tparam OutputType ComputeData type produced.
44 */
45template <ComputeData InputType = std::vector<Kakshya::DataVariant>,
46 ComputeData OutputType = InputType>
47class MAYAFLUX_API ShaderExecutionContext : public GpuExecutionContext<InputType, OutputType> {
48public:
51
52 /**
53 * @brief Construct with shader config and optional pre-built binding list.
54 *
55 * Bindings may be supplied here or added incrementally via the fluent API.
56 * Mixing both is valid — fluent calls append after any pre-built bindings.
57 *
58 * @param config Shader path, workgroup size, push constant size.
59 * @param bindings Pre-built descriptor layout. Empty by default.
60 * @param name Executor name for logging and error messages.
61 */
63 GpuShaderConfig config,
64 std::vector<GpuBufferBinding> bindings = {},
65 std::string name = "ShaderExecutionContext")
66 : GpuExecutionContext<InputType, OutputType>(std::move(config))
67 , m_bindings(std::move(bindings))
68 , m_name(std::move(name))
69 {
70 }
71
72 //==========================================================================
73 // Fluent binding API
74 //==========================================================================
75
76 /**
77 * @brief Add an INPUT binding, inferring the next available binding index.
78 * @tparam T Element type of the data vector.
79 * @param data Data to upload for this binding.
80 * @param type Element type hint for the shader (default: FLOAT32).
81 * @return Reference to this executor for chaining.
82 */
83 template <typename T>
84 ShaderExecutionContext& input(const std::vector<T>& data,
85 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
86 {
87 const uint32_t idx = next_binding_index();
88 m_bindings.push_back({ .set = 0,
89 .binding = idx,
90 .direction = GpuBufferBinding::Direction::INPUT,
91 .element_type = type });
92 this->set_binding_data(idx, data);
93 return *this;
94 }
95
96 /**
97 * @brief Add an INPUT binding at an explicit index.
98 * @tparam T Element type of the data vector.
99 * @param binding Binding index.
100 * @param data Data to upload for this binding.
101 * @param type Element type hint for the shader (default: FLOAT32).
102 * @return Reference to this executor for chaining.
103 */
104 template <typename T>
105 ShaderExecutionContext& input(uint32_t binding, const std::vector<T>& data,
106 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
107 {
108 m_bindings.push_back({ .set = 0,
109 .binding = binding,
110 .direction = GpuBufferBinding::Direction::INPUT,
111 .element_type = type });
112
113 this->set_binding_data(binding, data);
114 return *this;
115 }
116
117 /**
118 * @brief Add an INPUT_OUTPUT binding, inferring the next available binding index.
119 * @tparam T Element type of the data vector.
120 * @param data Data to upload for this binding.
121 * @param type Element type hint for the shader (default: FLOAT32).
122 * @return Reference to this executor for chaining.
123 */
124 template <typename T>
125 ShaderExecutionContext& in_out(const std::vector<T>& data,
126 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
127 {
128 const uint32_t idx = next_binding_index();
129 m_bindings.push_back({ .set = 0,
130 .binding = idx,
131 .direction = GpuBufferBinding::Direction::INPUT_OUTPUT,
132 .element_type = type });
133
134 this->set_binding_data(idx, data);
135 return *this;
136 }
137
138 /**
139 * @brief Add an INPUT_OUTPUT binding at an explicit index.
140 * @tparam T Element type of the data vector.
141 * @param binding Binding index.
142 * @param data Data to upload for this binding.
143 * @param type Element type hint for the shader (default: FLOAT32).
144 * @return Reference to this executor for chaining.
145 */
146 template <typename T>
147 ShaderExecutionContext& in_out(uint32_t binding, const std::vector<T>& data,
148 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
149 {
150 m_bindings.push_back({ .set = 0,
151 .binding = binding,
152 .direction = GpuBufferBinding::Direction::INPUT_OUTPUT,
153 .element_type = type });
154
155 this->set_binding_data(binding, data);
156 return *this;
157 }
158
159 /**
160 * @brief Declare an INPUT_OUTPUT binding without pre-staging data.
161 *
162 * The binding direction is registered but no data is uploaded at
163 * configuration time. Dispatch stages data from the input Datum
164 * automatically. Use when the shader reads and writes the same buffer
165 * and the input arrives via apply_operation rather than set_binding_data.
166 *
167 * @param type Element type hint for the shader (default: FLOAT32).
168 * @return Reference to this executor for chaining.
169 */
171 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
172 {
173 m_bindings.push_back({ .set = 0,
174 .binding = binding,
175 .direction = GpuBufferBinding::Direction::INPUT_OUTPUT,
176 .element_type = type });
177
178 return *this;
179 }
180
181 /**
182 * @brief Declare an INPUT_OUTPUT binding at an explicit index without
183 * pre-staging data.
184 *
185 * @param binding Binding index.
186 * @param type Element type hint for the shader (default: FLOAT32).
187 * @return Reference to this executor for chaining.
188 */
190 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
191 {
192 const uint32_t idx = next_binding_index();
193 m_bindings.push_back({ .set = 0,
194 .binding = idx,
195 .direction = GpuBufferBinding::Direction::INPUT_OUTPUT,
196 .element_type = type });
197
198 return *this;
199 }
200
201 /**
202 * @brief Add an OUTPUT binding, inferring the next available binding index.
203 * @param byte_size Allocation size in bytes.
204 * @param type Element type hint for the shader (default: FLOAT32).
205 * @return Reference to this executor for chaining.
206 */
208 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
209 {
210 const uint32_t idx = next_binding_index();
211 m_bindings.push_back({ .set = 0,
212 .binding = idx,
213 .direction = GpuBufferBinding::Direction::OUTPUT,
214 .element_type = type });
215
216 this->set_output_size(idx, byte_size);
217 return *this;
218 }
219
220 /**
221 * @brief Add an OUTPUT binding at an explicit index.
222 * @param binding Binding index.
223 * @param byte_size Allocation size in bytes.
224 * @param type Element type hint for the shader (default: FLOAT32).
225 * @return Reference to this executor for chaining.
226 */
227 ShaderExecutionContext& output(uint32_t binding, size_t byte_size,
228 GpuBufferBinding::ElementType type = GpuBufferBinding::ElementType::FLOAT32)
229 {
230 m_bindings.push_back({ .set = 0,
231 .binding = binding,
232 .direction = GpuBufferBinding::Direction::OUTPUT,
233 .element_type = type });
234
235 this->set_output_size(binding, byte_size);
236 return *this;
237 }
238
239 /**
240 * @brief Set push constants from a trivially copyable struct or value.
241 *
242 * Fluent alias for GpuExecutionContext::set_push_constants<T>.
243 *
244 * @tparam T Push constant type. Must match shader layout exactly.
245 * @param data Push constant data.
246 * @return Reference to this executor for chaining.
247 */
248 template <typename T>
250 {
251 this->set_push_constants(data);
252 return *this;
253 }
254
255 /**
256 * @brief Configure multi-pass (CHAINED) dispatch.
257 *
258 * Stores pass count and push constant updater so the caller never
259 * touches ExecutionContext::execution_metadata by string key.
260 * The CHAINED mode is activated automatically in execute() when
261 * a multipass configuration is present.
262 *
263 * @param pass_count Total number of passes to dispatch.
264 * @param pc_updater Called before each pass with (pass_index, push_constant_ptr).
265 * @return Reference to this executor for chaining.
266 */
268 uint32_t pass_count,
269 std::function<void(uint32_t, void*)> pc_updater)
270 {
271 m_multipass_count = pass_count;
272 m_multipass_updater = std::move(pc_updater);
273 return *this;
274 }
275
276 //==========================================================================
277 // Output readback
278 //==========================================================================
279
280 /**
281 * @brief Read a typed output buffer from a pipeline result Datum.
282 *
283 * Replaces the manual any_cast + reinterpret_cast pattern at call sites.
284 * Copies the raw bytes from metadata into a typed vector.
285 *
286 * @tparam T Element type to interpret the buffer as.
287 * @param result Datum returned by ComputationPipeline::process or
288 * ComputeOperation::apply_operation.
289 * @param binding_index Binding index matching the OUTPUT or INPUT_OUTPUT
290 * declaration.
291 * @return Vector of T with element count derived from raw byte size.
292 *
293 * @code
294 * auto edges = ShaderExecutionContext<>::read_output<float>(result, 2);
295 * auto count = ShaderExecutionContext<>::read_output<uint32_t>(result, 3)[0];
296 * @endcode
297 */
298 template <typename T>
299 static std::vector<T> read_output(
300 const Datum<std::vector<Kakshya::DataVariant>>& result,
301 size_t binding_index)
302 {
303 const auto key = "gpu_output_" + std::to_string(binding_index);
304
305 if (!result.metadata.contains(key)) {
306 error<std::runtime_error>(Journal::Component::Yantra, Journal::Context::Runtime,
307 std::source_location::current(),
308 "read_output: metadata key '{}' not found", key);
309 }
310
311 const auto& raw = safe_any_cast_or_throw<std::vector<uint8_t>>(result.metadata.at(key));
312 const size_t count = raw.size() / sizeof(T);
313 std::vector<T> out(count);
314 std::memcpy(out.data(), raw.data(), count * sizeof(T));
315 return out;
316 }
317
318 /**
319 * @brief Set or clear skip_auto_readback for a binding by index.
320 *
321 * When skip_auto_readback is true, the output buffer for this binding will
322 * not be read back to the CPU after dispatch. Use this to avoid unnecessary
323 * GPU-CPU synchronization when the output is consumed by another GPU stage
324 * rather than the CPU.
325 *
326 * @param binding Binding index to modify.
327 * @param skip Whether to skip automatic readback for this binding.
328 */
329 void set_skip_readback(uint32_t binding, bool skip)
330 {
331 for (auto& b : m_bindings) {
332 if (b.binding == binding)
333 b.skip_auto_readback = skip;
334 }
335 }
336
337 /**
338 * @brief Dispatch asynchronously and return a fence for polling.
339 *
340 * Equivalent to calling extract_inputs then dispatch_core_async.
341 * The fence becomes signaled when the GPU work completes. Call
342 * collect_result() once signaled to obtain the GpuChannelResult.
343 *
344 * @param input Input Datum. Channels are extracted before submission.
345 * @return FenceID to poll with ShaderFoundry::is_fence_signaled.
346 * Returns INVALID_FENCE if GPU initialisation fails.
347 */
349 {
350 if (!this->ensure_gpu_ready())
351 return Portal::Graphics::INVALID_FENCE;
352
353 auto [channels, structure_info] = this->extract_inputs(input);
354 return this->dispatch_core_async(channels, structure_info);
355 }
356
357 /**
358 * @brief Collect the result of the last async dispatch.
359 *
360 * Must be called only after ShaderFoundry::is_fence_signaled returns
361 * true for the FenceID returned by dispatch_async. Calls readback_primary
362 * and readback_aux using the element count cached by dispatch_core_async.
363 *
364 * @return GpuChannelResult with primary float data and aux buffers.
365 */
367 {
368 GpuChannelResult result;
369 result.primary = this->readback_primary(this->last_effective_element_count());
370 this->readback_aux(result);
371 return result;
372 }
373
374protected:
375 /**
376 * @brief Returns the binding list declared via constructor or fluent API.
377 */
378 [[nodiscard]] std::vector<GpuBufferBinding> declare_buffer_bindings() const override
379 {
380 return m_bindings;
381 }
382
383 /**
384 * @brief Injects multipass configuration into the context before dispatch
385 * when set_multipass() has been called.
386 */
388 {
389 if (m_multipass_count > 0 && m_multipass_updater) {
390 ExecutionContext chained = ctx;
391 chained.mode = ExecutionMode::CHAINED;
392 chained.execution_metadata["pass_count"] = m_multipass_count;
393 chained.execution_metadata["pc_updater"] = m_multipass_updater;
395 }
397 }
398
399private:
400 std::vector<GpuBufferBinding> m_bindings;
401 std::string m_name;
402 uint32_t m_multipass_count { 0 };
403 std::function<void(uint32_t, void*)> m_multipass_updater;
404
405 /**
406 * @brief Returns one past the highest binding index currently registered.
407 *
408 * Used by the no-index fluent overloads to append sequentially after
409 * any existing bindings, including those set by explicit-index calls.
410 */
411 [[nodiscard]] uint32_t next_binding_index() const
412 {
413 if (m_bindings.empty())
414 return 0;
415 return std::ranges::max(
416 m_bindings | std::views::transform([](const GpuBufferBinding& b) { return b.binding; }))
417 + 1;
418 }
419};
420
421// =============================================================================
422// Factory helpers
423// =============================================================================
424
425/**
426 * @brief Convenience factory for ShaderExecutionContext.
427 *
428 * @code
429 * auto executor = make_shader_executor(
430 * { "shaders/spectral_blur.comp", { 256, 1, 1 }, sizeof(SpectralBlurPC) },
431 * { GpuBufferBinding::input(0, 0),
432 * GpuBufferBinding::output(0, 1) },
433 * "spectral_blur"
434 * );
435 * my_operation->set_gpu_backend(executor);
436 * @endcode
437 */
438template <ComputeData InputType = std::vector<Kakshya::DataVariant>,
439 ComputeData OutputType = InputType>
440std::shared_ptr<ShaderExecutionContext<InputType, OutputType>>
442 GpuShaderConfig config,
443 std::vector<GpuBufferBinding> bindings,
444 std::string name = "ShaderExecutionContext")
445{
446 return std::make_shared<ShaderExecutionContext<InputType, OutputType>>(
447 std::move(config),
448 std::move(bindings),
449 std::move(name));
450}
451
452} // namespace MayaFlux::Yantra
Core::GlobalInputConfig input
Definition Config.cpp:36
size_t b
size_t count
Type-parameterised shell over GpuDispatchCore.
static std::vector< T > read_output(const Datum< std::vector< Kakshya::DataVariant > > &result, size_t binding_index)
Read a typed output buffer from a pipeline result Datum.
std::vector< GpuBufferBinding > declare_buffer_bindings() const override
Returns the binding list declared via constructor or fluent API.
ShaderExecutionContext & in_out(uint32_t binding, const std::vector< T > &data, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Add an INPUT_OUTPUT binding at an explicit index.
ShaderExecutionContext & push(const T &data)
Set push constants from a trivially copyable struct or value.
output_type execute(const input_type &input, const ExecutionContext &ctx) override
Injects multipass configuration into the context before dispatch when set_multipass() has been called...
ShaderExecutionContext & output(uint32_t binding, size_t byte_size, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Add an OUTPUT binding at an explicit index.
uint32_t next_binding_index() const
Returns one past the highest binding index currently registered.
ShaderExecutionContext & input(const std::vector< T > &data, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Add an INPUT binding, inferring the next available binding index.
ShaderExecutionContext & set_multipass(uint32_t pass_count, std::function< void(uint32_t, void *)> pc_updater)
Configure multi-pass (CHAINED) dispatch.
ShaderExecutionContext & in_out(uint32_t binding, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Declare an INPUT_OUTPUT binding without pre-staging data.
Portal::Graphics::FenceID dispatch_async(const input_type &input)
Dispatch asynchronously and return a fence for polling.
ShaderExecutionContext(GpuShaderConfig config, std::vector< GpuBufferBinding > bindings={}, std::string name="ShaderExecutionContext")
Construct with shader config and optional pre-built binding list.
ShaderExecutionContext & in_out(GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Declare an INPUT_OUTPUT binding at an explicit index without pre-staging data.
typename GpuExecutionContext< InputType, OutputType >::output_type output_type
ShaderExecutionContext & input(uint32_t binding, const std::vector< T > &data, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Add an INPUT binding at an explicit index.
typename GpuExecutionContext< InputType, OutputType >::input_type input_type
GpuChannelResult collect_result()
Collect the result of the last async dispatch.
void set_skip_readback(uint32_t binding, bool skip)
Set or clear skip_auto_readback for a binding by index.
std::function< void(uint32_t, void *)> m_multipass_updater
ShaderExecutionContext & in_out(const std::vector< T > &data, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Add an INPUT_OUTPUT binding, inferring the next available binding index.
ShaderExecutionContext & output(size_t byte_size, GpuBufferBinding::ElementType type=GpuBufferBinding::ElementType::FLOAT32)
Add an OUTPUT binding, inferring the next available binding index.
Concrete GpuExecutionContext for a single fixed shader with fixed bindings.
std::shared_ptr< ShaderExecutionContext< InputType, OutputType > > make_shader_executor(GpuShaderConfig config, std::vector< GpuBufferBinding > bindings, std::string name="ShaderExecutionContext")
Convenience factory for ShaderExecutionContext.
Input/Output container for computation pipeline data flow with structure preservation.
Definition DataIO.hpp:24
ExecutionMode mode
Execution mode controlling scheduling behavior.
std::unordered_map< std::string, std::any > execution_metadata
Arbitrary metadata parameters used by operations.
Context information controlling how a compute operation executes.
ElementType
Element type the shader expects in this buffer.
Declares a single storage buffer the shader expects.
Erased output of a GPU dispatch: reconstructed float data plus any raw auxiliary outputs keyed by bin...
Plain-data description of the compute shader to dispatch.