MayaFlux 0.4.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
GpuDispatchCore.hpp
Go to the documentation of this file.
1#pragma once
2
5
7
8namespace MayaFlux::Core {
9class VKImage;
10}
11
12namespace MayaFlux::Yantra {
13
14/**
15 * @struct GpuChannelResult
16 * @brief Erased output of a GPU dispatch: reconstructed float data plus
17 * any raw auxiliary outputs keyed by binding index.
18 */
20 std::vector<float> primary;
21 std::unordered_map<size_t, std::vector<uint8_t>> aux;
22};
23
24/**
25 * @class GpuDispatchCore
26 * @brief Non-template base that owns all type-independent GPU dispatch logic.
27 *
28 * Separates resource management, buffer staging, and dispatch orchestration
29 * from the type-parameterised boundary in GpuExecutionContext. All virtual
30 * override points that do not reference InputType/OutputType live here so
31 * that implementations can be placed in a .cpp file.
32 *
33 * Subclasses (including GpuExecutionContext) implement the two remaining
34 * type-dependent steps -- channel extraction and output reconstruction --
35 * without duplicating anything that is type-independent.
36 */
37class MAYAFLUX_API GpuDispatchCore {
38public:
39 explicit GpuDispatchCore(GpuShaderConfig config);
40 virtual ~GpuDispatchCore() = default;
41
46
47 /**
48 * @brief Set push constant data from a raw byte pointer.
49 * @param data Pointer to trivially-copyable push constant struct.
50 * @param bytes Size in bytes.
51 */
52 void set_push_constants(const void* data, size_t bytes);
53
54 /**
55 * @brief Typed convenience wrapper for set_push_constants(const void*, size_t).
56 * @tparam T Trivially copyable type matching shader push constant layout.
57 */
58 template <typename T>
59 void set_push_constants(const T& data)
60 {
61 set_push_constants(&data, sizeof(T));
62 }
63
64 /**
65 * @brief Pre-stage typed data for a specific binding slot, bypassing
66 * the default channel-flattening path in prepare_gpu_inputs.
67 * @tparam T Trivially copyable element type.
68 * @param index Binding index matching declare_buffer_bindings order.
69 * @param data Elements to upload.
70 */
71 template <typename T>
72 void set_binding_data(size_t index, std::span<const T> data)
73 {
74 if (index >= m_binding_data.size())
75 m_binding_data.resize(index + 1);
76 auto& slot = m_binding_data[index];
77 slot.resize(data.size_bytes());
78 std::memcpy(slot.data(), data.data(), data.size_bytes());
79 }
80
81 template <typename T>
82 void set_binding_data(size_t index, const std::vector<T>& data)
83 {
84 set_binding_data(index, std::span<const T>(data));
85 }
86
87 /**
88 * @brief Declare the byte capacity of an output binding independently
89 * of input data. Required for edge lists, histograms, count
90 * buffers, and any output whose size cannot be derived from input.
91 * @param index Binding index.
92 * @param byte_size Required allocation in bytes.
93 */
94 void set_output_size(size_t index, size_t byte_size);
95
96 /**
97 * @brief Ensure GPU resources are initialised. Safe to call repeatedly.
98 * @return True if GPU is ready after this call.
99 */
100 bool ensure_gpu_ready();
101
102 /**
103 * @brief Query GPU readiness without attempting initialisation.
104 */
105 [[nodiscard]] bool is_gpu_ready() const;
106
107 /**
108 * @brief Return the image registered at an IMAGE_STORAGE output binding.
109 *
110 * Valid after dispatch_core completes (dispatch is synchronous via
111 * submit_and_wait). Callers may then bind it directly to a render pass
112 * or read it back via TextureLoom.
113 *
114 * @param binding_index Index of the IMAGE_STORAGE binding.
115 * @return Shared pointer to the VKImage, or nullptr if not registered.
116 */
117 [[nodiscard]] std::shared_ptr<Core::VKImage> get_output_image(size_t binding_index) const;
118
119 /**
120 * @brief Read back a specific binding into a caller-provided destination.
121 *
122 * @param index Binding index to read back.
123 * @param dest Pointer to caller-allocated memory for the data.
124 * @param byte_size Size in bytes to read back (must not exceed allocated size).
125 */
126 void download_binding(size_t index, void* dest, size_t byte_size);
127
128protected:
129 /**
130 * @brief Declare the storage buffers the shader expects.
131 *
132 * Default: INPUT at (0,0) FLOAT32, OUTPUT at (0,1) FLOAT32.
133 */
134 [[nodiscard]] virtual std::vector<GpuBufferBinding> declare_buffer_bindings() const;
135
136 /**
137 * @brief Called immediately before dispatch. Override to write push
138 * constants or perform any per-dispatch reconfiguration.
139 */
140 virtual void on_before_gpu_dispatch(
141 const std::vector<std::vector<double>>& channels,
142 const DataStructureInfo& structure_info);
143
144 /**
145 * @brief Marshal channel data into GPU input buffers.
146 *
147 * Handles FLOAT32, UINT32, INT32, PASSTHROUGH, IMAGE_STORAGE, and
148 * IMAGE_SAMPLED binding kinds. Called after flatten_channels_to_staging.
149 */
150 virtual void prepare_gpu_inputs(
151 const std::vector<std::vector<double>>& channels,
152 const DataStructureInfo& structure_info);
153
154 /**
155 * @brief Calculate workgroup dispatch counts from structure dimensions.
156 *
157 * Reads SPATIAL_X/Y/Z roles for 2D/3D shaders; falls back to 1D
158 * element-count dispatch when no spatial dimensions exist.
159 *
160 * @param total_elements Flat element count for the 1D fallback.
161 * @param structure_info Dimension metadata.
162 */
163 [[nodiscard]] virtual std::array<uint32_t, 3> calculate_dispatch_size(
164 size_t total_elements,
165 const DataStructureInfo& structure_info) const;
166
167 /**
168 * @brief Stage raw bytes for a PASSTHROUGH binding before dispatch.
169 * @param binding_index Index matching declare_buffer_bindings order.
170 * @param data Raw byte pointer.
171 * @param byte_size Size in bytes.
172 */
173 void stage_passthrough(size_t binding_index, const void* data, size_t byte_size);
174
175 /**
176 * @brief Register a VKImage for an IMAGE_STORAGE binding.
177 *
178 * The image will be transitioned to eGeneral layout if not already there.
179 *
180 * @param binding_index Index matching the IMAGE_STORAGE declaration.
181 * @param image Initialised VKImage.
182 */
183 void stage_image_storage(size_t binding_index, std::shared_ptr<Core::VKImage> image);
184
185 /**
186 * @brief Register a VKImage + sampler for an IMAGE_SAMPLED binding.
187 *
188 * The image will be transitioned to eShaderReadOnlyOptimal if needed.
189 *
190 * @param binding_index Index matching the IMAGE_SAMPLED declaration.
191 * @param image Initialised VKImage.
192 * @param sampler Vulkan sampler handle.
193 */
194 void stage_image_sampled(size_t binding_index,
195 std::shared_ptr<Core::VKImage> image,
196 vk::Sampler sampler);
197
198 [[nodiscard]] const GpuShaderConfig& gpu_config() const;
199
200 /**
201 * @brief Full single-pass dispatch. Drives prepare_gpu_inputs,
202 * on_before_gpu_dispatch, bind_descriptor, and GpuResourceManager::dispatch.
203 *
204 * @param channels Extracted double channels from the input Datum.
205 * @param structure_info Dimension/modality metadata from OperationHelper.
206 * @return GpuChannelResult containing primary float readback and aux buffers.
207 */
208 GpuChannelResult dispatch_core(
209 const std::vector<std::vector<double>>& channels,
210 const DataStructureInfo& structure_info);
211
212 /**
213 * @brief Multi-pass (chained) dispatch. Calls dispatch_batched on
214 * GpuResourceManager and reads back once after all passes.
215 *
216 * @param channels Extracted double channels.
217 * @param structure_info Dimension/modality metadata.
218 * @param ctx ExecutionContext carrying pass_count and pc_updater.
219 * @return GpuChannelResult containing primary float readback and aux buffers.
220 */
221 GpuChannelResult dispatch_core_chained(
222 const std::vector<std::vector<double>>& channels,
223 const DataStructureInfo& structure_info,
224 const ExecutionContext& ctx);
225
226 /**
227 * @brief Non-blocking variant of dispatch_core.
228 *
229 * Performs the full setup (on_before_gpu_dispatch, prepare_gpu_inputs,
230 * bind_descriptor) then calls GpuResourceManager::dispatch_async.
231 * Returns immediately with a FenceID. The caller must poll
232 * ShaderFoundry::is_fence_signaled on the returned ID, and once
233 * signaled call readback_primary / readback_aux to collect results.
234 *
235 * @param channels Extracted double channels from the input Datum.
236 * @param structure_info Dimension/modality metadata from OperationHelper.
237 * @return FenceID to poll. INVALID_FENCE if dispatch fails.
238 */
239 [[nodiscard]] Portal::Graphics::FenceID dispatch_core_async(
240 const std::vector<std::vector<double>>& channels,
241 const DataStructureInfo& structure_info);
242
243 /**
244 * @brief Effective element count used by the last dispatch_core or
245 * dispatch_core_async call.
246 *
247 * Cached after each dispatch so callers can pass the correct count to
248 * readback_primary without re-deriving it.
249 */
250 [[nodiscard]] size_t last_effective_element_count() const
251 {
252 return m_last_effective_element_count;
253 }
254
255 /**
256 * @brief Read back the primary output buffer into a float vector.
257 *
258 * Selects the first OUTPUT or INPUT_OUTPUT binding. Caps readback to
259 * the lesser of the requested float count and the allocated buffer size.
260 *
261 * @param float_count Number of float elements to attempt to read.
262 * @return Float vector of length min(float_count, allocated / sizeof(float)).
263 */
264 [[nodiscard]] std::vector<float> readback_primary(size_t float_count);
265
266 /**
267 * @brief Read back all OUTPUT bindings that have explicit size overrides
268 * into the aux map of a GpuChannelResult.
269 *
270 * @param result GpuChannelResult to write aux entries into.
271 */
272 void readback_aux(GpuChannelResult& result);
273
274 /**
275 * @brief Flatten planar double channels into m_staging_floats.
276 *
277 * Skipped for structured modalities (glm::vec3 etc.) since those are
278 * handled per-binding via PASSTHROUGH or integer paths.
279 */
280 void flatten_channels_to_staging(
281 const std::vector<std::vector<double>>& channels,
282 const DataStructureInfo& structure_info);
283
284 [[nodiscard]] size_t find_first_output_index() const;
285 [[nodiscard]] size_t largest_binding_data_element_count() const;
286
288 std::vector<GpuBufferBinding> m_bindings;
289 std::vector<float> m_staging_floats;
290 std::vector<uint8_t> m_push_constants;
291 std::vector<size_t> m_output_size_overrides;
292 std::vector<std::vector<uint8_t>> m_passthrough_bytes;
293 std::vector<std::vector<uint8_t>> m_binding_data;
294
296 std::shared_ptr<Core::VKImage> image;
297 vk::Sampler sampler;
299 };
300 std::vector<ImageBinding> m_image_bindings;
301
302private:
304
305 size_t m_last_effective_element_count {};
306};
307
308} // namespace MayaFlux::Yantra
IO::ImageData image
Definition Decoder.cpp:57
std::vector< ImageBinding > m_image_bindings
GpuDispatchCore(GpuDispatchCore &&)=delete
std::vector< uint8_t > m_push_constants
void set_binding_data(size_t index, const std::vector< T > &data)
std::vector< std::vector< uint8_t > > m_binding_data
std::vector< GpuBufferBinding > m_bindings
std::vector< size_t > m_output_size_overrides
virtual ~GpuDispatchCore()=default
void set_push_constants(const T &data)
Typed convenience wrapper for set_push_constants(const void*, size_t).
size_t last_effective_element_count() const
Effective element count used by the last dispatch_core or dispatch_core_async call.
GpuDispatchCore & operator=(GpuDispatchCore &&)=delete
std::vector< std::vector< uint8_t > > m_passthrough_bytes
GpuDispatchCore(const GpuDispatchCore &)=delete
GpuDispatchCore & operator=(const GpuDispatchCore &)=delete
void set_binding_data(size_t index, std::span< const T > data)
Pre-stage typed data for a specific binding slot, bypassing the default channel-flattening path in pr...
Non-template base that owns all type-independent GPU dispatch logic.
Encapsulates all Vulkan resource lifecycle behind Portal facades.
Metadata about data structure for reconstruction.
Context information controlling how a compute operation executes.
ElementType
Element type the shader expects in this buffer.
std::unordered_map< size_t, std::vector< uint8_t > > aux
Erased output of a GPU dispatch: reconstructed float data plus any raw auxiliary outputs keyed by bin...
Plain-data description of the compute shader to dispatch.