MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
DataUtils.cpp
Go to the documentation of this file.
1#include "DataUtils.hpp"
2
3namespace MayaFlux::Kakshya {
4
5uint64_t calculate_total_elements(const std::vector<DataDimension>& dimensions)
6{
7 if (dimensions.empty())
8 return 0;
9
10 return std::transform_reduce(dimensions.begin(), dimensions.end(),
11 uint64_t(1), std::multiplies<>(),
12 [](const DataDimension& dim) { return dim.size; });
13}
14
15uint64_t calculate_frame_size(const std::vector<DataDimension>& dimensions)
16{
17 if (dimensions.empty())
18 return 0;
19
20 return std::transform_reduce(
21 dimensions.begin() + 1, dimensions.end(),
22 uint64_t(1), std::multiplies<>(),
23 [](const DataDimension& dim) constexpr { return dim.size; });
24}
25
26std::type_index get_variant_type_index(const DataVariant& data)
27{
28 return std::visit([](const auto& vec) -> std::type_index {
29 return std::type_index(typeid(decltype(vec)));
30 },
31 data);
32}
33
35{
36 std::visit([&](const auto& input_vec, auto& output_vec) {
37 using InputType = typename std::decay_t<decltype(input_vec)>::value_type;
38 using OutputType = typename std::decay_t<decltype(output_vec)>::value_type;
39
40 if constexpr (ProcessableData<InputType> && ProcessableData<OutputType>) {
41 std::vector<OutputType> temp_storage;
42 auto input_span = extract_from_variant<OutputType>(input, temp_storage);
43
44 output_vec.resize(input_span.size());
45 std::copy(input_span.begin(), input_span.end(), output_vec.begin());
46 } else {
47 error<std::invalid_argument>(
50 std::source_location::current(),
51 "Unsupported type conversion from {} to {}",
52 typeid(InputType).name(),
53 typeid(OutputType).name());
54 }
55 },
56 input, output);
57}
58
59void set_metadata_value(std::unordered_map<std::string, std::any>& metadata, const std::string& key, std::any value)
60{
61 metadata[key] = std::move(value);
62}
63
64int find_dimension_by_role(const std::vector<DataDimension>& dimensions, DataDimension::Role role)
65{
66 auto it = std::ranges::find_if(dimensions,
67 [role](const DataDimension& dim) { return dim.role == role; });
68
69 return (it != dimensions.end()) ? static_cast<int>(std::distance(dimensions.begin(), it)) : -1;
70}
71
72DataModality detect_data_modality(const std::vector<DataDimension>& dimensions)
73{
74 if (dimensions.empty()) {
76 }
77
78 size_t time_dims = 0, spatial_dims = 0, channel_dims = 0, frequency_dims = 0, custom_dims = 0;
79 size_t total_spatial_elements = 1;
80 size_t total_channels = 0;
81
82 for (const auto& dim : dimensions) {
83 if (dim.grouping) {
84 switch (dim.role) {
95 if (dim.grouping->count == 3)
97 if (dim.grouping->count == 4)
99 break;
100 default:
101 if (dim.grouping->count == 16)
103 break;
104 }
105 }
106 }
107
108 for (const auto& dim : dimensions) {
109 switch (dim.role) {
111 time_dims++;
112 break;
116 spatial_dims++;
117 total_spatial_elements *= dim.size;
118 break;
120 channel_dims++;
121 total_channels += dim.size;
122 break;
124 frequency_dims++;
125 break;
127 default:
128 custom_dims++;
129 break;
130 }
131 }
132
133 if (time_dims == 1 && spatial_dims == 0 && frequency_dims == 0) {
134 if (channel_dims == 0) {
136 } else if (channel_dims == 1) {
137 return (total_channels <= 1) ? DataModality::AUDIO_1D : DataModality::AUDIO_MULTICHANNEL;
138 } else {
140 }
141 }
142
143 if (time_dims >= 1 && frequency_dims >= 1) {
144 if (spatial_dims == 0 && channel_dims <= 1) {
146 }
148 }
149
150 if (spatial_dims >= 2 && time_dims == 0) {
151 if (spatial_dims == 2) {
152 if (channel_dims == 0) {
154 } else if (channel_dims == 1 && total_channels >= 3) {
156 } else {
158 }
159 } else if (spatial_dims == 3) {
161 }
162 }
163
164 if (time_dims >= 1 && spatial_dims >= 2) {
165 if (spatial_dims == 2) {
166 if (channel_dims == 0 || (channel_dims == 1 && total_channels <= 1)) {
168 } else {
170 }
171 }
173 }
174
175 if (spatial_dims == 2 && time_dims == 0 && channel_dims >= 1) {
176 if (total_spatial_elements >= 64 && total_channels >= 1) {
178 }
179 }
180
182}
183
184std::vector<DataDimension> detect_data_dimensions(const DataVariant& data)
185{
186 std::cerr << "Inferring structure from single DataVariant...\n"
187 << "This is not advisable as the method makes naive assumptions that can lead to massive computational errors\n"
188 << "If the variant is part of a container, region, or segment, please use the appropriate method instead.\n"
189 << "If the variant is part of a vector, please use infer_from_data_variant_vector instead.\n"
190 << "If you are sure you want to proceed, please ignore this warning.\n";
191
192 return std::visit([](const auto& vec) -> std::vector<DataDimension> {
193 using ValueType = typename std::decay_t<decltype(vec)>::value_type;
194
195 std::vector<DataDimension> dims;
196
197 if constexpr (DecimalData<ValueType>) {
198 dims.emplace_back(DataDimension::time(vec.size()));
199
200 } else if constexpr (ComplexData<ValueType>) {
201 dims.emplace_back(DataDimension::frequency(vec.size()));
202
203 } else if constexpr (IntegerData<ValueType>) {
204 // uint8_t, uint16_t, uint32_t -> flattened 2D (images typically)
205 // Need to guess reasonable 2D dimensions from 1D size
206 uint64_t total_size = vec.size();
207
208 if (total_size == 0) {
209 dims.emplace_back(DataDimension::spatial(0, 'x'));
210 dims.emplace_back(DataDimension::spatial(0, 'y'));
211 } else {
212 auto sqrt_size = static_cast<uint64_t>(std::sqrt(total_size));
213 if (sqrt_size * sqrt_size == total_size) {
214 dims.emplace_back(DataDimension::spatial(sqrt_size, 'x'));
215 dims.emplace_back(DataDimension::spatial(sqrt_size, 'y'));
216 } else {
217 uint64_t width = sqrt_size;
218 uint64_t height = total_size / width;
219 while (width * height != total_size && width > 1) {
220 width--;
221 height = total_size / width;
222 }
223 dims.emplace_back(DataDimension::spatial(height, 'y'));
224 dims.emplace_back(DataDimension::spatial(width, 'x'));
225 }
226 }
227 } else if constexpr (GlmData<ValueType>) {
228 constexpr size_t components = glm_component_count<ValueType>();
230
231 if constexpr (GlmVec2Type<ValueType>) {
233 } else if constexpr (GlmVec3Type<ValueType>) {
235 } else if constexpr (GlmVec4Type<ValueType>) {
237 } else if constexpr (GlmMatrixType<ValueType>) {
239 }
240
241 dims.push_back(DataDimension::grouped(
242 "glm_structured_data",
243 static_cast<uint64_t>(vec.size()),
244 static_cast<uint8_t>(components),
245 role));
246 } else {
247 dims.emplace_back(DataDimension::time(vec.size()));
248 }
249
250 return dims;
251 },
252 data);
253}
254
255std::vector<DataDimension> detect_data_dimensions(
256 const std::vector<DataVariant>& variants)
257{
258 std::cerr << "Inferring structure from DataVariant vector...\n"
259 << "This is not advisable as the method makes naive assumptions that can lead to massive computational errors\n"
260 << "If the variant is part of a container, region, or segment, please use the appropriate method instead.\n"
261 << "If you are sure you want to proceed, please ignore this warning.\n";
262
263 if (variants.empty()) {
264 std::vector<DataDimension> dims;
265 dims.emplace_back("empty_variants", 0, 1, DataDimension::Role::CUSTOM);
266 return dims;
267 }
268
269 std::vector<DataDimension> dimensions;
270 size_t variant_count = variants.size();
271
272 size_t first_variant_size = std::visit([](const auto& vec) -> size_t {
273 return vec.size();
274 },
275 variants[0]);
276
277 bool consistent_glm = std::ranges::all_of(variants, [](const auto& variant) {
278 return std::visit([](const auto& vec) -> bool {
279 using ValueType = typename std::decay_t<decltype(vec)>::value_type;
280 return GlmData<ValueType>;
281 },
282 variant);
283 });
284
285 bool consistent_decimal = std::ranges::all_of(variants, [](const auto& variant) {
286 return std::visit([](const auto& vec) -> bool {
287 using ValueType = typename std::decay_t<decltype(vec)>::value_type;
288 return MayaFlux::DecimalData<ValueType>;
289 },
290 variant);
291 });
292
293 bool consistent_complex = std::ranges::all_of(variants, [](const auto& variant) {
294 return std::visit([](const auto& vec) -> bool {
295 using ValueType = typename std::decay_t<decltype(vec)>::value_type;
296 return MayaFlux::ComplexData<ValueType>;
297 },
298 variant);
299 });
300
301 bool consistent_integer = std::ranges::all_of(variants, [](const auto& variant) {
302 return std::visit([](const auto& vec) -> bool {
303 using ValueType = typename std::decay_t<decltype(vec)>::value_type;
304 return MayaFlux::IntegerData<ValueType>;
305 },
306 variant);
307 });
308
309 if (consistent_glm) {
310 dimensions.emplace_back(DataDimension::channel(variant_count));
311
312 std::visit([&](const auto& first_vec) {
313 using ValueType = typename std::decay_t<decltype(first_vec)>::value_type;
314 constexpr size_t components = glm_component_count<ValueType>();
315
317 if constexpr (GlmVec2Type<ValueType>) {
319 } else if constexpr (GlmVec3Type<ValueType>) {
321 } else if constexpr (GlmVec4Type<ValueType>) {
323 }
324
325 dimensions.emplace_back(DataDimension::grouped(
326 "glm_elements",
327 first_variant_size,
328 static_cast<uint8_t>(components),
329 role));
330 },
331 variants[0]);
332
333 return dimensions;
334 }
335
336 if (variant_count == 1) {
337 if (consistent_decimal) {
338 dimensions.emplace_back(DataDimension::time(first_variant_size, "samples"));
339 } else if (consistent_complex) {
340 dimensions.emplace_back(DataDimension::frequency(first_variant_size, "frequency_data"));
341 } else if (consistent_integer) {
342 dimensions.emplace_back(DataDimension::spatial(first_variant_size, 'x', 1, "data_points"));
343 } else {
344 dimensions.emplace_back("unknown_data", first_variant_size, 1,
346 }
347
348 } else if (variant_count == 2 && (consistent_decimal || consistent_complex || consistent_integer)) {
349 dimensions.emplace_back(DataDimension::channel(2));
350 if (consistent_decimal) {
351 dimensions.emplace_back(DataDimension::time(first_variant_size, "samples"));
352 } else if (consistent_complex) {
353 dimensions.emplace_back(DataDimension::frequency(first_variant_size, "bins"));
354 } else {
355 dimensions.emplace_back(DataDimension::spatial(first_variant_size, 'x', 1, "elements"));
356 }
357
358 } else if (variant_count <= 16 && (consistent_decimal || consistent_complex || consistent_integer)) {
359 dimensions.emplace_back(DataDimension::channel(variant_count));
360 if (consistent_decimal) {
361 dimensions.emplace_back(DataDimension::time(first_variant_size, "samples"));
362 } else if (consistent_complex) {
363 dimensions.emplace_back(DataDimension::frequency(first_variant_size, "bins"));
364 } else {
365 dimensions.emplace_back(DataDimension::spatial(first_variant_size, 'x', 1, "pixels"));
366 }
367
368 } else if (consistent_decimal || consistent_complex || consistent_integer) {
369 if (consistent_decimal) {
370 dimensions.emplace_back(DataDimension::time(variant_count, "time_blocks"));
371 dimensions.emplace_back("block_samples", first_variant_size, 1,
373 } else if (consistent_complex) {
374 dimensions.emplace_back(DataDimension::time(variant_count, "time_windows"));
375 dimensions.emplace_back(DataDimension::frequency(first_variant_size, "frequency_bins"));
376 } else {
377 dimensions.emplace_back(DataDimension::time(variant_count, "frames"));
378 dimensions.emplace_back(DataDimension::spatial(first_variant_size, 'x', 1, "frame_data"));
379 }
380
381 } else {
382 dimensions.emplace_back("mixed_variants", variant_count, 1,
384 dimensions.emplace_back("variant_data", first_variant_size, 1,
386 }
387
388 return dimensions;
389}
390
391}
@ Runtime
General runtime operations (default fallback)
@ Kakshya
Containers[Signalsource, Stream, File], Regions, DataProcessors.
std::vector< DataDimension > detect_data_dimensions(const DataVariant &data)
Detect data dimensions from a DataVariant.
uint64_t calculate_frame_size(const std::vector< DataDimension > &dimensions)
Calculate the frame size (number of elements per frame) for a set of dimensions.
Definition DataUtils.cpp:15
std::variant< std::vector< double >, std::vector< float >, std::vector< uint8_t >, std::vector< uint16_t >, std::vector< uint32_t >, std::vector< std::complex< float > >, std::vector< std::complex< double > >, std::vector< glm::vec2 >, std::vector< glm::vec3 >, std::vector< glm::vec4 >, std::vector< glm::mat4 > > DataVariant
Multi-type data storage for different precision needs.
Definition NDData.hpp:73
DataModality
Data modality types for cross-modal analysis.
Definition NDData.hpp:78
@ AUDIO_MULTICHANNEL
Multi-channel audio.
@ SPECTRAL_2D
2D spectral data (time + frequency)
@ UNKNOWN
Unknown or undefined modality.
@ VOLUMETRIC_3D
3D volumetric data
@ VIDEO_GRAYSCALE
3D video (time + 2D grayscale)
@ VIDEO_COLOR
4D video (time + 2D + color)
@ TENSOR_ND
N-dimensional tensor.
@ IMAGE_COLOR
2D RGB/RGBA image
@ IMAGE_2D
2D image (grayscale or single channel)
std::type_index get_variant_type_index(const DataVariant &data)
Get type index from DataVariant.
Definition DataUtils.cpp:26
int find_dimension_by_role(const std::vector< DataDimension > &dimensions, DataDimension::Role role)
Find the index of a dimension by its semantic role.
Definition DataUtils.cpp:64
void set_metadata_value(std::unordered_map< std::string, std::any > &metadata, const std::string &key, std::any value)
Set a value in a metadata map (key-value).
Definition DataUtils.cpp:59
DataModality detect_data_modality(const std::vector< DataDimension > &dimensions)
Detects data modality from dimension information.
Definition DataUtils.cpp:72
void safe_copy_data_variant(const DataVariant &input, DataVariant &output)
Safely copy data from a DataVariant to another DataVariant, handling type conversion.
Definition DataUtils.cpp:34
uint64_t calculate_total_elements(const std::vector< DataDimension > &dimensions)
Calculate the total number of elements in an N-dimensional container.
Definition DataUtils.cpp:5
Role
Semantic role of the dimension.
Definition NDData.hpp:145
@ FREQUENCY
Spectral/frequency axis.
@ TIME
Temporal progression (samples, frames, steps)
@ CUSTOM
User-defined or application-specific.
@ POSITION
Vertex positions (3D space)
@ CHANNEL
Parallel streams (audio channels, color channels)
@ SPATIAL_X
Spatial X axis (images, tensors)
uint64_t size
Number of elements in this dimension.
Definition NDData.hpp:188
Role role
Semantic hint for common operations.
Definition NDData.hpp:190
static DataDimension spatial(uint64_t size, char axis, uint64_t stride=1, std::string name="spatial")
Convenience constructor for a spatial dimension.
Definition NDData.cpp:29
static DataDimension grouped(std::string name, uint64_t element_count, uint8_t components_per_element, Role role=Role::CUSTOM)
Create dimension with component grouping.
Definition NDData.cpp:69
static DataDimension frequency(uint64_t bins, std::string name="frequency")
Convenience constructor for a frequency dimension.
Definition NDData.cpp:24
static DataDimension time(uint64_t samples, std::string name="time")
Convenience constructor for a temporal (time) dimension.
Definition NDData.cpp:14
static DataDimension channel(uint64_t count, uint64_t stride=1)
Convenience constructor for a channel dimension.
Definition NDData.cpp:19
Minimal dimension descriptor focusing on structure only.
Definition NDData.hpp:138