MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
StatisticalAnalyzer.hpp
Go to the documentation of this file.
1#pragma once
2
5
7#include <Eigen/Dense>
8
9#include "AnalysisHelper.hpp"
10
11/**
12 * @file StatisticalAnalyzer_new.hpp
13 * @brief Span-based statistical analysis for digital signals in Maya Flux
14 *
15 * Defines the StatisticalAnalyzer using the new UniversalAnalyzer framework with
16 * zero-copy span processing and automatic structure handling via OperationHelper.
17 * This analyzer extracts statistical features from digital data streams with multiple
18 * computation methods and flexible output configurations.
19 *
20 * Key Features:
21 * - **Zero-copy processing:** Uses spans for maximum efficiency
22 * - **Template-flexible I/O:** Instance defines input/output types at construction
23 * - **Multiple statistical methods:** Mean, variance, std dev, skewness, kurtosis, percentiles, etc.
24 * - **Parallel processing:** Utilizes std::execution for performance
25 * - **Cross-modal support:** Works on any numeric data stream - truly digital-first
26 * - **Statistical classification:** Maps values to qualitative levels (outliers, normal, etc.)
27 * - **Automatic data handling:** OperationHelper manages all extraction/conversion
28 */
29
30namespace MayaFlux::Yantra {
31
32/**
33 * @enum StatisticalMethod
34 * @brief Supported statistical computation methods
35 */
36enum class StatisticalMethod : uint8_t {
37 MEAN, ///< Arithmetic mean
38 VARIANCE, ///< Population or sample variance
39 STD_DEV, ///< Standard deviation
40 SKEWNESS, ///< Third moment - asymmetry measure
41 KURTOSIS, ///< Fourth moment - tail heaviness
42 MIN, ///< Minimum value
43 MAX, ///< Maximum value
44 MEDIAN, ///< 50th percentile
45 RANGE, ///< Max - min
46 PERCENTILE, ///< Arbitrary percentile (requires parameter)
47 MODE, ///< Most frequent value
48 MAD, ///< Median Absolute Deviation
49 CV, ///< Coefficient of Variation (std_dev/mean)
50 SUM, ///< Sum of all values
51 COUNT, ///< Number of values
52 RMS, ///< Root Mean Square
53 ENTROPY, ///< Shannon entropy for discrete data
54 ZSCORE ///< Z-score normalization
55};
56
57/**
58 * @enum StatisticalLevel
59 * @brief Qualitative classification of statistical values
60 */
61enum class StatisticalLevel : uint8_t {
63 LOW,
64 NORMAL,
65 HIGH,
68};
69
70/**
71 * @struct ChannelStatistics
72 * @brief Statistical results for a single data channel
73 */
74struct MAYAFLUX_API ChannelStatistics {
75 std::vector<double> statistical_values;
76
77 double mean_stat {};
78 double max_stat {};
79 double min_stat {};
80 double stat_variance {};
81 double stat_std_dev {};
82
83 double skewness {};
84 double kurtosis {};
85 double median {};
86 std::vector<double> percentiles;
87
88 std::vector<StatisticalLevel> stat_classifications;
89 std::array<int, 6> level_counts {};
90
91 std::vector<std::pair<size_t, size_t>> window_positions;
92 std::map<std::string, std::any> method_specific_data;
93};
94
95/**
96 * @struct StatisticalAnalysis
97 * @brief Analysis result structure for statistical analysis
98 */
99struct MAYAFLUX_API StatisticalAnalysis {
100 StatisticalMethod method_used { StatisticalMethod::MEAN };
101 uint32_t window_size {};
102 uint32_t hop_size {};
103
104 std::vector<ChannelStatistics> channel_statistics;
105};
106
107/**
108 * @class StatisticalAnalyzer
109 * @brief High-performance statistical analyzer with zero-copy processing
110 *
111 * The StatisticalAnalyzer provides comprehensive statistical analysis capabilities for
112 * digital data streams using span-based processing for maximum efficiency.
113 * All data extraction and conversion is handled automatically by OperationHelper.
114 *
115 * Example usage:
116 * ```cpp
117 * // DataVariant -> VectorXd analyzer
118 * auto stat_analyzer = std::make_shared<StatisticalAnalyzer<Kakshya::DataVariant, Eigen::VectorXd>>();
119 *
120 * // User-facing analysis
121 * auto analysis = stat_analyzer->analyze_data(numeric_data);
122 * auto stat_result = safe_any_cast<StatisticalAnalysisResult>(analysis);
123 *
124 * // Pipeline usage
125 * auto pipeline_output = stat_analyzer->apply_operation(IO{numeric_data});
126 * ```
127 */
128template <ComputeData InputType = std::vector<Kakshya::DataVariant>, ComputeData OutputType = Eigen::VectorXd>
129class MAYAFLUX_API StatisticalAnalyzer : public UniversalAnalyzer<InputType, OutputType> {
130public:
134
135 /**
136 * @brief Construct StatisticalAnalyzer with configurable window parameters
137 * @param window_size Size of analysis window in samples (default: 512)
138 * @param hop_size Step size between windows in samples (default: 256)
139 */
140 explicit StatisticalAnalyzer(uint32_t window_size = 512, uint32_t hop_size = 256)
141 : m_window_size(window_size)
142 , m_hop_size(hop_size)
143 {
144 validate_window_parameters();
145 }
146
147 /**
148 * @brief Type-safe statistical analysis method
149 * @param data Input data
150 * @return StatisticalAnalysisResult directly
151 */
153 {
154 auto result = this->analyze_data(data);
155 return safe_any_cast_or_throw<StatisticalAnalysis>(result);
156 }
157
158 /**
159 * @brief Get last statistical analysis result (type-safe)
160 * @return StatisticalAnalysisResult from last operation
161 */
163 {
164 return safe_any_cast_or_throw<StatisticalAnalysis>(this->get_current_analysis());
165 }
166
167 /**
168 * @brief Get analysis type category
169 * @return AnalysisType::STATISTICAL
170 */
171 [[nodiscard]] AnalysisType get_analysis_type() const override
172 {
173 return AnalysisType::STATISTICAL;
174 }
175
176 /**
177 * @brief Get available analysis methods
178 * @return Vector of supported statistical method names
179 */
180 [[nodiscard]] std::vector<std::string> get_available_methods() const override
181 {
182 return Utils::get_enum_names_lowercase<StatisticalMethod>();
183 }
184
185 /**
186 * @brief Get supported methods for specific input type
187 * @tparam T Input type to check
188 * @return Vector of method names supported for this type
189 */
190 template <typename T>
191 [[nodiscard]] std::vector<std::string> get_methods_for_type() const
192 {
193 return get_methods_for_type_impl(std::type_index(typeid(T)));
194 }
195
196 /**
197 * @brief Check if analyzer supports given input type
198 * @tparam T Input type to check
199 * @return True if supported
200 */
201 template <typename T>
202 [[nodiscard]] bool supports_input_type() const
203 {
204 return !get_methods_for_type<T>().empty();
205 }
206
207 /**
208 * @brief Set statistical analysis method
209 * @param method StatisticalMethod enum value
210 */
212 {
213 m_method = method;
214 this->set_parameter("method", method_to_string(method));
215 }
216
217 /**
218 * @brief Set method by string name
219 * @param method_name String representation of method
220 */
221 void set_method(const std::string& method_name)
222 {
223 m_method = string_to_method(method_name);
224 this->set_parameter("method", method_name);
225 }
226
227 /**
228 * @brief Get current statistical method
229 * @return StatisticalMethod enum value
230 */
231 [[nodiscard]] StatisticalMethod get_method() const
232 {
233 return m_method;
234 }
235
236 /**
237 * @brief Set window size for windowed analysis
238 * @param size Window size in samples
239 */
240 void set_window_size(uint32_t size)
241 {
242 m_window_size = size;
243 validate_window_parameters();
244 }
245
246 /**
247 * @brief Set hop size for windowed analysis
248 * @param size Hop size in samples
249 */
250 void set_hop_size(uint32_t size)
251 {
252 m_hop_size = size;
253 validate_window_parameters();
254 }
255
256 /**
257 * @brief Get window size
258 * @return Current window size
259 */
260 [[nodiscard]] uint32_t get_window_size() const { return m_window_size; }
261
262 /**
263 * @brief Get hop size
264 * @return Current hop size
265 */
266 [[nodiscard]] uint32_t get_hop_size() const { return m_hop_size; }
267
268 /**
269 * @brief Enable/disable outlier classification
270 * @param enabled Whether to classify outliers
271 */
272 void set_classification_enabled(bool enabled)
273 {
274 m_classification_enabled = enabled;
275 }
276
277 /**
278 * @brief Check if classification is enabled
279 * @return True if classification enabled
280 */
281 [[nodiscard]] bool is_classification_enabled() const { return m_classification_enabled; }
282
283 /**
284 * @brief Classify statistical value qualitatively
285 * @param value Statistical value to classify
286 * @return StatisticalLevel classification
287 */
288 [[nodiscard]] StatisticalLevel classify_statistical_level(double value) const
289 {
290 if (std::abs(value) > m_outlier_threshold)
291 return StatisticalLevel::OUTLIER;
292 if (value <= m_extreme_low_threshold)
293 return StatisticalLevel::EXTREME_LOW;
294 if (value <= m_low_threshold)
295 return StatisticalLevel::LOW;
296 if (value <= m_high_threshold)
297 return StatisticalLevel::NORMAL;
298 if (value <= m_extreme_high_threshold)
299 return StatisticalLevel::HIGH;
300 return StatisticalLevel::EXTREME_HIGH;
301 }
302
303 /**
304 * @brief Convert statistical method enum to string
305 * @param method StatisticalMethod value
306 * @return String representation
307 */
308 static std::string method_to_string(StatisticalMethod method)
309 {
310 return Utils::enum_to_lowercase_string(method);
311 }
312
313 /**
314 * @brief Convert string to statistical method enum
315 * @param str String representation
316 * @return StatisticalMethod value
317 */
318 static StatisticalMethod string_to_method(const std::string& str)
319 {
320 if (str == "default")
321 return StatisticalMethod::MEAN;
322 return Utils::string_to_enum_or_throw_case_insensitive<StatisticalMethod>(str, "StatisticalMethod");
323 }
324
325 /**
326 * @brief Convert statistical level enum to string
327 * @param level StatisticalLevel value
328 * @return String representation
329 */
331 {
332 return Utils::enum_to_lowercase_string(level);
333 }
334
335protected:
336 /**
337 * @brief Get analyzer name
338 * @return "StatisticalAnalyzer"
339 */
340 [[nodiscard]] std::string get_analyzer_name() const override
341 {
342 return "StatisticalAnalyzer";
343 }
344
345 /**
346 * @brief Core analysis implementation - creates analysis result AND pipeline output
347 * @param input Input data wrapped in IO container
348 * @return Pipeline output (data flow for chaining operations)
349 */
351 {
352 if (input.data.empty()) {
353 throw std::runtime_error("Input is empty");
354 }
355 try {
356 auto [data_span, structure_info] = OperationHelper::extract_structured_double(
357 const_cast<input_type&>(input));
358
359 std::vector<std::span<const double>> channel_spans;
360 for (const auto& span : data_span)
361 channel_spans.emplace_back(span.data(), span.size());
362
363 std::vector<std::vector<double>> stat_values;
364 stat_values.reserve(channel_spans.size());
365 for (const auto& ch_span : channel_spans) {
366 stat_values.push_back(compute_statistical_values(ch_span, m_method));
367 }
368
369 StatisticalAnalysis analysis_result = create_analysis_result(
370 stat_values, channel_spans, structure_info);
371
372 this->store_current_analysis(analysis_result);
373 return create_pipeline_output(input, analysis_result, structure_info);
374 } catch (const std::exception& e) {
375 std::cerr << "Energy analysis failed: " << e.what() << '\n';
376 output_type error_result;
377 error_result.metadata = input.metadata;
378 error_result.metadata["error"] = std::string("Analysis failed: ") + e.what();
379 return error_result;
380 }
381 }
382
383 /**
384 * @brief Handle analysis-specific parameters
385 */
386 void set_analysis_parameter(const std::string& name, std::any value) override
387 {
388 try {
389 if (name == "method") {
390 try {
391 auto method_str = safe_any_cast_or_throw<std::string>(value);
392 m_method = string_to_method(method_str);
393 } catch (const std::runtime_error&) {
394 auto method_enum = safe_any_cast_or_throw<StatisticalMethod>(value);
395 m_method = method_enum;
396 }
397 } else if (name == "window_size") {
398 auto size = safe_any_cast_or_throw<uint32_t>(value);
399 m_window_size = size;
400 validate_window_parameters();
401 } else if (name == "hop_size") {
402 auto size = safe_any_cast_or_throw<uint32_t>(value);
403 m_hop_size = size;
404 validate_window_parameters();
405 } else if (name == "classification_enabled") {
406 auto enabled = safe_any_cast_or_throw<bool>(value);
407 m_classification_enabled = enabled;
408 } else if (name == "percentile") {
409 auto percentile = safe_any_cast_or_throw<double>(value);
410 if (percentile < 0.0 || percentile > 100.0) {
411 throw std::invalid_argument("Percentile must be between 0.0 and 100.0, got: " + std::to_string(percentile));
412 }
413 m_percentile_value = percentile;
414 } else if (name == "sample_variance") {
415 auto sample = safe_any_cast_or_throw<bool>(value);
416 m_sample_variance = sample;
417 } else {
418 base_type::set_analysis_parameter(name, std::move(value));
419 }
420 } catch (const std::runtime_error& e) {
421 throw std::invalid_argument("Failed to set parameter '" + name + "': " + e.what());
422 }
423 }
424
425 /**
426 * @brief Get analysis-specific parameter
427 */
428 [[nodiscard]] std::any get_analysis_parameter(const std::string& name) const override
429 {
430 if (name == "method")
431 return std::any(method_to_string(m_method));
432 if (name == "window_size")
433 return std::any(m_window_size);
434 if (name == "hop_size")
435 return std::any(m_hop_size);
436 if (name == "classification_enabled")
437 return std::any(m_classification_enabled);
438 if (name == "percentile")
439 return std::any(m_percentile_value);
440 if (name == "sample_variance")
441 return std::any(m_sample_variance);
442
443 return base_type::get_analysis_parameter(name);
444 }
445
446 /**
447 * @brief Get supported methods for specific type index
448 * @param type_info Type index to check
449 * @return Vector of supported method names
450 */
451 [[nodiscard]] std::vector<std::string> get_methods_for_type_impl(std::type_index /*type_info*/) const
452 {
453 return get_available_methods();
454 }
455
456private:
457 StatisticalMethod m_method { StatisticalMethod::MEAN };
459 uint32_t m_hop_size;
460 bool m_classification_enabled { true };
461 double m_percentile_value { 50.0 };
462 bool m_sample_variance { true };
463
464 double m_outlier_threshold { 3.0 };
465 double m_extreme_low_threshold { -2.0 };
466 double m_low_threshold { -1.0 };
467 double m_high_threshold { 1.0 };
468 double m_extreme_high_threshold { 2.0 };
469
470 /**
471 * @brief Validate window parameters
472 */
474 {
475 if (m_window_size == 0 || m_hop_size == 0) {
476 throw std::invalid_argument("Window size and hop size must be greater than 0");
477 }
478 if (m_hop_size > m_window_size) {
479 throw std::invalid_argument("Hop size should not exceed window size");
480 }
481 }
482
483 /**
484 * @brief Compute statistical values using span (zero-copy processing)
485 */
486 [[nodiscard]] std::vector<double> compute_statistical_values(std::span<const double> data, StatisticalMethod method) const
487 {
488 const size_t num_windows = calculate_num_windows(data.size());
489
490 switch (method) {
491 case StatisticalMethod::MEAN:
492 return compute_mean_statistic(data, num_windows, m_hop_size, m_window_size);
493 case StatisticalMethod::VARIANCE:
494 return compute_variance_statistic(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
495 case StatisticalMethod::STD_DEV:
496 return compute_std_dev_statistic(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
497 case StatisticalMethod::SKEWNESS:
498 return compute_skewness_statistic(data, num_windows, m_hop_size, m_window_size);
499 case StatisticalMethod::KURTOSIS:
500 return compute_kurtosis_statistic(data, num_windows, m_hop_size, m_window_size);
501 case StatisticalMethod::MEDIAN:
502 return compute_median_statistic(data, num_windows, m_hop_size, m_window_size);
503 case StatisticalMethod::PERCENTILE:
504 return compute_percentile_statistic(data, num_windows, m_hop_size, m_window_size, m_percentile_value);
505 case StatisticalMethod::ENTROPY:
506 return compute_entropy_statistic(data, num_windows, m_hop_size, m_window_size);
507 case StatisticalMethod::MIN:
508 return compute_min_statistic(data, num_windows, m_hop_size, m_window_size);
509 case StatisticalMethod::MAX:
510 return compute_max_statistic(data, num_windows, m_hop_size, m_window_size);
511 case StatisticalMethod::RANGE:
512 return compute_range_statistic(data, num_windows, m_hop_size, m_window_size);
513 case StatisticalMethod::SUM:
514 return compute_sum_statistic(data, num_windows, m_hop_size, m_window_size);
515 case StatisticalMethod::COUNT:
516 return compute_count_statistic(data, num_windows, m_hop_size, m_window_size);
517 case StatisticalMethod::RMS:
518 return compute_rms_energy(data, num_windows, m_hop_size, m_window_size);
519 case StatisticalMethod::MAD:
520 return compute_mad_statistic(data, num_windows, m_hop_size, m_window_size);
521 case StatisticalMethod::CV:
522 return compute_cv_statistic(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
523 case StatisticalMethod::MODE:
524 return compute_mode_statistic(data, num_windows, m_hop_size, m_window_size);
525 case StatisticalMethod::ZSCORE:
526 return compute_zscore_statistic(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
527 default:
528 return compute_mean_statistic(data, num_windows, m_hop_size, m_window_size);
529 }
530 }
531
532 /**
533 * @brief Calculate number of windows for given data size
534 */
535 [[nodiscard]] size_t calculate_num_windows(size_t data_size) const
536 {
537 if (data_size < m_window_size)
538 return 0;
539 return (data_size - m_window_size) / m_hop_size + 1;
540 }
541
542 /**
543 * @brief Create comprehensive analysis result
544 */
545 StatisticalAnalysis create_analysis_result(const std::vector<std::vector<double>>& stat_values,
546 std::vector<std::span<const double>> original_data, const auto& /*structure_info*/) const
547 {
548 StatisticalAnalysis result;
549 result.method_used = m_method;
550 result.window_size = m_window_size;
551 result.hop_size = m_hop_size;
552
553 if (stat_values.empty()) {
554 return result;
555 }
556
557 result.channel_statistics.resize(stat_values.size());
558
559 for (size_t ch = 0; ch < stat_values.size(); ++ch) {
560 auto& channel_result = result.channel_statistics[ch];
561 const auto& ch_stats = stat_values[ch];
562
563 channel_result.statistical_values = ch_stats;
564
565 if (ch_stats.empty())
566 continue;
567
568 const auto [min_it, max_it] = std::ranges::minmax_element(ch_stats);
569 channel_result.min_stat = *min_it;
570 channel_result.max_stat = *max_it;
571
572 auto mean_result = compute_mean_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size());
573 channel_result.mean_stat = mean_result.empty() ? 0.0 : mean_result[0];
574
575 auto variance_result = compute_variance_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size(), m_sample_variance);
576 channel_result.stat_variance = variance_result.empty() ? 0.0 : variance_result[0];
577 channel_result.stat_std_dev = std::sqrt(channel_result.stat_variance);
578
579 auto skew_result = compute_skewness_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size());
580 channel_result.skewness = skew_result.empty() ? 0.0 : skew_result[0];
581
582 auto kurt_result = compute_kurtosis_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size());
583 channel_result.kurtosis = kurt_result.empty() ? 0.0 : kurt_result[0];
584
585 auto median_result = compute_median_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size());
586 channel_result.median = median_result.empty() ? 0.0 : median_result[0];
587
588 auto q25_result = compute_percentile_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size(), 25.0);
589 auto q75_result = compute_percentile_statistic(std::span<const double>(ch_stats), 1, 0, ch_stats.size(), 75.0);
590 channel_result.percentiles = {
591 q25_result.empty() ? 0.0 : q25_result[0], // Q1
592 channel_result.median, // Q2
593 q75_result.empty() ? 0.0 : q75_result[0] // Q3
594 };
595
596 const size_t data_size = (ch < original_data.size()) ? original_data[ch].size() : 0;
597 channel_result.window_positions.reserve(ch_stats.size());
598 for (size_t i = 0; i < ch_stats.size(); ++i) {
599 const size_t start = i * m_hop_size;
600 const size_t end = std::min(start + m_window_size, data_size);
601 channel_result.window_positions.emplace_back(start, end);
602 }
603
604 if (m_classification_enabled) {
605 channel_result.stat_classifications.reserve(ch_stats.size());
606 channel_result.level_counts.fill(0);
607
608 for (double value : ch_stats) {
609 const StatisticalLevel level = classify_statistical_level(value);
610 channel_result.stat_classifications.push_back(level);
611 channel_result.level_counts[static_cast<size_t>(level)]++;
612 }
613 }
614 }
615
616 return result;
617 }
618
619 /**
620 * @brief Create pipeline output for operation chaining
621 */
623 {
624 std::vector<std::vector<double>> channel_stats;
625 channel_stats.reserve(analysis_result.channel_statistics.size());
626 for (const auto& ch : analysis_result.channel_statistics) {
627 channel_stats.push_back(ch.statistical_values);
628 }
629
630 output_type output = this->convert_result(channel_stats, info);
631
632 output.metadata = input.metadata;
633
634 output.metadata["source_analyzer"] = "StatisticalAnalyzer";
635 output.metadata["statistical_method"] = method_to_string(analysis_result.method_used);
636 output.metadata["window_size"] = analysis_result.window_size;
637 output.metadata["hop_size"] = analysis_result.hop_size;
638 output.metadata["num_channels"] = analysis_result.channel_statistics.size();
639
640 if (!analysis_result.channel_statistics.empty()) {
641 std::vector<double> channel_means, channel_maxs, channel_mins, channel_variances, channel_stddevs, channel_skewness, channel_kurtosis, channel_medians;
642 std::vector<size_t> channel_window_counts;
643
644 for (const auto& ch : analysis_result.channel_statistics) {
645 channel_means.push_back(ch.mean_stat);
646 channel_maxs.push_back(ch.max_stat);
647 channel_mins.push_back(ch.min_stat);
648 channel_variances.push_back(ch.stat_variance);
649 channel_stddevs.push_back(ch.stat_std_dev);
650 channel_skewness.push_back(ch.skewness);
651 channel_kurtosis.push_back(ch.kurtosis);
652 channel_medians.push_back(ch.median);
653 channel_window_counts.push_back(ch.statistical_values.size());
654 }
655
656 output.metadata["mean_per_channel"] = channel_means;
657 output.metadata["max_per_channel"] = channel_maxs;
658 output.metadata["min_per_channel"] = channel_mins;
659 output.metadata["variance_per_channel"] = channel_variances;
660 output.metadata["stddev_per_channel"] = channel_stddevs;
661 output.metadata["skewness_per_channel"] = channel_skewness;
662 output.metadata["kurtosis_per_channel"] = channel_kurtosis;
663 output.metadata["median_per_channel"] = channel_medians;
664 output.metadata["window_count_per_channel"] = channel_window_counts;
665 }
666
667 return output;
668 }
669};
670
671/// Standard statistical analyzer: DataVariant -> MatrixXd
673
674/// Container statistical analyzer: SignalContainer -> MatrixXd
676
677/// Region statistical analyzer: Region -> MatrixXd
679
680/// Raw statistical analyzer: produces double vectors
681template <ComputeData InputType = std::vector<Kakshya::DataVariant>>
683
684/// Variant statistical analyzer: produces DataVariant output
685template <ComputeData InputType = std::vector<Kakshya::DataVariant>>
687
688} // namespace MayaFlux::Yantra
Modern, digital-first universal analyzer framework for Maya Flux.
std::any get_analysis_parameter(const std::string &name) const override
Get analysis-specific parameter.
void set_classification_enabled(bool enabled)
Enable/disable outlier classification.
bool is_classification_enabled() const
Check if classification is enabled.
StatisticalMethod get_method() const
Get current statistical method.
StatisticalAnalysis get_statistical_analysis() const
Get last statistical analysis result (type-safe)
void validate_window_parameters() const
Validate window parameters.
std::vector< std::string > get_methods_for_type_impl(std::type_index) const
Get supported methods for specific type index.
std::string get_analyzer_name() const override
Get analyzer name.
static StatisticalMethod string_to_method(const std::string &str)
Convert string to statistical method enum.
bool supports_input_type() const
Check if analyzer supports given input type.
std::vector< std::string > get_methods_for_type() const
Get supported methods for specific input type.
StatisticalAnalysis create_analysis_result(const std::vector< std::vector< double > > &stat_values, std::vector< std::span< const double > > original_data, const auto &) const
Create comprehensive analysis result.
std::vector< std::string > get_available_methods() const override
Get available analysis methods.
StatisticalAnalyzer(uint32_t window_size=512, uint32_t hop_size=256)
Construct StatisticalAnalyzer with configurable window parameters.
StatisticalLevel classify_statistical_level(double value) const
Classify statistical value qualitatively.
static std::string method_to_string(StatisticalMethod method)
Convert statistical method enum to string.
std::vector< double > compute_statistical_values(std::span< const double > data, StatisticalMethod method) const
Compute statistical values using span (zero-copy processing)
static std::string statistical_level_to_string(StatisticalLevel level)
Convert statistical level enum to string.
AnalysisType get_analysis_type() const override
Get analysis type category.
uint32_t get_window_size() const
Get window size.
void set_window_size(uint32_t size)
Set window size for windowed analysis.
void set_method(StatisticalMethod method)
Set statistical analysis method.
output_type create_pipeline_output(const input_type &input, const StatisticalAnalysis &analysis_result, DataStructureInfo &info)
Create pipeline output for operation chaining.
uint32_t get_hop_size() const
Get hop size.
output_type analyze_implementation(const input_type &input) override
Core analysis implementation - creates analysis result AND pipeline output.
size_t calculate_num_windows(size_t data_size) const
Calculate number of windows for given data size.
void set_method(const std::string &method_name)
Set method by string name.
void set_analysis_parameter(const std::string &name, std::any value) override
Handle analysis-specific parameters.
StatisticalAnalysis analyze_statistics(const InputType &data)
Type-safe statistical analysis method.
void set_hop_size(uint32_t size)
Set hop size for windowed analysis.
High-performance statistical analyzer with zero-copy processing.
Template-flexible analyzer base with instance-defined I/O types.
std::vector< double > compute_zscore_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size, bool sample_variance)
Compute z-score statistic using zero-copy processing.
std::vector< double > compute_entropy_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size, size_t num_bins)
Compute entropy statistic using zero-copy processing.
AnalysisType
Categories of analysis operations for discovery and organization.
@ RMS
Root Mean Square energy.
std::vector< double > compute_skewness_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute skewness statistic using zero-copy processing.
std::vector< double > compute_variance_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size, bool sample_variance)
Compute variance statistic using zero-copy processing.
std::vector< double > compute_mad_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute MAD (Median Absolute Deviation) statistic using zero-copy processing.
std::vector< double > compute_std_dev_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size, bool sample_variance)
Compute standard deviation statistic using zero-copy processing.
std::vector< double > compute_sum_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute sum statistic using zero-copy processing.
std::vector< double > compute_percentile_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size, double percentile)
Compute percentile statistic using zero-copy processing.
std::vector< double > compute_mode_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute mode statistic using zero-copy processing.
std::vector< double > compute_rms_energy(std::span< const double > data, const uint32_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute RMS energy using zero-copy processing.
std::vector< double > compute_min_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute min statistic using zero-copy processing.
std::vector< double > compute_mean_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute mean statistic using zero-copy processing.
StatisticalLevel
Qualitative classification of statistical values.
std::vector< double > compute_range_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute range statistic using zero-copy processing.
std::vector< double > compute_cv_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size, bool sample_variance)
Compute CV (Coefficient of Variation) statistic using zero-copy processing.
StatisticalMethod
Supported statistical computation methods.
@ PERCENTILE
Arbitrary percentile (requires parameter)
@ KURTOSIS
Fourth moment - tail heaviness.
@ ZSCORE
Z-score normalization.
@ ENTROPY
Shannon entropy for discrete data.
@ MAD
Median Absolute Deviation.
@ CV
Coefficient of Variation (std_dev/mean)
@ VARIANCE
Population or sample variance.
@ SKEWNESS
Third moment - asymmetry measure.
std::vector< double > compute_median_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute median statistic using zero-copy processing.
std::vector< double > compute_kurtosis_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute kurtosis statistic using zero-copy processing.
std::vector< double > compute_max_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute max statistic using zero-copy processing.
std::vector< double > compute_count_statistic(std::span< const double > data, const size_t num_windows, const uint32_t hop_size, const uint32_t window_size)
Compute count statistic using zero-copy processing.
std::map< std::string, std::any > method_specific_data
std::vector< std::pair< size_t, size_t > > window_positions
std::vector< StatisticalLevel > stat_classifications
Statistical results for a single data channel.
Metadata about data structure for reconstruction.
T data
The actual computation data.
Definition DataIO.hpp:25
std::unordered_map< std::string, std::any > metadata
Associated metadata.
Definition DataIO.hpp:28
Input/Output container for computation pipeline data flow with structure preservation.
Definition DataIO.hpp:24
std::vector< ChannelStatistics > channel_statistics
Analysis result structure for statistical analysis.