80 double stat_variance {};
81 double stat_std_dev {};
89 std::array<int, 6> level_counts {};
101 uint32_t window_size {};
102 uint32_t hop_size {};
128template <ComputeData InputType = std::vector<Kakshya::DataVariant>, ComputeData OutputType = Eigen::VectorXd>
141 : m_window_size(window_size)
142 , m_hop_size(hop_size)
144 validate_window_parameters();
154 auto result = this->analyze_data(data);
155 return safe_any_cast_or_throw<StatisticalAnalysis>(result);
160 return this->analyze_statistics(
input_type { data });
169 return safe_any_cast_or_throw<StatisticalAnalysis>(this->get_current_analysis());
178 return AnalysisType::STATISTICAL;
187 return Reflect::get_enum_names_lowercase<StatisticalMethod>();
195 template <
typename T>
198 return get_methods_for_type_impl(std::type_index(
typeid(T)));
206 template <
typename T>
209 return !get_methods_for_type<T>().empty();
219 this->set_parameter(
"method", method_to_string(method));
228 m_method = string_to_method(method_name);
229 this->set_parameter(
"method", method_name);
247 m_window_size =
size;
248 validate_window_parameters();
258 validate_window_parameters();
279 m_classification_enabled = enabled;
295 if (std::abs(value) > m_outlier_threshold)
296 return StatisticalLevel::OUTLIER;
297 if (value <= m_extreme_low_threshold)
298 return StatisticalLevel::EXTREME_LOW;
299 if (value <= m_low_threshold)
300 return StatisticalLevel::LOW;
301 if (value <= m_high_threshold)
302 return StatisticalLevel::NORMAL;
303 if (value <= m_extreme_high_threshold)
304 return StatisticalLevel::HIGH;
305 return StatisticalLevel::EXTREME_HIGH;
315 return Reflect::enum_to_lowercase_string(method);
325 if (str ==
"default")
326 return StatisticalMethod::MEAN;
327 return Reflect::string_to_enum_or_throw_case_insensitive<StatisticalMethod>(str,
"StatisticalMethod");
337 return Reflect::enum_to_lowercase_string(level);
347 return "StatisticalAnalyzer";
357 if constexpr (
requires { input.
data.empty(); }) {
358 if (input.
data.empty()) {
359 error<std::runtime_error>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(),
"Input is empty");
361 }
else if constexpr (std::is_same_v<InputType, Kakshya::RegionGroup>) {
362 if (input.
data.regions.empty()) {
363 error<std::runtime_error>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(),
"Input is empty");
367 auto [data_span, structure_info] = OperationHelper::extract_structured_double(
370 std::vector<std::span<const double>> channel_spans;
371 for (
const auto& span : data_span)
372 channel_spans.emplace_back(span.data(), span.size());
374 std::vector<std::vector<double>> stat_values;
375 stat_values.reserve(channel_spans.size());
376 for (
const auto& ch_span : channel_spans) {
377 stat_values.push_back(compute_statistical_values(ch_span, m_method));
381 stat_values, channel_spans, structure_info);
383 this->store_current_analysis(analysis_result);
384 return create_pipeline_output(input, analysis_result, structure_info);
385 }
catch (
const std::exception& e) {
386 MF_ERROR(Journal::Component::Yantra, Journal::Context::ComputeMatrix,
"Statistical analysis failed: {}", e.what());
389 error_result.
metadata[
"error"] = std::string(
"Analysis failed: ") + e.what();
400 if (name ==
"method") {
402 auto method_str = safe_any_cast_or_throw<std::string>(value);
403 m_method = string_to_method(method_str);
404 }
catch (
const std::runtime_error&) {
405 auto method_enum = safe_any_cast_or_throw<StatisticalMethod>(value);
406 m_method = method_enum;
408 }
else if (name ==
"window_size") {
409 auto size = safe_any_cast_or_throw<uint32_t>(value);
410 m_window_size =
size;
411 validate_window_parameters();
412 }
else if (name ==
"hop_size") {
413 auto size = safe_any_cast_or_throw<uint32_t>(value);
415 validate_window_parameters();
416 }
else if (name ==
"classification_enabled") {
417 auto enabled = safe_any_cast_or_throw<bool>(value);
418 m_classification_enabled = enabled;
419 }
else if (name ==
"percentile") {
420 auto percentile = safe_any_cast_or_throw<double>(value);
421 if (percentile < 0.0 || percentile > 100.0) {
422 throw std::invalid_argument(
"Percentile must be between 0.0 and 100.0, got: " + std::to_string(percentile));
424 m_percentile_value = percentile;
425 }
else if (name ==
"sample_variance") {
426 auto sample = safe_any_cast_or_throw<bool>(value);
427 m_sample_variance = sample;
429 base_type::set_analysis_parameter(name, std::move(value));
431 }
catch (
const std::runtime_error& e) {
432 error_rethrow(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(),
"Failed to set parameter '{}': {}", name, e.what());
441 if (name ==
"method")
442 return std::any(method_to_string(m_method));
443 if (name ==
"window_size")
444 return std::any(m_window_size);
445 if (name ==
"hop_size")
446 return std::any(m_hop_size);
447 if (name ==
"classification_enabled")
448 return std::any(m_classification_enabled);
449 if (name ==
"percentile")
450 return std::any(m_percentile_value);
451 if (name ==
"sample_variance")
452 return std::any(m_sample_variance);
454 return base_type::get_analysis_parameter(name);
464 return get_available_methods();
471 bool m_classification_enabled {
true };
472 double m_percentile_value { 50.0 };
473 bool m_sample_variance {
true };
475 double m_outlier_threshold { 3.0 };
476 double m_extreme_low_threshold { -2.0 };
477 double m_low_threshold { -1.0 };
478 double m_high_threshold { 1.0 };
479 double m_extreme_high_threshold { 2.0 };
486 if (m_window_size == 0 || m_hop_size == 0) {
487 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(),
"Window size and hop size must be greater than 0");
489 if (m_hop_size > m_window_size) {
490 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(),
"Hop size should not exceed window size");
499 const size_t num_windows = calculate_num_windows(data.size());
504 case StatisticalMethod::MEAN:
505 return D::mean(data, num_windows, m_hop_size, m_window_size);
506 case StatisticalMethod::VARIANCE:
507 return D::variance(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
508 case StatisticalMethod::STD_DEV:
509 return D::std_dev(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
510 case StatisticalMethod::SKEWNESS:
511 return D::skewness(data, num_windows, m_hop_size, m_window_size);
512 case StatisticalMethod::KURTOSIS:
513 return D::kurtosis(data, num_windows, m_hop_size, m_window_size);
514 case StatisticalMethod::MEDIAN:
515 return D::median(data, num_windows, m_hop_size, m_window_size);
516 case StatisticalMethod::PERCENTILE:
517 return D::percentile(data, num_windows, m_hop_size, m_window_size, m_percentile_value);
518 case StatisticalMethod::ENTROPY:
519 return D::entropy(data, num_windows, m_hop_size, m_window_size);
520 case StatisticalMethod::MIN:
521 return D::min(data, num_windows, m_hop_size, m_window_size);
522 case StatisticalMethod::MAX:
523 return D::max(data, num_windows, m_hop_size, m_window_size);
524 case StatisticalMethod::RANGE:
525 return D::range(data, num_windows, m_hop_size, m_window_size);
526 case StatisticalMethod::SUM:
527 return D::sum(data, num_windows, m_hop_size, m_window_size);
528 case StatisticalMethod::COUNT:
529 return D::count(data, num_windows, m_hop_size, m_window_size);
530 case StatisticalMethod::RMS:
531 return D::rms(data, num_windows, m_hop_size, m_window_size);
532 case StatisticalMethod::MAD:
533 return D::mad(data, num_windows, m_hop_size, m_window_size);
534 case StatisticalMethod::CV:
535 return D::coefficient_of_variation(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
536 case StatisticalMethod::MODE:
537 return D::mode(data, num_windows, m_hop_size, m_window_size);
538 case StatisticalMethod::ZSCORE:
539 return D::mean_zscore(data, num_windows, m_hop_size, m_window_size, m_sample_variance);
541 return D::mean(data, num_windows, m_hop_size, m_window_size);
550 if (data_size < m_window_size)
552 return (data_size - m_window_size) / m_hop_size + 1;
559 std::vector<std::span<const double>> original_data,
const auto& )
const
568 if (stat_values.empty()) {
574 for (
size_t ch = 0; ch < stat_values.size(); ++ch) {
576 const auto& ch_stats = stat_values[ch];
578 channel_result.statistical_values = ch_stats;
580 if (ch_stats.empty())
583 const auto [min_it, max_it] = std::ranges::minmax_element(ch_stats);
584 channel_result.min_stat = *min_it;
585 channel_result.max_stat = *max_it;
587 const std::span<const double> sp(ch_stats);
588 const auto sz =
static_cast<uint32_t
>(ch_stats.size());
590 const auto single = [&](
auto fn) {
return fn(sp, 1, 0, sz)[0]; };
592 channel_result.mean_stat = single([](
auto&&...
a) {
return D::mean(
a...); });
593 channel_result.stat_variance = single([&](
auto&&...
a) {
return D::variance(
a..., m_sample_variance); });
594 channel_result.stat_std_dev = std::sqrt(channel_result.stat_variance);
595 channel_result.skewness = single([](
auto&&...
a) {
return D::skewness(
a...); });
596 channel_result.kurtosis = single([](
auto&&...
a) {
return D::kurtosis(
a...); });
597 channel_result.median = single([](
auto&&...
a) {
return D::median(
a...); });
599 channel_result.percentiles = {
600 D::percentile(sp, 1, 0, sz, 25.0)[0],
601 channel_result.median,
602 D::percentile(sp, 1, 0, sz, 75.0)[0]
605 const size_t data_size = (ch < original_data.size()) ? original_data[ch].
size() : 0;
606 channel_result.window_positions.reserve(ch_stats.size());
607 for (
size_t i = 0; i < ch_stats.size(); ++i) {
608 const size_t start = i * m_hop_size;
609 const size_t end = std::min(start + m_window_size, data_size);
610 channel_result.window_positions.emplace_back(start, end);
613 if (m_classification_enabled) {
614 channel_result.stat_classifications.reserve(ch_stats.size());
615 channel_result.level_counts.fill(0);
617 for (
double value : ch_stats) {
619 channel_result.stat_classifications.push_back(level);
620 channel_result.level_counts[
static_cast<size_t>(level)]++;
633 std::vector<std::vector<double>> channel_stats;
636 channel_stats.push_back(ch.statistical_values);
639 output_type output = this->convert_result(channel_stats, info);
643 output.
metadata[
"source_analyzer"] =
"StatisticalAnalyzer";
650 std::vector<double> channel_means, channel_maxs, channel_mins, channel_variances, channel_stddevs, channel_skewness, channel_kurtosis, channel_medians;
651 std::vector<size_t> channel_window_counts;
654 channel_means.push_back(ch.mean_stat);
655 channel_maxs.push_back(ch.max_stat);
656 channel_mins.push_back(ch.min_stat);
657 channel_variances.push_back(ch.stat_variance);
658 channel_stddevs.push_back(ch.stat_std_dev);
659 channel_skewness.push_back(ch.skewness);
660 channel_kurtosis.push_back(ch.kurtosis);
661 channel_medians.push_back(ch.median);
662 channel_window_counts.push_back(ch.statistical_values.size());
665 output.
metadata[
"mean_per_channel"] = channel_means;
666 output.
metadata[
"max_per_channel"] = channel_maxs;
667 output.
metadata[
"min_per_channel"] = channel_mins;
668 output.
metadata[
"variance_per_channel"] = channel_variances;
669 output.
metadata[
"stddev_per_channel"] = channel_stddevs;
670 output.
metadata[
"skewness_per_channel"] = channel_skewness;
671 output.
metadata[
"kurtosis_per_channel"] = channel_kurtosis;
672 output.
metadata[
"median_per_channel"] = channel_medians;
673 output.
metadata[
"window_count_per_channel"] = channel_window_counts;
690template <ComputeData InputType = std::vector<Kakshya::DataVariant>>
694template <ComputeData InputType = std::vector<Kakshya::DataVariant>>
728 const std::string
q = qualifier.empty() ?
"mean_stat" : qualifier;
730 if (
q ==
"mean_stat")
737 return ch.stat_variance;
739 return ch.stat_std_dev;
746 if (
q ==
"window_count")
747 return static_cast<double>(ch.statistical_values.size());
Discrete sequence analysis primitives for MayaFlux::Kinesis.
#define MF_ERROR(comp, ctx,...)
Modern, digital-first universal analyzer framework for Maya Flux.
std::any get_analysis_parameter(const std::string &name) const override
Get analysis-specific parameter.
void set_classification_enabled(bool enabled)
Enable/disable outlier classification.
bool is_classification_enabled() const
Check if classification is enabled.
StatisticalMethod get_method() const
Get current statistical method.
StatisticalAnalysis get_statistical_analysis() const
Get last statistical analysis result (type-safe)
void validate_window_parameters() const
Validate window parameters.
std::vector< std::string > get_methods_for_type_impl(std::type_index) const
Get supported methods for specific type index.
std::string get_analyzer_name() const override
Get analyzer name.
static StatisticalMethod string_to_method(const std::string &str)
Convert string to statistical method enum.
bool supports_input_type() const
Check if analyzer supports given input type.
std::vector< std::string > get_methods_for_type() const
Get supported methods for specific input type.
StatisticalAnalysis create_analysis_result(const std::vector< std::vector< double > > &stat_values, std::vector< std::span< const double > > original_data, const auto &) const
Create comprehensive analysis result.
std::vector< std::string > get_available_methods() const override
Get available analysis methods.
StatisticalAnalyzer(uint32_t window_size=512, uint32_t hop_size=256)
Construct StatisticalAnalyzer with configurable window parameters.
StatisticalLevel classify_statistical_level(double value) const
Classify statistical value qualitatively.
static std::string method_to_string(StatisticalMethod method)
Convert statistical method enum to string.
std::vector< double > compute_statistical_values(std::span< const double > data, StatisticalMethod method) const
Compute statistical values using span (zero-copy processing)
static std::string statistical_level_to_string(StatisticalLevel level)
Convert statistical level enum to string.
AnalysisType get_analysis_type() const override
Get analysis type category.
uint32_t get_window_size() const
Get window size.
void set_window_size(uint32_t size)
Set window size for windowed analysis.
void set_method(StatisticalMethod method)
Set statistical analysis method.
output_type create_pipeline_output(const input_type &input, const StatisticalAnalysis &analysis_result, DataStructureInfo &info)
Create pipeline output for operation chaining.
uint32_t get_hop_size() const
Get hop size.
output_type analyze_implementation(const input_type &input) override
Core analysis implementation - creates analysis result AND pipeline output.
size_t calculate_num_windows(size_t data_size) const
Calculate number of windows for given data size.
void set_method(const std::string &method_name)
Set method by string name.
void set_analysis_parameter(const std::string &name, std::any value) override
Handle analysis-specific parameters.
StatisticalAnalysis analyze_statistics(const InputType &data)
StatisticalAnalysis analyze_statistics(const input_type &data)
Type-safe statistical analysis method.
void set_hop_size(uint32_t size)
Set hop size for windowed analysis.
High-performance statistical analyzer with zero-copy processing.
Template-flexible analyzer base with instance-defined I/O types.
AnalysisType
Categories of analysis operations for discovery and organization.
@ RMS
Root Mean Square energy.
StatisticalLevel
Qualitative classification of statistical values.
StatisticalMethod
Supported statistical computation methods.
@ PERCENTILE
Arbitrary percentile (requires parameter)
@ KURTOSIS
Fourth moment - tail heaviness.
@ ZSCORE
Z-score normalization.
@ ENTROPY
Shannon entropy for discrete data.
@ MAD
Median Absolute Deviation.
@ CV
Coefficient of Variation (std_dev/mean)
@ STD_DEV
Standard deviation.
@ MODE
Most frequent value.
@ VARIANCE
Population or sample variance.
@ SKEWNESS
Third moment - asymmetry measure.
MAYAFLUX_API double extract_scalar_statistics(const StatisticalAnalysis &analysis, const std::string &qualifier)
Extract a named scalar from a StatisticalAnalysis result.
std::map< std::string, std::any > method_specific_data
std::vector< std::pair< size_t, size_t > > window_positions
std::vector< StatisticalLevel > stat_classifications
std::vector< double > percentiles
std::vector< double > statistical_values
Statistical results for a single data channel.
Metadata about data structure for reconstruction.
T data
The actual computation data.
std::unordered_map< std::string, std::any > metadata
Associated metadata.
Input/Output container for computation pipeline data flow with structure preservation.
StatisticalMethod method_used
std::vector< ChannelStatistics > channel_statistics
Analysis result structure for statistical analysis.