MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches

◆ extract_outlier_data()

std::vector< std::vector< double > > MayaFlux::Yantra::extract_outlier_data ( const std::vector< std::span< const double > > &  data,
double  std_dev_threshold = 2.0,
uint32_t  window_size = 512,
uint32_t  hop_size = 256 
)

Extract data from statistical outlier regions.

Parameters
dataInput data span
std_dev_thresholdNumber of standard deviations for outlier detection
window_sizeAnalysis window size
hop_sizeHop size between windows
Returns
Vector containing actual data from outlier regions

Definition at line 182 of file ExtractionHelper.cpp.

187{
188 std::vector<std::vector<double>> result;
189 result.reserve(data.size());
190
191 for (const auto& channel : data) {
192 if (channel.empty()) {
193 result.emplace_back();
194 continue;
195 }
196
197 uint32_t effective_window_size = std::min(window_size, static_cast<uint32_t>(channel.size()));
198 uint32_t effective_hop_size = std::min(hop_size, effective_window_size / 2);
199 if (effective_hop_size == 0)
200 effective_hop_size = 1;
201
202 if (!validate_extraction_parameters(effective_window_size, effective_hop_size, channel.size())) {
203 result.emplace_back();
204 continue;
205 }
206
207 try {
208 auto stat_analyzer = std::make_shared<StatisticalAnalyzer<std::vector<Kakshya::DataVariant>, Eigen::VectorXd>>(
209 effective_window_size, effective_hop_size);
210 stat_analyzer->set_parameter("method", "mean");
211
212 std::vector<Kakshya::DataVariant> data_variant { Kakshya::DataVariant { std::vector<double>(channel.begin(), channel.end()) } };
213 ChannelStatistics stat_result = stat_analyzer->analyze_statistics(data_variant).channel_statistics[0];
214
215 if (stat_result.statistical_values.empty() || stat_result.window_positions.empty() || stat_result.stat_std_dev <= 0.0) {
216 result.emplace_back();
217 continue;
218 }
219
220 const double global_mean = stat_result.mean_stat;
221 const double global_std_dev = stat_result.stat_std_dev;
222 const double outlier_threshold = std_dev_threshold * global_std_dev;
223
224 std::vector<std::pair<size_t, size_t>> qualifying_windows;
225 for (size_t i = 0; i < stat_result.statistical_values.size(); ++i) {
226 if (std::abs(stat_result.statistical_values[i] - global_mean) > outlier_threshold) {
227 auto [start_idx, end_idx] = stat_result.window_positions[i];
228 if (start_idx < channel.size() && end_idx <= channel.size() && start_idx < end_idx) {
229 qualifying_windows.emplace_back(start_idx, end_idx);
230 }
231 }
232 }
233
234 auto merged_windows = merge_overlapping_windows(qualifying_windows);
235
236 std::vector<double> extracted_data;
237 for (const auto& [start_idx, end_idx] : merged_windows) {
238 std::ranges::copy(channel.subspan(start_idx, end_idx - start_idx),
239 std::back_inserter(extracted_data));
240 }
241
242 result.push_back(std::move(extracted_data));
243 } catch (const std::exception&) {
244 result.emplace_back();
245 }
246 }
247
248 return result;
249}

References MayaFlux::Yantra::ChannelStatistics::mean_stat, MayaFlux::Yantra::ChannelStatistics::stat_std_dev, MayaFlux::Yantra::ChannelStatistics::statistical_values, validate_extraction_parameters(), and MayaFlux::Yantra::ChannelStatistics::window_positions.

Referenced by MayaFlux::Yantra::FeatureExtractor< InputType, OutputType >::extract_implementation().

+ Here is the call graph for this function:
+ Here is the caller graph for this function: