Extract data from statistical outlier regions.
187{
188 std::vector<std::vector<double>> result;
189 result.reserve(data.size());
190
191 for (const auto& channel : data) {
192 if (channel.empty()) {
193 result.emplace_back();
194 continue;
195 }
196
197 uint32_t effective_window_size = std::min(window_size, static_cast<uint32_t>(channel.size()));
198 uint32_t effective_hop_size = std::min(hop_size, effective_window_size / 2);
199 if (effective_hop_size == 0)
200 effective_hop_size = 1;
201
202 if (!validate_extraction_parameters(effective_window_size, effective_hop_size, channel.size())) {
203 result.emplace_back();
204 continue;
205 }
206
207 try {
208 auto stat_analyzer = std::make_shared<StatisticalAnalyzer<std::vector<Kakshya::DataVariant>, Eigen::VectorXd>>(
209 effective_window_size, effective_hop_size);
210 stat_analyzer->set_parameter("method", "mean");
211
212 std::vector<Kakshya::DataVariant> data_variant { Kakshya::DataVariant { std::vector<double>(channel.begin(), channel.end()) } };
213 ChannelStatistics stat_result = stat_analyzer->analyze_statistics(data_variant).channel_statistics[0];
214
215 if (stat_result.statistical_values.empty() || stat_result.window_positions.empty() || stat_result.stat_std_dev <= 0.0) {
216 result.emplace_back();
217 continue;
218 }
219
220 const double global_mean = stat_result.mean_stat;
221 const double global_std_dev = stat_result.stat_std_dev;
222 const double outlier_threshold = std_dev_threshold * global_std_dev;
223
224 std::vector<std::pair<size_t, size_t>> qualifying_windows;
225 for (size_t i = 0; i < stat_result.statistical_values.size(); ++i) {
226 if (std::abs(stat_result.statistical_values[i] - global_mean) > outlier_threshold) {
227 auto [start_idx, end_idx] = stat_result.window_positions[i];
228 if (start_idx < channel.size() && end_idx <= channel.size() && start_idx < end_idx) {
229 qualifying_windows.emplace_back(start_idx, end_idx);
230 }
231 }
232 }
233
234 auto merged_windows = merge_overlapping_windows(qualifying_windows);
235
236 std::vector<double> extracted_data;
237 for (const auto& [start_idx, end_idx] : merged_windows) {
238 std::ranges::copy(channel.subspan(start_idx, end_idx - start_idx),
239 std::back_inserter(extracted_data));
240 }
241
242 result.push_back(std::move(extracted_data));
243 } catch (const std::exception&) {
244 result.emplace_back();
245 }
246 }
247
248 return result;
249}