MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
FeatureExtractor.hpp
Go to the documentation of this file.
1#pragma once
2
6
8
9/**
10 * @file FeatureExtractor.hpp
11 * @brief Concrete feature extractor using analyzer-guided extraction
12 *
13 * FeatureExtractor demonstrates the modern extraction paradigm by using
14 * analyzers (EnergyAnalyzer, StatisticalAnalyzer) to identify regions of interest,
15 * then extracting the actual data from those regions. Uses enum-based configuration
16 * instead of string parsing for type safety and performance.
17 *
18 * Example Usage:
19 * ```cpp
20 * // Extract high-energy audio data
21 * auto extractor = std::make_shared<StandardFeatureExtractor>();
22 * extractor->set_extraction_method(ExtractionMethod::HIGH_ENERGY_DATA);
23 * extractor->set_parameter("energy_threshold", 0.2);
24 *
25 * auto high_energy_audio = extractor->extract_data(audio_data);
26 *
27 * // Extract data from statistical outlier regions
28 * extractor->set_extraction_method(ExtractionMethod::OUTLIER_DATA);
29 * extractor->set_parameter("std_dev_threshold", 2.5);
30 *
31 * auto outlier_audio = extractor->extract_data(audio_data);
32 * ```
33 */
34
35namespace MayaFlux::Yantra {
36
37/**
38 * @enum ExtractionMethod
39 * @brief Supported extraction methods for FeatureExtractor
40 */
41enum class ExtractionMethod : uint8_t {
42 HIGH_ENERGY_DATA, ///< Extract data from high-energy regions
43 PEAK_DATA, ///< Extract data around detected peaks
44 OUTLIER_DATA, ///< Extract data from statistical outlier regions
45 HIGH_SPECTRAL_DATA, ///< Extract data from high spectral energy regions
46 ABOVE_MEAN_DATA, ///< Extract data above statistical mean
47 OVERLAPPING_WINDOWS, ///< Extract overlapping windowed data
48 ZERO_CROSSING_DATA, ///< Extract actual data at zero crossing points
49 SILENCE_DATA, ///< Extract actual silent regions
50 ONSET_DATA ///< Extract actual onset/transient regions
51};
52
53/**
54 * @class FeatureExtractor
55 * @brief Analyzer-guided feature extractor with enum-based configuration
56 *
57 * Uses analyzers to identify regions of interest based on energy, statistical properties,
58 * or spectral characteristics, then extracts the actual data from those regions.
59 * All extraction logic is delegated to ExtractionHelper functions.
60 */
61template <ComputeData InputType = std::vector<Kakshya::DataVariant>, ComputeData OutputType = std::vector<std::vector<double>>>
62class MAYAFLUX_API FeatureExtractor : public UniversalExtractor<InputType, OutputType> {
63public:
67
68 /**
69 * @brief Construct FeatureExtractor with default parameters
70 * @param window_size Analysis window size (default: 512)
71 * @param hop_size Hop size between windows (default: 256)
72 * @param method Initial extraction method (default: HIGH_ENERGY_DATA)
73 */
74 explicit FeatureExtractor(uint32_t window_size = 512,
75 uint32_t hop_size = 256,
76 ExtractionMethod method = ExtractionMethod::HIGH_ENERGY_DATA)
77 : m_window_size(window_size)
78 , m_hop_size(hop_size)
79 , m_method(method)
80 {
81 validate_parameters();
82 }
83
84 /**
85 * @brief Get extraction type category
86 * @return ExtractionType::FEATURE_GUIDED
87 */
88 [[nodiscard]] ExtractionType get_extraction_type() const override
89 {
90 return ExtractionType::FEATURE_GUIDED;
91 }
92
93 /**
94 * @brief Get available extraction methods
95 * @return Vector of supported method names
96 */
97 [[nodiscard]] std::vector<std::string> get_available_methods() const override
98 {
99 return Utils::get_enum_names_lowercase<ExtractionMethod>();
100 }
101
102 /**
103 * @brief Set extraction method using enum
104 * @param method ExtractionMethod enum value
105 */
107 {
108 m_method = method;
109 this->set_parameter("method", method_to_string(method));
110 }
111
112 /**
113 * @brief Set extraction method using string (case-insensitive)
114 * @param method_name String representation of method
115 */
116 void set_extraction_method(const std::string& method_name)
117 {
118 m_method = string_to_method(method_name);
119 this->set_parameter("method", method_name);
120 }
121
122 /**
123 * @brief Get current extraction method
124 * @return ExtractionMethod enum value
125 */
127 {
128 return m_method;
129 }
130
131 /**
132 * @brief Set window size for analysis
133 * @param size Window size in samples
134 */
135 void set_window_size(uint32_t size)
136 {
137 m_window_size = size;
138 validate_parameters();
139 }
140
141 /**
142 * @brief Set hop size for analysis
143 * @param size Hop size in samples
144 */
145 void set_hop_size(uint32_t size)
146 {
147 m_hop_size = size;
148 validate_parameters();
149 }
150
151 /**
152 * @brief Get current window size
153 * @return Window size in samples
154 */
155 [[nodiscard]] uint32_t get_window_size() const { return m_window_size; }
156
157 /**
158 * @brief Get current hop size
159 * @return Hop size in samples
160 */
161 [[nodiscard]] uint32_t get_hop_size() const { return m_hop_size; }
162
163 /**
164 * @brief Convert extraction method enum to string
165 * @param method ExtractionMethod value
166 * @return Lowercase string representation
167 */
168 static std::string method_to_string(ExtractionMethod method)
169 {
170 return Utils::enum_to_lowercase_string(method);
171 }
172
173 /**
174 * @brief Convert string to extraction method enum
175 * @param str String representation (case-insensitive)
176 * @return ExtractionMethod value
177 */
178 static ExtractionMethod string_to_method(const std::string& str)
179 {
180 return Utils::string_to_enum_or_throw_case_insensitive<ExtractionMethod>(str, "ExtractionMethod");
181 }
182
183 /**
184 * @brief Input validation
185 */
186 bool validate_extraction_input(const input_type& input) const override
187 {
188 try {
189 if constexpr (RequiresContainer<input_type>) {
190 if (!input.has_container())
191 return false;
192 }
193 auto numeric_data = OperationHelper::extract_numeric_data(input.data);
194 if (numeric_data.empty())
195 return false;
196 for (const auto& span : numeric_data) {
197 return validate_extraction_parameters(m_window_size, m_hop_size, span.size());
198 }
199 return true;
200 } catch (const std::exception& e) {
201 std::cerr << e.what() << "\n";
202 return false;
203 }
204 }
205
206 /**
207 * @brief Get extractor name
208 * @return "FeatureExtractor"
209 */
210 [[nodiscard]] std::string get_extractor_name() const override
211 {
212 return "FeatureExtractor";
213 }
214
215protected:
216 /**
217 * @brief Core extraction implementation - delegates to ExtractionHelper
218 * @param input Input data with metadata
219 * @return Extracted data with metadata
220 */
222 {
223 try {
224 auto [numeric_data, info] = OperationHelper::extract_structured_double(const_cast<input_type&>(input));
225 DataStructureInfo structure_info = info;
226
227 std::vector<std::span<const double>> data_span;
228 data_span.reserve(numeric_data.size());
229
230 for (auto& span : numeric_data) {
231 data_span.emplace_back(span.data(), span.size());
232 }
233
234 std::vector<std::vector<double>> extracted_data;
235
236 switch (m_method) {
237 case ExtractionMethod::HIGH_ENERGY_DATA: {
238 double energy_threshold = this->template get_parameter_or_default<double>("energy_threshold", 0.1);
239 extracted_data = extract_high_energy_data(data_span, energy_threshold, m_window_size, m_hop_size);
240 break;
241 }
242 case ExtractionMethod::PEAK_DATA: {
243 double threshold = this->template get_parameter_or_default<double>("threshold", 0.1);
244 double min_distance = this->template get_parameter_or_default<double>("min_distance", 10.0);
245 uint32_t region_size = this->template get_parameter_or_default<uint32_t>("region_size", 256);
246 extracted_data = extract_peak_data(data_span, threshold, min_distance, region_size);
247 break;
248 }
249 case ExtractionMethod::OUTLIER_DATA: {
250 double std_dev_threshold = this->template get_parameter_or_default<double>("std_dev_threshold", 2.0);
251 extracted_data = extract_outlier_data(data_span, std_dev_threshold, m_window_size, m_hop_size);
252 break;
253 }
254 case ExtractionMethod::HIGH_SPECTRAL_DATA: {
255 double spectral_threshold = this->template get_parameter_or_default<double>("spectral_threshold", 0.1);
256 extracted_data = extract_high_spectral_data(data_span, spectral_threshold, m_window_size, m_hop_size);
257 break;
258 }
259 case ExtractionMethod::ABOVE_MEAN_DATA: {
260 double mean_multiplier = this->template get_parameter_or_default<double>("mean_multiplier", 1.5);
261 extracted_data = extract_above_mean_data(data_span, mean_multiplier, m_window_size, m_hop_size);
262 break;
263 }
264 case ExtractionMethod::OVERLAPPING_WINDOWS: {
265 double overlap = this->template get_parameter_or_default<double>("overlap", 0.5);
266 extracted_data = extract_overlapping_windows(data_span, m_window_size, overlap);
267 break;
268 }
269 case ExtractionMethod::ZERO_CROSSING_DATA: {
270 double threshold = this->template get_parameter_or_default<double>("threshold", 0.0);
271 double min_distance = this->template get_parameter_or_default<double>("min_distance", 1.0);
272 uint32_t region_size = this->template get_parameter_or_default<uint32_t>("region_size", 1);
273 extracted_data = extract_zero_crossing_data(data_span, threshold, min_distance, region_size);
274 break;
275 }
276 case ExtractionMethod::SILENCE_DATA: {
277 double silence_threshold = this->template get_parameter_or_default<double>("silence_threshold", 0.01);
278 uint32_t min_duration = this->template get_parameter_or_default<uint32_t>("min_duration", 1024);
279 extracted_data = extract_silence_data(data_span, silence_threshold, min_duration, m_window_size, m_hop_size);
280 break;
281 }
282 case ExtractionMethod::ONSET_DATA: {
283 double threshold = this->template get_parameter_or_default<double>("threshold", 0.3);
284 uint32_t region_size = this->template get_parameter_or_default<uint32_t>("region_size", 512);
285 uint32_t fft_window = this->template get_parameter_or_default<uint32_t>("fft_window_size", 1024);
286 extracted_data = extract_onset_data(data_span, threshold, region_size, fft_window, m_hop_size);
287 break;
288 }
289 default:
290 throw std::invalid_argument("Unknown extraction method");
291 }
292
293 output_type output = this->convert_result(extracted_data, structure_info);
294
295 output.template set_metadata<std::string>("extractor_type", "FeatureExtractor");
296 output.template set_metadata<std::string>("extraction_method", method_to_string(m_method));
297 output.template set_metadata<uint32_t>("window_size", static_cast<uint32_t>(m_window_size));
298 output.template set_metadata<uint32_t>("hop_size", static_cast<uint32_t>(m_hop_size));
299 output.template set_metadata<size_t>("extracted_samples", extracted_data.size());
300 output.template set_metadata<size_t>("original_samples", data_span.size());
301
302 return output;
303
304 } catch (const std::exception& e) {
305 throw std::runtime_error(std::string("FeatureExtractor failed: ") + e.what());
306 }
307 }
308
309 /**
310 * @brief Handle extractor-specific parameters
311 */
312 void set_extraction_parameter(const std::string& name, std::any value) override
313 {
314 if (name == "method") {
315 if (auto* method_str = std::any_cast<std::string>(&value)) {
316 m_method = string_to_method(*method_str);
317 return;
318 }
319 if (auto* method_enum = std::any_cast<ExtractionMethod>(&value)) {
320 m_method = *method_enum;
321 return;
322 }
323 throw std::invalid_argument("Method parameter must be string or ExtractionMethod enum");
324 }
325 if (name == "window_size") {
326 if (auto* size = std::any_cast<uint32_t>(&value)) {
327 m_window_size = *size;
328 validate_parameters();
329 return;
330 }
331 }
332 if (name == "hop_size") {
333 if (auto* size = std::any_cast<uint32_t>(&value)) {
334 m_hop_size = *size;
335 validate_parameters();
336 return;
337 }
338 }
339
340 base_type::set_extraction_parameter(name, std::move(value));
341 }
342
343 [[nodiscard]] std::any get_extraction_parameter(const std::string& name) const override
344 {
345 if (name == "method") {
346 return method_to_string(m_method);
347 }
348 if (name == "window_size") {
349 return m_window_size;
350 }
351 if (name == "hop_size") {
352 return m_hop_size;
353 }
354
355 return base_type::get_extraction_parameter(name);
356 }
357
358private:
360 uint32_t m_hop_size;
362
363 /**
364 * @brief Validate extraction parameters
365 */
367 {
368 if (m_window_size == 0) {
369 throw std::invalid_argument("Window size must be greater than 0");
370 }
371 if (m_hop_size == 0) {
372 throw std::invalid_argument("Hop size must be greater than 0");
373 }
374 if (m_hop_size > m_window_size) {
375 throw std::invalid_argument("Hop size should not exceed window size for optimal coverage");
376 }
377 }
378};
379
380/// Standard feature extractor: vector of [DataVariant -> vector<double>]
382
383/// Eigen Matrix feature extractor: DataVariant -> Matrixxd
385
386/// Container feature extractor: SignalContainer -> multi vector<double>
388
389/// Region feature extractor: Region -> multi vector<double>
391
392/// Variant feature extractor: DataVariant -> DataVariant
394
395} // namespace MayaFlux::Yantra
Central helper for extraction operations - uses analyzers to find data.
Modern, digital-first universal extractor framework for Maya Flux.
std::vector< std::string > get_available_methods() const override
Get available extraction methods.
void set_extraction_method(ExtractionMethod method)
Set extraction method using enum.
void set_extraction_parameter(const std::string &name, std::any value) override
Handle extractor-specific parameters.
output_type extract_implementation(const input_type &input) override
Core extraction implementation - delegates to ExtractionHelper.
bool validate_extraction_input(const input_type &input) const override
Input validation.
void set_window_size(uint32_t size)
Set window size for analysis.
uint32_t get_window_size() const
Get current window size.
void set_hop_size(uint32_t size)
Set hop size for analysis.
FeatureExtractor(uint32_t window_size=512, uint32_t hop_size=256, ExtractionMethod method=ExtractionMethod::HIGH_ENERGY_DATA)
Construct FeatureExtractor with default parameters.
std::any get_extraction_parameter(const std::string &name) const override
uint32_t get_hop_size() const
Get current hop size.
std::string get_extractor_name() const override
Get extractor name.
ExtractionMethod get_extraction_method() const
Get current extraction method.
void validate_parameters() const
Validate extraction parameters.
static std::string method_to_string(ExtractionMethod method)
Convert extraction method enum to string.
static ExtractionMethod string_to_method(const std::string &str)
Convert string to extraction method enum.
void set_extraction_method(const std::string &method_name)
Set extraction method using string (case-insensitive)
ExtractionType get_extraction_type() const override
Get extraction type category.
Analyzer-guided feature extractor with enum-based configuration.
Template-flexible extractor base with instance-defined I/O types.
std::vector< std::vector< double > > extract_overlapping_windows(const std::vector< std::span< const double > > &data, uint32_t window_size, double overlap)
Extract overlapping windows of actual data.
std::vector< std::vector< double > > extract_outlier_data(const std::vector< std::span< const double > > &data, double std_dev_threshold, uint32_t window_size, uint32_t hop_size)
Extract data from statistical outlier regions.
ExtractionType
Categories of extraction operations for discovery and organization.
std::vector< std::vector< double > > extract_silence_data(const std::vector< std::span< const double > > &data, double silence_threshold, uint32_t min_duration, uint32_t window_size, uint32_t hop_size)
Extract data from silent regions using existing EnergyAnalyzer.
bool validate_extraction_parameters(uint32_t window_size, uint32_t hop_size, size_t data_size)
Validate extraction parameters.
std::vector< std::vector< double > > extract_high_spectral_data(const std::vector< std::span< const double > > &data, double spectral_threshold, uint32_t window_size, uint32_t hop_size)
Extract data from regions with high spectral energy.
std::vector< std::vector< double > > extract_onset_data(const std::vector< std::span< const double > > &data, double threshold, uint32_t region_size, uint32_t window_size, uint32_t hop_size)
Extract data at onset/transient positions using spectral flux.
std::vector< std::vector< double > > extract_above_mean_data(const std::vector< std::span< const double > > &data, double mean_multiplier, uint32_t window_size, uint32_t hop_size)
Extract data from regions with values above statistical mean.
ExtractionMethod
Supported extraction methods for FeatureExtractor.
@ ABOVE_MEAN_DATA
Extract data above statistical mean.
@ SILENCE_DATA
Extract actual silent regions.
@ PEAK_DATA
Extract data around detected peaks.
@ OUTLIER_DATA
Extract data from statistical outlier regions.
@ ZERO_CROSSING_DATA
Extract actual data at zero crossing points.
@ HIGH_ENERGY_DATA
Extract data from high-energy regions.
@ HIGH_SPECTRAL_DATA
Extract data from high spectral energy regions.
@ OVERLAPPING_WINDOWS
Extract overlapping windowed data.
@ ONSET_DATA
Extract actual onset/transient regions.
std::vector< std::vector< double > > extract_peak_data(const std::vector< std::span< const double > > &data, double threshold, double min_distance, uint32_t region_size)
Extract data from peak regions using peak detection.
std::vector< std::vector< double > > extract_high_energy_data(const std::vector< std::span< const double > > &data, double energy_threshold, uint32_t window_size, uint32_t hop_size)
Extract data from high-energy regions using EnergyAnalyzer.
std::vector< std::vector< double > > extract_zero_crossing_data(const std::vector< std::span< const double > > &data, double threshold, double min_distance, uint32_t region_size)
Extract data at zero crossing points using existing EnergyAnalyzer.
Metadata about data structure for reconstruction.
bool has_container() const
Check if a container reference is associated.
Definition DataIO.hpp:155
T data
The actual computation data.
Definition DataIO.hpp:25
Input/Output container for computation pipeline data flow with structure preservation.
Definition DataIO.hpp:24