MayaFlux 0.3.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
FeatureExtractor.hpp
Go to the documentation of this file.
1#pragma once
2
7
9
10/**
11 * @file FeatureExtractor.hpp
12 * @brief Concrete feature extractor using analyzer-guided extraction
13 *
14 * FeatureExtractor demonstrates the modern extraction paradigm by using
15 * analyzers (EnergyAnalyzer, StatisticalAnalyzer) to identify regions of interest,
16 * then extracting the actual data from those regions. Uses enum-based configuration
17 * instead of string parsing for type safety and performance.
18 *
19 * Example Usage:
20 * ```cpp
21 * // Extract high-energy audio data
22 * auto extractor = std::make_shared<StandardFeatureExtractor>();
23 * extractor->set_extraction_method(ExtractionMethod::HIGH_ENERGY_DATA);
24 * extractor->set_parameter("energy_threshold", 0.2);
25 *
26 * auto high_energy_audio = extractor->extract_data(audio_data);
27 *
28 * // Extract data from statistical outlier regions
29 * extractor->set_extraction_method(ExtractionMethod::OUTLIER_DATA);
30 * extractor->set_parameter("std_dev_threshold", 2.5);
31 *
32 * auto outlier_audio = extractor->extract_data(audio_data);
33 * ```
34 */
35
36namespace MayaFlux::Yantra {
37
38/**
39 * @enum ExtractionMethod
40 * @brief Supported extraction methods for FeatureExtractor
41 */
42enum class ExtractionMethod : uint8_t {
43 HIGH_ENERGY_DATA, ///< Extract data from high-energy regions
44 PEAK_DATA, ///< Extract data around detected peaks
45 OUTLIER_DATA, ///< Extract data from statistical outlier regions
46 HIGH_SPECTRAL_DATA, ///< Extract data from high spectral energy regions
47 ABOVE_MEAN_DATA, ///< Extract data above statistical mean
48 OVERLAPPING_WINDOWS, ///< Extract overlapping windowed data
49 ZERO_CROSSING_DATA, ///< Extract actual data at zero crossing points
50 SILENCE_DATA, ///< Extract actual silent regions
51 ONSET_DATA ///< Extract actual onset/transient regions
52};
53
54/**
55 * @class FeatureExtractor
56 * @brief Analyzer-guided feature extractor with enum-based configuration
57 *
58 * Uses analyzers to identify regions of interest based on energy, statistical properties,
59 * or spectral characteristics, then extracts the actual data from those regions.
60 * All extraction logic is delegated to ExtractionHelper functions.
61 */
62template <ComputeData InputType = std::vector<Kakshya::DataVariant>, ComputeData OutputType = std::vector<std::vector<double>>>
63class MAYAFLUX_API FeatureExtractor : public UniversalExtractor<InputType, OutputType> {
64public:
68
69 /**
70 * @brief Construct FeatureExtractor with default parameters
71 * @param window_size Analysis window size (default: 512)
72 * @param hop_size Hop size between windows (default: 256)
73 * @param method Initial extraction method (default: HIGH_ENERGY_DATA)
74 */
75 explicit FeatureExtractor(uint32_t window_size = 512,
76 uint32_t hop_size = 256,
77 ExtractionMethod method = ExtractionMethod::HIGH_ENERGY_DATA)
78 : m_window_size(window_size)
79 , m_hop_size(hop_size)
80 , m_method(method)
81 {
82 validate_parameters();
83 }
84
85 /**
86 * @brief Get extraction type category
87 * @return ExtractionType::FEATURE_GUIDED
88 */
89 [[nodiscard]] ExtractionType get_extraction_type() const override
90 {
91 return ExtractionType::FEATURE_GUIDED;
92 }
93
94 /**
95 * @brief Get available extraction methods
96 * @return Vector of supported method names
97 */
98 [[nodiscard]] std::vector<std::string> get_available_methods() const override
99 {
100 return Reflect::get_enum_names_lowercase<ExtractionMethod>();
101 }
102
103 /**
104 * @brief Set extraction method using enum
105 * @param method ExtractionMethod enum value
106 */
108 {
109 m_method = method;
110 this->set_parameter("method", method_to_string(method));
111 }
112
113 /**
114 * @brief Set extraction method using string (case-insensitive)
115 * @param method_name String representation of method
116 */
117 void set_extraction_method(const std::string& method_name)
118 {
119 m_method = string_to_method(method_name);
120 this->set_parameter("method", method_name);
121 }
122
123 /**
124 * @brief Get current extraction method
125 * @return ExtractionMethod enum value
126 */
128 {
129 return m_method;
130 }
131
132 /**
133 * @brief Set window size for analysis
134 * @param size Window size in samples
135 */
136 void set_window_size(uint32_t size)
137 {
138 m_window_size = size;
139 validate_parameters();
140 }
141
142 /**
143 * @brief Set hop size for analysis
144 * @param size Hop size in samples
145 */
146 void set_hop_size(uint32_t size)
147 {
148 m_hop_size = size;
149 validate_parameters();
150 }
151
152 /**
153 * @brief Get current window size
154 * @return Window size in samples
155 */
156 [[nodiscard]] uint32_t get_window_size() const { return m_window_size; }
157
158 /**
159 * @brief Get current hop size
160 * @return Hop size in samples
161 */
162 [[nodiscard]] uint32_t get_hop_size() const { return m_hop_size; }
163
164 /**
165 * @brief Convert extraction method enum to string
166 * @param method ExtractionMethod value
167 * @return Lowercase string representation
168 */
169 static std::string method_to_string(ExtractionMethod method)
170 {
171 return Reflect::enum_to_lowercase_string(method);
172 }
173
174 /**
175 * @brief Convert string to extraction method enum
176 * @param str String representation (case-insensitive)
177 * @return ExtractionMethod value
178 */
179 static ExtractionMethod string_to_method(const std::string& str)
180 {
181 return Reflect::string_to_enum_or_throw_case_insensitive<ExtractionMethod>(str, "ExtractionMethod");
182 }
183
184 /**
185 * @brief Input validation
186 */
187 bool validate_extraction_input(const input_type& input) const override
188 {
189 try {
190 if constexpr (RequiresContainer<input_type>) {
191 if (!input.has_container())
192 return false;
193 }
194 auto numeric_data = OperationHelper::extract_numeric_data(input.data);
195 if (numeric_data.empty())
196 return false;
197 for (const auto& span : numeric_data) {
198 return Kinesis::Discrete::validate_window_parameters(m_window_size, m_hop_size, span.size());
199 }
200 return true;
201 } catch (const std::exception& e) {
202 MF_ERROR(Journal::Component::Yantra, Journal::Context::ComputeMatrix, "Input validation failed: {}", e.what());
203 return false;
204 }
205 }
206
207 /**
208 * @brief Get extractor name
209 * @return "FeatureExtractor"
210 */
211 [[nodiscard]] std::string get_extractor_name() const override
212 {
213 return "FeatureExtractor";
214 }
215
216protected:
217 /**
218 * @brief Core extraction implementation - delegates to ExtractionHelper
219 * @param input Input data with metadata
220 * @return Extracted data with metadata
221 */
223 {
224 try {
225 auto [numeric_data, info] = OperationHelper::extract_structured_double(const_cast<input_type&>(input));
226 DataStructureInfo structure_info = info;
227
228 std::vector<std::span<const double>> channels;
229 channels.reserve(numeric_data.size());
230 for (auto& s : numeric_data)
231 channels.emplace_back(s.data(), s.size());
232
233 std::vector<std::vector<double>> extracted_data;
234
235 switch (m_method) {
236
237 case ExtractionMethod::HIGH_ENERGY_DATA:
238 extracted_data = extract_high_energy(channels,
239 this->template get_parameter_or_default<double>("energy_threshold", 0.1),
240 m_window_size, m_hop_size);
241 break;
242
243 case ExtractionMethod::PEAK_DATA:
244 extracted_data = extract_peaks(channels,
245 this->template get_parameter_or_default<double>("threshold", 0.1),
246 this->template get_parameter_or_default<double>("min_distance", 10.0),
247 this->template get_parameter_or_default<uint32_t>("region_size", 256));
248 break;
249
250 case ExtractionMethod::OUTLIER_DATA:
251 extracted_data = extract_outliers(channels,
252 this->template get_parameter_or_default<double>("std_dev_threshold", 2.0),
253 m_window_size, m_hop_size);
254 break;
255
256 case ExtractionMethod::HIGH_SPECTRAL_DATA:
257 extracted_data = extract_high_spectral(channels,
258 this->template get_parameter_or_default<double>("spectral_threshold", 0.1),
259 m_window_size, m_hop_size);
260 break;
261
262 case ExtractionMethod::ABOVE_MEAN_DATA:
263 extracted_data = extract_above_mean(channels,
264 this->template get_parameter_or_default<double>("mean_multiplier", 1.5),
265 m_window_size, m_hop_size);
266 break;
267
268 case ExtractionMethod::OVERLAPPING_WINDOWS:
269 extracted_data = extract_overlapping_windows(channels,
270 m_window_size,
271 this->template get_parameter_or_default<double>("overlap", 0.5));
272 break;
273
274 case ExtractionMethod::ZERO_CROSSING_DATA:
275 extracted_data = extract_zero_crossings(channels,
276 this->template get_parameter_or_default<double>("threshold", 0.0),
277 this->template get_parameter_or_default<double>("min_distance", 1.0),
278 this->template get_parameter_or_default<uint32_t>("region_size", 1));
279 break;
280
281 case ExtractionMethod::SILENCE_DATA:
282 extracted_data = extract_silence(channels,
283 this->template get_parameter_or_default<double>("silence_threshold", 0.01),
284 this->template get_parameter_or_default<uint32_t>("min_duration", 1024),
285 m_window_size, m_hop_size);
286 break;
287
288 case ExtractionMethod::ONSET_DATA:
289 extracted_data = extract_onsets(channels,
290 this->template get_parameter_or_default<double>("threshold", 0.3),
291 this->template get_parameter_or_default<uint32_t>("region_size", 512),
292 this->template get_parameter_or_default<uint32_t>("fft_window_size", 1024),
293 m_hop_size);
294 break;
295
296 default:
297 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(), "Unknown extraction method");
298 }
299
300 output_type output = this->convert_result(extracted_data, structure_info);
301
302 output.template set_metadata<std::string>("extractor_type", "FeatureExtractor");
303 output.template set_metadata<std::string>("extraction_method", method_to_string(m_method));
304 output.template set_metadata<uint32_t>("window_size", static_cast<uint32_t>(m_window_size));
305 output.template set_metadata<uint32_t>("hop_size", static_cast<uint32_t>(m_hop_size));
306 output.template set_metadata<size_t>("extracted_samples", extracted_data.size());
307
308 return output;
309
310 } catch (const std::exception& e) {
311 MF_ERROR(Journal::Component::Yantra, Journal::Context::ComputeMatrix, "Feature extraction failed: {}", e.what());
312 return output_type {};
313 }
314 }
315
316 /**
317 * @brief Handle extractor-specific parameters
318 */
319 void set_extraction_parameter(const std::string& name, std::any value) override
320 {
321 if (name == "method") {
322 if (auto* method_str = std::any_cast<std::string>(&value)) {
323 m_method = string_to_method(*method_str);
324 return;
325 }
326 if (auto* method_enum = std::any_cast<ExtractionMethod>(&value)) {
327 m_method = *method_enum;
328 return;
329 }
330 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(), "Method parameter must be string or ExtractionMethod enum");
331 }
332
333 if (name == "window_size") {
334 if (auto* size = std::any_cast<uint32_t>(&value)) {
335 m_window_size = *size;
336 validate_parameters();
337 return;
338 }
339 }
340 if (name == "hop_size") {
341 if (auto* size = std::any_cast<uint32_t>(&value)) {
342 m_hop_size = *size;
343 validate_parameters();
344 return;
345 }
346 }
347
348 base_type::set_extraction_parameter(name, std::move(value));
349 }
350
351 [[nodiscard]] std::any get_extraction_parameter(const std::string& name) const override
352 {
353 if (name == "method") {
354 return method_to_string(m_method);
355 }
356 if (name == "window_size") {
357 return m_window_size;
358 }
359 if (name == "hop_size") {
360 return m_hop_size;
361 }
362
363 return base_type::get_extraction_parameter(name);
364 }
365
366private:
368 uint32_t m_hop_size;
370
371 /**
372 * @brief Validate extraction parameters
373 */
375 {
376 if (m_window_size == 0) {
377 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(), "Window size must be greater than 0");
378 }
379 if (m_hop_size == 0) {
380 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(), "Hop size must be greater than 0");
381 }
382 if (m_hop_size > m_window_size) {
383 error<std::invalid_argument>(Journal::Component::Yantra, Journal::Context::ComputeMatrix, std::source_location::current(), "Hop size should not exceed window size for optimal coverage");
384 }
385 }
386};
387
388/// Standard feature extractor: vector of [DataVariant -> vector<double>]
390
391/// Eigen Matrix feature extractor: DataVariant -> Matrixxd
393
394/// Container feature extractor: SignalContainer -> multi vector<double>
396
397/// Region feature extractor: Region -> multi vector<double>
399
400/// Variant feature extractor: DataVariant -> DataVariant
402
403} // namespace MayaFlux::Yantra
#define MF_ERROR(comp, ctx,...)
Discrete sequence extraction primitives for MayaFlux::Kinesis.
Named multichannel extraction functions for FeatureExtractor.
Modern, digital-first universal extractor framework for Maya Flux.
std::vector< std::string > get_available_methods() const override
Get available extraction methods.
void set_extraction_method(ExtractionMethod method)
Set extraction method using enum.
void set_extraction_parameter(const std::string &name, std::any value) override
Handle extractor-specific parameters.
output_type extract_implementation(const input_type &input) override
Core extraction implementation - delegates to ExtractionHelper.
bool validate_extraction_input(const input_type &input) const override
Input validation.
void set_window_size(uint32_t size)
Set window size for analysis.
uint32_t get_window_size() const
Get current window size.
void set_hop_size(uint32_t size)
Set hop size for analysis.
FeatureExtractor(uint32_t window_size=512, uint32_t hop_size=256, ExtractionMethod method=ExtractionMethod::HIGH_ENERGY_DATA)
Construct FeatureExtractor with default parameters.
std::any get_extraction_parameter(const std::string &name) const override
uint32_t get_hop_size() const
Get current hop size.
std::string get_extractor_name() const override
Get extractor name.
ExtractionMethod get_extraction_method() const
Get current extraction method.
void validate_parameters() const
Validate extraction parameters.
static std::string method_to_string(ExtractionMethod method)
Convert extraction method enum to string.
static ExtractionMethod string_to_method(const std::string &str)
Convert string to extraction method enum.
void set_extraction_method(const std::string &method_name)
Set extraction method using string (case-insensitive)
ExtractionType get_extraction_type() const override
Get extraction type category.
Analyzer-guided feature extractor with enum-based configuration.
Template-flexible extractor base with instance-defined I/O types.
ExtractionType
Categories of extraction operations for discovery and organization.
std::vector< std::vector< double > > extract_outliers(const std::vector< std::span< const double > > &channels, double std_dev_threshold, uint32_t window_size, uint32_t hop_size)
std::vector< std::vector< double > > extract_onsets(const std::vector< std::span< const double > > &channels, double threshold, uint32_t region_size, uint32_t fft_window_size, uint32_t hop_size)
std::vector< std::vector< double > > extract_above_mean(const std::vector< std::span< const double > > &channels, double mean_multiplier, uint32_t window_size, uint32_t hop_size)
std::vector< std::vector< double > > extract_high_energy(const std::vector< std::span< const double > > &channels, double energy_threshold, uint32_t window_size, uint32_t hop_size)
ExtractionMethod
Supported extraction methods for FeatureExtractor.
@ ABOVE_MEAN_DATA
Extract data above statistical mean.
@ SILENCE_DATA
Extract actual silent regions.
@ PEAK_DATA
Extract data around detected peaks.
@ OUTLIER_DATA
Extract data from statistical outlier regions.
@ ZERO_CROSSING_DATA
Extract actual data at zero crossing points.
@ HIGH_ENERGY_DATA
Extract data from high-energy regions.
@ HIGH_SPECTRAL_DATA
Extract data from high spectral energy regions.
@ OVERLAPPING_WINDOWS
Extract overlapping windowed data.
@ ONSET_DATA
Extract actual onset/transient regions.
std::vector< std::vector< double > > extract_zero_crossings(const std::vector< std::span< const double > > &channels, double threshold, double min_distance, uint32_t region_size)
std::vector< std::vector< double > > extract_peaks(const std::vector< std::span< const double > > &channels, double threshold, double min_distance, uint32_t region_size)
std::vector< std::vector< double > > extract_silence(const std::vector< std::span< const double > > &channels, double silence_threshold, uint32_t min_duration, uint32_t window_size, uint32_t hop_size)
std::vector< std::vector< double > > extract_overlapping_windows(const std::vector< std::span< const double > > &channels, uint32_t window_size, double overlap)
std::vector< std::vector< double > > extract_high_spectral(const std::vector< std::span< const double > > &channels, double spectral_threshold, uint32_t window_size, uint32_t hop_size)
Metadata about data structure for reconstruction.
T data
The actual computation data.
Definition DataIO.hpp:25
bool has_container() const
Check if a container reference is associated.
Definition DataIO.hpp:155
Input/Output container for computation pipeline data flow with structure preservation.
Definition DataIO.hpp:24