MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
SoundFileReader.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "FileReader.hpp"
5
6// Forward declarations for FFmpeg types
7extern "C" {
8struct AVFormatContext;
9struct AVCodecContext;
10struct AVFrame;
11struct AVPacket;
12struct SwrContext;
13}
14
15namespace MayaFlux::IO {
16
17/**
18 * @enum AudioReadOptions
19 * @brief Audio-specific reading options
20 */
21enum class AudioReadOptions : uint32_t {
22 NONE = 0,
23 NORMALIZE = 1 << 0, // Not implemented - would use FFmpeg's volume filter
24 CONVERT_TO_MONO = 1 << 2, // Not implemented - would use FFmpeg's channel mixer
25 DEINTERLEAVE = 1 << 3, // Convert from interleaved to planar layout
26 ALL = 0xFFFFFFFF
27};
28
30{
31 return static_cast<AudioReadOptions>(static_cast<uint32_t>(a) | static_cast<uint32_t>(b));
32}
33
35{
36 return static_cast<AudioReadOptions>(static_cast<uint32_t>(a) & static_cast<uint32_t>(b));
37}
38
39/**
40 * @brief RAII wrapper for FFmpeg contexts with proper cleanup
41 *
42 * This struct holds all FFmpeg-related state and ensures proper cleanup order.
43 * Shared ownership allows safe concurrent access with reader-writer semantics.
44 */
46 AVFormatContext* format_context = nullptr;
47 AVCodecContext* codec_context = nullptr;
48 SwrContext* swr_context = nullptr;
50 uint64_t total_frames = 0;
51 uint32_t sample_rate = 0;
52 uint32_t channels = 0;
53
55
56 // Non-copyable, non-movable (managed by shared_ptr)
57 FFmpegContext() = default;
58 FFmpegContext(const FFmpegContext&) = delete;
62
63 bool is_valid() const
64 {
66 }
67};
68
69/**
70 * @class SoundFileReader
71 * @brief FFmpeg-based audio file reader for MayaFlux
72 *
73 * SoundFileReader provides a high-level interface for reading and decoding audio files using FFmpeg.
74 * It supports a wide range of audio formats, automatic sample format conversion to double precision,
75 * resampling, metadata extraction, region/marker extraction, and streaming/seekable access.
76 *
77 * Key Features:
78 * - Format detection and demuxing via libavformat
79 * - Audio decoding via libavcodec
80 * - Sample format conversion and resampling via libswresample (always outputs double)
81 * - Metadata and region extraction from FFmpeg's parsed structures
82 * - Seeking and timestamp handling via FFmpeg's APIs
83 * - Automatic creation and population of Kakshya::SoundFileContainer for downstream processing
84 * - Thread-safe access for reading and metadata queries
85 *
86 * Usage:
87 * SoundFileReader reader;
88 * if (reader.open("file.wav")) {
89 * auto metadata = reader.get_metadata();
90 * auto all_data = reader.read_all();
91 * auto container = reader.create_container();
92 * // ...
93 * reader.close();
94 * }
95 *
96 * All audio data is converted to double precision for internal processing.
97 * The reader can output data in either interleaved or deinterleaved (planar) layout.
98 */
100public:
101 /**
102 * @brief Construct a new SoundFileReader object.
103 * Initializes internal state and prepares for file operations.
104 */
106
107 /**
108 * @brief Destroy the SoundFileReader object.
109 * Cleans up FFmpeg resources and internal state.
110 */
111 ~SoundFileReader() override;
112
113 /**
114 * @brief Check if this reader can open the given file.
115 * @param filepath Path to the file.
116 * @return True if the file can be read, false otherwise.
117 */
118 bool can_read(const std::string& filepath) const override;
119
120 /**
121 * @brief Open an audio file for reading.
122 * @param filepath Path to the file.
123 * @param options File read options.
124 * @return True if the file was opened successfully.
125 */
126 bool open(const std::string& filepath, FileReadOptions options = FileReadOptions::ALL) override;
127
128 /**
129 * @brief Close the currently open file and release resources.
130 */
131 void close() override;
132
133 /**
134 * @brief Check if a file is currently open.
135 * @return True if a file is open, false otherwise.
136 */
137 bool is_open() const override;
138
139 /**
140 * @brief Get metadata for the currently open file.
141 * @return Optional FileMetadata structure.
142 */
143 std::optional<FileMetadata> get_metadata() const override;
144
145 /**
146 * @brief Get all regions (markers, loops, etc.) from the file.
147 * @return Vector of FileRegion structures.
148 */
149 std::vector<FileRegion> get_regions() const override;
150
151 /**
152 * @brief Read the entire audio file into memory.
153 * @return DataVariant containing audio data as std::vector<double>.
154 */
155 std::vector<Kakshya::DataVariant> read_all() override;
156
157 /**
158 * @brief Read a specific region from the file.
159 * @param region Region to read.
160 * @return DataVariant containing region data.
161 */
162 std::vector<Kakshya::DataVariant> read_region(const FileRegion& region) override;
163
164 /**
165 * @brief Create a SignalSourceContainer for this file.
166 * @return Shared pointer to a new SignalSourceContainer.
167 */
168 std::shared_ptr<Kakshya::SignalSourceContainer> create_container() override;
169
170 /**
171 * @brief Load file data into an existing SignalSourceContainer.
172 * @param container Target container.
173 * @return True if loading succeeded.
174 */
175 bool load_into_container(std::shared_ptr<Kakshya::SignalSourceContainer> container) override;
176
177 /**
178 * @brief Get the current read position in the file.
179 * @return Vector of dimension indices (e.g., frame index).
180 */
181 std::vector<uint64_t> get_read_position() const override;
182
183 /**
184 * @brief Seek to a specific position in the file.
185 * @param position Vector of dimension indices.
186 * @return True if seek succeeded.
187 */
188 bool seek(const std::vector<uint64_t>& position) override;
189
190 /**
191 * @brief Get supported file extensions for this reader.
192 * @return Vector of supported extensions (e.g., "wav", "flac").
193 */
194 std::vector<std::string> get_supported_extensions() const override;
195
196 /**
197 * @brief Get the C++ type of the data returned by this reader.
198 * @return Type index for std::vector<double>.
199 */
200 std::type_index get_data_type() const override { return typeid(std::vector<double>); }
201
202 /**
203 * @brief Get the C++ type of the container returned by this reader.
204 * @return Type index for Kakshya::SoundFileContainer.
205 */
206 std::type_index get_container_type() const override { return typeid(Kakshya::SoundFileContainer); }
207
208 /**
209 * @brief Get the last error message encountered by the reader.
210 * @return Error string.
211 */
212 std::string get_last_error() const override;
213
214 /**
215 * @brief Check if the reader supports streaming access.
216 * @return True if streaming is supported.
217 */
218 bool supports_streaming() const override;
219
220 /**
221 * @brief Get the preferred chunk size for streaming reads.
222 * @return Preferred chunk size in frames.
223 */
224 uint64_t get_preferred_chunk_size() const override;
225
226 /**
227 * @brief Get the number of dimensions in the audio data (typically 2: time, channel).
228 * @return Number of dimensions.
229 */
230 size_t get_num_dimensions() const override;
231
232 /**
233 * @brief Get the size of each dimension (e.g., frames, channels).
234 * @return Vector of dimension sizes.
235 */
236 std::vector<uint64_t> get_dimension_sizes() const override;
237
238 /**
239 * @brief Read a specific number of frames from the file.
240 * @param num_frames Number of frames to read.
241 * @param offset Frame offset from beginning.
242 * @return DataVariant containing std::vector<double>.
243 */
244 std::vector<Kakshya::DataVariant> read_frames(uint64_t num_frames, uint64_t offset = 0);
245
246 /**
247 * @brief Set audio-specific read options.
248 * @param options Audio read options (e.g., DEINTERLEAVE).
249 */
251
252 /**
253 * @brief Set the target sample rate for resampling.
254 * @param sample_rate Target sample rate (0 = no resampling).
255 */
256 void set_target_sample_rate(uint32_t sample_rate) { m_target_sample_rate = sample_rate; }
257
258 /**
259 * @brief Set the target bit depth (ignored, always outputs double).
260 * @param bit_depth Target bit depth.
261 * @deprecated Always outputs double precision.
262 */
263 void set_target_bit_depth(uint32_t bit_depth) { m_target_bit_depth = bit_depth; }
264
265 /**
266 * @brief Initialize FFmpeg libraries (thread-safe, called automatically).
267 */
268 static void initialize_ffmpeg();
269
270private:
271 // FFmpeg contexts - let FFmpeg manage these
272
273 // Shared FFmpeg context - enables safe concurrent access
274 std::shared_ptr<FFmpegContext> m_context;
275
276 // Reader-writer lock: multiple readers OR single writer
277 mutable std::shared_mutex m_context_mutex;
278
279 /**
280 * @brief Path to the currently open file.
281 */
282 std::string m_filepath;
283
284 /**
285 * @brief File read options used for this session.
286 */
288
289 /**
290 * @brief Audio-specific read options.
291 */
293
294 /**
295 * @brief Last error message encountered.
296 */
297 mutable std::string m_last_error;
298
299 /**
300 * @brief Cached file metadata.
301 */
302 mutable std::optional<FileMetadata> m_cached_metadata;
303
304 /**
305 * @brief Cached file regions (markers, loops, etc.).
306 */
307 mutable std::vector<FileRegion> m_cached_regions;
308
309 /**
310 * @brief Current frame position for reading.
311 */
312 std::atomic<uint64_t> m_current_frame_position { 0 };
313
314 /**
315 * @brief Target sample rate for resampling (0 = use source rate).
316 */
318
319 /**
320 * @brief Target bit depth (ignored, always outputs double).
321 */
322 uint32_t m_target_bit_depth = 0;
323
324 /**
325 * @brief Mutex for thread-safe metadata access.
326 */
327 mutable std::mutex m_metadata_mutex;
328
329 // Simplified internal methods
330
331 /**
332 * @brief Set up the FFmpeg resampler if needed.
333 * @return True if setup succeeded.
334 */
335 bool setup_resampler(const std::shared_ptr<FFmpegContext>& ctx);
336
337 /**
338 * @brief Extract metadata from the file.
339 */
340 void extract_metadata(const std::shared_ptr<FFmpegContext>& ctx);
341
342 /**
343 * @brief Extract region information from the file.
344 */
345 void extract_regions(const std::shared_ptr<FFmpegContext>& ctx);
346
347 /**
348 * @brief Decode a specific number of frames from the file.
349 * @param ctx FFmpeg context.
350 * @param num_frames Number of frames to decode.
351 * @param offset Frame offset from beginning.
352 * @return DataVariant containing decoded data.
353 */
354 std::vector<Kakshya::DataVariant> decode_frames(
355 std::shared_ptr<FFmpegContext> ctx,
356 uint64_t num_frames,
357 uint64_t offset);
358
359 /**
360 * @brief Internal seek implementation.
361 * @param ctx FFmpeg context.
362 * @param frame_position Target frame position.
363 * @return True if seek succeeded.
364 */
365 bool seek_internal(std::shared_ptr<FFmpegContext>& ctx, uint64_t frame_position);
366
367 /**
368 * @brief Convert interleaved audio data to deinterleaved (planar) format.
369 * @param interleaved Input interleaved data.
370 * @param channels Number of channels.
371 * @return Deinterleaved data as std::vector<double>.
372 */
373 std::vector<std::vector<double>> deinterleave_data(const std::vector<double>& interleaved, uint32_t channels);
374
375 /**
376 * @brief Set the last error message.
377 * @param error Error string.
378 */
379 void set_error(const std::string& error) const;
380
381 /**
382 * @brief Clear the last error message.
383 */
384 void clear_error() const;
385
386 /**
387 * @brief True if FFmpeg has been initialized.
388 */
389 static std::atomic<bool> s_ffmpeg_initialized;
390
391 /**
392 * @brief Mutex for FFmpeg initialization.
393 */
394 static std::mutex s_ffmpeg_init_mutex;
395};
396
397} // namespace MayaFlux::IO
Abstract interface for reading various file formats into containers.
std::vector< Kakshya::DataVariant > read_all() override
Read the entire audio file into memory.
void close() override
Close the currently open file and release resources.
std::string get_last_error() const override
Get the last error message encountered by the reader.
uint64_t get_preferred_chunk_size() const override
Get the preferred chunk size for streaming reads.
uint32_t m_target_sample_rate
Target sample rate for resampling (0 = use source rate).
bool supports_streaming() const override
Check if the reader supports streaming access.
bool open(const std::string &filepath, FileReadOptions options=FileReadOptions::ALL) override
Open an audio file for reading.
bool load_into_container(std::shared_ptr< Kakshya::SignalSourceContainer > container) override
Load file data into an existing SignalSourceContainer.
bool can_read(const std::string &filepath) const override
Check if this reader can open the given file.
uint32_t m_target_bit_depth
Target bit depth (ignored, always outputs double).
std::mutex m_metadata_mutex
Mutex for thread-safe metadata access.
static std::atomic< bool > s_ffmpeg_initialized
True if FFmpeg has been initialized.
std::vector< Kakshya::DataVariant > decode_frames(std::shared_ptr< FFmpegContext > ctx, uint64_t num_frames, uint64_t offset)
Decode a specific number of frames from the file.
bool seek_internal(std::shared_ptr< FFmpegContext > &ctx, uint64_t frame_position)
Internal seek implementation.
std::string m_last_error
Last error message encountered.
void extract_regions(const std::shared_ptr< FFmpegContext > &ctx)
Extract region information from the file.
std::vector< uint64_t > get_read_position() const override
Get the current read position in the file.
void set_error(const std::string &error) const
Set the last error message.
std::atomic< uint64_t > m_current_frame_position
Current frame position for reading.
void extract_metadata(const std::shared_ptr< FFmpegContext > &ctx)
Extract metadata from the file.
std::type_index get_container_type() const override
Get the C++ type of the container returned by this reader.
bool setup_resampler(const std::shared_ptr< FFmpegContext > &ctx)
Set up the FFmpeg resampler if needed.
std::vector< Kakshya::DataVariant > read_region(const FileRegion &region) override
Read a specific region from the file.
std::vector< Kakshya::DataVariant > read_frames(uint64_t num_frames, uint64_t offset=0)
Read a specific number of frames from the file.
AudioReadOptions m_audio_options
Audio-specific read options.
std::shared_ptr< FFmpegContext > m_context
std::optional< FileMetadata > m_cached_metadata
Cached file metadata.
std::vector< FileRegion > m_cached_regions
Cached file regions (markers, loops, etc.).
void clear_error() const
Clear the last error message.
~SoundFileReader() override
Destroy the SoundFileReader object.
std::type_index get_data_type() const override
Get the C++ type of the data returned by this reader.
std::string m_filepath
Path to the currently open file.
std::vector< uint64_t > get_dimension_sizes() const override
Get the size of each dimension (e.g., frames, channels).
void set_target_bit_depth(uint32_t bit_depth)
Set the target bit depth (ignored, always outputs double).
void set_audio_options(AudioReadOptions options)
Set audio-specific read options.
std::shared_ptr< Kakshya::SignalSourceContainer > create_container() override
Create a SignalSourceContainer for this file.
bool seek(const std::vector< uint64_t > &position) override
Seek to a specific position in the file.
SoundFileReader()
Construct a new SoundFileReader object.
std::vector< std::vector< double > > deinterleave_data(const std::vector< double > &interleaved, uint32_t channels)
Convert interleaved audio data to deinterleaved (planar) format.
size_t get_num_dimensions() const override
Get the number of dimensions in the audio data (typically 2: time, channel).
std::optional< FileMetadata > get_metadata() const override
Get metadata for the currently open file.
std::vector< FileRegion > get_regions() const override
Get all regions (markers, loops, etc.) from the file.
bool is_open() const override
Check if a file is currently open.
FileReadOptions m_options
File read options used for this session.
std::vector< std::string > get_supported_extensions() const override
Get supported file extensions for this reader.
static void initialize_ffmpeg()
Initialize FFmpeg libraries (thread-safe, called automatically).
void set_target_sample_rate(uint32_t sample_rate)
Set the target sample rate for resampling.
static std::mutex s_ffmpeg_init_mutex
Mutex for FFmpeg initialization.
FFmpeg-based audio file reader for MayaFlux.
File-backed audio container with complete streaming functionality.
AudioReadOptions
Audio-specific reading options.
FileReadOptions
Generic options for file reading behavior.
@ ALL
All options enabled.
@ NONE
No special options.
FileReadOptions operator&(FileReadOptions a, FileReadOptions b)
FileReadOptions operator|(FileReadOptions a, FileReadOptions b)
FFmpegContext & operator=(const FFmpegContext &)=delete
FFmpegContext & operator=(FFmpegContext &&)=delete
FFmpegContext(const FFmpegContext &)=delete
FFmpegContext(FFmpegContext &&)=delete
RAII wrapper for FFmpeg contexts with proper cleanup.
Generic region descriptor for any file type.