MayaFlux 0.3.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
SoundFileReader.cpp
Go to the documentation of this file.
1#include "SoundFileReader.hpp"
2
3extern "C" {
4#include <libavcodec/avcodec.h>
5#include <libavformat/avformat.h>
6#include <libavutil/opt.h>
7#include <libavutil/samplefmt.h>
8#include <libswresample/swresample.h>
9}
10
11namespace MayaFlux::IO {
12
13// ============================================================================
14// FileRegion Implementation
15// ============================================================================
16
18{
19 if (start_coordinates.size() == 1 && end_coordinates.size() == 1) {
22 }
24 }
25
27 region.set_attribute("label", name);
28 region.set_attribute("type", type);
29
30 for (const auto& [key, value] : attributes) {
31 region.set_attribute(key, value);
32 }
33 return region;
34}
35
36std::unordered_map<std::string, Kakshya::RegionGroup>
37FileReader::regions_to_groups(const std::vector<FileRegion>& regions)
38{
39 std::unordered_map<std::string, Kakshya::RegionGroup> groups;
40
41 for (const auto& region : regions) {
42 auto& group = groups[region.type];
43 group.name = region.type;
44 group.add_region(region.to_region());
45 }
46
47 return groups;
48}
49
50// ============================================================================
51// Constructor/Destructor
52// ============================================================================
53
58
63
64// ============================================================================
65// File Operations
66// ============================================================================
67
68bool SoundFileReader::can_read(const std::string& filepath) const
69{
71 if (!probe.open(filepath))
72 return false;
73
74 const AVCodec* codec = nullptr;
75 int idx = probe.find_best_stream(AVMEDIA_TYPE_AUDIO,
76 reinterpret_cast<const void**>(&codec));
77 return idx >= 0 && codec != nullptr;
78}
79
80// =========================================================================
81// FileReader — open / close / is_open
82// =========================================================================
83
84bool SoundFileReader::open(const std::string& filepath, FileReadOptions options)
85{
86 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
87
88 m_demux.reset();
89 m_audio.reset();
91 {
92 std::lock_guard<std::mutex> ml(m_metadata_mutex);
93 m_cached_metadata.reset();
94 m_cached_regions.clear();
95 }
97
98 m_filepath = filepath;
99 m_options = options;
100
101 auto demux = std::make_shared<FFmpegDemuxContext>();
102 if (!demux->open(filepath)) {
103 set_error(demux->last_error());
104 return false;
105 }
106
108 auto audio = std::make_shared<AudioStreamContext>();
109 if (!audio->open(*demux, planar, m_target_sample_rate)) {
110 set_error(audio->last_error());
111 return false;
112 }
113
114 m_demux = std::move(demux);
115 m_audio = std::move(audio);
116
119
122
123 return true;
124}
125
127 std::shared_ptr<FFmpegDemuxContext> demux,
128 std::shared_ptr<AudioStreamContext> audio,
129 const std::string& filepath,
130 FileReadOptions options)
131{
132 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
133
134 m_demux.reset();
135 m_audio.reset();
137 {
138 std::lock_guard<std::mutex> ml(m_metadata_mutex);
139 m_cached_metadata.reset();
140 m_cached_regions.clear();
141 }
142 clear_error();
143
144 if (!demux || !demux->is_open()) {
145 set_error("open_from_demux: demux context is null or not open");
146 return false;
147 }
148
149 if (!audio || !audio->is_valid()) {
150 set_error("open_from_demux: audio stream context is null or not valid");
151 return false;
152 }
153
154 m_filepath = filepath;
155 m_demux = std::move(demux);
156 m_audio = std::move(audio);
157
160
163
164 return true;
165}
166
168{
169 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
170 m_audio.reset();
171 m_demux.reset();
173 m_filepath.clear();
174 {
175 std::lock_guard<std::mutex> ml(m_metadata_mutex);
176 m_cached_metadata.reset();
177 m_cached_regions.clear();
178 }
179}
180
182{
183 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
184 return m_demux && m_demux->is_open() && m_audio && m_audio->is_valid();
185}
186
187// ============================================================================
188// Metadata and Regions
189// ============================================================================
190
191std::optional<FileMetadata> SoundFileReader::get_metadata() const
192{
193 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
194 if (!m_demux || !m_audio)
195 return std::nullopt;
196
197 {
198 std::lock_guard<std::mutex> ml(m_metadata_mutex);
200 return m_cached_metadata;
201 }
202
204
205 std::lock_guard<std::mutex> ml(m_metadata_mutex);
206 return m_cached_metadata;
207}
208
209std::vector<FileRegion> SoundFileReader::get_regions() const
210{
211 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
212 if (!m_demux || !m_audio)
213 return {};
214
215 {
216 std::lock_guard<std::mutex> ml(m_metadata_mutex);
217 if (!m_cached_regions.empty())
218 return m_cached_regions;
219 }
220
222
223 std::lock_guard<std::mutex> ml(m_metadata_mutex);
224 return m_cached_regions;
225}
226
228 const std::shared_ptr<FFmpegDemuxContext>& demux,
229 const std::shared_ptr<AudioStreamContext>& audio) const
230{
231 FileMetadata meta;
232 demux->extract_container_metadata(meta);
233 audio->extract_stream_metadata(*demux, meta);
234
235 meta.file_size = std::filesystem::file_size(m_filepath);
236 auto ftime = std::filesystem::last_write_time(m_filepath);
237 meta.modification_time = std::chrono::system_clock::time_point(
238 std::chrono::seconds(
239 std::chrono::duration_cast<std::chrono::seconds>(ftime.time_since_epoch())));
240
241 std::lock_guard<std::mutex> ml(m_metadata_mutex);
242 m_cached_metadata = std::move(meta);
243}
244
246 const std::shared_ptr<FFmpegDemuxContext>& demux,
247 const std::shared_ptr<AudioStreamContext>& audio) const
248{
249 auto chapters = demux->extract_chapter_regions();
250 auto cues = audio->extract_cue_regions(*demux);
251
252 std::vector<FileRegion> all;
253 all.reserve(chapters.size() + cues.size());
254 all.insert(all.end(), chapters.begin(), chapters.end());
255 all.insert(all.end(), cues.begin(), cues.end());
256
257 std::lock_guard<std::mutex> ml(m_metadata_mutex);
258 m_cached_regions = std::move(all);
259}
260
261// ============================================================================
262// Reading Operations
263// ============================================================================
264
265std::vector<Kakshya::DataVariant> SoundFileReader::read_all()
266{
267 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
268 if (!m_demux || !m_audio) {
269 set_error("File not open");
270 return {};
271 }
272 return decode_frames(m_demux, m_audio, m_audio->total_frames, 0);
273}
274
275std::vector<Kakshya::DataVariant> SoundFileReader::read_region(const FileRegion& region)
276{
277 if (region.start_coordinates.empty() || region.end_coordinates.empty()) {
278 set_error("Invalid region coordinates");
279 return {};
280 }
281 uint64_t start = region.start_coordinates[0];
282 uint64_t end = region.end_coordinates[0];
283 uint64_t n = (end > start) ? (end - start) : 1;
284 return read_frames(n, start);
285}
286
287std::vector<Kakshya::DataVariant> SoundFileReader::read_frames(uint64_t num_frames,
288 uint64_t offset)
289{
290 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
291 if (!m_demux || !m_audio) {
292 set_error("File not open");
293 return {};
294 }
295
296 if (offset != m_current_frame_position.load()) {
297 lock.unlock();
298 std::unique_lock<std::shared_mutex> wlock(m_context_mutex);
299 if (!m_demux || !m_audio) {
300 set_error("File closed during operation");
301 return {};
302 }
303 if (!seek_internal(m_demux, m_audio, offset))
304 return {};
305 wlock.unlock();
306 lock.lock();
307 if (!m_demux || !m_audio) {
308 set_error("File closed during operation");
309 return {};
310 }
311 }
312
313 return decode_frames(m_demux, m_audio, num_frames, offset);
314}
315
316// ============================================================================
317// Seeking
318// ============================================================================
319
320std::vector<uint64_t> SoundFileReader::get_read_position() const
321{
322 return { m_current_frame_position.load() };
323}
324
325bool SoundFileReader::seek(const std::vector<uint64_t>& position)
326{
327 if (position.empty()) {
328 set_error("Empty position vector");
329 return false;
330 }
331 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
332 if (!m_demux || !m_audio) {
333 set_error("File not open");
334 return false;
335 }
336 return seek_internal(m_demux, m_audio, position[0]);
337}
338
340 const std::shared_ptr<FFmpegDemuxContext>& demux,
341 const std::shared_ptr<AudioStreamContext>& audio,
342 uint64_t frame_position)
343{
344 if (frame_position > audio->total_frames)
345 frame_position = audio->total_frames;
346
347 if (audio->sample_rate == 0) {
348 set_error("Invalid sample rate");
349 return false;
350 }
351
352 AVStream* stream = demux->get_stream(audio->stream_index);
353 if (!stream) {
354 set_error("Invalid stream index");
355 return false;
356 }
357
358 int64_t ts = av_rescale_q(
359 static_cast<int64_t>(frame_position),
360 AVRational { 1, static_cast<int>(audio->sample_rate) },
361 stream->time_base);
362
363 if (!demux->seek(audio->stream_index, ts)) {
364 set_error(demux->last_error());
365 return false;
366 }
367
368 audio->flush_codec();
369 audio->drain_resampler_init();
370 m_current_frame_position = frame_position;
371 return true;
372}
373
374// =========================================================================
375// Decode loop
376// =========================================================================
377
378std::vector<Kakshya::DataVariant> SoundFileReader::decode_frames(
379 const std::shared_ptr<FFmpegDemuxContext>& demux,
380 const std::shared_ptr<AudioStreamContext>& audio,
381 uint64_t num_frames,
382 uint64_t /*offset*/)
383{
384 if (!audio->is_valid()) {
385 set_error("Invalid audio context for decoding");
386 return {};
387 }
388
390 int ch = static_cast<int>(audio->channels);
391
392 std::vector<Kakshya::DataVariant> output;
393 if (use_planar) {
394 output.resize(ch);
395 for (auto& v : output) {
396 v = std::vector<double>();
397 std::get<std::vector<double>>(v).reserve(num_frames);
398 }
399 } else {
400 output.resize(1);
401 output[0] = std::vector<double>();
402 std::get<std::vector<double>>(output[0]).reserve(num_frames * static_cast<size_t>(ch));
403 }
404
405 uint64_t decoded = 0;
406 bool eof_reached = false;
407
408 AVPacket* pkt = av_packet_alloc();
409 AVFrame* frame = av_frame_alloc();
410 if (!pkt || !frame) {
411 av_packet_free(&pkt);
412 av_frame_free(&frame);
413 set_error("Failed to allocate packet/frame");
414 return {};
415 }
416
417 uint32_t out_rate = m_target_sample_rate > 0 ? m_target_sample_rate : audio->sample_rate;
418 int max_resampled = static_cast<int>(av_rescale_rnd(
419 static_cast<int64_t>(num_frames), out_rate, audio->sample_rate, AV_ROUND_UP));
420
421 AVSampleFormat tgt_fmt = use_planar ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
422 uint8_t** resample_buf = nullptr;
423 int linesize = 0;
424
425 if (av_samples_alloc_array_and_samples(
426 &resample_buf, &linesize, ch, max_resampled, tgt_fmt, 0)
427 < 0) {
428 av_packet_free(&pkt);
429 av_frame_free(&frame);
430 set_error("Failed to allocate resample buffer");
431 return {};
432 }
433
434 while (decoded < num_frames) {
435 if (!eof_reached) {
436 int ret = av_read_frame(demux->format_context, pkt);
437 if (ret == AVERROR_EOF) {
438 eof_reached = true;
439 avcodec_send_packet(audio->codec_context, nullptr);
440 } else if (ret < 0) {
441 eof_reached = true;
442 } else if (pkt->stream_index == audio->stream_index) {
443 avcodec_send_packet(audio->codec_context, pkt);
444 av_packet_unref(pkt);
445 } else {
446 av_packet_unref(pkt);
447 }
448 }
449
450 int receive_ret = 0;
451 while (decoded < num_frames) {
452 receive_ret = avcodec_receive_frame(audio->codec_context, frame);
453
454 if (receive_ret == AVERROR(EAGAIN))
455 break;
456 if (receive_ret == AVERROR_EOF) {
457 // decoded = num_frames;
458 break;
459 }
460 if (receive_ret < 0)
461 break;
462
463 int out_samples = swr_convert(
464 audio->swr_context,
465 resample_buf, max_resampled,
466 const_cast<const uint8_t**>(frame->data),
467 frame->nb_samples);
468
469 if (out_samples > 0) {
470 uint64_t to_copy = std::min(static_cast<uint64_t>(out_samples),
471 num_frames - decoded);
472 if (use_planar) {
473 for (int c = 0; c < ch; ++c) {
474 auto* src = reinterpret_cast<double*>(resample_buf[c]);
475 auto& dst = std::get<std::vector<double>>(output[c]);
476 dst.insert(dst.end(), src, src + to_copy);
477 }
478 } else {
479 auto* src = reinterpret_cast<double*>(resample_buf[0]);
480 auto& dst = std::get<std::vector<double>>(output[0]);
481 dst.insert(dst.end(), src, src + to_copy * static_cast<uint64_t>(ch));
482 }
483 decoded += to_copy;
484 }
485 av_frame_unref(frame);
486 }
487
488 if (eof_reached && receive_ret == AVERROR_EOF)
489 break;
490 }
491
492 while (true) {
493 int n = swr_convert(audio->swr_context, resample_buf, max_resampled, nullptr, 0);
494 if (n <= 0)
495 break;
496
497 uint64_t to_copy = std::min(static_cast<uint64_t>(n),
498 (num_frames > decoded) ? (num_frames - decoded) : 0);
499
500 if (to_copy > 0) {
501 if (use_planar) {
502 for (int c = 0; c < ch; ++c) {
503 auto* src = reinterpret_cast<double*>(resample_buf[c]);
504 auto& dst = std::get<std::vector<double>>(output[c]);
505 dst.insert(dst.end(), src, src + to_copy);
506 }
507 } else {
508 auto* src = reinterpret_cast<double*>(resample_buf[0]);
509 auto& dst = std::get<std::vector<double>>(output[0]);
510 dst.insert(dst.end(), src, src + to_copy * static_cast<uint64_t>(ch));
511 }
512 decoded += to_copy;
513 } else {
514 break;
515 }
516 }
517
518 av_freep(&resample_buf[0]);
519 av_freep(&resample_buf);
520 av_packet_free(&pkt);
521 av_frame_free(&frame);
522
523 m_current_frame_position += decoded;
524 return output;
525}
526
527// =========================================================================
528// Container Operations
529// =========================================================================
530
531std::shared_ptr<Kakshya::SignalSourceContainer> SoundFileReader::create_container()
532{
533 std::shared_lock lock(m_context_mutex);
534 if (!m_demux || !m_audio) {
535 set_error("File not open");
536 return nullptr;
537 }
538
539 return std::make_shared<Kakshya::SoundFileContainer>();
540}
541
543 std::shared_ptr<Kakshya::SignalSourceContainer> container)
544{
545 if (!container) {
546 set_error("Invalid container");
547 return false;
548 }
549
550 auto sc = std::dynamic_pointer_cast<Kakshya::SoundFileContainer>(container);
551 if (!sc) {
552 set_error("Container is not a SoundFileContainer");
553 return false;
554 }
555
556 std::shared_ptr<AudioStreamContext> audio;
557 {
558 std::shared_lock lock(m_context_mutex);
559 if (!m_demux || !m_audio) {
560 set_error("File not open");
561 return false;
562 }
563 audio = m_audio;
564 }
565
566 sc->setup(audio->total_frames, audio->sample_rate, audio->channels);
567
569 sc->get_structure().organization = planar
572
573 auto data = read_all();
574 if (data.empty()) {
575 set_error("Failed to read audio data");
576 return false;
577 }
578
579 sc->set_raw_data(data);
580
581 auto regions = get_regions();
582 auto region_groups = regions_to_groups(regions);
583 for (const auto& [name, group] : region_groups)
584 sc->add_region_group(group);
585
586 sc->create_default_processor();
587 sc->mark_ready_for_processing(true);
588 return true;
589}
590
591// ============================================================================
592// Utility Methods
593// ============================================================================
594
596{
597 return 2; // time × channels
598}
599
600std::vector<uint64_t> SoundFileReader::get_dimension_sizes() const
601{
602 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
603 if (!m_audio)
604 return { 0, 0 };
605 return { m_audio->total_frames, m_audio->channels };
606}
607
608std::vector<std::string> SoundFileReader::get_supported_extensions() const
609{
610 return {
611 "wav", "flac", "mp3", "m4a", "aac", "ogg", "opus", "wma",
612 "aiff", "aif", "ape", "wv", "tta", "mka", "ac3", "dts",
613 "mp2", "mp4", "webm", "caf", "amr", "au", "voc", "w64",
614 "mpc", "mp+", "m4b", "m4r", "3gp", "3g2", "asf", "rm",
615 "ra", "avi", "mov", "mkv", "ogv", "ogx", "oga", "spx",
616 "f4a", "f4b", "f4v", "m4v", "asx", "wvx", "wax"
617 };
618}
619
621{
622 std::lock_guard<std::mutex> lock(m_error_mutex);
623 return m_last_error;
624}
625
626void SoundFileReader::set_error(const std::string& err) const
627{
628 std::lock_guard<std::mutex> lock(m_error_mutex);
629 m_last_error = err;
630 MF_ERROR(Journal::Component::IO, Journal::Context::FileIO, "SoundFileReader: {}", err);
631}
632
634{
635 std::lock_guard<std::mutex> lock(m_error_mutex);
636 m_last_error.clear();
637}
638
639} // namespace MayaFlux::IO
#define MF_ERROR(comp, ctx,...)
bool open(const std::string &filepath)
Open a media file and probe stream information.
static void init_ffmpeg()
Initialise FFmpeg logging level once per process.
int find_best_stream(int media_type, const void **out_codec=nullptr) const
Find the best stream of the requested media type.
RAII owner of a single AVFormatContext and associated demux state.
static std::unordered_map< std::string, Kakshya::RegionGroup > regions_to_groups(const std::vector< FileRegion > &regions)
Convert file regions to region groups.
std::vector< Kakshya::DataVariant > read_all() override
Read the entire audio file into memory.
void close() override
Close the currently open file and release resources.
void build_regions(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio) const
Build and cache FileRegion list from both contexts.
std::string get_last_error() const override
Get the last error message encountered by the reader.
uint32_t m_target_sample_rate
Target sample rate for resampling (0 = use source rate).
bool open(const std::string &filepath, FileReadOptions options=FileReadOptions::ALL) override
Open an audio file for reading.
std::mutex m_error_mutex
Mutex for thread-safe error message access.
void build_metadata(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio) const
Build and cache FileMetadata from both contexts.
std::shared_ptr< AudioStreamContext > m_audio
Codec + resampler state.
bool load_into_container(std::shared_ptr< Kakshya::SignalSourceContainer > container) override
Load file data into an existing SignalSourceContainer.
std::shared_mutex m_context_mutex
Guards both context pointers.
bool seek_internal(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio, uint64_t frame_position)
Seek the demuxer and flush the codec to the given frame position.
bool can_read(const std::string &filepath) const override
Check if this reader can open the given file.
std::mutex m_metadata_mutex
Mutex for thread-safe metadata access.
std::vector< Kakshya::DataVariant > decode_frames(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio, uint64_t num_frames, uint64_t offset)
Decode num_frames PCM frames starting at offset.
std::string m_last_error
Last error message encountered.
std::vector< uint64_t > get_read_position() const override
Get the current read position in the file.
void set_error(const std::string &error) const
Set the last error message.
std::atomic< uint64_t > m_current_frame_position
Current frame position for reading.
std::vector< Kakshya::DataVariant > read_region(const FileRegion &region) override
Read a specific region from the file.
std::vector< Kakshya::DataVariant > read_frames(uint64_t num_frames, uint64_t offset=0)
Read a specific number of frames from the file.
AudioReadOptions m_audio_options
Audio-specific read options.
std::optional< FileMetadata > m_cached_metadata
Cached file metadata.
bool open_from_demux(std::shared_ptr< FFmpegDemuxContext > demux, std::shared_ptr< AudioStreamContext > audio, const std::string &filepath, FileReadOptions options=FileReadOptions::ALL)
Open an audio stream from an already-constructed demux and stream context.
std::vector< FileRegion > m_cached_regions
Cached file regions (markers, loops, etc.).
void clear_error() const
Clear the last error message.
~SoundFileReader() override
Destroy the SoundFileReader object.
std::string m_filepath
Path to the currently open file.
std::vector< uint64_t > get_dimension_sizes() const override
Get the size of each dimension (e.g., frames, channels).
std::shared_ptr< Kakshya::SignalSourceContainer > create_container() override
Create a SignalSourceContainer for this file.
bool seek(const std::vector< uint64_t > &position) override
Seek to a specific position in the file.
SoundFileReader()
Construct a new SoundFileReader object.
size_t get_num_dimensions() const override
Get the number of dimensions in the audio data (typically 2: time, channel).
std::shared_ptr< FFmpegDemuxContext > m_demux
Container / format state.
std::optional< FileMetadata > get_metadata() const override
Get metadata for the currently open file.
std::vector< FileRegion > get_regions() const override
Get all regions (markers, loops, etc.) from the file.
bool is_open() const override
Check if a file is currently open.
FileReadOptions m_options
File read options used for this session.
std::vector< std::string > get_supported_extensions() const override
Get supported file extensions for this reader.
@ DEINTERLEAVE
Output planar (per-channel) doubles instead of interleaved.
FileReadOptions
Generic options for file reading behavior.
@ EXTRACT_METADATA
Extract file metadata.
@ EXTRACT_REGIONS
Extract semantic regions (format-specific)
@ NONE
No special options.
@ FileIO
Filesystem I/O operations.
@ IO
Networking, file handling, streaming.
@ PLANAR
Separate DataVariant per logical unit (LLL...RRR for stereo)
@ INTERLEAVED
Single DataVariant with interleaved data (LRLRLR for stereo)
uint64_t file_size
Size in bytes.
std::chrono::system_clock::time_point modification_time
Last modification time.
Generic metadata structure for any file type.
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
Kakshya::Region to_region() const
Convert this FileRegion to a Region for use in processing.
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.
static Region time_span(uint64_t start_frame, uint64_t end_frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a time span (e.g., a segment of frames).
Definition Region.hpp:135
void set_attribute(const std::string &key, std::any value)
Set an attribute value by key.
Definition Region.hpp:339
static Region time_point(uint64_t frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a single time point (e.g., a frame or sample).
Definition Region.hpp:114
Represents a point or span in N-dimensional space.
Definition Region.hpp:67