MayaFlux 0.1.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
SoundFileReader.cpp
Go to the documentation of this file.
1#include "SoundFileReader.hpp"
2
3#include <chrono>
4
5extern "C" {
6#include <libavcodec/avcodec.h>
7#include <libavformat/avformat.h>
8#include <libavutil/channel_layout.h>
9#include <libavutil/opt.h>
10#include <libavutil/samplefmt.h>
11#include <libswresample/swresample.h>
12}
13
14namespace MayaFlux::IO {
15
16// ============================================================================
17// FFmpegContext Implementation
18// ============================================================================
19
21{
22 // Cleanup order matters: resampler -> codec -> format
23 if (swr_context) {
24 swr_free(&swr_context);
25 swr_context = nullptr;
26 }
27
28 if (codec_context) {
29 avcodec_free_context(&codec_context);
30 codec_context = nullptr;
31 }
32
33 if (format_context) {
34 avformat_close_input(&format_context);
35 format_context = nullptr;
36 }
37
39}
40
41// ============================================================================
42// FileRegion Implementation
43// ============================================================================
44
46{
47 if (start_coordinates.size() == 1 && end_coordinates.size() == 1) {
50 }
52 }
53
55 region.set_attribute("label", name);
56 region.set_attribute("type", type);
57
58 for (const auto& [key, value] : attributes) {
59 region.set_attribute(key, value);
60 }
61 return region;
62}
63
64std::unordered_map<std::string, Kakshya::RegionGroup>
65FileReader::regions_to_groups(const std::vector<FileRegion>& regions)
66{
67 std::unordered_map<std::string, Kakshya::RegionGroup> groups;
68
69 for (const auto& region : regions) {
70 auto& group = groups[region.type];
71 group.name = region.type;
72 group.add_region(region.to_region());
73 }
74
75 return groups;
76}
77
78// ============================================================================
79// Static Members
80// ============================================================================
81
82std::atomic<bool> SoundFileReader::s_ffmpeg_initialized { false };
84
85// ============================================================================
86// Constructor/Destructor
87// ============================================================================
88
93
98
100{
101 std::lock_guard<std::mutex> lock(s_ffmpeg_init_mutex);
102 if (!s_ffmpeg_initialized.exchange(true)) {
103 av_log_set_level(AV_LOG_WARNING);
104 }
105}
106
107// ============================================================================
108// File Operations
109// ============================================================================
110
111bool SoundFileReader::can_read(const std::string& filepath) const
112{
113 AVFormatContext* format_ctx = nullptr;
114 int ret = avformat_open_input(&format_ctx, filepath.c_str(), nullptr, nullptr);
115
116 if (ret < 0) {
117 return false;
118 }
119
120 bool has_audio = false;
121 if (avformat_find_stream_info(format_ctx, nullptr) >= 0) {
122 int audio_stream = av_find_best_stream(format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
123 has_audio = (audio_stream >= 0);
124 }
125
126 avformat_close_input(&format_ctx);
127 return has_audio;
128}
129
130bool SoundFileReader::open(const std::string& filepath, FileReadOptions options)
131{
132 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
133
134 if (m_context) {
135 m_context.reset();
137 m_cached_metadata.reset();
138 m_cached_regions.clear();
139 }
140
141 m_filepath = filepath;
142 m_options = options;
143 clear_error();
144
145 auto ctx = std::make_shared<FFmpegContext>();
146 if (avformat_open_input(&ctx->format_context, filepath.c_str(), nullptr, nullptr) < 0) {
147 set_error("Failed to open file: " + filepath);
148 return false;
149 }
150
151 if (avformat_find_stream_info(ctx->format_context, nullptr) < 0) {
152 set_error("Failed to find stream info");
153 return false;
154 }
155
156 const AVCodec* codec = nullptr;
157 ctx->audio_stream_index = av_find_best_stream(
158 ctx->format_context, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
159
160 if (ctx->audio_stream_index < 0 || !codec) {
161 set_error("No audio stream found");
162 return false;
163 }
164
165 ctx->codec_context = avcodec_alloc_context3(codec);
166 if (!ctx->codec_context) {
167 set_error("Failed to allocate codec context");
168 return false;
169 }
170
171 AVStream* stream = ctx->format_context->streams[ctx->audio_stream_index];
172 if (avcodec_parameters_to_context(ctx->codec_context, stream->codecpar) < 0) {
173 set_error("Failed to copy codec parameters");
174 return false;
175 }
176
177 if (avcodec_open2(ctx->codec_context, codec, nullptr) < 0) {
178 set_error("Failed to open codec");
179 return false;
180 }
181
182 if (stream->duration != AV_NOPTS_VALUE && stream->time_base.num && stream->time_base.den) {
183 double duration_seconds = stream->duration * av_q2d(stream->time_base);
184 ctx->total_frames = static_cast<uint64_t>(duration_seconds * ctx->codec_context->sample_rate);
185 } else if (ctx->format_context->duration != AV_NOPTS_VALUE) {
186 double duration_seconds = ctx->format_context->duration / static_cast<double>(AV_TIME_BASE);
187 ctx->total_frames = static_cast<uint64_t>(duration_seconds * ctx->codec_context->sample_rate);
188 } else {
189 ctx->total_frames = 0;
190 }
191
192 ctx->sample_rate = ctx->codec_context->sample_rate;
193 ctx->channels = ctx->codec_context->ch_layout.nb_channels;
194 if (!setup_resampler(ctx)) {
195 set_error("Failed to setup resampler");
196 return false;
197 }
198
199 if (!ctx->is_valid()) {
200 set_error("Invalid context after initialization");
201 return false;
202 }
203
205 extract_metadata(ctx);
206 }
207
209 extract_regions(ctx);
210 }
211
212 m_context = std::move(ctx);
214 return true;
215}
216
218{
219 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
220
221 if (m_context) {
222 m_context.reset();
224 m_filepath.clear();
225 m_cached_metadata.reset();
226 m_cached_regions.clear();
227 }
228}
229
231{
232 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
233 return m_context && m_context->is_valid();
234}
235
236// ============================================================================
237// Resampler Setup
238// ============================================================================
239
240bool SoundFileReader::setup_resampler(const std::shared_ptr<FFmpegContext>& ctx)
241{
242 if (!ctx || !ctx->codec_context) {
243 return false;
244 }
245
246 AVChannelLayout out_ch_layout;
247 av_channel_layout_copy(&out_ch_layout, &ctx->codec_context->ch_layout);
248
249 uint32_t out_sample_rate = m_target_sample_rate > 0 ? m_target_sample_rate : ctx->codec_context->sample_rate;
250
251 AVSampleFormat out_sample_fmt = (m_audio_options & AudioReadOptions::DEINTERLEAVE) != AudioReadOptions::NONE
252 ? AV_SAMPLE_FMT_DBLP
253 : AV_SAMPLE_FMT_DBL;
254
255 int ret = swr_alloc_set_opts2(&ctx->swr_context,
256 &out_ch_layout, out_sample_fmt, out_sample_rate,
257 &ctx->codec_context->ch_layout, ctx->codec_context->sample_fmt,
258 ctx->codec_context->sample_rate,
259 0, nullptr);
260
261 av_channel_layout_uninit(&out_ch_layout);
262
263 if (ret < 0 || !ctx->swr_context) {
264 set_error("Failed to allocate resampler");
265 return false;
266 }
267
268 if (swr_init(ctx->swr_context) < 0) {
269 set_error("Failed to initialize resampler");
270 return false;
271 }
272
273 return true;
274}
275
276// ============================================================================
277// Metadata and Regions
278// ============================================================================
279
280std::optional<FileMetadata> SoundFileReader::get_metadata() const
281{
282 std::shared_lock<std::shared_mutex> ctx_lock(m_context_mutex);
283
284 if (!m_context || !m_context->is_valid()) {
285 return std::nullopt;
286 }
287
288 {
289 std::lock_guard<std::mutex> meta_lock(m_metadata_mutex);
290 if (m_cached_metadata) {
291 return m_cached_metadata;
292 }
293 }
294
295 FileMetadata metadata;
296 auto ctx = m_context;
297
298 metadata.format = ctx->format_context->iformat->name;
299 metadata.mime_type = ctx->format_context->iformat->mime_type
300 ? ctx->format_context->iformat->mime_type
301 : "audio/" + std::string(ctx->format_context->iformat->name);
302
303 metadata.file_size = std::filesystem::file_size(m_filepath);
304 auto ftime = std::filesystem::last_write_time(m_filepath);
305 metadata.modification_time = std::chrono::system_clock::time_point(
306 std::chrono::seconds(std::chrono::duration_cast<std::chrono::seconds>(
307 ftime.time_since_epoch())));
308
309 metadata.attributes["codec"] = avcodec_get_name(ctx->codec_context->codec_id);
310 metadata.attributes["codec_long_name"] = ctx->codec_context->codec->long_name;
311 metadata.attributes["total_frames"] = ctx->total_frames;
312 metadata.attributes["sample_rate"] = ctx->sample_rate;
313 metadata.attributes["channels"] = ctx->channels;
314
315 char layout_desc[256];
316 av_channel_layout_describe(&ctx->codec_context->ch_layout, layout_desc, sizeof(layout_desc));
317 metadata.attributes["channel_layout"] = std::string(layout_desc);
318 metadata.attributes["bit_rate"] = ctx->codec_context->bit_rate;
319
320 if (ctx->format_context->duration != AV_NOPTS_VALUE) {
321 metadata.attributes["duration_seconds"] = ctx->format_context->duration / static_cast<double>(AV_TIME_BASE);
322 } else if (ctx->total_frames > 0) {
323 metadata.attributes["duration_seconds"] = ctx->total_frames / static_cast<double>(ctx->sample_rate);
324 }
325
326 AVDictionaryEntry* tag = nullptr;
327 while ((tag = av_dict_get(ctx->format_context->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
328 metadata.attributes[std::string("tag_") + tag->key] = tag->value;
329 }
330
331 AVStream* stream = ctx->format_context->streams[ctx->audio_stream_index];
332 tag = nullptr;
333 while ((tag = av_dict_get(stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
334 metadata.attributes[std::string("stream_") + tag->key] = tag->value;
335 }
336
337 {
338 std::lock_guard<std::mutex> meta_lock(m_metadata_mutex);
339 m_cached_metadata = metadata;
340 }
341
342 return metadata;
343}
344
345void SoundFileReader::extract_metadata(const std::shared_ptr<FFmpegContext>& ctx)
346{
347 if (!ctx || !ctx->is_valid()) {
348 return;
349 }
350
351 std::lock_guard<std::mutex> meta_lock(m_metadata_mutex);
352
353 FileMetadata metadata;
354
355 metadata.format = ctx->format_context->iformat->name;
356 metadata.mime_type = ctx->format_context->iformat->mime_type
357 ? ctx->format_context->iformat->mime_type
358 : "audio/" + std::string(ctx->format_context->iformat->name);
359
360 metadata.file_size = std::filesystem::file_size(m_filepath);
361 auto ftime = std::filesystem::last_write_time(m_filepath);
362 metadata.modification_time = std::chrono::system_clock::time_point(
363 std::chrono::seconds(std::chrono::duration_cast<std::chrono::seconds>(
364 ftime.time_since_epoch())));
365
366 metadata.attributes["codec"] = avcodec_get_name(ctx->codec_context->codec_id);
367 metadata.attributes["codec_long_name"] = ctx->codec_context->codec->long_name;
368 metadata.attributes["total_frames"] = ctx->total_frames;
369 metadata.attributes["sample_rate"] = ctx->sample_rate;
370 metadata.attributes["channels"] = ctx->channels;
371
372 char layout_desc[256];
373 av_channel_layout_describe(&ctx->codec_context->ch_layout, layout_desc, sizeof(layout_desc));
374 metadata.attributes["channel_layout"] = std::string(layout_desc);
375 metadata.attributes["bit_rate"] = ctx->codec_context->bit_rate;
376
377 if (ctx->format_context->duration != AV_NOPTS_VALUE) {
378 metadata.attributes["duration_seconds"] = ctx->format_context->duration / static_cast<double>(AV_TIME_BASE);
379 } else if (ctx->total_frames > 0) {
380 metadata.attributes["duration_seconds"] = ctx->total_frames / static_cast<double>(ctx->sample_rate);
381 }
382
383 AVDictionaryEntry* tag = nullptr;
384 while ((tag = av_dict_get(ctx->format_context->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
385 metadata.attributes[std::string("tag_") + tag->key] = tag->value;
386 }
387
388 AVStream* stream = ctx->format_context->streams[ctx->audio_stream_index];
389 tag = nullptr;
390 while ((tag = av_dict_get(stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
391 metadata.attributes[std::string("stream_") + tag->key] = tag->value;
392 }
393
394 m_cached_metadata = metadata;
395}
396
397void SoundFileReader::extract_regions(const std::shared_ptr<FFmpegContext>& ctx)
398{
399 if (!ctx || !ctx->is_valid()) {
400 return;
401 }
402
403 std::lock_guard<std::mutex> lock(m_metadata_mutex);
404 m_cached_regions.clear();
405
406 for (unsigned int i = 0; i < ctx->format_context->nb_chapters; i++) {
407 AVChapter* chapter = ctx->format_context->chapters[i];
408
409 FileRegion region;
410 region.type = "chapter";
411
412 uint64_t start = av_rescale_q(chapter->start, chapter->time_base,
413 AVRational { 1, static_cast<int>(ctx->sample_rate) });
414 uint64_t end = av_rescale_q(chapter->end, chapter->time_base,
415 AVRational { 1, static_cast<int>(ctx->sample_rate) });
416
417 region.start_coordinates = { start };
418 region.end_coordinates = { end };
419
420 AVDictionaryEntry* entry = nullptr;
421 while ((entry = av_dict_get(chapter->metadata, "", entry, AV_DICT_IGNORE_SUFFIX))) {
422 if (strcmp(entry->key, "title") == 0) {
423 region.name = entry->value;
424 } else {
425 region.attributes[entry->key] = entry->value;
426 }
427 }
428
429 if (region.name.empty()) {
430 region.name = "Chapter " + std::to_string(i + 1);
431 }
432
433 m_cached_regions.push_back(region);
434 }
435
436 AVDictionaryEntry* tag = nullptr;
437 while ((tag = av_dict_get(ctx->format_context->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
438 std::string key = tag->key;
439 if (key.find("cue") != std::string::npos || key.find("CUE") != std::string::npos) {
440 FileRegion region;
441 region.type = "cue";
442 region.name = key;
443 region.attributes["description"] = tag->value;
444
445 try {
446 uint64_t position = std::stoull(tag->value);
447 region.start_coordinates = { position };
448 region.end_coordinates = { position };
449 } catch (...) {
450 region.start_coordinates = { 0 };
451 region.end_coordinates = { 0 };
452 region.attributes["value"] = tag->value;
453 }
454 m_cached_regions.push_back(region);
455 }
456
457 if (key.find("loop") != std::string::npos || key.find("LOOP") != std::string::npos) {
458 FileRegion region;
459 region.type = "loop";
460 region.name = key;
461 region.attributes["value"] = tag->value;
462 region.start_coordinates = { 0 };
463 region.end_coordinates = { 0 };
464 m_cached_regions.push_back(region);
465 }
466 }
467
468 /* tag = nullptr;
469 while ((tag = av_dict_get(ctx->format_context->streams[m_audio_stream_index]->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
470 std::string key = tag->key;
471
472 if (key.find("marker") != std::string::npos || key.find("MARKER") != std::string::npos) {
473 FileRegion region;
474 region.type = "marker";
475 region.name = key;
476 region.attributes["value"] = tag->value;
477 region.start_coordinates = { 0 };
478 region.end_coordinates = { 0 };
479 m_cached_regions.push_back(region);
480 }
481 } */
482}
483
484std::vector<FileRegion> SoundFileReader::get_regions() const
485{
486 std::shared_lock<std::shared_mutex> ctx_lock(m_context_mutex);
487
488 if (!m_context || !m_context->is_valid()) {
489 return {};
490 }
491
492 std::lock_guard<std::mutex> meta_lock(m_metadata_mutex);
493 return m_cached_regions;
494}
495
496// ============================================================================
497// Reading Operations
498// ============================================================================
499
500std::vector<Kakshya::DataVariant> SoundFileReader::read_all()
501{
502 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
503
504 if (!m_context || !m_context->is_valid()) {
505 set_error("File not open");
506 return {};
507 }
508
509 auto ctx = m_context;
510 lock.unlock();
511
512 return read_frames(ctx->total_frames, 0);
513}
514
515std::vector<Kakshya::DataVariant> SoundFileReader::read_frames(uint64_t num_frames, uint64_t offset)
516{
517 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
518
519 if (!m_context || !m_context->is_valid()) {
520 set_error("File not open");
521 return {};
522 }
523
524 auto ctx = m_context;
525
526 if (offset != m_current_frame_position.load()) {
527 lock.unlock();
528 std::unique_lock<std::shared_mutex> write_lock(m_context_mutex);
529
530 if (!m_context || !m_context->is_valid()) {
531 set_error("File closed during operation");
532 return {};
533 }
534
535 ctx = m_context;
536 if (!seek_internal(ctx, offset)) {
537 return {};
538 }
539
540 write_lock.unlock();
541 lock.lock();
542
543 if (!m_context || !m_context->is_valid()) {
544 set_error("File closed during operation");
545 return {};
546 }
547
548 ctx = m_context;
549 }
550
551 return decode_frames(ctx, num_frames, offset);
552}
553
554std::vector<Kakshya::DataVariant> SoundFileReader::decode_frames(
555 std::shared_ptr<FFmpegContext> ctx,
556 uint64_t num_frames,
557 uint64_t offset)
558{
559 if (!ctx || !ctx->is_valid() || !ctx->swr_context) {
560 set_error("Invalid context for decoding");
561 return {};
562 }
563
564 std::vector<Kakshya::DataVariant> output_data;
565 uint64_t frames_decoded = 0;
566
567 AVPacket* packet = av_packet_alloc();
568 AVFrame* frame = av_frame_alloc();
569
570 if (!packet || !frame) {
571 av_packet_free(&packet);
572 av_frame_free(&frame);
573 set_error("Failed to allocate packet/frame");
574 return {};
575 }
576
577 int channels = ctx->channels;
579
580 if (use_planar) {
581 output_data.resize(channels);
582 for (auto& channel_vector : output_data) {
583 channel_vector = std::vector<double>();
584 std::get<std::vector<double>>(channel_vector).reserve(num_frames);
585 }
586 } else {
587 output_data.resize(1);
588 output_data[0] = std::vector<double>();
589 std::get<std::vector<double>>(output_data[0]).reserve(num_frames * channels);
590 }
591
592 uint8_t** resample_buffer = nullptr;
593 int resample_linesize = 0;
594
595 int max_resample_samples = av_rescale_rnd(
596 num_frames,
597 m_target_sample_rate > 0 ? m_target_sample_rate : ctx->sample_rate,
598 ctx->sample_rate,
599 AV_ROUND_UP);
600
601 AVSampleFormat target_format = use_planar ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
602
603 int alloc_ret = av_samples_alloc_array_and_samples(
604 &resample_buffer, &resample_linesize,
605 channels, max_resample_samples, target_format, 0);
606
607 if (alloc_ret < 0 || !resample_buffer) {
608 av_packet_free(&packet);
609 av_frame_free(&frame);
610 set_error("Failed to allocate resample buffer");
611 return {};
612 }
613
614 while (frames_decoded < num_frames) {
615 int ret = av_read_frame(ctx->format_context, packet);
616
617 if (ret < 0) {
618 if (ret == AVERROR_EOF) {
619 avcodec_send_packet(ctx->codec_context, nullptr);
620 } else {
621 break;
622 }
623 } else if (packet->stream_index != ctx->audio_stream_index) {
624 av_packet_unref(packet);
625 continue;
626 } else {
627 ret = avcodec_send_packet(ctx->codec_context, packet);
628 av_packet_unref(packet);
629
630 if (ret < 0 && ret != AVERROR(EAGAIN)) {
631 continue;
632 }
633 }
634
635 while (ret >= 0 && frames_decoded < num_frames) {
636 ret = avcodec_receive_frame(ctx->codec_context, frame);
637
638 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
639 break;
640 } else if (ret < 0) {
641 break;
642 }
643
644 int out_samples = swr_convert(ctx->swr_context,
645 resample_buffer, max_resample_samples,
646 (const uint8_t**)frame->data, frame->nb_samples);
647
648 if (out_samples > 0) {
649 uint64_t samples_to_copy = std::min(
650 static_cast<uint64_t>(out_samples),
651 num_frames - frames_decoded);
652
653 if (use_planar) {
654 for (int ch = 0; ch < channels; ++ch) {
655 double* channel_data = reinterpret_cast<double*>(resample_buffer[ch]);
656 auto& channel_vector = std::get<std::vector<double>>(output_data[ch]);
657 channel_vector.insert(channel_vector.end(),
658 channel_data, channel_data + samples_to_copy);
659 }
660 } else {
661 double* interleaved_data = reinterpret_cast<double*>(resample_buffer[0]);
662 auto& interleaved_vector = std::get<std::vector<double>>(output_data[0]);
663 interleaved_vector.insert(interleaved_vector.end(),
664 interleaved_data, interleaved_data + samples_to_copy * channels);
665 }
666
667 frames_decoded += samples_to_copy;
668 }
669
670 av_frame_unref(frame);
671 }
672
673 if (ret == AVERROR_EOF) {
674 break;
675 }
676 }
677
678 av_frame_free(&frame);
679 av_packet_free(&packet);
680
681 if (resample_buffer) {
682 av_freep(&resample_buffer[0]);
683 av_freep(&resample_buffer);
684 }
685
686 m_current_frame_position = offset + frames_decoded;
687 return output_data;
688}
689
690std::vector<Kakshya::DataVariant> SoundFileReader::read_region(const FileRegion& region)
691{
692 if (region.start_coordinates.empty()) {
693 set_error("Invalid region");
694 return {};
695 }
696
697 uint64_t start = region.start_coordinates[0];
698 uint64_t end = region.end_coordinates.empty() ? start : region.end_coordinates[0];
699 uint64_t num_frames = (end > start) ? (end - start) : 1;
700
701 return read_frames(num_frames, start);
702}
703
704// ============================================================================
705// Seeking
706// ============================================================================
707
708std::vector<uint64_t> SoundFileReader::get_read_position() const
709{
710 return { m_current_frame_position.load() };
711}
712
713bool SoundFileReader::seek(const std::vector<uint64_t>& position)
714{
715 if (position.empty()) {
716 set_error("Empty position vector");
717 return false;
718 }
719
720 std::unique_lock<std::shared_mutex> lock(m_context_mutex);
721
722 if (!m_context || !m_context->is_valid()) {
723 set_error("File not open");
724 return false;
725 }
726
727 return seek_internal(m_context, position[0]);
728}
729
730bool SoundFileReader::seek_internal(std::shared_ptr<FFmpegContext>& ctx, uint64_t frame_position)
731{
732 if (!ctx || !ctx->is_valid()) {
733 set_error("Invalid context for seeking");
734 return false;
735 }
736
737 if (frame_position > ctx->total_frames) {
738 frame_position = ctx->total_frames;
739 }
740
741 if (ctx->sample_rate == 0) {
742 set_error("Invalid sample rate");
743 return false;
744 }
745
746 if (ctx->audio_stream_index < 0 || ctx->audio_stream_index >= static_cast<int>(ctx->format_context->nb_streams)) {
747 set_error("Invalid audio stream index");
748 return false;
749 }
750
751 AVStream* stream = ctx->format_context->streams[ctx->audio_stream_index];
752
753 int64_t timestamp = av_rescale_q(
754 frame_position,
755 AVRational { 1, static_cast<int>(ctx->sample_rate) },
756 stream->time_base);
757
758 int ret = av_seek_frame(
759 ctx->format_context,
760 ctx->audio_stream_index,
761 timestamp,
762 AVSEEK_FLAG_BACKWARD);
763
764 if (ret < 0) {
765 set_error("Seek operation failed");
766 return false;
767 }
768
769 avcodec_flush_buffers(ctx->codec_context);
770
771 if (ctx->swr_context) {
772 uint8_t** dummy = nullptr;
773 int linesize = 0;
774
775 int alloc_ret = av_samples_alloc_array_and_samples(
776 &dummy, &linesize,
777 ctx->channels, 2048, AV_SAMPLE_FMT_DBL, 0);
778
779 if (alloc_ret >= 0 && dummy) {
780 while (swr_convert(ctx->swr_context, dummy, 2048, nullptr, 0) > 0) {
781 }
782
783 av_freep(&dummy[0]);
784 av_freep(&dummy);
785 }
786 }
787
788 m_current_frame_position = frame_position;
789 return true;
790}
791
792// ============================================================================
793// Container Operations
794// ============================================================================
795
796std::shared_ptr<Kakshya::SignalSourceContainer> SoundFileReader::create_container()
797{
798 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
799
800 if (!m_context || !m_context->is_valid()) {
801 set_error("File not open");
802 return nullptr;
803 }
804
805 auto container = std::make_shared<Kakshya::SoundFileContainer>();
806 lock.unlock();
807
808 if (!load_into_container(container)) {
809 return nullptr;
810 }
811
812 return container;
813}
814
815bool SoundFileReader::load_into_container(std::shared_ptr<Kakshya::SignalSourceContainer> container)
816{
817 if (!container) {
818 set_error("Invalid container");
819 return false;
820 }
821
822 auto sound_container = std::dynamic_pointer_cast<Kakshya::SoundFileContainer>(container);
823 if (!sound_container) {
824 set_error("Container is not a SoundFileContainer");
825 return false;
826 }
827
828 auto metadata = get_metadata();
829 if (!metadata) {
830 set_error("Failed to get metadata");
831 return false;
832 }
833
834 auto total_frames = metadata->get_attribute<uint64_t>("total_frames").value_or(0);
835 auto sample_rate = metadata->get_attribute<uint32_t>("sample_rate").value_or(48000);
836 auto channels = metadata->get_attribute<uint32_t>("channels").value_or(2);
837
838 sound_container->setup(total_frames, sample_rate, channels);
839
841 sound_container->get_structure().organization = Kakshya::OrganizationStrategy::PLANAR;
842 } else {
843 sound_container->get_structure().organization = Kakshya::OrganizationStrategy::INTERLEAVED;
844 }
845
846 std::vector<Kakshya::DataVariant> audio_data = read_all();
847
848 if (audio_data.empty()) {
849 set_error("Failed to read audio data");
850 return false;
851 }
852
853 sound_container->set_raw_data(audio_data);
854
855 auto regions = get_regions();
856 auto region_groups = regions_to_groups(regions);
857 for (const auto& [name, group] : region_groups) {
858 sound_container->add_region_group(group);
859 }
860
861 sound_container->create_default_processor();
862 sound_container->mark_ready_for_processing(true);
863
864 return true;
865}
866
867// ============================================================================
868// Utility Methods
869// ============================================================================
870
872{
873 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
874
875 if (m_context && m_context->codec_context && m_context->codec_context->codec) {
876 if (m_context->codec_context->frame_size > 0) {
877 return m_context->codec_context->frame_size * 4;
878 }
879 }
880 return 4096;
881}
882
884{
885 return 2; // time × channels
886}
887
888std::vector<uint64_t> SoundFileReader::get_dimension_sizes() const
889{
890 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
891
892 if (!m_context || !m_context->is_valid()) {
893 return {};
894 }
895
896 return { m_context->total_frames, static_cast<uint64_t>(m_context->channels) };
897}
898
899std::vector<std::string> SoundFileReader::get_supported_extensions() const
900{
901 return {
902 "wav", "flac", "mp3", "m4a", "aac", "ogg", "opus", "wma",
903 "aiff", "aif", "ape", "wv", "tta", "mka", "ac3", "dts",
904 "mp2", "mp4", "webm", "caf", "amr", "au", "voc", "w64",
905 "mpc", "mp+", "m4b", "m4r", "3gp", "3g2", "asf", "rm",
906 "ra", "avi", "mov", "mkv", "ogv", "ogx", "oga", "spx",
907 "f4a", "f4b", "f4v", "m4v", "asx", "wvx", "wax"
908 };
909}
910
912{
913 std::lock_guard<std::mutex> lock(m_metadata_mutex);
914 return m_last_error;
915}
916
918{
919 std::shared_lock<std::shared_mutex> lock(m_context_mutex);
920
921 if (!m_context || !m_context->format_context) {
922 return false;
923 }
924
925 return m_context->format_context->pb && m_context->format_context->pb->seekable;
926}
927
928void SoundFileReader::set_error(const std::string& error) const
929{
930 std::lock_guard<std::mutex> lock(m_metadata_mutex);
931 m_last_error = error;
932}
933
935{
936 std::lock_guard<std::mutex> lock(m_metadata_mutex);
937 m_last_error.clear();
938}
939
940std::vector<std::vector<double>> SoundFileReader::deinterleave_data(
941 const std::vector<double>& interleaved, uint32_t channels)
942{
943 if (channels == 1) {
944 return { interleaved };
945 }
946
947 std::vector<std::vector<double>> deinterleaved(channels);
948
949 size_t samples_per_channel = interleaved.size() / channels;
950
951 for (uint32_t ch = 0; ch < channels; ch++) {
952 deinterleaved[ch].reserve(samples_per_channel);
953 for (size_t i = 0; i < samples_per_channel; i++) {
954 deinterleaved[ch].push_back(interleaved[i * channels + ch]);
955 }
956 }
957
958 return deinterleaved;
959}
960
961} // namespace MayaFlux::IO
static std::unordered_map< std::string, Kakshya::RegionGroup > regions_to_groups(const std::vector< FileRegion > &regions)
Convert file regions to region groups.
std::vector< Kakshya::DataVariant > read_all() override
Read the entire audio file into memory.
void close() override
Close the currently open file and release resources.
std::string get_last_error() const override
Get the last error message encountered by the reader.
uint64_t get_preferred_chunk_size() const override
Get the preferred chunk size for streaming reads.
uint32_t m_target_sample_rate
Target sample rate for resampling (0 = use source rate).
bool supports_streaming() const override
Check if the reader supports streaming access.
bool open(const std::string &filepath, FileReadOptions options=FileReadOptions::ALL) override
Open an audio file for reading.
bool load_into_container(std::shared_ptr< Kakshya::SignalSourceContainer > container) override
Load file data into an existing SignalSourceContainer.
bool can_read(const std::string &filepath) const override
Check if this reader can open the given file.
std::mutex m_metadata_mutex
Mutex for thread-safe metadata access.
static std::atomic< bool > s_ffmpeg_initialized
True if FFmpeg has been initialized.
std::vector< Kakshya::DataVariant > decode_frames(std::shared_ptr< FFmpegContext > ctx, uint64_t num_frames, uint64_t offset)
Decode a specific number of frames from the file.
bool seek_internal(std::shared_ptr< FFmpegContext > &ctx, uint64_t frame_position)
Internal seek implementation.
std::string m_last_error
Last error message encountered.
void extract_regions(const std::shared_ptr< FFmpegContext > &ctx)
Extract region information from the file.
std::vector< uint64_t > get_read_position() const override
Get the current read position in the file.
void set_error(const std::string &error) const
Set the last error message.
std::atomic< uint64_t > m_current_frame_position
Current frame position for reading.
void extract_metadata(const std::shared_ptr< FFmpegContext > &ctx)
Extract metadata from the file.
bool setup_resampler(const std::shared_ptr< FFmpegContext > &ctx)
Set up the FFmpeg resampler if needed.
std::vector< Kakshya::DataVariant > read_region(const FileRegion &region) override
Read a specific region from the file.
std::vector< Kakshya::DataVariant > read_frames(uint64_t num_frames, uint64_t offset=0)
Read a specific number of frames from the file.
AudioReadOptions m_audio_options
Audio-specific read options.
std::shared_ptr< FFmpegContext > m_context
std::optional< FileMetadata > m_cached_metadata
Cached file metadata.
std::vector< FileRegion > m_cached_regions
Cached file regions (markers, loops, etc.).
void clear_error() const
Clear the last error message.
~SoundFileReader() override
Destroy the SoundFileReader object.
std::string m_filepath
Path to the currently open file.
std::vector< uint64_t > get_dimension_sizes() const override
Get the size of each dimension (e.g., frames, channels).
std::shared_ptr< Kakshya::SignalSourceContainer > create_container() override
Create a SignalSourceContainer for this file.
bool seek(const std::vector< uint64_t > &position) override
Seek to a specific position in the file.
SoundFileReader()
Construct a new SoundFileReader object.
std::vector< std::vector< double > > deinterleave_data(const std::vector< double > &interleaved, uint32_t channels)
Convert interleaved audio data to deinterleaved (planar) format.
size_t get_num_dimensions() const override
Get the number of dimensions in the audio data (typically 2: time, channel).
std::optional< FileMetadata > get_metadata() const override
Get metadata for the currently open file.
std::vector< FileRegion > get_regions() const override
Get all regions (markers, loops, etc.) from the file.
bool is_open() const override
Check if a file is currently open.
FileReadOptions m_options
File read options used for this session.
std::vector< std::string > get_supported_extensions() const override
Get supported file extensions for this reader.
static void initialize_ffmpeg()
Initialize FFmpeg libraries (thread-safe, called automatically).
static std::mutex s_ffmpeg_init_mutex
Mutex for FFmpeg initialization.
FileReadOptions
Generic options for file reading behavior.
@ EXTRACT_METADATA
Extract file metadata.
@ EXTRACT_REGIONS
Extract semantic regions (format-specific)
@ NONE
No special options.
@ PLANAR
Separate DataVariant per logical unit (LLL...RRR for stereo)
@ INTERLEAVED
Single DataVariant with interleaved data (LRLRLR for stereo)
uint64_t file_size
Size in bytes.
std::unordered_map< std::string, std::any > attributes
Type-specific metadata stored as key-value pairs (e.g., sample rate, channels)
std::chrono::system_clock::time_point modification_time
Last modification time.
std::string format
File format identifier (e.g., "wav", "mp3", "hdf5")
std::string mime_type
MIME type if applicable (e.g., "audio/wav")
Generic metadata structure for any file type.
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
Kakshya::Region to_region() const
Convert this FileRegion to a Region for use in processing.
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.
static Region time_span(uint64_t start_frame, uint64_t end_frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a time span (e.g., a segment of frames).
Definition Region.hpp:135
void set_attribute(const std::string &key, std::any value)
Set an attribute value by key.
Definition Region.hpp:339
static Region time_point(uint64_t frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a single time point (e.g., a frame or sample).
Definition Region.hpp:114
Represents a point or span in N-dimensional space.
Definition Region.hpp:67