5#include <libavcodec/avcodec.h>
6#include <libavformat/avformat.h>
7#include <libavutil/opt.h>
8#include <libavutil/samplefmt.h>
9#include <libswresample/swresample.h>
37std::unordered_map<std::string, Kakshya::RegionGroup>
40 std::unordered_map<std::string, Kakshya::RegionGroup> groups;
42 for (
const auto& region : regions) {
43 auto& group = groups[region.type];
44 group.name = region.type;
45 group.add_region(region.to_region());
72 if (!probe.
open(filepath))
75 const AVCodec* codec =
nullptr;
77 reinterpret_cast<const void**
>(&codec));
78 return idx >= 0 && codec !=
nullptr;
103 auto demux = std::make_shared<FFmpegDemuxContext>();
104 if (!demux->open(resolved)) {
110 auto audio = std::make_shared<AudioStreamContext>();
129 std::shared_ptr<FFmpegDemuxContext> demux,
130 std::shared_ptr<AudioStreamContext> audio,
131 const std::string& filepath,
146 if (!demux || !demux->is_open()) {
147 set_error(
"open_from_demux: demux context is null or not open");
151 if (!audio || !audio->is_valid()) {
152 set_error(
"open_from_demux: audio stream context is null or not valid");
230 const std::shared_ptr<FFmpegDemuxContext>& demux,
231 const std::shared_ptr<AudioStreamContext>& audio)
const
234 demux->extract_container_metadata(meta);
235 audio->extract_stream_metadata(*demux, meta);
238 auto ftime = std::filesystem::last_write_time(
m_filepath);
240 std::chrono::seconds(
241 std::chrono::duration_cast<std::chrono::seconds>(ftime.time_since_epoch())));
248 const std::shared_ptr<FFmpegDemuxContext>& demux,
249 const std::shared_ptr<AudioStreamContext>& audio)
const
251 auto chapters = demux->extract_chapter_regions();
252 auto cues = audio->extract_cue_regions(*demux);
254 std::vector<FileRegion> all;
255 all.reserve(chapters.size() + cues.size());
256 all.insert(all.end(), chapters.begin(), chapters.end());
257 all.insert(all.end(), cues.begin(), cues.end());
285 uint64_t n = (end > start) ? (end - start) : 1;
290 const std::string& filepath,
295 set_error(
"load_bounded: unsupported file: " + filepath);
304 std::shared_ptr<AudioStreamContext> audio;
310 if (!audio || !audio->is_valid()) {
311 set_error(
"load_bounded: no valid audio stream");
318 : audio->sample_rate;
321 max_frames = effective_rate * 5;
323 const bool over_limit = audio->total_frames > max_frames;
325 if (over_limit && !truncate) {
327 "load_bounded: file has {} frames, exceeds limit of {}",
328 audio->total_frames, max_frames));
335 "load_bounded: {} has {} frames, truncating to {}",
336 filepath, audio->total_frames, max_frames);
339 const uint64_t frames_to_load = over_limit ? max_frames : audio->total_frames;
342 auto stream = std::make_shared<Kakshya::DynamicSoundStream>(
344 static_cast<uint32_t
>(audio->channels));
346 stream->get_structure().organization = planar
350 stream->set_auto_resize(
false);
351 stream->ensure_capacity(frames_to_load);
353 auto data = over_limit
360 set_error(
"load_bounded: read returned no data");
364 stream->set_all_data(data);
382 set_error(
"File closed during operation");
390 set_error(
"File closed during operation");
422 const std::shared_ptr<FFmpegDemuxContext>& demux,
423 const std::shared_ptr<AudioStreamContext>& audio,
424 uint64_t frame_position)
426 if (frame_position > audio->total_frames)
427 frame_position = audio->total_frames;
429 if (audio->sample_rate == 0) {
434 AVStream* stream = demux->get_stream(audio->stream_index);
440 int64_t ts = av_rescale_q(
441 static_cast<int64_t
>(frame_position),
442 AVRational { .num = 1, .den =
static_cast<int>(audio->sample_rate) },
445 if (!demux->seek(audio->stream_index, ts)) {
450 audio->flush_codec();
451 audio->drain_resampler_init();
461 const std::shared_ptr<FFmpegDemuxContext>& demux,
462 const std::shared_ptr<AudioStreamContext>& audio,
466 if (!audio->is_valid()) {
467 set_error(
"Invalid audio context for decoding");
472 int ch =
static_cast<int>(audio->channels);
474 std::vector<Kakshya::DataVariant> output;
477 for (
auto& v : output) {
478 v = std::vector<double>();
479 std::get<std::vector<double>>(v).reserve(num_frames);
483 output[0] = std::vector<double>();
484 std::get<std::vector<double>>(output[0]).reserve(num_frames *
static_cast<size_t>(ch));
487 uint64_t decoded = 0;
488 bool eof_reached =
false;
490 AVPacket* pkt = av_packet_alloc();
491 AVFrame* frame = av_frame_alloc();
492 if (!pkt || !frame) {
493 av_packet_free(&pkt);
494 av_frame_free(&frame);
495 set_error(
"Failed to allocate packet/frame");
500 int max_resampled =
static_cast<int>(av_rescale_rnd(
501 static_cast<int64_t
>(num_frames), out_rate, audio->sample_rate, AV_ROUND_UP));
503 AVSampleFormat tgt_fmt = use_planar ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
504 uint8_t** resample_buf =
nullptr;
507 if (av_samples_alloc_array_and_samples(
508 &resample_buf, &linesize, ch, max_resampled, tgt_fmt, 0)
510 av_packet_free(&pkt);
511 av_frame_free(&frame);
512 set_error(
"Failed to allocate resample buffer");
516 while (decoded < num_frames) {
518 int ret = av_read_frame(demux->format_context, pkt);
519 if (ret == AVERROR_EOF) {
521 avcodec_send_packet(audio->codec_context,
nullptr);
522 }
else if (ret < 0) {
524 }
else if (pkt->stream_index == audio->stream_index) {
525 avcodec_send_packet(audio->codec_context, pkt);
526 av_packet_unref(pkt);
528 av_packet_unref(pkt);
533 while (decoded < num_frames) {
534 receive_ret = avcodec_receive_frame(audio->codec_context, frame);
536 if (receive_ret == AVERROR(EAGAIN))
538 if (receive_ret == AVERROR_EOF) {
545 int out_samples = swr_convert(
547 resample_buf, max_resampled,
548 const_cast<const uint8_t**
>(frame->data),
551 if (out_samples > 0) {
552 uint64_t to_copy = std::min(
static_cast<uint64_t
>(out_samples),
553 num_frames - decoded);
555 for (
int c = 0; c < ch; ++c) {
556 auto* src =
reinterpret_cast<double*
>(resample_buf[c]);
557 auto& dst = std::get<std::vector<double>>(output[c]);
558 dst.insert(dst.end(), src, src + to_copy);
561 auto* src =
reinterpret_cast<double*
>(resample_buf[0]);
562 auto& dst = std::get<std::vector<double>>(output[0]);
563 dst.insert(dst.end(), src, src + to_copy *
static_cast<uint64_t
>(ch));
567 av_frame_unref(frame);
570 if (eof_reached && receive_ret == AVERROR_EOF)
575 int n = swr_convert(audio->swr_context, resample_buf, max_resampled,
nullptr, 0);
579 uint64_t to_copy = std::min(
static_cast<uint64_t
>(n),
580 (num_frames > decoded) ? (num_frames - decoded) : 0);
584 for (
int c = 0; c < ch; ++c) {
585 auto* src =
reinterpret_cast<double*
>(resample_buf[c]);
586 auto& dst = std::get<std::vector<double>>(output[c]);
587 dst.insert(dst.end(), src, src + to_copy);
590 auto* src =
reinterpret_cast<double*
>(resample_buf[0]);
591 auto& dst = std::get<std::vector<double>>(output[0]);
592 dst.insert(dst.end(), src, src + to_copy *
static_cast<uint64_t
>(ch));
600 av_freep(&resample_buf[0]);
601 av_freep(&resample_buf);
602 av_packet_free(&pkt);
603 av_frame_free(&frame);
621 return std::make_shared<Kakshya::SoundFileContainer>();
625 std::shared_ptr<Kakshya::SignalSourceContainer> container)
632 auto sc = std::dynamic_pointer_cast<Kakshya::SoundFileContainer>(container);
634 set_error(
"Container is not a SoundFileContainer");
638 std::shared_ptr<AudioStreamContext> audio;
650 sc->set_source_format(
m_demux->format_context->iformat->name);
652 sc->setup(audio->total_frames, audio->sample_rate, audio->channels);
655 sc->get_structure().organization = planar
665 sc->set_raw_data(data);
669 for (
const auto& [name, group] : region_groups)
670 sc->add_region_group(group);
672 sc->create_default_processor();
673 sc->mark_ready_for_processing(
true);
697 "wav",
"flac",
"mp3",
"m4a",
"aac",
"ogg",
"opus",
"wma",
698 "aiff",
"aif",
"ape",
"wv",
"tta",
"mka",
"ac3",
"dts",
699 "mp2",
"mp4",
"webm",
"caf",
"amr",
"au",
"voc",
"w64",
700 "mpc",
"mp+",
"m4b",
"m4r",
"3gp",
"3g2",
"asf",
"rm",
701 "ra",
"avi",
"mov",
"mkv",
"ogv",
"ogx",
"oga",
"spx",
702 "f4a",
"f4b",
"f4v",
"m4v",
"asx",
"wvx",
"wax"
#define MF_ERROR(comp, ctx,...)
#define MF_WARN(comp, ctx,...)
bool open(const std::string &filepath)
Open a media file and probe stream information.
static void init_ffmpeg()
Initialise FFmpeg logging level once per process.
int find_best_stream(int media_type, const void **out_codec=nullptr) const
Find the best stream of the requested media type.
RAII owner of a single AVFormatContext and associated demux state.
static std::string resolve_path(const std::string &filepath)
Resolve a filepath against the project source root if not found as-is.
static std::unordered_map< std::string, Kakshya::RegionGroup > regions_to_groups(const std::vector< FileRegion > ®ions)
Convert file regions to region groups.
std::vector< Kakshya::DataVariant > read_all() override
Read the entire audio file into memory.
void close() override
Close the currently open file and release resources.
void build_regions(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio) const
Build and cache FileRegion list from both contexts.
std::string get_last_error() const override
Get the last error message encountered by the reader.
uint32_t m_target_sample_rate
Target sample rate for resampling (0 = use source rate).
bool open(const std::string &filepath, FileReadOptions options=FileReadOptions::ALL) override
Open an audio file for reading.
std::mutex m_error_mutex
Mutex for thread-safe error message access.
void build_metadata(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio) const
Build and cache FileMetadata from both contexts.
std::shared_ptr< AudioStreamContext > m_audio
Codec + resampler state.
bool load_into_container(std::shared_ptr< Kakshya::SignalSourceContainer > container) override
Load file data into an existing SignalSourceContainer.
std::shared_mutex m_context_mutex
Guards both context pointers.
std::shared_ptr< Kakshya::DynamicSoundStream > load_bounded(const std::string &filepath, uint64_t max_frames=0, bool truncate=false)
Load an audio file into a size-bounded DynamicSoundStream.
bool seek_internal(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio, uint64_t frame_position)
Seek the demuxer and flush the codec to the given frame position.
bool can_read(const std::string &filepath) const override
Check if this reader can open the given file.
std::mutex m_metadata_mutex
Mutex for thread-safe metadata access.
std::vector< Kakshya::DataVariant > decode_frames(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio, uint64_t num_frames, uint64_t offset)
Decode num_frames PCM frames starting at offset.
std::string m_last_error
Last error message encountered.
std::vector< uint64_t > get_read_position() const override
Get the current read position in the file.
void set_error(const std::string &error) const
Set the last error message.
std::atomic< uint64_t > m_current_frame_position
Current frame position for reading.
std::vector< Kakshya::DataVariant > read_region(const FileRegion ®ion) override
Read a specific region from the file.
std::vector< Kakshya::DataVariant > read_frames(uint64_t num_frames, uint64_t offset=0)
Read a specific number of frames from the file.
AudioReadOptions m_audio_options
Audio-specific read options.
std::optional< FileMetadata > m_cached_metadata
Cached file metadata.
bool open_from_demux(std::shared_ptr< FFmpegDemuxContext > demux, std::shared_ptr< AudioStreamContext > audio, const std::string &filepath, FileReadOptions options=FileReadOptions::ALL)
Open an audio stream from an already-constructed demux and stream context.
std::vector< FileRegion > m_cached_regions
Cached file regions (markers, loops, etc.).
void clear_error() const
Clear the last error message.
~SoundFileReader() override
Destroy the SoundFileReader object.
std::string m_filepath
Path to the currently open file.
std::vector< uint64_t > get_dimension_sizes() const override
Get the size of each dimension (e.g., frames, channels).
std::shared_ptr< Kakshya::SignalSourceContainer > create_container() override
Create a SignalSourceContainer for this file.
bool seek(const std::vector< uint64_t > &position) override
Seek to a specific position in the file.
SoundFileReader()
Construct a new SoundFileReader object.
size_t get_num_dimensions() const override
Get the number of dimensions in the audio data (typically 2: time, channel).
std::shared_ptr< FFmpegDemuxContext > m_demux
Container / format state.
std::optional< FileMetadata > get_metadata() const override
Get metadata for the currently open file.
std::vector< FileRegion > get_regions() const override
Get all regions (markers, loops, etc.) from the file.
bool is_open() const override
Check if a file is currently open.
FileReadOptions m_options
File read options used for this session.
std::vector< std::string > get_supported_extensions() const override
Get supported file extensions for this reader.
@ DEINTERLEAVE
Output planar (per-channel) doubles instead of interleaved.
FileReadOptions
Generic options for file reading behavior.
@ EXTRACT_METADATA
Extract file metadata.
@ EXTRACT_REGIONS
Extract semantic regions (format-specific)
@ NONE
No special options.
@ FileIO
Filesystem I/O operations.
@ IO
Networking, file handling, streaming.
@ PLANAR
Separate DataVariant per logical unit (LLL...RRR for stereo)
@ INTERLEAVED
Single DataVariant with interleaved data (LRLRLR for stereo)
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
Kakshya::Region to_region() const
Convert this FileRegion to a Region for use in processing.
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.
static Region time_span(uint64_t start_frame, uint64_t end_frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a time span (e.g., a segment of frames).
void set_attribute(const std::string &key, std::any value)
Set an attribute value by key.
static Region time_point(uint64_t frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a single time point (e.g., a frame or sample).
Represents a point or span in N-dimensional space.