4#include <libavcodec/avcodec.h>
5#include <libavformat/avformat.h>
6#include <libavutil/opt.h>
7#include <libavutil/samplefmt.h>
8#include <libswresample/swresample.h>
36std::unordered_map<std::string, Kakshya::RegionGroup>
39 std::unordered_map<std::string, Kakshya::RegionGroup> groups;
41 for (
const auto& region : regions) {
42 auto& group = groups[region.type];
43 group.name = region.type;
44 group.add_region(region.to_region());
71 if (!probe.
open(filepath))
74 const AVCodec* codec =
nullptr;
76 reinterpret_cast<const void**
>(&codec));
77 return idx >= 0 && codec !=
nullptr;
101 auto demux = std::make_shared<FFmpegDemuxContext>();
102 if (!demux->open(filepath)) {
108 auto audio = std::make_shared<AudioStreamContext>();
127 std::shared_ptr<FFmpegDemuxContext> demux,
128 std::shared_ptr<AudioStreamContext> audio,
129 const std::string& filepath,
144 if (!demux || !demux->is_open()) {
145 set_error(
"open_from_demux: demux context is null or not open");
149 if (!audio || !audio->is_valid()) {
150 set_error(
"open_from_demux: audio stream context is null or not valid");
228 const std::shared_ptr<FFmpegDemuxContext>& demux,
229 const std::shared_ptr<AudioStreamContext>& audio)
const
232 demux->extract_container_metadata(meta);
233 audio->extract_stream_metadata(*demux, meta);
236 auto ftime = std::filesystem::last_write_time(
m_filepath);
238 std::chrono::seconds(
239 std::chrono::duration_cast<std::chrono::seconds>(ftime.time_since_epoch())));
246 const std::shared_ptr<FFmpegDemuxContext>& demux,
247 const std::shared_ptr<AudioStreamContext>& audio)
const
249 auto chapters = demux->extract_chapter_regions();
250 auto cues = audio->extract_cue_regions(*demux);
252 std::vector<FileRegion> all;
253 all.reserve(chapters.size() + cues.size());
254 all.insert(all.end(), chapters.begin(), chapters.end());
255 all.insert(all.end(), cues.begin(), cues.end());
283 uint64_t n = (end > start) ? (end - start) : 1;
300 set_error(
"File closed during operation");
308 set_error(
"File closed during operation");
327 if (position.empty()) {
340 const std::shared_ptr<FFmpegDemuxContext>& demux,
341 const std::shared_ptr<AudioStreamContext>& audio,
342 uint64_t frame_position)
344 if (frame_position > audio->total_frames)
345 frame_position = audio->total_frames;
347 if (audio->sample_rate == 0) {
352 AVStream* stream = demux->get_stream(audio->stream_index);
358 int64_t ts = av_rescale_q(
359 static_cast<int64_t
>(frame_position),
360 AVRational { 1,
static_cast<int>(audio->sample_rate) },
363 if (!demux->seek(audio->stream_index, ts)) {
368 audio->flush_codec();
369 audio->drain_resampler_init();
379 const std::shared_ptr<FFmpegDemuxContext>& demux,
380 const std::shared_ptr<AudioStreamContext>& audio,
384 if (!audio->is_valid()) {
385 set_error(
"Invalid audio context for decoding");
390 int ch =
static_cast<int>(audio->channels);
392 std::vector<Kakshya::DataVariant> output;
395 for (
auto& v : output) {
396 v = std::vector<double>();
397 std::get<std::vector<double>>(v).reserve(num_frames);
401 output[0] = std::vector<double>();
402 std::get<std::vector<double>>(output[0]).reserve(num_frames *
static_cast<size_t>(ch));
405 uint64_t decoded = 0;
406 bool eof_reached =
false;
408 AVPacket* pkt = av_packet_alloc();
409 AVFrame* frame = av_frame_alloc();
410 if (!pkt || !frame) {
411 av_packet_free(&pkt);
412 av_frame_free(&frame);
413 set_error(
"Failed to allocate packet/frame");
418 int max_resampled =
static_cast<int>(av_rescale_rnd(
419 static_cast<int64_t
>(num_frames), out_rate, audio->sample_rate, AV_ROUND_UP));
421 AVSampleFormat tgt_fmt = use_planar ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
422 uint8_t** resample_buf =
nullptr;
425 if (av_samples_alloc_array_and_samples(
426 &resample_buf, &linesize, ch, max_resampled, tgt_fmt, 0)
428 av_packet_free(&pkt);
429 av_frame_free(&frame);
430 set_error(
"Failed to allocate resample buffer");
434 while (decoded < num_frames) {
436 int ret = av_read_frame(demux->format_context, pkt);
437 if (ret == AVERROR_EOF) {
439 avcodec_send_packet(audio->codec_context,
nullptr);
440 }
else if (ret < 0) {
442 }
else if (pkt->stream_index == audio->stream_index) {
443 avcodec_send_packet(audio->codec_context, pkt);
444 av_packet_unref(pkt);
446 av_packet_unref(pkt);
451 while (decoded < num_frames) {
452 receive_ret = avcodec_receive_frame(audio->codec_context, frame);
454 if (receive_ret == AVERROR(EAGAIN))
456 if (receive_ret == AVERROR_EOF) {
463 int out_samples = swr_convert(
465 resample_buf, max_resampled,
466 const_cast<const uint8_t**
>(frame->data),
469 if (out_samples > 0) {
470 uint64_t to_copy = std::min(
static_cast<uint64_t
>(out_samples),
471 num_frames - decoded);
473 for (
int c = 0; c < ch; ++c) {
474 auto* src =
reinterpret_cast<double*
>(resample_buf[c]);
475 auto& dst = std::get<std::vector<double>>(output[c]);
476 dst.insert(dst.end(), src, src + to_copy);
479 auto* src =
reinterpret_cast<double*
>(resample_buf[0]);
480 auto& dst = std::get<std::vector<double>>(output[0]);
481 dst.insert(dst.end(), src, src + to_copy *
static_cast<uint64_t
>(ch));
485 av_frame_unref(frame);
488 if (eof_reached && receive_ret == AVERROR_EOF)
493 int n = swr_convert(audio->swr_context, resample_buf, max_resampled,
nullptr, 0);
497 uint64_t to_copy = std::min(
static_cast<uint64_t
>(n),
498 (num_frames > decoded) ? (num_frames - decoded) : 0);
502 for (
int c = 0; c < ch; ++c) {
503 auto* src =
reinterpret_cast<double*
>(resample_buf[c]);
504 auto& dst = std::get<std::vector<double>>(output[c]);
505 dst.insert(dst.end(), src, src + to_copy);
508 auto* src =
reinterpret_cast<double*
>(resample_buf[0]);
509 auto& dst = std::get<std::vector<double>>(output[0]);
510 dst.insert(dst.end(), src, src + to_copy *
static_cast<uint64_t
>(ch));
518 av_freep(&resample_buf[0]);
519 av_freep(&resample_buf);
520 av_packet_free(&pkt);
521 av_frame_free(&frame);
539 return std::make_shared<Kakshya::SoundFileContainer>();
543 std::shared_ptr<Kakshya::SignalSourceContainer> container)
550 auto sc = std::dynamic_pointer_cast<Kakshya::SoundFileContainer>(container);
552 set_error(
"Container is not a SoundFileContainer");
556 std::shared_ptr<AudioStreamContext> audio;
566 sc->setup(audio->total_frames, audio->sample_rate, audio->channels);
569 sc->get_structure().organization = planar
579 sc->set_raw_data(data);
583 for (
const auto& [name, group] : region_groups)
584 sc->add_region_group(group);
586 sc->create_default_processor();
587 sc->mark_ready_for_processing(
true);
611 "wav",
"flac",
"mp3",
"m4a",
"aac",
"ogg",
"opus",
"wma",
612 "aiff",
"aif",
"ape",
"wv",
"tta",
"mka",
"ac3",
"dts",
613 "mp2",
"mp4",
"webm",
"caf",
"amr",
"au",
"voc",
"w64",
614 "mpc",
"mp+",
"m4b",
"m4r",
"3gp",
"3g2",
"asf",
"rm",
615 "ra",
"avi",
"mov",
"mkv",
"ogv",
"ogx",
"oga",
"spx",
616 "f4a",
"f4b",
"f4v",
"m4v",
"asx",
"wvx",
"wax"
#define MF_ERROR(comp, ctx,...)
bool open(const std::string &filepath)
Open a media file and probe stream information.
static void init_ffmpeg()
Initialise FFmpeg logging level once per process.
int find_best_stream(int media_type, const void **out_codec=nullptr) const
Find the best stream of the requested media type.
RAII owner of a single AVFormatContext and associated demux state.
static std::unordered_map< std::string, Kakshya::RegionGroup > regions_to_groups(const std::vector< FileRegion > ®ions)
Convert file regions to region groups.
std::vector< Kakshya::DataVariant > read_all() override
Read the entire audio file into memory.
void close() override
Close the currently open file and release resources.
void build_regions(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio) const
Build and cache FileRegion list from both contexts.
std::string get_last_error() const override
Get the last error message encountered by the reader.
uint32_t m_target_sample_rate
Target sample rate for resampling (0 = use source rate).
bool open(const std::string &filepath, FileReadOptions options=FileReadOptions::ALL) override
Open an audio file for reading.
std::mutex m_error_mutex
Mutex for thread-safe error message access.
void build_metadata(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio) const
Build and cache FileMetadata from both contexts.
std::shared_ptr< AudioStreamContext > m_audio
Codec + resampler state.
bool load_into_container(std::shared_ptr< Kakshya::SignalSourceContainer > container) override
Load file data into an existing SignalSourceContainer.
std::shared_mutex m_context_mutex
Guards both context pointers.
bool seek_internal(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio, uint64_t frame_position)
Seek the demuxer and flush the codec to the given frame position.
bool can_read(const std::string &filepath) const override
Check if this reader can open the given file.
std::mutex m_metadata_mutex
Mutex for thread-safe metadata access.
std::vector< Kakshya::DataVariant > decode_frames(const std::shared_ptr< FFmpegDemuxContext > &demux, const std::shared_ptr< AudioStreamContext > &audio, uint64_t num_frames, uint64_t offset)
Decode num_frames PCM frames starting at offset.
std::string m_last_error
Last error message encountered.
std::vector< uint64_t > get_read_position() const override
Get the current read position in the file.
void set_error(const std::string &error) const
Set the last error message.
std::atomic< uint64_t > m_current_frame_position
Current frame position for reading.
std::vector< Kakshya::DataVariant > read_region(const FileRegion ®ion) override
Read a specific region from the file.
std::vector< Kakshya::DataVariant > read_frames(uint64_t num_frames, uint64_t offset=0)
Read a specific number of frames from the file.
AudioReadOptions m_audio_options
Audio-specific read options.
std::optional< FileMetadata > m_cached_metadata
Cached file metadata.
bool open_from_demux(std::shared_ptr< FFmpegDemuxContext > demux, std::shared_ptr< AudioStreamContext > audio, const std::string &filepath, FileReadOptions options=FileReadOptions::ALL)
Open an audio stream from an already-constructed demux and stream context.
std::vector< FileRegion > m_cached_regions
Cached file regions (markers, loops, etc.).
void clear_error() const
Clear the last error message.
~SoundFileReader() override
Destroy the SoundFileReader object.
std::string m_filepath
Path to the currently open file.
std::vector< uint64_t > get_dimension_sizes() const override
Get the size of each dimension (e.g., frames, channels).
std::shared_ptr< Kakshya::SignalSourceContainer > create_container() override
Create a SignalSourceContainer for this file.
bool seek(const std::vector< uint64_t > &position) override
Seek to a specific position in the file.
SoundFileReader()
Construct a new SoundFileReader object.
size_t get_num_dimensions() const override
Get the number of dimensions in the audio data (typically 2: time, channel).
std::shared_ptr< FFmpegDemuxContext > m_demux
Container / format state.
std::optional< FileMetadata > get_metadata() const override
Get metadata for the currently open file.
std::vector< FileRegion > get_regions() const override
Get all regions (markers, loops, etc.) from the file.
bool is_open() const override
Check if a file is currently open.
FileReadOptions m_options
File read options used for this session.
std::vector< std::string > get_supported_extensions() const override
Get supported file extensions for this reader.
@ DEINTERLEAVE
Output planar (per-channel) doubles instead of interleaved.
FileReadOptions
Generic options for file reading behavior.
@ EXTRACT_METADATA
Extract file metadata.
@ EXTRACT_REGIONS
Extract semantic regions (format-specific)
@ NONE
No special options.
@ FileIO
Filesystem I/O operations.
@ IO
Networking, file handling, streaming.
@ PLANAR
Separate DataVariant per logical unit (LLL...RRR for stereo)
@ INTERLEAVED
Single DataVariant with interleaved data (LRLRLR for stereo)
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
Kakshya::Region to_region() const
Convert this FileRegion to a Region for use in processing.
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.
static Region time_span(uint64_t start_frame, uint64_t end_frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a time span (e.g., a segment of frames).
void set_attribute(const std::string &key, std::any value)
Set an attribute value by key.
static Region time_point(uint64_t frame, const std::string &label="", const std::any &extra_data={})
Create a Region representing a single time point (e.g., a frame or sample).
Represents a point or span in N-dimensional space.