4#include <libavcodec/avcodec.h>
5#include <libavformat/avformat.h>
6#include <libavutil/channel_layout.h>
7#include <libavutil/opt.h>
8#include <libavutil/samplefmt.h>
9#include <libswresample/swresample.h>
56 const AVCodec* codec =
nullptr;
58 reinterpret_cast<const void**
>(&codec));
71 if (avcodec_parameters_to_context(
codec_context, stream->codecpar) < 0) {
86 if (stream->duration > 0) {
87 total_frames = av_rescale_q(stream->duration, stream->time_base,
88 AVRational { 1, (int)sample_rate });
90 double dur = (double)demux.
format_context->duration / AV_TIME_BASE;
113 AVChannelLayout out_layout;
114 av_channel_layout_copy(&out_layout, &
codec_context->ch_layout);
116 uint32_t out_rate = target_rate > 0 ? target_rate :
sample_rate;
117 AVSampleFormat out_fmt = planar_output ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
119 int ret = swr_alloc_set_opts2(
121 &out_layout, out_fmt,
static_cast<int>(out_rate),
125 av_channel_layout_uninit(&out_layout);
157 constexpr int k_drain_samples = 2048;
158 uint8_t** buf =
nullptr;
161 int alloc = av_samples_alloc_array_and_samples(
163 static_cast<int>(
channels), k_drain_samples,
164 AV_SAMPLE_FMT_DBL, 0);
166 if (alloc < 0 || !buf)
169 while (swr_convert(
swr_context, buf, k_drain_samples,
nullptr, 0) > 0) { }
192 char layout_desc[256] = {};
193 av_channel_layout_describe(&
codec_context->ch_layout, layout_desc,
sizeof(layout_desc));
194 out.
attributes[
"channel_layout"] = std::string(layout_desc);
200 AVDictionaryEntry* tag =
nullptr;
201 while ((tag = av_dict_get(stream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX)))
202 out.
attributes[std::string(
"stream_") + tag->key] = std::string(tag->value);
208 std::vector<FileRegion> regions;
216 AVDictionaryEntry* tag =
nullptr;
218 while ((tag = av_dict_get(stream->metadata,
"cue", tag, AV_DICT_IGNORE_SUFFIX))) {
224 r.
attributes[
"label"] = std::string(tag->value);
225 regions.push_back(std::move(r));
SwrContext * swr_context
Owned; freed in destructor.
bool open(const FFmpegDemuxContext &demux, bool planar_output=false, uint32_t target_rate=0)
Open the audio stream from an already-probed demux context.
std::vector< FileRegion > extract_cue_regions(const FFmpegDemuxContext &demux) const
Extract cue/marker regions from stream metadata tags.
void drain_resampler_init()
Drain any samples buffered inside the resampler.
AVCodecContext * codec_context
Owned; freed in destructor.
void extract_stream_metadata(const FFmpegDemuxContext &demux, FileMetadata &out) const
Populate stream-specific fields into an existing FileMetadata.
void flush_codec()
Flush codec internal buffers (call after a seek).
bool setup_resampler(bool planar_output, uint32_t target_rate)
void close()
Release codec and resampler resources.
AVFormatContext * format_context
Owned; freed in destructor.
static void init_ffmpeg()
Initialise FFmpeg logging level once per process.
int find_best_stream(int media_type, const void **out_codec=nullptr) const
Find the best stream of the requested media type.
AVStream * get_stream(int index) const
Access a stream by index.
bool is_open() const
True if the format context is open and stream info was found.
RAII owner of a single AVFormatContext and associated demux state.
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.