MayaFlux 0.2.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
AudioStreamContext.cpp
Go to the documentation of this file.
2
3extern "C" {
4#include <libavcodec/avcodec.h>
5#include <libavformat/avformat.h>
6#include <libavutil/channel_layout.h>
7#include <libavutil/opt.h>
8#include <libavutil/samplefmt.h>
9#include <libswresample/swresample.h>
10}
11
12namespace MayaFlux::IO {
13
14// =========================================================================
15// Destructor
16// =========================================================================
17
22
24{
25 if (swr_context) {
26 swr_free(&swr_context);
27 swr_context = nullptr;
28 }
29 if (codec_context) {
30 avcodec_free_context(&codec_context);
31 codec_context = nullptr;
32 }
33 stream_index = -1;
34 total_frames = 0;
35 sample_rate = 0;
36 channels = 0;
37 m_last_error.clear();
38}
39
40// =========================================================================
41// Open
42// =========================================================================
43
45 bool planar_output,
46 uint32_t target_rate)
47{
48 close();
50
51 if (!demux.is_open()) {
52 m_last_error = "Demux context is not open";
53 return false;
54 }
55
56 const AVCodec* codec = nullptr;
57 stream_index = demux.find_best_stream(AVMEDIA_TYPE_AUDIO,
58 reinterpret_cast<const void**>(&codec));
59 if (stream_index < 0 || !codec) {
60 m_last_error = "No audio stream found";
61 return false;
62 }
63
64 codec_context = avcodec_alloc_context3(codec);
65 if (!codec_context) {
66 m_last_error = "avcodec_alloc_context3 failed";
67 return false;
68 }
69
70 AVStream* stream = demux.get_stream(stream_index);
71 if (avcodec_parameters_to_context(codec_context, stream->codecpar) < 0) {
72 m_last_error = "avcodec_parameters_to_context failed";
73 close();
74 return false;
75 }
76
77 if (avcodec_open2(codec_context, codec, nullptr) < 0) {
78 m_last_error = "avcodec_open2 failed";
79 close();
80 return false;
81 }
82
83 sample_rate = static_cast<uint32_t>(codec_context->sample_rate);
84 channels = static_cast<uint32_t>(codec_context->ch_layout.nb_channels);
85
86 if (stream->duration > 0) {
87 total_frames = av_rescale_q(stream->duration, stream->time_base,
88 AVRational { 1, (int)sample_rate });
89 } else if (demux.format_context->duration != AV_NOPTS_VALUE) {
90 double dur = (double)demux.format_context->duration / AV_TIME_BASE;
91 total_frames = static_cast<uint64_t>(dur * sample_rate);
92 }
93
94 if (!setup_resampler(planar_output, target_rate)) {
95 close();
96 return false;
97 }
98
100
101 return true;
102}
103
104// =========================================================================
105// Resampler
106// =========================================================================
107
108bool AudioStreamContext::setup_resampler(bool planar_output, uint32_t target_rate)
109{
110 if (!codec_context)
111 return false;
112
113 AVChannelLayout out_layout;
114 av_channel_layout_copy(&out_layout, &codec_context->ch_layout);
115
116 uint32_t out_rate = target_rate > 0 ? target_rate : sample_rate;
117 AVSampleFormat out_fmt = planar_output ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
118
119 int ret = swr_alloc_set_opts2(
121 &out_layout, out_fmt, static_cast<int>(out_rate),
122 &codec_context->ch_layout, codec_context->sample_fmt, codec_context->sample_rate,
123 0, nullptr);
124
125 av_channel_layout_uninit(&out_layout);
126
127 if (ret < 0 || !swr_context) {
128 m_last_error = "swr_alloc_set_opts2 failed";
129 return false;
130 }
131
132 if (swr_init(swr_context) < 0) {
133 m_last_error = "swr_init failed";
134 swr_free(&swr_context);
135 swr_context = nullptr;
136 return false;
137 }
138
139 return true;
140}
141
142// =========================================================================
143// Codec flush
144// =========================================================================
145
147{
148 if (codec_context)
149 avcodec_flush_buffers(codec_context);
150}
151
153{
154 if (!swr_context || channels == 0)
155 return;
156
157 constexpr int k_drain_samples = 2048;
158 uint8_t** buf = nullptr;
159 int linesize = 0;
160
161 int alloc = av_samples_alloc_array_and_samples(
162 &buf, &linesize,
163 static_cast<int>(channels), k_drain_samples,
164 AV_SAMPLE_FMT_DBL, 0);
165
166 if (alloc < 0 || !buf)
167 return;
168
169 while (swr_convert(swr_context, buf, k_drain_samples, nullptr, 0) > 0) { }
170
171 av_freep(&buf[0]);
172 av_freep(&buf);
173}
174
175// =========================================================================
176// Metadata
177// =========================================================================
178
180 FileMetadata& out) const
181{
182 if (!codec_context || stream_index < 0)
183 return;
184
185 out.attributes["codec"] = std::string(avcodec_get_name(codec_context->codec_id));
186 out.attributes["codec_long_name"] = std::string(codec_context->codec->long_name);
187 out.attributes["total_frames"] = total_frames;
188 out.attributes["sample_rate"] = sample_rate;
189 out.attributes["channels"] = channels;
190 out.attributes["bit_rate"] = codec_context->bit_rate;
191
192 char layout_desc[256] = {};
193 av_channel_layout_describe(&codec_context->ch_layout, layout_desc, sizeof(layout_desc));
194 out.attributes["channel_layout"] = std::string(layout_desc);
195
196 AVStream* stream = demux.get_stream(stream_index);
197 if (!stream)
198 return;
199
200 AVDictionaryEntry* tag = nullptr;
201 while ((tag = av_dict_get(stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX)))
202 out.attributes[std::string("stream_") + tag->key] = std::string(tag->value);
203}
204
206 const FFmpegDemuxContext& demux) const
207{
208 std::vector<FileRegion> regions;
209 if (stream_index < 0 || sample_rate == 0)
210 return regions;
211
212 AVStream* stream = demux.get_stream(stream_index);
213 if (!stream)
214 return regions;
215
216 AVDictionaryEntry* tag = nullptr;
217 int idx = 0;
218 while ((tag = av_dict_get(stream->metadata, "cue", tag, AV_DICT_IGNORE_SUFFIX))) {
219 FileRegion r;
220 r.type = "cue";
221 r.name = tag->value;
222 r.start_coordinates = { static_cast<uint64_t>(idx) };
223 r.end_coordinates = { static_cast<uint64_t>(idx) };
224 r.attributes["label"] = std::string(tag->value);
225 regions.push_back(std::move(r));
226 ++idx;
227 }
228 return regions;
229}
230
231} // namespace MayaFlux::IO
SwrContext * swr_context
Owned; freed in destructor.
bool open(const FFmpegDemuxContext &demux, bool planar_output=false, uint32_t target_rate=0)
Open the audio stream from an already-probed demux context.
std::vector< FileRegion > extract_cue_regions(const FFmpegDemuxContext &demux) const
Extract cue/marker regions from stream metadata tags.
void drain_resampler_init()
Drain any samples buffered inside the resampler.
AVCodecContext * codec_context
Owned; freed in destructor.
void extract_stream_metadata(const FFmpegDemuxContext &demux, FileMetadata &out) const
Populate stream-specific fields into an existing FileMetadata.
void flush_codec()
Flush codec internal buffers (call after a seek).
bool setup_resampler(bool planar_output, uint32_t target_rate)
void close()
Release codec and resampler resources.
AVFormatContext * format_context
Owned; freed in destructor.
static void init_ffmpeg()
Initialise FFmpeg logging level once per process.
int find_best_stream(int media_type, const void **out_codec=nullptr) const
Find the best stream of the requested media type.
AVStream * get_stream(int index) const
Access a stream by index.
bool is_open() const
True if the format context is open and stream info was found.
RAII owner of a single AVFormatContext and associated demux state.
std::unordered_map< std::string, std::any > attributes
Type-specific metadata stored as key-value pairs (e.g., sample rate, channels)
Generic metadata structure for any file type.
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.