MayaFlux 0.2.0
Digital-First Multimedia Processing Framework
Loading...
Searching...
No Matches
VideoStreamContext.cpp
Go to the documentation of this file.
3
4extern "C" {
5#include <libavcodec/avcodec.h>
6#include <libavformat/avformat.h>
7#include <libavutil/imgutils.h>
8#include <libavutil/opt.h>
9#include <libavutil/pixdesc.h>
10#include <libswscale/swscale.h>
11}
12
13namespace MayaFlux::IO {
14
15// =========================================================================
16// Destructor
17// =========================================================================
18
23
25{
26 if (sws_context) {
27 sws_freeContext(sws_context);
28 sws_context = nullptr;
29 }
30 if (codec_context) {
31 avcodec_free_context(&codec_context);
32 codec_context = nullptr;
33 }
34 stream_index = -1;
35 total_frames = 0;
36 width = 0;
37 height = 0;
38 out_width = 0;
39 out_height = 0;
40 frame_rate = 0.0;
44 out_linesize = 0;
45 m_last_error.clear();
46}
47
48// =========================================================================
49// Open
50// =========================================================================
51
53 uint32_t target_width,
54 uint32_t target_height,
55 int target_format)
56{
57 close();
59
60 if (!demux.is_open()) {
61 m_last_error = "Demux context is not open";
62 return false;
63 }
64
65 const AVCodec* codec = nullptr;
66 stream_index = demux.find_best_stream(AVMEDIA_TYPE_VIDEO,
67 reinterpret_cast<const void**>(&codec));
68 if (stream_index < 0 || !codec) {
69 m_last_error = "No video stream found";
70 return false;
71 }
72
73 codec_context = avcodec_alloc_context3(codec);
74 if (!codec_context) {
75 m_last_error = "avcodec_alloc_context3 failed";
76 return false;
77 }
78
79 AVStream* stream = demux.get_stream(stream_index);
80 if (avcodec_parameters_to_context(codec_context, stream->codecpar) < 0) {
81 m_last_error = "avcodec_parameters_to_context failed";
82 close();
83 return false;
84 }
85
86 if (avcodec_open2(codec_context, codec, nullptr) < 0) {
87 m_last_error = "avcodec_open2 failed";
88 close();
89 return false;
90 }
91
92#ifdef MAYAFLUX_PLATFORM_WINDOWS
93 uint32_t probed_width = 0;
94 uint32_t probed_height = 0;
95
96 if (codec_context->pix_fmt == AV_PIX_FMT_NONE) {
97 AVPacket* probe_pkt = av_packet_alloc();
98 AVFrame* probe_frm = av_frame_alloc();
99
100 if (probe_pkt && probe_frm) {
101 AVFormatContext* fmt = demux.format_context;
102 bool probed = false;
103
104 while (!probed && av_read_frame(fmt, probe_pkt) >= 0) {
105 if (probe_pkt->stream_index != stream_index) {
106 av_packet_unref(probe_pkt);
107 continue;
108 }
109 if (avcodec_send_packet(codec_context, probe_pkt) >= 0) {
110 if (avcodec_receive_frame(codec_context, probe_frm) >= 0) {
111 if (codec_context->pix_fmt == AV_PIX_FMT_NONE
112 && probe_frm->format != AV_PIX_FMT_NONE) {
113 codec_context->pix_fmt = static_cast<AVPixelFormat>(probe_frm->format);
114 }
115 probed_width = static_cast<uint32_t>(probe_frm->width);
116 probed_height = static_cast<uint32_t>(probe_frm->height);
117 probed = true;
118 av_frame_unref(probe_frm);
119 }
120 }
121 av_packet_unref(probe_pkt);
122 }
123 }
124
125 av_packet_free(&probe_pkt);
126 av_frame_free(&probe_frm);
127 avcodec_flush_buffers(codec_context);
128 }
129
130 width = static_cast<uint32_t>(codec_context->width);
131 height = static_cast<uint32_t>(codec_context->height);
132
133 if (probed_width > 0 && probed_height > 0) {
134 width = probed_width;
135 height = probed_height;
136 codec_context->width = static_cast<int>(width);
137 codec_context->height = static_cast<int>(height);
138 }
139#else
140 width = static_cast<uint32_t>(codec_context->width);
141 height = static_cast<uint32_t>(codec_context->height);
142#endif
143
144 if (codec_context->codec_id == AV_CODEC_ID_HEVC
145 && stream->r_frame_rate.den > 0
146 && stream->r_frame_rate.num > 0) {
147 frame_rate = av_q2d(stream->r_frame_rate);
148 } else if (stream->avg_frame_rate.den > 0 && stream->avg_frame_rate.num > 0) {
149 frame_rate = av_q2d(stream->avg_frame_rate);
150 } else if (stream->r_frame_rate.den > 0 && stream->r_frame_rate.num > 0) {
151 frame_rate = av_q2d(stream->r_frame_rate);
152 }
153
155
156 if (stream->nb_frames > 0) {
157 total_frames = static_cast<uint64_t>(stream->nb_frames);
158 } else if (stream->duration != AV_NOPTS_VALUE
159 && stream->time_base.num > 0 && stream->time_base.den > 0
160 && frame_rate > 0.0) {
161 double dur = static_cast<double>(stream->duration) * av_q2d(stream->time_base);
162 total_frames = static_cast<uint64_t>(dur * frame_rate);
163 } else if (demux.format_context->duration != AV_NOPTS_VALUE && frame_rate > 0.0) {
164 double dur = static_cast<double>(demux.format_context->duration)
165 / static_cast<double>(AV_TIME_BASE);
166 total_frames = static_cast<uint64_t>(dur * frame_rate);
167 }
168
170 "[VideoStreamContext] stream #{} | "
171 "avg_frame_rate={}/{} ({:.6f} fps) | "
172 "r_frame_rate={}/{} ({:.6f} fps) | "
173 "chosen frame_rate={:.6f} fps | "
174 "nb_frames={} | "
175 "stream duration={} (tb={}/{}, => {:.4f}s) | "
176 "format duration={} (=> {:.4f}s) | "
177 "total_frames={} | "
178 "source={}",
180 stream->avg_frame_rate.num, stream->avg_frame_rate.den,
181 (stream->avg_frame_rate.den > 0 ? av_q2d(stream->avg_frame_rate) : 0.0),
182 stream->r_frame_rate.num, stream->r_frame_rate.den,
183 (stream->r_frame_rate.den > 0 ? av_q2d(stream->r_frame_rate) : 0.0),
185 static_cast<int64_t>(stream->nb_frames),
186 stream->duration,
187 stream->time_base.num, stream->time_base.den,
188 (stream->duration != AV_NOPTS_VALUE && stream->time_base.den > 0
189 ? static_cast<double>(stream->duration) * av_q2d(stream->time_base)
190 : -1.0),
191 demux.format_context->duration,
192 (demux.format_context->duration != AV_NOPTS_VALUE
193 ? static_cast<double>(demux.format_context->duration) / AV_TIME_BASE
194 : -1.0),
196 (stream->nb_frames > 0 ? "nb_frames"
197 : (stream->duration != AV_NOPTS_VALUE ? "stream_duration*fps"
198 : "format_duration*fps")));
199
201 close();
202 return false;
203 }
204
205 return true;
206}
207
209 uint32_t tw, uint32_t th, int tf)
210{
211 close();
213
214 if (!demux.is_open()) {
215 m_last_error = "Demux context is not open";
216 return false;
217 }
218
219 const AVCodec* codec = nullptr;
220 stream_index = demux.find_best_stream(AVMEDIA_TYPE_VIDEO,
221 reinterpret_cast<const void**>(&codec));
222 if (stream_index < 0 || !codec) {
223 m_last_error = "No video stream found";
224 return false;
225 }
226
227 codec_context = avcodec_alloc_context3(codec);
228 if (!codec_context) {
229 m_last_error = "avcodec_alloc_context3 failed";
230 return false;
231 }
232
233 AVStream* stream = demux.get_stream(stream_index);
234 if (avcodec_parameters_to_context(codec_context, stream->codecpar) < 0) {
235 m_last_error = "avcodec_parameters_to_context failed";
236 close();
237 return false;
238 }
239
240 if (avcodec_open2(codec_context, codec, nullptr) < 0) {
241 m_last_error = "avcodec_open2 failed";
242 close();
243 return false;
244 }
245
246 target_width = tw;
247 target_height = th;
248 target_format = tf;
249
251
252 width = static_cast<uint32_t>(codec_context->width);
253 height = static_cast<uint32_t>(codec_context->height);
254
255 if (width == 0 || height == 0) {
256 AVStream* s = demux.get_stream(stream_index);
257 width = static_cast<uint32_t>(s->codecpar->width);
258 height = static_cast<uint32_t>(s->codecpar->height);
259 }
260
261 if (width == 0 || height == 0) {
264 }
265
266#ifdef MAYAFLUX_PLATFORM_WINDOWS
267 if (target_width > 0 && target_height > 0
269 std::swap(width, height);
270 }
271#endif
272
275
276 if (stream->avg_frame_rate.den > 0 && stream->avg_frame_rate.num > 0)
277 frame_rate = av_q2d(stream->avg_frame_rate);
278 else if (stream->r_frame_rate.den > 0 && stream->r_frame_rate.num > 0)
279 frame_rate = av_q2d(stream->r_frame_rate);
280
281 auto fmt = av_get_pix_fmt_name(static_cast<AVPixelFormat>(src_pixel_format));
282
284 "[VideoStreamContext] open_device: stream #{} | {}x{} | pix_fmt={}",
285 stream_index, width, height, fmt ? fmt : "none");
286
287 return true;
288}
289
290// =========================================================================
291// Scaler
292// =========================================================================
293
294bool VideoStreamContext::setup_scaler(uint32_t target_width,
295 uint32_t target_height,
296 int target_format)
297{
298 if (!codec_context)
299 return false;
300
301 if (codec_context->pix_fmt == AV_PIX_FMT_NONE) {
302 m_last_error = "setup_scaler: source pix_fmt is AV_PIX_FMT_NONE — "
303 "codec has not resolved its output format yet";
304 return false;
305 }
306
311 : static_cast<int>(AV_PIX_FMT_RGBA);
312
313 sws_context = sws_getContext(
314 static_cast<int>(width),
315 static_cast<int>(height),
316 codec_context->pix_fmt,
317 static_cast<int>(out_width),
318 static_cast<int>(out_height),
319 static_cast<AVPixelFormat>(out_pixel_format),
320 SWS_BILINEAR,
321 nullptr, nullptr, nullptr);
322
323 if (!sws_context) {
324 m_last_error = "sws_getContext failed";
325 return false;
326 }
327
328 const AVPixFmtDescriptor* desc = av_pix_fmt_desc_get(
329 static_cast<AVPixelFormat>(out_pixel_format));
330 if (desc) {
331 int bits = 0;
332 for (int c = 0; c < desc->nb_components; ++c)
333 bits += desc->comp[c].depth;
334 out_bytes_per_pixel = static_cast<uint32_t>((bits + 7) / 8);
335 } else {
337 }
338
339 out_linesize = static_cast<int>(out_width * out_bytes_per_pixel);
340 int align_remainder = out_linesize % 32;
341 if (align_remainder != 0)
342 out_linesize += 32 - align_remainder;
343
344 return true;
345}
346
348 const AVFrame* frame,
349 uint32_t tw, uint32_t th, int tf)
350{
351 if (!frame || frame->width <= 0 || frame->height <= 0
352 || frame->format == AV_PIX_FMT_NONE) {
353 m_last_error = "rebuild_scaler_from_frame: invalid frame";
354 return false;
355 }
356
357 if (codec_context && codec_context->pix_fmt == AV_PIX_FMT_NONE)
358 codec_context->pix_fmt = static_cast<AVPixelFormat>(frame->format);
359
360 if (width == 0 || height == 0) {
361 width = static_cast<uint32_t>(frame->width);
362 height = static_cast<uint32_t>(frame->height);
363 }
364 src_pixel_format = codec_context ? codec_context->pix_fmt : frame->format;
365
366 if (sws_context) {
367 sws_freeContext(sws_context);
368 sws_context = nullptr;
369 }
370
371 const uint32_t use_w = tw > 0 ? tw : (target_width > 0 ? target_width : width);
372 const uint32_t use_h = th > 0 ? th : (target_height > 0 ? target_height : height);
373 const int use_f = tf >= 0 ? tf : (target_format >= 0 ? target_format : -1);
374
375 return setup_scaler(use_w, use_h, use_f);
376}
377
378// =========================================================================
379// Codec flush
380// =========================================================================
381
383{
384 if (codec_context)
385 avcodec_flush_buffers(codec_context);
386}
387
388// =========================================================================
389// Metadata
390// =========================================================================
391
393 FileMetadata& out) const
394{
395 if (!codec_context || stream_index < 0)
396 return;
397
398 out.attributes["video_codec"] = std::string(avcodec_get_name(codec_context->codec_id));
399 if (codec_context->codec && codec_context->codec->long_name)
400 out.attributes["video_codec_long_name"] = std::string(codec_context->codec->long_name);
401 out.attributes["video_width"] = width;
402 out.attributes["video_height"] = height;
403 out.attributes["video_frame_rate"] = frame_rate;
404 out.attributes["video_total_frames"] = total_frames;
405 out.attributes["video_bit_rate"] = codec_context->bit_rate;
406
407 const char* pix_fmt_name = av_get_pix_fmt_name(codec_context->pix_fmt);
408 if (pix_fmt_name)
409 out.attributes["video_pixel_format"] = std::string(pix_fmt_name);
410
411 if (codec_context->color_range != AVCOL_RANGE_UNSPECIFIED)
412 out.attributes["video_color_range"] = static_cast<int>(codec_context->color_range);
413 if (codec_context->colorspace != AVCOL_SPC_UNSPECIFIED)
414 out.attributes["video_colorspace"] = static_cast<int>(codec_context->colorspace);
415 if (codec_context->color_trc != AVCOL_TRC_UNSPECIFIED)
416 out.attributes["video_color_trc"] = static_cast<int>(codec_context->color_trc);
417 if (codec_context->color_primaries != AVCOL_PRI_UNSPECIFIED)
418 out.attributes["video_color_primaries"] = static_cast<int>(codec_context->color_primaries);
419
420 AVStream* stream = demux.get_stream(stream_index);
421 if (!stream)
422 return;
423
424 if (stream->sample_aspect_ratio.num > 0 && stream->sample_aspect_ratio.den > 0) {
425 out.attributes["video_sar_num"] = stream->sample_aspect_ratio.num;
426 out.attributes["video_sar_den"] = stream->sample_aspect_ratio.den;
427 }
428
429 AVDictionaryEntry* tag = nullptr;
430 while ((tag = av_dict_get(stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX)))
431 out.attributes[std::string("video_stream_") + tag->key] = std::string(tag->value);
432}
433
435 const FFmpegDemuxContext& demux) const
436{
437 std::vector<FileRegion> regions;
438 if (stream_index < 0 || !codec_context)
439 return regions;
440
441 AVStream* stream = demux.get_stream(stream_index);
442 if (!stream)
443 return regions;
444
445 AVPacket* pkt = av_packet_alloc();
446 if (!pkt)
447 return regions;
448
449 int idx = 0;
450 while (av_read_frame(demux.format_context, pkt) >= 0) {
451 if (pkt->stream_index == stream_index && (pkt->flags & AV_PKT_FLAG_KEY)) {
452 FileRegion r;
453 r.type = "keyframe";
454 r.name = "keyframe_" + std::to_string(idx);
455
456 int64_t pts = pkt->pts != AV_NOPTS_VALUE ? pkt->pts : pkt->dts;
457 double ts = 0.0;
458 if (pts != AV_NOPTS_VALUE && stream->time_base.num > 0 && stream->time_base.den > 0)
459 ts = static_cast<double>(pts) * av_q2d(stream->time_base);
460
461 uint64_t frame_pos = 0;
462 if (frame_rate > 0.0)
463 frame_pos = static_cast<uint64_t>(ts * frame_rate);
464
465 r.start_coordinates = { frame_pos };
466 r.end_coordinates = { frame_pos };
467 r.attributes["pts"] = pts;
468 r.attributes["timestamp_seconds"] = ts;
469 r.attributes["keyframe_index"] = idx;
470
471 regions.push_back(std::move(r));
472 ++idx;
473 }
474 av_packet_unref(pkt);
475 }
476
477 av_packet_free(&pkt);
478
479 av_seek_frame(demux.format_context, stream_index, 0, AVSEEK_FLAG_BACKWARD);
480
481 return regions;
482}
483
484} // namespace MayaFlux::IO
#define MF_INFO(comp, ctx,...)
AVFormatContext * format_context
Owned; freed in destructor.
static void init_ffmpeg()
Initialise FFmpeg logging level once per process.
int find_best_stream(int media_type, const void **out_codec=nullptr) const
Find the best stream of the requested media type.
AVStream * get_stream(int index) const
Access a stream by index.
bool is_open() const
True if the format context is open and stream info was found.
RAII owner of a single AVFormatContext and associated demux state.
void flush_codec()
Flush codec internal buffers (call after a seek).
bool open_device(const FFmpegDemuxContext &demux, uint32_t target_width=0, uint32_t target_height=0, int target_format=-1)
Open codec only, without initialising the SwsContext scaler.
uint32_t target_width
Requested output width (0 = source).
std::vector< FileRegion > extract_keyframe_regions(const FFmpegDemuxContext &demux) const
Extract keyframe positions as FileRegion entries.
void close()
Release codec and scaler resources.
uint32_t target_height
Requested output height (0 = source).
double frame_rate
Average frame rate (fps).
bool setup_scaler(uint32_t target_width, uint32_t target_height, int target_format)
Allocate and initialise the SwsContext for pixel format conversion.
uint32_t out_height
Output height after scaling.
uint32_t height
Source height in pixels.
void extract_stream_metadata(const FFmpegDemuxContext &demux, FileMetadata &out) const
Populate stream-specific fields into an existing FileMetadata.
uint32_t width
Source width in pixels.
int src_pixel_format
Source AVPixelFormat.
int out_pixel_format
Output AVPixelFormat.
int out_linesize
Output row stride in bytes.
bool rebuild_scaler_from_frame(const AVFrame *frame, uint32_t target_width=0, uint32_t target_height=0, int target_format=-1)
Rebuild the SwsContext using the pixel format resolved from a live decoded frame.
SwsContext * sws_context
Owned; freed in destructor.
AVCodecContext * codec_context
Owned; freed in destructor.
int target_format
Requested AVPixelFormat (negative = RGBA).
uint32_t out_bytes_per_pixel
Bytes per pixel in output format.
uint32_t out_width
Output width after scaling.
bool open(const FFmpegDemuxContext &demux, uint32_t target_width=0, uint32_t target_height=0, int target_format=-1)
Open the video stream from an already-probed demux context.
@ FileIO
Filesystem I/O operations.
@ IO
Networking, file handling, streaming.
std::unordered_map< std::string, std::any > attributes
Type-specific metadata stored as key-value pairs (e.g., sample rate, channels)
Generic metadata structure for any file type.
std::vector< uint64_t > start_coordinates
N-dimensional start position (e.g., frame, x, y)
std::string name
Human-readable name for the region.
std::string type
Region type identifier (e.g., "cue", "scene", "block")
std::unordered_map< std::string, std::any > attributes
Region-specific metadata.
std::vector< uint64_t > end_coordinates
N-dimensional end position (inclusive)
Generic region descriptor for any file type.