Decode num_frames PCM frames starting at offset.
Caller must hold at least a shared lock on m_context_mutex.
383{
384 if (!audio->is_valid()) {
385 set_error(
"Invalid audio context for decoding");
386 return {};
387 }
388
390 int ch = static_cast<int>(audio->channels);
391
392 std::vector<Kakshya::DataVariant> output;
393 if (use_planar) {
394 output.resize(ch);
395 for (auto& v : output) {
396 v = std::vector<double>();
397 std::get<std::vector<double>>(v).reserve(num_frames);
398 }
399 } else {
400 output.resize(1);
401 output[0] = std::vector<double>();
402 std::get<std::vector<double>>(output[0]).reserve(num_frames * static_cast<size_t>(ch));
403 }
404
405 uint64_t decoded = 0;
406 bool eof_reached = false;
407
408 AVPacket* pkt = av_packet_alloc();
409 AVFrame* frame = av_frame_alloc();
410 if (!pkt || !frame) {
411 av_packet_free(&pkt);
412 av_frame_free(&frame);
413 set_error(
"Failed to allocate packet/frame");
414 return {};
415 }
416
418 int max_resampled = static_cast<int>(av_rescale_rnd(
419 static_cast<int64_t>(num_frames), out_rate, audio->sample_rate, AV_ROUND_UP));
420
421 AVSampleFormat tgt_fmt = use_planar ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
422 uint8_t** resample_buf = nullptr;
423 int linesize = 0;
424
425 if (av_samples_alloc_array_and_samples(
426 &resample_buf, &linesize, ch, max_resampled, tgt_fmt, 0)
427 < 0) {
428 av_packet_free(&pkt);
429 av_frame_free(&frame);
430 set_error(
"Failed to allocate resample buffer");
431 return {};
432 }
433
434 while (decoded < num_frames) {
435 if (!eof_reached) {
436 int ret = av_read_frame(demux->format_context, pkt);
437 if (ret == AVERROR_EOF) {
438 eof_reached = true;
439 avcodec_send_packet(audio->codec_context, nullptr);
440 } else if (ret < 0) {
441 eof_reached = true;
442 } else if (pkt->stream_index == audio->stream_index) {
443 avcodec_send_packet(audio->codec_context, pkt);
444 av_packet_unref(pkt);
445 } else {
446 av_packet_unref(pkt);
447 }
448 }
449
450 int receive_ret = 0;
451 while (decoded < num_frames) {
452 receive_ret = avcodec_receive_frame(audio->codec_context, frame);
453
454 if (receive_ret == AVERROR(EAGAIN))
455 break;
456 if (receive_ret == AVERROR_EOF) {
457
458 break;
459 }
460 if (receive_ret < 0)
461 break;
462
463 int out_samples = swr_convert(
464 audio->swr_context,
465 resample_buf, max_resampled,
466 const_cast<const uint8_t**>(frame->data),
467 frame->nb_samples);
468
469 if (out_samples > 0) {
470 uint64_t to_copy = std::min(static_cast<uint64_t>(out_samples),
471 num_frames - decoded);
472 if (use_planar) {
473 for (int c = 0; c < ch; ++c) {
474 auto* src = reinterpret_cast<double*>(resample_buf[c]);
475 auto& dst = std::get<std::vector<double>>(output[c]);
476 dst.insert(dst.end(), src, src + to_copy);
477 }
478 } else {
479 auto* src = reinterpret_cast<double*>(resample_buf[0]);
480 auto& dst = std::get<std::vector<double>>(output[0]);
481 dst.insert(dst.end(), src, src + to_copy * static_cast<uint64_t>(ch));
482 }
483 decoded += to_copy;
484 }
485 av_frame_unref(frame);
486 }
487
488 if (eof_reached && receive_ret == AVERROR_EOF)
489 break;
490 }
491
492 while (true) {
493 int n = swr_convert(audio->swr_context, resample_buf, max_resampled, nullptr, 0);
494 if (n <= 0)
495 break;
496
497 uint64_t to_copy = std::min(static_cast<uint64_t>(n),
498 (num_frames > decoded) ? (num_frames - decoded) : 0);
499
500 if (to_copy > 0) {
501 if (use_planar) {
502 for (int c = 0; c < ch; ++c) {
503 auto* src = reinterpret_cast<double*>(resample_buf[c]);
504 auto& dst = std::get<std::vector<double>>(output[c]);
505 dst.insert(dst.end(), src, src + to_copy);
506 }
507 } else {
508 auto* src = reinterpret_cast<double*>(resample_buf[0]);
509 auto& dst = std::get<std::vector<double>>(output[0]);
510 dst.insert(dst.end(), src, src + to_copy * static_cast<uint64_t>(ch));
511 }
512 decoded += to_copy;
513 } else {
514 break;
515 }
516 }
517
518 av_freep(&resample_buf[0]);
519 av_freep(&resample_buf);
520 av_packet_free(&pkt);
521 av_frame_free(&frame);
522
524 return output;
525}
uint32_t m_target_sample_rate
Target sample rate for resampling (0 = use source rate).
void set_error(const std::string &error) const
Set the last error message.
std::atomic< uint64_t > m_current_frame_position
Current frame position for reading.
AudioReadOptions m_audio_options
Audio-specific read options.
@ DEINTERLEAVE
Output planar (per-channel) doubles instead of interleaved.