OpenShot Library | libopenshot  0.5.0
FFmpegReader.cpp
Go to the documentation of this file.
1 
12 // Copyright (c) 2008-2024 OpenShot Studios, LLC, Fabrice Bellard
13 //
14 // SPDX-License-Identifier: LGPL-3.0-or-later
15 
16 #include <thread> // for std::this_thread::sleep_for
17 #include <chrono> // for std::chrono::milliseconds
18 #include <algorithm>
19 #include <cmath>
20 #include <sstream>
21 #include <unistd.h>
22 
23 #include "FFmpegUtilities.h"
24 #include "effects/CropHelpers.h"
25 
26 #include "FFmpegReader.h"
27 #include "Exceptions.h"
28 #include "MemoryTrim.h"
29 #include "Timeline.h"
30 #include "ZmqLogger.h"
31 
32 #define ENABLE_VAAPI 0
33 
34 #if USE_HW_ACCEL
35 #define MAX_SUPPORTED_WIDTH 1950
36 #define MAX_SUPPORTED_HEIGHT 1100
37 
38 #if ENABLE_VAAPI
39 #include "libavutil/hwcontext_vaapi.h"
40 
41 typedef struct VAAPIDecodeContext {
42  VAProfile va_profile;
43  VAEntrypoint va_entrypoint;
44  VAConfigID va_config;
45  VAContextID va_context;
46 
47 #if FF_API_STRUCT_VAAPI_CONTEXT
48  // FF_DISABLE_DEPRECATION_WARNINGS
49  int have_old_context;
50  struct vaapi_context *old_context;
51  AVBufferRef *device_ref;
52  // FF_ENABLE_DEPRECATION_WARNINGS
53 #endif
54 
55  AVHWDeviceContext *device;
56  AVVAAPIDeviceContext *hwctx;
57 
58  AVHWFramesContext *frames;
59  AVVAAPIFramesContext *hwfc;
60 
61  enum AVPixelFormat surface_format;
62  int surface_count;
63  } VAAPIDecodeContext;
64 #endif // ENABLE_VAAPI
65 #endif // USE_HW_ACCEL
66 
67 
68 using namespace openshot;
69 
70 int hw_de_on = 0;
71 #if USE_HW_ACCEL
72  AVPixelFormat hw_de_av_pix_fmt_global = AV_PIX_FMT_NONE;
73  AVHWDeviceType hw_de_av_device_type_global = AV_HWDEVICE_TYPE_NONE;
74 #endif
75 
76 FFmpegReader::FFmpegReader(const std::string &path, bool inspect_reader)
77  : FFmpegReader(path, DurationStrategy::VideoPreferred, inspect_reader) {}
78 
79 FFmpegReader::FFmpegReader(const std::string &path, DurationStrategy duration_strategy, bool inspect_reader)
80  : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
81  path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
82  seek_audio_frame_found(0), seek_video_frame_found(0),
83  last_seek_max_frame(-1), seek_stagnant_count(0),
84  is_duration_known(false), largest_frame_processed(0),
85  current_video_frame(0), packet(NULL), duration_strategy(duration_strategy),
86  audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
87  pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
88  hold_packet(false) {
89 
90  // Initialize FFMpeg, and register all formats and codecs
93 
94  // Init timestamp offsets
95  pts_offset_seconds = NO_PTS_OFFSET;
96  video_pts_seconds = NO_PTS_OFFSET;
97  audio_pts_seconds = NO_PTS_OFFSET;
98 
99  // Init cache
100  const int init_working_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, OPEN_MP_NUM_PROCESSORS * 4);
101  const int init_final_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, OPEN_MP_NUM_PROCESSORS * 4);
102  working_cache.SetMaxBytesFromInfo(init_working_cache_frames, info.width, info.height, info.sample_rate, info.channels);
103  final_cache.SetMaxBytesFromInfo(init_final_cache_frames, info.width, info.height, info.sample_rate, info.channels);
104 
105  // Open and Close the reader, to populate its attributes (such as height, width, etc...)
106  if (inspect_reader) {
107  Open();
108  Close();
109  }
110 }
111 
113  if (is_open)
114  // Auto close reader if not already done
115  Close();
116 }
117 
118 // This struct holds the associated video frame and starting sample # for an audio packet.
119 bool AudioLocation::is_near(AudioLocation location, int samples_per_frame, int64_t amount) {
120  // Is frame even close to this one?
121  if (abs(location.frame - frame) >= 2)
122  // This is too far away to be considered
123  return false;
124 
125  // Note that samples_per_frame can vary slightly frame to frame when the
126  // audio sampling rate is not an integer multiple of the video fps.
127  int64_t diff = samples_per_frame * (location.frame - frame) + location.sample_start - sample_start;
128  if (abs(diff) <= amount)
129  // close
130  return true;
131 
132  // not close
133  return false;
134 }
135 
136 #if USE_HW_ACCEL
137 
138 // Get hardware pix format
139 static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts)
140 {
141  const enum AVPixelFormat *p;
142 
143  // Prefer only the format matching the selected hardware decoder
145 
146  for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
147  switch (*p) {
148 #if defined(__linux__)
149  // Linux pix formats
150  case AV_PIX_FMT_VAAPI:
151  if (selected == 1) {
152  hw_de_av_pix_fmt_global = AV_PIX_FMT_VAAPI;
153  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VAAPI;
154  return *p;
155  }
156  break;
157  case AV_PIX_FMT_VDPAU:
158  if (selected == 6) {
159  hw_de_av_pix_fmt_global = AV_PIX_FMT_VDPAU;
160  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VDPAU;
161  return *p;
162  }
163  break;
164 #endif
165 #if defined(_WIN32)
166  // Windows pix formats
167  case AV_PIX_FMT_DXVA2_VLD:
168  if (selected == 3) {
169  hw_de_av_pix_fmt_global = AV_PIX_FMT_DXVA2_VLD;
170  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_DXVA2;
171  return *p;
172  }
173  break;
174  case AV_PIX_FMT_D3D11:
175  if (selected == 4) {
176  hw_de_av_pix_fmt_global = AV_PIX_FMT_D3D11;
177  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_D3D11VA;
178  return *p;
179  }
180  break;
181 #endif
182 #if defined(__APPLE__)
183  // Apple pix formats
184  case AV_PIX_FMT_VIDEOTOOLBOX:
185  if (selected == 5) {
186  hw_de_av_pix_fmt_global = AV_PIX_FMT_VIDEOTOOLBOX;
187  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
188  return *p;
189  }
190  break;
191 #endif
192  // Cross-platform pix formats
193  case AV_PIX_FMT_CUDA:
194  if (selected == 2) {
195  hw_de_av_pix_fmt_global = AV_PIX_FMT_CUDA;
196  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_CUDA;
197  return *p;
198  }
199  break;
200  case AV_PIX_FMT_QSV:
201  if (selected == 7) {
202  hw_de_av_pix_fmt_global = AV_PIX_FMT_QSV;
203  hw_de_av_device_type_global = AV_HWDEVICE_TYPE_QSV;
204  return *p;
205  }
206  break;
207  default:
208  // This is only here to silence unused-enum warnings
209  break;
210  }
211  }
212  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::get_hw_dec_format (Unable to decode this file using hardware decode)");
213  return AV_PIX_FMT_NONE;
214 }
215 
216 int FFmpegReader::IsHardwareDecodeSupported(int codecid)
217 {
218  int ret;
219  switch (codecid) {
220  case AV_CODEC_ID_H264:
221  case AV_CODEC_ID_MPEG2VIDEO:
222  case AV_CODEC_ID_VC1:
223  case AV_CODEC_ID_WMV1:
224  case AV_CODEC_ID_WMV2:
225  case AV_CODEC_ID_WMV3:
226  ret = 1;
227  break;
228  default :
229  ret = 0;
230  break;
231  }
232  return ret;
233 }
234 #endif // USE_HW_ACCEL
235 
237  // Open reader if not already open
238  if (!is_open) {
239  // Prevent async calls to the following code
240  const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
241 
242  // Initialize format context
243  pFormatCtx = NULL;
244  {
246  ZmqLogger::Instance()->AppendDebugMethod("Decode hardware acceleration settings", "hw_de_on", hw_de_on, "HARDWARE_DECODER", openshot::Settings::Instance()->HARDWARE_DECODER);
247  }
248 
249  // Open video file
250  if (avformat_open_input(&pFormatCtx, path.c_str(), NULL, NULL) != 0)
251  throw InvalidFile("FFmpegReader could not open media file.", path);
252 
253  // Retrieve stream information
254  if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
255  throw NoStreamsFound("No streams found in file.", path);
256 
257  videoStream = -1;
258  audioStream = -1;
259 
260  // Init end-of-file detection variables
261  packet_status.reset(true);
262 
263  // Loop through each stream, and identify the video and audio stream index
264  for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
265  // Is this a video stream?
266  if (AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
267  videoStream = i;
268  packet_status.video_eof = false;
269  packet_status.packets_eof = false;
270  packet_status.end_of_file = false;
271  }
272  // Is this an audio stream?
273  if (AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
274  audioStream = i;
275  packet_status.audio_eof = false;
276  packet_status.packets_eof = false;
277  packet_status.end_of_file = false;
278  }
279  }
280  if (videoStream == -1 && audioStream == -1)
281  throw NoStreamsFound("No video or audio streams found in this file.", path);
282 
283  // Is there a video stream?
284  if (videoStream != -1) {
285  // Set the stream index
286  info.video_stream_index = videoStream;
287 
288  // Set the codec and codec context pointers
289  pStream = pFormatCtx->streams[videoStream];
290 
291  // Find the codec ID from stream
292  const AVCodecID codecId = AV_FIND_DECODER_CODEC_ID(pStream);
293 
294  // Get codec and codec context from stream
295  const AVCodec *pCodec = avcodec_find_decoder(codecId);
296  AVDictionary *opts = NULL;
297  int retry_decode_open = 2;
298  // If hw accel is selected but hardware cannot handle repeat with software decoding
299  do {
300  pCodecCtx = AV_GET_CODEC_CONTEXT(pStream, pCodec);
301 #if USE_HW_ACCEL
302  if (hw_de_on && (retry_decode_open==2)) {
303  // Up to here no decision is made if hardware or software decode
304  hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
305  }
306 #endif
307  retry_decode_open = 0;
308 
309  // Set number of threads equal to number of processors (not to exceed 16)
310  pCodecCtx->thread_count = std::min(FF_VIDEO_NUM_PROCESSORS, 16);
311 
312  if (pCodec == NULL) {
313  throw InvalidCodec("A valid video codec could not be found for this file.", path);
314  }
315 
316  // Init options
317  av_dict_set(&opts, "strict", "experimental", 0);
318 #if USE_HW_ACCEL
319  if (hw_de_on && hw_de_supported) {
320  // Open Hardware Acceleration
321  int i_decoder_hw = 0;
322  char adapter[256];
323  char *adapter_ptr = NULL;
324  int adapter_num;
326  ZmqLogger::Instance()->AppendDebugMethod("Hardware decoding device number", "adapter_num", adapter_num);
327 
328  // Set hardware pix format (callback)
329  pCodecCtx->get_format = get_hw_dec_format;
330 
331  if (adapter_num < 3 && adapter_num >=0) {
332 #if defined(__linux__)
333  snprintf(adapter,sizeof(adapter),"/dev/dri/renderD%d", adapter_num+128);
334  adapter_ptr = adapter;
336  switch (i_decoder_hw) {
337  case 1:
338  hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
339  break;
340  case 2:
341  hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
342  break;
343  case 6:
344  hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
345  break;
346  case 7:
347  hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
348  break;
349  default:
350  hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
351  break;
352  }
353 
354 #elif defined(_WIN32)
355  adapter_ptr = NULL;
357  switch (i_decoder_hw) {
358  case 2:
359  hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
360  break;
361  case 3:
362  hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
363  break;
364  case 4:
365  hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
366  break;
367  case 7:
368  hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
369  break;
370  default:
371  hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
372  break;
373  }
374 #elif defined(__APPLE__)
375  adapter_ptr = NULL;
377  switch (i_decoder_hw) {
378  case 5:
379  hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
380  break;
381  case 7:
382  hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
383  break;
384  default:
385  hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
386  break;
387  }
388 #endif
389 
390  } else {
391  adapter_ptr = NULL; // Just to be sure
392  }
393 
394  // Check if it is there and writable
395 #if defined(__linux__)
396  if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
397 #elif defined(_WIN32)
398  if( adapter_ptr != NULL ) {
399 #elif defined(__APPLE__)
400  if( adapter_ptr != NULL ) {
401 #endif
402  ZmqLogger::Instance()->AppendDebugMethod("Decode Device present using device");
403  }
404  else {
405  adapter_ptr = NULL; // use default
406  ZmqLogger::Instance()->AppendDebugMethod("Decode Device not present using default");
407  }
408 
409  hw_device_ctx = NULL;
410  // Here the first hardware initialisations are made
411  if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
412  const char* hw_name = av_hwdevice_get_type_name(hw_de_av_device_type);
413  std::string hw_msg = "HW decode active: ";
414  hw_msg += (hw_name ? hw_name : "unknown");
415  ZmqLogger::Instance()->Log(hw_msg);
416  if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
417  throw InvalidCodec("Hardware device reference create failed.", path);
418  }
419 
420  /*
421  av_buffer_unref(&ist->hw_frames_ctx);
422  ist->hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx);
423  if (!ist->hw_frames_ctx) {
424  av_log(avctx, AV_LOG_ERROR, "Error creating a CUDA frames context\n");
425  return AVERROR(ENOMEM);
426  }
427 
428  frames_ctx = (AVHWFramesContext*)ist->hw_frames_ctx->data;
429 
430  frames_ctx->format = AV_PIX_FMT_CUDA;
431  frames_ctx->sw_format = avctx->sw_pix_fmt;
432  frames_ctx->width = avctx->width;
433  frames_ctx->height = avctx->height;
434 
435  av_log(avctx, AV_LOG_DEBUG, "Initializing CUDA frames context: sw_format = %s, width = %d, height = %d\n",
436  av_get_pix_fmt_name(frames_ctx->sw_format), frames_ctx->width, frames_ctx->height);
437 
438 
439  ret = av_hwframe_ctx_init(pCodecCtx->hw_device_ctx);
440  ret = av_hwframe_ctx_init(ist->hw_frames_ctx);
441  if (ret < 0) {
442  av_log(avctx, AV_LOG_ERROR, "Error initializing a CUDA frame pool\n");
443  return ret;
444  }
445  */
446  }
447  else {
448  ZmqLogger::Instance()->Log("HW decode active: no (falling back to software)");
449  throw InvalidCodec("Hardware device create failed.", path);
450  }
451  }
452 #endif // USE_HW_ACCEL
453 
454  // Disable per-frame threading for album arts
455  // Using FF_THREAD_FRAME adds one frame decoding delay per thread,
456  // but there's only one frame in this case.
457  if (HasAlbumArt())
458  {
459  pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
460  }
461 
462  // Open video codec
463  int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &opts);
464  if (avcodec_return < 0) {
465  std::stringstream avcodec_error_msg;
466  avcodec_error_msg << "A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
467  throw InvalidCodec(avcodec_error_msg.str(), path);
468  }
469 
470 #if USE_HW_ACCEL
471  if (hw_de_on && hw_de_supported) {
472  AVHWFramesConstraints *constraints = NULL;
473  void *hwconfig = NULL;
474  hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
475 
476 // TODO: needs va_config!
477 #if ENABLE_VAAPI
478  ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
479  constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
480 #endif // ENABLE_VAAPI
481  if (constraints) {
482  if (pCodecCtx->coded_width < constraints->min_width ||
483  pCodecCtx->coded_height < constraints->min_height ||
484  pCodecCtx->coded_width > constraints->max_width ||
485  pCodecCtx->coded_height > constraints->max_height) {
486  ZmqLogger::Instance()->AppendDebugMethod("DIMENSIONS ARE TOO LARGE for hardware acceleration\n");
487  hw_de_supported = 0;
488  retry_decode_open = 1;
489  AV_FREE_CONTEXT(pCodecCtx);
490  if (hw_device_ctx) {
491  av_buffer_unref(&hw_device_ctx);
492  hw_device_ctx = NULL;
493  }
494  }
495  else {
496  // All is just peachy
497  ZmqLogger::Instance()->AppendDebugMethod("\nDecode hardware acceleration is used\n", "Min width :", constraints->min_width, "Min Height :", constraints->min_height, "MaxWidth :", constraints->max_width, "MaxHeight :", constraints->max_height, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
498  retry_decode_open = 0;
499  }
500  av_hwframe_constraints_free(&constraints);
501  if (hwconfig) {
502  av_freep(&hwconfig);
503  }
504  }
505  else {
506  int max_h, max_w;
507  //max_h = ((getenv( "LIMIT_HEIGHT_MAX" )==NULL) ? MAX_SUPPORTED_HEIGHT : atoi(getenv( "LIMIT_HEIGHT_MAX" )));
509  //max_w = ((getenv( "LIMIT_WIDTH_MAX" )==NULL) ? MAX_SUPPORTED_WIDTH : atoi(getenv( "LIMIT_WIDTH_MAX" )));
511  ZmqLogger::Instance()->AppendDebugMethod("Constraints could not be found using default limit\n");
512  //cerr << "Constraints could not be found using default limit\n";
513  if (pCodecCtx->coded_width < 0 ||
514  pCodecCtx->coded_height < 0 ||
515  pCodecCtx->coded_width > max_w ||
516  pCodecCtx->coded_height > max_h ) {
517  ZmqLogger::Instance()->AppendDebugMethod("DIMENSIONS ARE TOO LARGE for hardware acceleration\n", "Max Width :", max_w, "Max Height :", max_h, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
518  hw_de_supported = 0;
519  retry_decode_open = 1;
520  AV_FREE_CONTEXT(pCodecCtx);
521  if (hw_device_ctx) {
522  av_buffer_unref(&hw_device_ctx);
523  hw_device_ctx = NULL;
524  }
525  }
526  else {
527  ZmqLogger::Instance()->AppendDebugMethod("\nDecode hardware acceleration is used\n", "Max Width :", max_w, "Max Height :", max_h, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
528  retry_decode_open = 0;
529  }
530  }
531  } // if hw_de_on && hw_de_supported
532  else {
533  ZmqLogger::Instance()->AppendDebugMethod("\nDecode in software is used\n");
534  }
535 #else
536  retry_decode_open = 0;
537 #endif // USE_HW_ACCEL
538  } while (retry_decode_open); // retry_decode_open
539  // Free options
540  av_dict_free(&opts);
541 
542  // Update the File Info struct with video details (if a video stream is found)
543  UpdateVideoInfo();
544  }
545 
546  // Is there an audio stream?
547  if (audioStream != -1) {
548  // Set the stream index
549  info.audio_stream_index = audioStream;
550 
551  // Get a pointer to the codec context for the audio stream
552  aStream = pFormatCtx->streams[audioStream];
553 
554  // Find the codec ID from stream
555  AVCodecID codecId = AV_FIND_DECODER_CODEC_ID(aStream);
556 
557  // Get codec and codec context from stream
558  const AVCodec *aCodec = avcodec_find_decoder(codecId);
559  aCodecCtx = AV_GET_CODEC_CONTEXT(aStream, aCodec);
560 
561  // Audio encoding does not typically use more than 2 threads (most codecs use 1 thread)
562  aCodecCtx->thread_count = std::min(FF_AUDIO_NUM_PROCESSORS, 2);
563 
564  bool audio_opened = false;
565  if (aCodec != NULL) {
566  // Init options
567  AVDictionary *opts = NULL;
568  av_dict_set(&opts, "strict", "experimental", 0);
569 
570  // Open audio codec
571  audio_opened = (avcodec_open2(aCodecCtx, aCodec, &opts) >= 0);
572 
573  // Free options
574  av_dict_free(&opts);
575  }
576 
577  if (audio_opened) {
578  // Update the File Info struct with audio details (if an audio stream is found)
579  UpdateAudioInfo();
580 
581  // Disable malformed audio stream metadata (prevents divide-by-zero / invalid resampling math)
582  const bool invalid_audio_info =
583  (info.channels <= 0) ||
584  (info.sample_rate <= 0) ||
585  (info.audio_timebase.num <= 0) ||
586  (info.audio_timebase.den <= 0) ||
587  (aCodecCtx->sample_fmt == AV_SAMPLE_FMT_NONE);
588  if (invalid_audio_info) {
590  "FFmpegReader::Open (Disable invalid audio stream)",
591  "channels", info.channels,
592  "sample_rate", info.sample_rate,
593  "audio_timebase.num", info.audio_timebase.num,
594  "audio_timebase.den", info.audio_timebase.den,
595  "sample_fmt", static_cast<int>(aCodecCtx ? aCodecCtx->sample_fmt : AV_SAMPLE_FMT_NONE));
596  info.has_audio = false;
598  audioStream = -1;
599  packet_status.audio_eof = true;
600  if (aCodecCtx) {
601  if (avcodec_is_open(aCodecCtx)) {
602  avcodec_flush_buffers(aCodecCtx);
603  }
604  AV_FREE_CONTEXT(aCodecCtx);
605  aCodecCtx = nullptr;
606  }
607  aStream = nullptr;
608  }
609  } else {
610  // Keep decoding video, but disable bad/unsupported audio stream.
612  "FFmpegReader::Open (Audio codec unavailable; disabling audio)",
613  "audioStream", audioStream);
614  info.has_audio = false;
616  audioStream = -1;
617  packet_status.audio_eof = true;
618  if (aCodecCtx) {
619  AV_FREE_CONTEXT(aCodecCtx);
620  aCodecCtx = nullptr;
621  }
622  aStream = nullptr;
623  }
624  }
625 
626  // Guard invalid frame-rate / timebase values from malformed streams.
627  if (info.fps.num <= 0 || info.fps.den <= 0) {
629  "FFmpegReader::Open (Invalid FPS detected; applying fallback)",
630  "fps.num", info.fps.num,
631  "fps.den", info.fps.den);
632  info.fps.num = 30;
633  info.fps.den = 1;
634  }
635  if (info.video_timebase.num <= 0 || info.video_timebase.den <= 0) {
637  "FFmpegReader::Open (Invalid video_timebase detected; applying fallback)",
638  "video_timebase.num", info.video_timebase.num,
639  "video_timebase.den", info.video_timebase.den);
641  }
642 
643  // Add format metadata (if any)
644  AVDictionaryEntry *tag = NULL;
645  while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
646  QString str_key = tag->key;
647  QString str_value = tag->value;
648  info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
649  }
650 
651  // Process video stream side data (rotation, spherical metadata, etc)
652  for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
653  AVStream* st = pFormatCtx->streams[i];
654  if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
655  // Only inspect the first video stream
656  for (int j = 0; j < st->nb_side_data; j++) {
657  AVPacketSideData *sd = &st->side_data[j];
658 
659  // Handle rotation metadata (unchanged)
660  if (sd->type == AV_PKT_DATA_DISPLAYMATRIX &&
661  sd->size >= 9 * sizeof(int32_t) &&
662  !info.metadata.count("rotate"))
663  {
664  double rotation = -av_display_rotation_get(
665  reinterpret_cast<int32_t *>(sd->data));
666  if (std::isnan(rotation)) rotation = 0;
667  info.metadata["rotate"] = std::to_string(rotation);
668  }
669  // Handle spherical video metadata
670  else if (sd->type == AV_PKT_DATA_SPHERICAL) {
671  // Always mark as spherical
672  info.metadata["spherical"] = "1";
673 
674  // Cast the raw bytes to an AVSphericalMapping
675  const AVSphericalMapping* map =
676  reinterpret_cast<const AVSphericalMapping*>(sd->data);
677 
678  // Projection enum → string
679  const char* proj_name = av_spherical_projection_name(map->projection);
680  info.metadata["spherical_projection"] = proj_name
681  ? proj_name
682  : "unknown";
683 
684  // Convert 16.16 fixed-point to float degrees
685  auto to_deg = [](int32_t v){
686  return (double)v / 65536.0;
687  };
688  info.metadata["spherical_yaw"] = std::to_string(to_deg(map->yaw));
689  info.metadata["spherical_pitch"] = std::to_string(to_deg(map->pitch));
690  info.metadata["spherical_roll"] = std::to_string(to_deg(map->roll));
691  }
692  }
693  break;
694  }
695  }
696 
697  // Init previous audio location to zero
698  previous_packet_location.frame = -1;
699  previous_packet_location.sample_start = 0;
700 
701  // Adjust cache size based on size of frame and audio
702  const int working_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, int(OPEN_MP_NUM_PROCESSORS * info.fps.ToDouble() * 2));
703  const int final_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, OPEN_MP_NUM_PROCESSORS * 2);
704  working_cache.SetMaxBytesFromInfo(working_cache_frames, info.width, info.height, info.sample_rate, info.channels);
706 
707  // Scan PTS for any offsets (i.e. non-zero starting streams). At least 1 stream must start at zero timestamp.
708  // This method allows us to shift timestamps to ensure at least 1 stream is starting at zero.
709  UpdatePTSOffset();
710 
711  // Override an invalid framerate
712  if (info.fps.ToFloat() > 240.0f || (info.fps.num <= 0 || info.fps.den <= 0) || info.video_length <= 0) {
713  // Calculate FPS, duration, video bit rate, and video length manually
714  // by scanning through all the video stream packets
715  CheckFPS();
716  }
717 
718  // Mark as "open"
719  is_open = true;
720 
721  // Seek back to beginning of file (if not already seeking)
722  if (!is_seeking) {
723  Seek(1);
724  }
725  }
726 }
727 
729  // Close all objects, if reader is 'open'
730  if (is_open) {
731  // Prevent async calls to the following code
732  const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
733 
734  // Mark as "closed"
735  is_open = false;
736 
737  // Keep track of most recent packet
738  AVPacket *recent_packet = packet;
739 
740  // Drain any packets from the decoder
741  packet = NULL;
742  int attempts = 0;
743  int max_attempts = 128;
744  while (packet_status.packets_decoded() < packet_status.packets_read() && attempts < max_attempts) {
745  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::Close (Drain decoder loop)",
746  "packets_read", packet_status.packets_read(),
747  "packets_decoded", packet_status.packets_decoded(),
748  "attempts", attempts);
749  if (packet_status.video_decoded < packet_status.video_read) {
750  ProcessVideoPacket(info.video_length);
751  }
752  if (packet_status.audio_decoded < packet_status.audio_read) {
753  ProcessAudioPacket(info.video_length);
754  }
755  attempts++;
756  }
757 
758  // Remove packet
759  if (recent_packet) {
760  RemoveAVPacket(recent_packet);
761  }
762 
763  // Close the video codec
764  if (info.has_video) {
765  if(avcodec_is_open(pCodecCtx)) {
766  avcodec_flush_buffers(pCodecCtx);
767  }
768  AV_FREE_CONTEXT(pCodecCtx);
769 #if USE_HW_ACCEL
770  if (hw_de_on) {
771  if (hw_device_ctx) {
772  av_buffer_unref(&hw_device_ctx);
773  hw_device_ctx = NULL;
774  }
775  }
776 #endif // USE_HW_ACCEL
777  if (img_convert_ctx) {
778  sws_freeContext(img_convert_ctx);
779  img_convert_ctx = nullptr;
780  }
781  if (pFrameRGB_cached) {
782  AV_FREE_FRAME(&pFrameRGB_cached);
783  }
784  }
785 
786  // Close the audio codec
787  if (info.has_audio) {
788  if(avcodec_is_open(aCodecCtx)) {
789  avcodec_flush_buffers(aCodecCtx);
790  }
791  AV_FREE_CONTEXT(aCodecCtx);
792  if (avr_ctx) {
793  SWR_CLOSE(avr_ctx);
794  SWR_FREE(&avr_ctx);
795  avr_ctx = nullptr;
796  }
797  }
798 
799  // Clear final cache
800  final_cache.Clear();
801  working_cache.Clear();
802 
803  // Close the video file
804  avformat_close_input(&pFormatCtx);
805  av_freep(&pFormatCtx);
806 
807  // Do not trim here; trimming is handled on explicit cache clears
808 
809  // Reset some variables
810  last_frame = 0;
811  hold_packet = false;
812  largest_frame_processed = 0;
813  seek_audio_frame_found = 0;
814  seek_video_frame_found = 0;
815  current_video_frame = 0;
816  last_video_frame.reset();
817  last_final_video_frame.reset();
818  }
819 }
820 
821 bool FFmpegReader::HasAlbumArt() {
822  // Check if the video stream we use is an attached picture
823  // This won't return true if the file has a cover image as a secondary stream
824  // like an MKV file with an attached image file
825  return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
826  && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
827 }
828 
829 double FFmpegReader::PickDurationSeconds() const {
830  auto has_value = [](double value) { return value > 0.0; };
831 
832  switch (duration_strategy) {
834  if (has_value(video_stream_duration_seconds))
835  return video_stream_duration_seconds;
836  if (has_value(audio_stream_duration_seconds))
837  return audio_stream_duration_seconds;
838  if (has_value(format_duration_seconds))
839  return format_duration_seconds;
840  break;
842  if (has_value(audio_stream_duration_seconds))
843  return audio_stream_duration_seconds;
844  if (has_value(video_stream_duration_seconds))
845  return video_stream_duration_seconds;
846  if (has_value(format_duration_seconds))
847  return format_duration_seconds;
848  break;
850  default:
851  {
852  double longest = 0.0;
853  if (has_value(video_stream_duration_seconds))
854  longest = std::max(longest, video_stream_duration_seconds);
855  if (has_value(audio_stream_duration_seconds))
856  longest = std::max(longest, audio_stream_duration_seconds);
857  if (has_value(format_duration_seconds))
858  longest = std::max(longest, format_duration_seconds);
859  if (has_value(longest))
860  return longest;
861  }
862  break;
863  }
864 
865  if (has_value(format_duration_seconds))
866  return format_duration_seconds;
867  if (has_value(inferred_duration_seconds))
868  return inferred_duration_seconds;
869 
870  return 0.0;
871 }
872 
873 void FFmpegReader::ApplyDurationStrategy() {
874  const double fps_value = info.fps.ToDouble();
875  const double chosen_seconds = PickDurationSeconds();
876 
877  if (chosen_seconds <= 0.0 || fps_value <= 0.0) {
878  info.duration = 0.0f;
879  info.video_length = 0;
880  is_duration_known = false;
881  return;
882  }
883 
884  const int64_t frames = static_cast<int64_t>(std::llround(chosen_seconds * fps_value));
885  if (frames <= 0) {
886  info.duration = 0.0f;
887  info.video_length = 0;
888  is_duration_known = false;
889  return;
890  }
891 
892  info.video_length = frames;
893  info.duration = static_cast<float>(static_cast<double>(frames) / fps_value);
894  is_duration_known = true;
895 }
896 
897 void FFmpegReader::UpdateAudioInfo() {
898  const int codec_channels =
899 #if HAVE_CH_LAYOUT
900  AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
901 #else
902  AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
903 #endif
904 
905  // Set default audio channel layout (if needed)
906 #if HAVE_CH_LAYOUT
907  if (codec_channels > 0 &&
908  !av_channel_layout_check(&(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout)))
909  AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout = (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
910 #else
911  if (codec_channels > 0 && AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout == 0)
912  AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout = av_get_default_channel_layout(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels);
913 #endif
914 
915  if (info.sample_rate > 0) {
916  // Skip init - if info struct already populated
917  return;
918  }
919 
920  auto record_duration = [](double &target, double seconds) {
921  if (seconds > 0.0)
922  target = std::max(target, seconds);
923  };
924 
925  // Set values of FileInfo struct
926  info.has_audio = true;
927  info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
928  info.acodec = aCodecCtx->codec->name;
929 #if HAVE_CH_LAYOUT
930  info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
931  info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.u.mask;
932 #else
933  info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
934  info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout;
935 #endif
936 
937  // If channel layout is not set, guess based on the number of channels
938  if (info.channel_layout == 0) {
939  if (info.channels == 1) {
941  } else if (info.channels == 2) {
943  }
944  }
945 
946  info.sample_rate = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->sample_rate;
947  info.audio_bit_rate = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->bit_rate;
948  if (info.audio_bit_rate <= 0) {
949  // Get bitrate from format
950  info.audio_bit_rate = pFormatCtx->bit_rate;
951  }
952 
953  // Set audio timebase
954  info.audio_timebase.num = aStream->time_base.num;
955  info.audio_timebase.den = aStream->time_base.den;
956 
957  // Get timebase of audio stream (if valid) and greater than the current duration
958  if (aStream->duration > 0) {
959  record_duration(audio_stream_duration_seconds, aStream->duration * info.audio_timebase.ToDouble());
960  }
961  if (pFormatCtx->duration > 0) {
962  // Use the format's duration when stream duration is missing or shorter
963  record_duration(format_duration_seconds, static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
964  }
965 
966  // Calculate duration from filesize and bitrate (if any)
967  if (info.duration <= 0.0f && info.video_bit_rate > 0 && info.file_size > 0) {
968  // Estimate from bitrate, total bytes, and framerate
969  record_duration(inferred_duration_seconds, static_cast<double>(info.file_size) / info.video_bit_rate);
970  }
971 
972  // Set video timebase (if no video stream was found)
973  if (!info.has_video) {
974  // Set a few important default video settings (so audio can be divided into frames)
975  info.fps.num = 30;
976  info.fps.den = 1;
977  info.video_timebase.num = 1;
978  info.video_timebase.den = 30;
979  info.width = 720;
980  info.height = 480;
981 
982  // Use timeline to set correct width & height (if any)
983  Clip *parent = static_cast<Clip *>(ParentClip());
984  if (parent) {
985  if (parent->ParentTimeline()) {
986  // Set max width/height based on parent clip's timeline (if attached to a timeline)
987  info.width = parent->ParentTimeline()->preview_width;
988  info.height = parent->ParentTimeline()->preview_height;
989  }
990  }
991  }
992 
993  ApplyDurationStrategy();
994 
995  // Add audio metadata (if any found)
996  AVDictionaryEntry *tag = NULL;
997  while ((tag = av_dict_get(aStream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
998  QString str_key = tag->key;
999  QString str_value = tag->value;
1000  info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1001  }
1002 }
1003 
1004 void FFmpegReader::UpdateVideoInfo() {
1005  if (info.vcodec.length() > 0) {
1006  // Skip init - if info struct already populated
1007  return;
1008  }
1009 
1010  auto record_duration = [](double &target, double seconds) {
1011  if (seconds > 0.0)
1012  target = std::max(target, seconds);
1013  };
1014 
1015  // Set values of FileInfo struct
1016  info.has_video = true;
1017  info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
1018  info.height = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->height;
1019  info.width = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->width;
1020  info.vcodec = pCodecCtx->codec->name;
1021  info.video_bit_rate = (pFormatCtx->bit_rate / 8);
1022 
1023  // Frame rate from the container and codec
1024  AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
1025  if (!check_fps) {
1026  info.fps.num = framerate.num;
1027  info.fps.den = framerate.den;
1028  }
1029 
1030  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::UpdateVideoInfo", "info.fps.num", info.fps.num, "info.fps.den", info.fps.den);
1031 
1032  // TODO: remove excessive debug info in the next releases
1033  // The debug info below is just for comparison and troubleshooting on users side during the transition period
1034  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::UpdateVideoInfo (pStream->avg_frame_rate)", "num", pStream->avg_frame_rate.num, "den", pStream->avg_frame_rate.den);
1035 
1036  if (pStream->sample_aspect_ratio.num != 0) {
1037  info.pixel_ratio.num = pStream->sample_aspect_ratio.num;
1038  info.pixel_ratio.den = pStream->sample_aspect_ratio.den;
1039  } else if (AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.num != 0) {
1040  info.pixel_ratio.num = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.num;
1041  info.pixel_ratio.den = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.den;
1042  } else {
1043  info.pixel_ratio.num = 1;
1044  info.pixel_ratio.den = 1;
1045  }
1046  info.pixel_format = AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx);
1047 
1048  // Calculate the DAR (display aspect ratio)
1050 
1051  // Reduce size fraction
1052  size.Reduce();
1053 
1054  // Set the ratio based on the reduced fraction
1055  info.display_ratio.num = size.num;
1056  info.display_ratio.den = size.den;
1057 
1058  // Get scan type and order from codec context/params
1059  if (!check_interlace) {
1060  check_interlace = true;
1061  AVFieldOrder field_order = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->field_order;
1062  switch(field_order) {
1063  case AV_FIELD_PROGRESSIVE:
1064  info.interlaced_frame = false;
1065  break;
1066  case AV_FIELD_TT:
1067  case AV_FIELD_TB:
1068  info.interlaced_frame = true;
1069  info.top_field_first = true;
1070  break;
1071  case AV_FIELD_BT:
1072  case AV_FIELD_BB:
1073  info.interlaced_frame = true;
1074  info.top_field_first = false;
1075  break;
1076  case AV_FIELD_UNKNOWN:
1077  // Check again later?
1078  check_interlace = false;
1079  break;
1080  }
1081  // check_interlace will prevent these checks being repeated,
1082  // unless it was cleared because we got an AV_FIELD_UNKNOWN response.
1083  }
1084 
1085  // Set the video timebase
1086  info.video_timebase.num = pStream->time_base.num;
1087  info.video_timebase.den = pStream->time_base.den;
1088 
1089  // Set the duration in seconds, and video length (# of frames)
1090  record_duration(video_stream_duration_seconds, pStream->duration * info.video_timebase.ToDouble());
1091 
1092  // Check for valid duration (if found)
1093  if (pFormatCtx->duration >= 0) {
1094  // Use the format's duration as another candidate
1095  record_duration(format_duration_seconds, static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1096  }
1097 
1098  // Calculate duration from filesize and bitrate (if any)
1099  if (info.video_bit_rate > 0 && info.file_size > 0) {
1100  // Estimate from bitrate, total bytes, and framerate
1101  record_duration(inferred_duration_seconds, static_cast<double>(info.file_size) / info.video_bit_rate);
1102  }
1103 
1104  // Certain "image" formats do not have a valid duration
1105  if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1106  pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
1107  // Force an "image" duration
1108  record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1109  info.has_single_image = true;
1110  }
1111  // Static GIFs can have no usable duration; fall back to a small default
1112  if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1113  pFormatCtx && pFormatCtx->iformat && strcmp(pFormatCtx->iformat->name, "gif") == 0) {
1114  record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1115  info.has_single_image = true;
1116  }
1117 
1118  ApplyDurationStrategy();
1119 
1120  // Normalize FFmpeg-decoded still images (e.g. JPG/JPEG) to match image-reader behavior.
1121  // This keeps timing/flags consistent regardless of which reader path was used.
1122  if (!info.has_single_image && audioStream < 0) {
1123  const AVCodecID codec_id = AV_FIND_DECODER_CODEC_ID(pStream);
1124  const bool likely_still_codec =
1125  codec_id == AV_CODEC_ID_MJPEG ||
1126  codec_id == AV_CODEC_ID_PNG ||
1127  codec_id == AV_CODEC_ID_BMP ||
1128  codec_id == AV_CODEC_ID_TIFF ||
1129  codec_id == AV_CODEC_ID_WEBP ||
1130  codec_id == AV_CODEC_ID_JPEG2000;
1131  const bool likely_image_demuxer =
1132  pFormatCtx && pFormatCtx->iformat && pFormatCtx->iformat->name &&
1133  strstr(pFormatCtx->iformat->name, "image2");
1134  const bool single_frame_clip = info.video_length <= 1;
1135 
1136  if (single_frame_clip && (likely_still_codec || likely_image_demuxer)) {
1137  info.has_single_image = true;
1138  record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1139  ApplyDurationStrategy();
1140  }
1141  }
1142 
1143  // Add video metadata (if any)
1144  AVDictionaryEntry *tag = NULL;
1145  while ((tag = av_dict_get(pStream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
1146  QString str_key = tag->key;
1147  QString str_value = tag->value;
1148  info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1149  }
1150 }
1151 
1153  return this->is_duration_known;
1154 }
1155 
1156 std::shared_ptr<Frame> FFmpegReader::GetFrame(int64_t requested_frame) {
1157  last_seek_max_frame = -1;
1158  seek_stagnant_count = 0;
1159  // Check for open reader (or throw exception)
1160  if (!is_open)
1161  throw ReaderClosed("The FFmpegReader is closed. Call Open() before calling this method.", path);
1162 
1163  // Adjust for a requested frame that is too small or too large
1164  if (requested_frame < 1)
1165  requested_frame = 1;
1166  if (requested_frame > info.video_length && is_duration_known)
1167  requested_frame = info.video_length;
1168  if (info.has_video && info.video_length == 0)
1169  // Invalid duration of video file
1170  throw InvalidFile("Could not detect the duration of the video or audio stream.", path);
1171 
1172  // Debug output
1173  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "requested_frame", requested_frame, "last_frame", last_frame);
1174 
1175  // Check the cache for this frame
1176  std::shared_ptr<Frame> frame = final_cache.GetFrame(requested_frame);
1177  if (frame) {
1178  // Debug output
1179  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame", requested_frame);
1180 
1181  // Return the cached frame
1182  return frame;
1183  } else {
1184 
1185  // Prevent async calls to the remainder of this code
1186  const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
1187 
1188  // Check the cache a 2nd time (due to the potential previous lock)
1189  frame = final_cache.GetFrame(requested_frame);
1190  if (frame) {
1191  // Debug output
1192  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame on 2nd look", requested_frame);
1193 
1194  } else {
1195  // Frame is not in cache
1196  // Reset seek count
1197  seek_count = 0;
1198 
1199  // Are we within X frames of the requested frame?
1200  int64_t diff = requested_frame - last_frame;
1201  if (diff >= 1 && diff <= 20) {
1202  // Continue walking the stream
1203  frame = ReadStream(requested_frame);
1204  } else {
1205  // Greater than 30 frames away, or backwards, we need to seek to the nearest key frame
1206  if (enable_seek) {
1207  // Only seek if enabled
1208  Seek(requested_frame);
1209 
1210  } else if (!enable_seek && diff < 0) {
1211  // Start over, since we can't seek, and the requested frame is smaller than our position
1212  // Since we are seeking to frame 1, this actually just closes/re-opens the reader
1213  Seek(1);
1214  }
1215 
1216  // Then continue walking the stream
1217  frame = ReadStream(requested_frame);
1218  }
1219  }
1220  return frame;
1221  }
1222 }
1223 
1224 // Read the stream until we find the requested Frame
1225 std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
1226  // Allocate video frame
1227  bool check_seek = false;
1228  int packet_error = -1;
1229  int64_t no_progress_count = 0;
1230  int64_t prev_packets_read = packet_status.packets_read();
1231  int64_t prev_packets_decoded = packet_status.packets_decoded();
1232  int64_t prev_video_decoded = packet_status.video_decoded;
1233  double prev_video_pts_seconds = video_pts_seconds;
1234 
1235  // Debug output
1236  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame);
1237 
1238  // Loop through the stream until the correct frame is found
1239  while (true) {
1240  // Check if working frames are 'finished'
1241  if (!is_seeking) {
1242  // Check for final frames
1243  CheckWorkingFrames(requested_frame);
1244  }
1245 
1246  // Check if requested 'final' frame is available (and break out of loop if found)
1247  bool is_cache_found = (final_cache.GetFrame(requested_frame) != NULL);
1248  if (is_cache_found) {
1249  break;
1250  }
1251 
1252  if (!hold_packet || !packet) {
1253  // Get the next packet
1254  packet_error = GetNextPacket();
1255  if (packet_error < 0 && !packet) {
1256  // No more packets to be found
1257  packet_status.packets_eof = true;
1258  }
1259  }
1260 
1261  // Debug output
1262  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (GetNextPacket)", "requested_frame", requested_frame,"packets_read", packet_status.packets_read(), "packets_decoded", packet_status.packets_decoded(), "is_seeking", is_seeking);
1263 
1264  // Check the status of a seek (if any)
1265  if (is_seeking) {
1266  check_seek = CheckSeek();
1267  } else {
1268  check_seek = false;
1269  }
1270 
1271  if (check_seek) {
1272  // Packet may become NULL on Close inside Seek if CheckSeek returns false
1273  // Jump to the next iteration of this loop
1274  continue;
1275  }
1276 
1277  // Video packet
1278  if ((info.has_video && packet && packet->stream_index == videoStream) ||
1279  (info.has_video && packet_status.video_decoded < packet_status.video_read) ||
1280  (info.has_video && !packet && !packet_status.video_eof)) {
1281  // Process Video Packet
1282  ProcessVideoPacket(requested_frame);
1283  }
1284  // Audio packet
1285  if ((info.has_audio && packet && packet->stream_index == audioStream) ||
1286  (info.has_audio && !packet && packet_status.audio_decoded < packet_status.audio_read) ||
1287  (info.has_audio && !packet && !packet_status.audio_eof)) {
1288  // Process Audio Packet
1289  ProcessAudioPacket(requested_frame);
1290  }
1291 
1292  // Remove unused packets (sometimes we purposely ignore video or audio packets,
1293  // if the has_video or has_audio properties are manually overridden)
1294  if ((!info.has_video && packet && packet->stream_index == videoStream) ||
1295  (!info.has_audio && packet && packet->stream_index == audioStream)) {
1296  // Keep track of deleted packet counts
1297  if (packet->stream_index == videoStream) {
1298  packet_status.video_decoded++;
1299  } else if (packet->stream_index == audioStream) {
1300  packet_status.audio_decoded++;
1301  }
1302 
1303  // Remove unused packets (sometimes we purposely ignore video or audio packets,
1304  // if the has_video or has_audio properties are manually overridden)
1305  RemoveAVPacket(packet);
1306  packet = NULL;
1307  }
1308 
1309  // Determine end-of-stream (waiting until final decoder threads finish)
1310  // Force end-of-stream in some situations
1311  packet_status.end_of_file = packet_status.packets_eof && packet_status.video_eof && packet_status.audio_eof;
1312  if ((packet_status.packets_eof && packet_status.packets_read() == packet_status.packets_decoded()) || packet_status.end_of_file) {
1313  // Force EOF (end of file) variables to true, if decoder does not support EOF detection.
1314  // If we have no more packets, and all known packets have been decoded
1315  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (force EOF)", "packets_read", packet_status.packets_read(), "packets_decoded", packet_status.packets_decoded(), "packets_eof", packet_status.packets_eof, "video_eof", packet_status.video_eof, "audio_eof", packet_status.audio_eof, "end_of_file", packet_status.end_of_file);
1316  if (!packet_status.video_eof) {
1317  packet_status.video_eof = true;
1318  }
1319  if (!packet_status.audio_eof) {
1320  packet_status.audio_eof = true;
1321  }
1322  packet_status.end_of_file = true;
1323  break;
1324  }
1325 
1326  // Detect decoder stalls with no progress at EOF and force completion so
1327  // missing frames can be finalized from prior image data.
1328  const bool has_progress =
1329  (packet_status.packets_read() != prev_packets_read) ||
1330  (packet_status.packets_decoded() != prev_packets_decoded) ||
1331  (packet_status.video_decoded != prev_video_decoded) ||
1332  (video_pts_seconds != prev_video_pts_seconds);
1333 
1334  if (has_progress) {
1335  no_progress_count = 0;
1336  } else {
1337  no_progress_count++;
1338  if (no_progress_count >= 2000
1339  && packet_status.packets_eof
1340  && !packet
1341  && !hold_packet) {
1342  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (force EOF after stall)",
1343  "requested_frame", requested_frame,
1344  "no_progress_count", no_progress_count,
1345  "packets_read", packet_status.packets_read(),
1346  "packets_decoded", packet_status.packets_decoded(),
1347  "video_decoded", packet_status.video_decoded,
1348  "audio_decoded", packet_status.audio_decoded);
1349  packet_status.video_eof = true;
1350  packet_status.audio_eof = true;
1351  packet_status.end_of_file = true;
1352  break;
1353  }
1354  }
1355  prev_packets_read = packet_status.packets_read();
1356  prev_packets_decoded = packet_status.packets_decoded();
1357  prev_video_decoded = packet_status.video_decoded;
1358  prev_video_pts_seconds = video_pts_seconds;
1359  } // end while
1360 
1361  // Debug output
1362  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (Completed)",
1363  "packets_read", packet_status.packets_read(),
1364  "packets_decoded", packet_status.packets_decoded(),
1365  "end_of_file", packet_status.end_of_file,
1366  "largest_frame_processed", largest_frame_processed,
1367  "Working Cache Count", working_cache.Count());
1368 
1369  // Have we reached end-of-stream (or the final frame)?
1370  if (!packet_status.end_of_file && requested_frame >= info.video_length) {
1371  // Force end-of-stream
1372  packet_status.end_of_file = true;
1373  }
1374  if (packet_status.end_of_file) {
1375  // Mark any other working frames as 'finished'
1376  CheckWorkingFrames(requested_frame);
1377  }
1378 
1379  // Return requested frame (if found)
1380  std::shared_ptr<Frame> frame = final_cache.GetFrame(requested_frame);
1381  if (frame)
1382  // Return prepared frame
1383  return frame;
1384  else {
1385 
1386  // Check if largest frame is still cached
1387  frame = final_cache.GetFrame(largest_frame_processed);
1388  int samples_in_frame = Frame::GetSamplesPerFrame(requested_frame, info.fps,
1390  if (frame) {
1391  // Copy and return the largest processed frame (assuming it was the last in the video file)
1392  std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1393 
1394  // Use solid color (if no image data found)
1395  if (!frame->has_image_data) {
1396  // Use solid black frame if no image data available
1397  f->AddColor(info.width, info.height, "#000");
1398  }
1399  // Silence audio data (if any), since we are repeating the last frame
1400  frame->AddAudioSilence(samples_in_frame);
1401 
1402  return frame;
1403  } else {
1404  // The largest processed frame is no longer in cache. Prefer the most recent
1405  // finalized image first, then decoded image, to avoid black flashes.
1406  std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1407  if (last_final_video_frame && last_final_video_frame->has_image_data
1408  && last_final_video_frame->number <= requested_frame) {
1409  f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
1410  } else if (last_video_frame && last_video_frame->has_image_data
1411  && last_video_frame->number <= requested_frame) {
1412  f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
1413  } else {
1414  f->AddColor(info.width, info.height, "#000");
1415  }
1416  f->AddAudioSilence(samples_in_frame);
1417  return f;
1418  }
1419  }
1420 
1421 }
1422 
1423 // Get the next packet (if any)
1424 int FFmpegReader::GetNextPacket() {
1425  int found_packet = 0;
1426  AVPacket *next_packet;
1427  next_packet = new AVPacket();
1428  found_packet = av_read_frame(pFormatCtx, next_packet);
1429 
1430  if (packet) {
1431  // Remove previous packet before getting next one
1432  RemoveAVPacket(packet);
1433  packet = NULL;
1434  }
1435  if (found_packet >= 0) {
1436  // Update current packet pointer
1437  packet = next_packet;
1438 
1439  // Keep track of packet stats
1440  if (packet->stream_index == videoStream) {
1441  packet_status.video_read++;
1442  } else if (packet->stream_index == audioStream) {
1443  packet_status.audio_read++;
1444  }
1445  } else {
1446  // No more packets found
1447  delete next_packet;
1448  packet = NULL;
1449  }
1450  // Return if packet was found (or error number)
1451  return found_packet;
1452 }
1453 
1454 // Get an AVFrame (if any)
1455 bool FFmpegReader::GetAVFrame() {
1456  int frameFinished = 0;
1457 
1458  // Decode video frame
1459  AVFrame *next_frame = AV_ALLOCATE_FRAME();
1460 
1461 #if IS_FFMPEG_3_2
1462  int send_packet_err = 0;
1463  int64_t send_packet_pts = 0;
1464  if ((packet && packet->stream_index == videoStream) || !packet) {
1465  send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1466 
1467  if (packet && send_packet_err >= 0) {
1468  send_packet_pts = GetPacketPTS();
1469  hold_packet = false;
1470  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet succeeded)", "send_packet_err", send_packet_err, "send_packet_pts", send_packet_pts);
1471  }
1472  }
1473 
1474  #if USE_HW_ACCEL
1475  // Get the format from the variables set in get_hw_dec_format
1476  hw_de_av_pix_fmt = hw_de_av_pix_fmt_global;
1477  hw_de_av_device_type = hw_de_av_device_type_global;
1478  #endif // USE_HW_ACCEL
1479  if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1480  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) + "])", "send_packet_err", send_packet_err, "send_packet_pts", send_packet_pts);
1481  if (send_packet_err == AVERROR(EAGAIN)) {
1482  hold_packet = true;
1483  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()", "send_packet_pts", send_packet_pts);
1484  }
1485  if (send_packet_err == AVERROR(EINVAL)) {
1486  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush", "send_packet_pts", send_packet_pts);
1487  }
1488  if (send_packet_err == AVERROR(ENOMEM)) {
1489  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors", "send_packet_pts", send_packet_pts);
1490  }
1491  }
1492 
1493  // Always try and receive a packet, if not EOF.
1494  // Even if the above avcodec_send_packet failed to send,
1495  // we might still need to receive a packet.
1496  int receive_frame_err = 0;
1497  AVFrame *next_frame2;
1498 #if USE_HW_ACCEL
1499  if (hw_de_on && hw_de_supported) {
1500  next_frame2 = AV_ALLOCATE_FRAME();
1501  }
1502  else
1503 #endif // USE_HW_ACCEL
1504  {
1505  next_frame2 = next_frame;
1506  }
1507  pFrame = AV_ALLOCATE_FRAME();
1508  while (receive_frame_err >= 0) {
1509  receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1510 
1511  if (receive_frame_err != 0) {
1512  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])", "receive_frame_err", receive_frame_err, "send_packet_pts", send_packet_pts);
1513 
1514  if (receive_frame_err == AVERROR_EOF) {
1516  "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)", "send_packet_pts", send_packet_pts);
1517  avcodec_flush_buffers(pCodecCtx);
1518  packet_status.video_eof = true;
1519  }
1520  if (receive_frame_err == AVERROR(EINVAL)) {
1522  "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)", "send_packet_pts", send_packet_pts);
1523  avcodec_flush_buffers(pCodecCtx);
1524  }
1525  if (receive_frame_err == AVERROR(EAGAIN)) {
1527  "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)", "send_packet_pts", send_packet_pts);
1528  }
1529  if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1531  "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)", "send_packet_pts", send_packet_pts);
1532  }
1533 
1534  // Break out of decoding loop
1535  // Nothing ready for decoding yet
1536  break;
1537  }
1538 
1539 #if USE_HW_ACCEL
1540  if (hw_de_on && hw_de_supported) {
1541  int err;
1542  if (next_frame2->format == hw_de_av_pix_fmt) {
1543  next_frame->format = AV_PIX_FMT_YUV420P;
1544  if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1545  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Failed to transfer data to output frame)", "hw_de_on", hw_de_on);
1546  }
1547  if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1548  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Failed to copy props to output frame)", "hw_de_on", hw_de_on);
1549  }
1550  }
1551  }
1552  else
1553 #endif // USE_HW_ACCEL
1554  { // No hardware acceleration used -> no copy from GPU memory needed
1555  next_frame = next_frame2;
1556  }
1557 
1558  // TODO also handle possible further frames
1559  // Use only the first frame like avcodec_decode_video2
1560  frameFinished = 1;
1561  packet_status.video_decoded++;
1562 
1563  // Allocate image (align 32 for simd)
1564  if (AV_ALLOCATE_IMAGE(pFrame, (AVPixelFormat)(pStream->codecpar->format), info.width, info.height) <= 0) {
1565  throw OutOfMemory("Failed to allocate image buffer", path);
1566  }
1567  av_image_copy(pFrame->data, pFrame->linesize, (const uint8_t**)next_frame->data, next_frame->linesize,
1568  (AVPixelFormat)(pStream->codecpar->format), info.width, info.height);
1569 
1570  // Get display PTS from video frame, often different than packet->pts.
1571  // Sending packets to the decoder (i.e. packet->pts) is async,
1572  // and retrieving packets from the decoder (frame->pts) is async. In most decoders
1573  // sending and retrieving are separated by multiple calls to this method.
1574  if (next_frame->pts != AV_NOPTS_VALUE) {
1575  // This is the current decoded frame (and should be the pts used) for
1576  // processing this data
1577  video_pts = next_frame->pts;
1578  } else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1579  // Some videos only set this timestamp (fallback)
1580  video_pts = next_frame->pkt_dts;
1581  }
1582 
1584  "FFmpegReader::GetAVFrame (Successful frame received)", "video_pts", video_pts, "send_packet_pts", send_packet_pts);
1585 
1586  // break out of loop after each successful image returned
1587  break;
1588  }
1589 #if USE_HW_ACCEL
1590  if (hw_de_on && hw_de_supported) {
1591  AV_FREE_FRAME(&next_frame2);
1592  }
1593  #endif // USE_HW_ACCEL
1594 #else
1595  avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1596 
1597  // always allocate pFrame (because we do that in the ffmpeg >= 3.2 as well); it will always be freed later
1598  pFrame = AV_ALLOCATE_FRAME();
1599 
1600  // is frame finished
1601  if (frameFinished) {
1602  // AVFrames are clobbered on the each call to avcodec_decode_video, so we
1603  // must make a copy of the image data before this method is called again.
1604  avpicture_alloc((AVPicture *) pFrame, pCodecCtx->pix_fmt, info.width, info.height);
1605  av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt, info.width,
1606  info.height);
1607  }
1608 #endif // IS_FFMPEG_3_2
1609 
1610  // deallocate the frame
1611  AV_FREE_FRAME(&next_frame);
1612 
1613  // Did we get a video frame?
1614  return frameFinished;
1615 }
1616 
1617 // Check the current seek position and determine if we need to seek again
1618 bool FFmpegReader::CheckSeek() {
1619  // Are we seeking for a specific frame?
1620  if (is_seeking) {
1621  const int64_t kSeekRetryMax = 5;
1622  const int kSeekStagnantMax = 2;
1623 
1624  // Determine if both an audio and video packet have been decoded since the seek happened.
1625  // If not, allow the ReadStream method to keep looping
1626  if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1627  return false;
1628 
1629  // Check for both streams
1630  if ((info.has_video && !seek_video_frame_found) || (info.has_audio && !seek_audio_frame_found))
1631  return false;
1632 
1633  // Determine max seeked frame
1634  int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1635  // Track stagnant seek results (no progress between retries)
1636  if (max_seeked_frame == last_seek_max_frame) {
1637  seek_stagnant_count++;
1638  } else {
1639  last_seek_max_frame = max_seeked_frame;
1640  seek_stagnant_count = 0;
1641  }
1642 
1643  // determine if we are "before" the requested frame
1644  if (max_seeked_frame >= seeking_frame) {
1645  // SEEKED TOO FAR
1646  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckSeek (Too far, seek again)",
1647  "is_video_seek", is_video_seek,
1648  "max_seeked_frame", max_seeked_frame,
1649  "seeking_frame", seeking_frame,
1650  "seeking_pts", seeking_pts,
1651  "seek_video_frame_found", seek_video_frame_found,
1652  "seek_audio_frame_found", seek_audio_frame_found);
1653 
1654  // Seek again... to the nearest Keyframe
1655  if (seek_count < kSeekRetryMax) {
1656  Seek(seeking_frame - (10 * seek_count * seek_count));
1657  } else if (seek_stagnant_count >= kSeekStagnantMax) {
1658  // Stagnant seek: force a much earlier target and keep seeking.
1659  Seek(seeking_frame - (10 * kSeekRetryMax * kSeekRetryMax));
1660  } else {
1661  // Retry budget exhausted: keep seeking from a conservative offset.
1662  Seek(seeking_frame - (10 * seek_count * seek_count));
1663  }
1664  } else {
1665  // SEEK WORKED
1666  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckSeek (Successful)",
1667  "is_video_seek", is_video_seek,
1668  "packet->pts", GetPacketPTS(),
1669  "seeking_pts", seeking_pts,
1670  "seeking_frame", seeking_frame,
1671  "seek_video_frame_found", seek_video_frame_found,
1672  "seek_audio_frame_found", seek_audio_frame_found);
1673 
1674  // Seek worked, and we are "before" the requested frame
1675  is_seeking = false;
1676  seeking_frame = 0;
1677  seeking_pts = -1;
1678  }
1679  }
1680 
1681  // return the pts to seek to (if any)
1682  return is_seeking;
1683 }
1684 
1685 // Process a video packet
1686 void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1687  // Get the AVFrame from the current packet
1688  // This sets the video_pts to the correct timestamp
1689  int frame_finished = GetAVFrame();
1690 
1691  // Check if the AVFrame is finished and set it
1692  if (!frame_finished) {
1693  // No AVFrame decoded yet, bail out
1694  if (pFrame) {
1695  RemoveAVFrame(pFrame);
1696  }
1697  return;
1698  }
1699 
1700  // Calculate current frame #
1701  int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1702 
1703  // Track 1st video packet after a successful seek
1704  if (!seek_video_frame_found && is_seeking)
1705  seek_video_frame_found = current_frame;
1706 
1707  // Create or get the existing frame object. Requested frame needs to be created
1708  // in working_cache at least once. Seek can clear the working_cache, so we must
1709  // add the requested frame back to the working_cache here. If it already exists,
1710  // it will be moved to the top of the working_cache.
1711  working_cache.Add(CreateFrame(requested_frame));
1712 
1713  // Debug output
1714  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessVideoPacket (Before)", "requested_frame", requested_frame, "current_frame", current_frame);
1715 
1716  // Init some things local (for OpenMP)
1717  PixelFormat pix_fmt = AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx);
1718  int height = info.height;
1719  int width = info.width;
1720  int64_t video_length = info.video_length;
1721 
1722  // Create or reuse a RGB Frame (since most videos are not in RGB, we must convert it)
1723  AVFrame *pFrameRGB = pFrameRGB_cached;
1724  if (!pFrameRGB) {
1725  pFrameRGB = AV_ALLOCATE_FRAME();
1726  if (pFrameRGB == nullptr)
1727  throw OutOfMemory("Failed to allocate frame buffer", path);
1728  pFrameRGB_cached = pFrameRGB;
1729  }
1730  AV_RESET_FRAME(pFrameRGB);
1731  uint8_t *buffer = nullptr;
1732 
1733  // Determine the max size of this source image (based on the timeline's size, the scaling mode,
1734  // and the scaling keyframes). This is a performance improvement, to keep the images as small as possible,
1735  // without losing quality. NOTE: We cannot go smaller than the timeline itself, or the add_layer timeline
1736  // method will scale it back to timeline size before scaling it smaller again. This needs to be fixed in
1737  // the future.
1738  int max_width = info.width;
1739  int max_height = info.height;
1740 
1741  Clip *parent = static_cast<Clip *>(ParentClip());
1742  if (parent) {
1743  if (parent->ParentTimeline()) {
1744  // Set max width/height based on parent clip's timeline (if attached to a timeline)
1745  max_width = parent->ParentTimeline()->preview_width;
1746  max_height = parent->ParentTimeline()->preview_height;
1747  }
1748  if (parent->scale == SCALE_FIT || parent->scale == SCALE_STRETCH) {
1749  // Best fit or Stretch scaling (based on max timeline size * scaling keyframes)
1750  float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1751  float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1752  max_width = std::max(float(max_width), max_width * max_scale_x);
1753  max_height = std::max(float(max_height), max_height * max_scale_y);
1754 
1755  } else if (parent->scale == SCALE_CROP) {
1756  // Cropping scale mode (based on max timeline size * cropped size * scaling keyframes)
1757  float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1758  float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1759  QSize width_size(max_width * max_scale_x,
1760  round(max_width / (float(info.width) / float(info.height))));
1761  QSize height_size(round(max_height / (float(info.height) / float(info.width))),
1762  max_height * max_scale_y);
1763  // respect aspect ratio
1764  if (width_size.width() >= max_width && width_size.height() >= max_height) {
1765  max_width = std::max(max_width, width_size.width());
1766  max_height = std::max(max_height, width_size.height());
1767  } else {
1768  max_width = std::max(max_width, height_size.width());
1769  max_height = std::max(max_height, height_size.height());
1770  }
1771 
1772  } else {
1773  // Scale video to equivalent unscaled size
1774  // Since the preview window can change sizes, we want to always
1775  // scale against the ratio of original video size to timeline size
1776  float preview_ratio = 1.0;
1777  if (parent->ParentTimeline()) {
1778  Timeline *t = (Timeline *) parent->ParentTimeline();
1779  preview_ratio = t->preview_width / float(t->info.width);
1780  }
1781  float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1782  float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1783  max_width = info.width * max_scale_x * preview_ratio;
1784  max_height = info.height * max_scale_y * preview_ratio;
1785  }
1786 
1787  // If a crop effect is resizing the image, request enough pixels to preserve detail
1788  ApplyCropResizeScale(parent, info.width, info.height, max_width, max_height);
1789  }
1790 
1791  // Determine if image needs to be scaled (for performance reasons)
1792  int original_height = height;
1793  if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1794  // Override width and height (but maintain aspect ratio)
1795  float ratio = float(width) / float(height);
1796  int possible_width = round(max_height * ratio);
1797  int possible_height = round(max_width / ratio);
1798 
1799  if (possible_width <= max_width) {
1800  // use calculated width, and max_height
1801  width = possible_width;
1802  height = max_height;
1803  } else {
1804  // use max_width, and calculated height
1805  width = max_width;
1806  height = possible_height;
1807  }
1808  }
1809 
1810  // Determine required buffer size and allocate buffer
1811  const int bytes_per_pixel = 4;
1812  int raw_buffer_size = (width * height * bytes_per_pixel) + 128;
1813 
1814  // Aligned memory allocation (for speed)
1815  constexpr size_t ALIGNMENT = 32; // AVX2
1816  int buffer_size = ((raw_buffer_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
1817  buffer = (unsigned char*) aligned_malloc(buffer_size, ALIGNMENT);
1818 
1819  // Copy picture data from one AVFrame (or AVPicture) to another one.
1820  AV_COPY_PICTURE_DATA(pFrameRGB, buffer, PIX_FMT_RGBA, width, height);
1821 
1822  int scale_mode = SWS_FAST_BILINEAR;
1823  if (openshot::Settings::Instance()->HIGH_QUALITY_SCALING) {
1824  scale_mode = SWS_BICUBIC;
1825  }
1826  img_convert_ctx = sws_getCachedContext(img_convert_ctx, info.width, info.height, AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx), width, height, PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL);
1827  if (!img_convert_ctx)
1828  throw OutOfMemory("Failed to initialize sws context", path);
1829 
1830  // Resize / Convert to RGB
1831  sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1832  original_height, pFrameRGB->data, pFrameRGB->linesize);
1833 
1834  // Create or get the existing frame object
1835  std::shared_ptr<Frame> f = CreateFrame(current_frame);
1836 
1837  // Add Image data to frame
1838  if (!ffmpeg_has_alpha(AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx))) {
1839  // Add image with no alpha channel, Speed optimization
1840  f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1841  } else {
1842  // Add image with alpha channel (this will be converted to premultipled when needed, but is slower)
1843  f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1844  }
1845 
1846  // Update working cache
1847  working_cache.Add(f);
1848 
1849  // Keep track of last last_video_frame
1850  last_video_frame = f;
1851 
1852  // Free the RGB image
1853  AV_RESET_FRAME(pFrameRGB);
1854 
1855  // Remove frame and packet
1856  RemoveAVFrame(pFrame);
1857 
1858  // Get video PTS in seconds
1859  video_pts_seconds = (double(video_pts) * info.video_timebase.ToDouble()) + pts_offset_seconds;
1860 
1861  // Debug output
1862  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessVideoPacket (After)", "requested_frame", requested_frame, "current_frame", current_frame, "f->number", f->number, "video_pts_seconds", video_pts_seconds);
1863 }
1864 
1865 // Process an audio packet
1866 void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1867  AudioLocation location;
1868  // Calculate location of current audio packet
1869  if (packet && packet->pts != AV_NOPTS_VALUE) {
1870  // Determine related video frame and starting sample # from audio PTS
1871  location = GetAudioPTSLocation(packet->pts);
1872 
1873  // Track 1st audio packet after a successful seek
1874  if (!seek_audio_frame_found && is_seeking)
1875  seek_audio_frame_found = location.frame;
1876  }
1877 
1878  // Create or get the existing frame object. Requested frame needs to be created
1879  // in working_cache at least once. Seek can clear the working_cache, so we must
1880  // add the requested frame back to the working_cache here. If it already exists,
1881  // it will be moved to the top of the working_cache.
1882  working_cache.Add(CreateFrame(requested_frame));
1883 
1884  // Debug output
1885  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Before)",
1886  "requested_frame", requested_frame,
1887  "target_frame", location.frame,
1888  "starting_sample", location.sample_start);
1889 
1890  // Init an AVFrame to hold the decoded audio samples
1891  int frame_finished = 0;
1892  AVFrame *audio_frame = AV_ALLOCATE_FRAME();
1893  AV_RESET_FRAME(audio_frame);
1894 
1895  int packet_samples = 0;
1896  int data_size = 0;
1897 
1898 #if IS_FFMPEG_3_2
1899  int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1900  if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1901  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Packet not sent)");
1902  }
1903  else {
1904  int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1905  if (receive_frame_err >= 0) {
1906  frame_finished = 1;
1907  }
1908  if (receive_frame_err == AVERROR_EOF) {
1909  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (EOF detected from decoder)");
1910  packet_status.audio_eof = true;
1911  }
1912  if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1913  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (invalid frame received or EOF from decoder)");
1914  avcodec_flush_buffers(aCodecCtx);
1915  }
1916  if (receive_frame_err != 0) {
1917  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (frame not ready yet from decoder)");
1918  }
1919  }
1920 #else
1921  int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1922 #endif
1923 
1924  if (frame_finished) {
1925  packet_status.audio_decoded++;
1926 
1927  // This can be different than the current packet, so we need to look
1928  // at the current AVFrame from the audio decoder. This timestamp should
1929  // be used for the remainder of this function
1930  audio_pts = audio_frame->pts;
1931 
1932  // Determine related video frame and starting sample # from audio PTS
1933  location = GetAudioPTSLocation(audio_pts);
1934 
1935  // determine how many samples were decoded
1936  int plane_size = -1;
1937 #if HAVE_CH_LAYOUT
1938  int nb_channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
1939 #else
1940  int nb_channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
1941 #endif
1942  data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
1943  audio_frame->nb_samples, (AVSampleFormat) (AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx)), 1);
1944 
1945  // Calculate total number of samples
1946  packet_samples = audio_frame->nb_samples * nb_channels;
1947  } else {
1948  if (audio_frame) {
1949  // Free audio frame
1950  AV_FREE_FRAME(&audio_frame);
1951  }
1952  }
1953 
1954  // Estimate the # of samples and the end of this packet's location (to prevent GAPS for the next timestamp)
1955  int pts_remaining_samples = packet_samples / info.channels; // Adjust for zero based array
1956 
1957  // Bail if no samples found
1958  if (pts_remaining_samples == 0) {
1959  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (No samples, bailing)",
1960  "packet_samples", packet_samples,
1961  "info.channels", info.channels,
1962  "pts_remaining_samples", pts_remaining_samples);
1963  return;
1964  }
1965 
1966  while (pts_remaining_samples) {
1967  // Get Samples per frame (for this frame number)
1968  int samples_per_frame = Frame::GetSamplesPerFrame(previous_packet_location.frame, info.fps, info.sample_rate, info.channels);
1969 
1970  // Calculate # of samples to add to this frame
1971  int samples = samples_per_frame - previous_packet_location.sample_start;
1972  if (samples > pts_remaining_samples)
1973  samples = pts_remaining_samples;
1974 
1975  // Decrement remaining samples
1976  pts_remaining_samples -= samples;
1977 
1978  if (pts_remaining_samples > 0) {
1979  // next frame
1980  previous_packet_location.frame++;
1981  previous_packet_location.sample_start = 0;
1982  } else {
1983  // Increment sample start
1984  previous_packet_location.sample_start += samples;
1985  }
1986  }
1987 
1988  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (ReSample)",
1989  "packet_samples", packet_samples,
1990  "info.channels", info.channels,
1991  "info.sample_rate", info.sample_rate,
1992  "aCodecCtx->sample_fmt", AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx));
1993 
1994  // Create output frame
1995  AVFrame *audio_converted = AV_ALLOCATE_FRAME();
1996  AV_RESET_FRAME(audio_converted);
1997  audio_converted->nb_samples = audio_frame->nb_samples;
1998  av_samples_alloc(audio_converted->data, audio_converted->linesize, info.channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
1999 
2000  SWRCONTEXT *avr = avr_ctx;
2001  // setup resample context if needed
2002  if (!avr) {
2003  avr = SWR_ALLOC();
2004 #if HAVE_CH_LAYOUT
2005  av_opt_set_chlayout(avr, "in_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
2006  av_opt_set_chlayout(avr, "out_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
2007 #else
2008  av_opt_set_int(avr, "in_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
2009  av_opt_set_int(avr, "out_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
2010  av_opt_set_int(avr, "in_channels", info.channels, 0);
2011  av_opt_set_int(avr, "out_channels", info.channels, 0);
2012 #endif
2013  av_opt_set_int(avr, "in_sample_fmt", AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx), 0);
2014  av_opt_set_int(avr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
2015  av_opt_set_int(avr, "in_sample_rate", info.sample_rate, 0);
2016  av_opt_set_int(avr, "out_sample_rate", info.sample_rate, 0);
2017  SWR_INIT(avr);
2018  avr_ctx = avr;
2019  }
2020 
2021  // Convert audio samples
2022  int nb_samples = SWR_CONVERT(avr, // audio resample context
2023  audio_converted->data, // output data pointers
2024  audio_converted->linesize[0], // output plane size, in bytes. (0 if unknown)
2025  audio_converted->nb_samples, // maximum number of samples that the output buffer can hold
2026  audio_frame->data, // input data pointers
2027  audio_frame->linesize[0], // input plane size, in bytes (0 if unknown)
2028  audio_frame->nb_samples); // number of input samples to convert
2029 
2030 
2031  int64_t starting_frame_number = -1;
2032  for (int channel_filter = 0; channel_filter < info.channels; channel_filter++) {
2033  // Array of floats (to hold samples for each channel)
2034  starting_frame_number = location.frame;
2035  int channel_buffer_size = nb_samples;
2036  auto *channel_buffer = (float *) (audio_converted->data[channel_filter]);
2037 
2038  // Loop through samples, and add them to the correct frames
2039  int start = location.sample_start;
2040  int remaining_samples = channel_buffer_size;
2041  while (remaining_samples > 0) {
2042  // Get Samples per frame (for this frame number)
2043  int samples_per_frame = Frame::GetSamplesPerFrame(starting_frame_number, info.fps, info.sample_rate, info.channels);
2044 
2045  // Calculate # of samples to add to this frame
2046  int samples = std::fmin(samples_per_frame - start, remaining_samples);
2047 
2048  // Create or get the existing frame object
2049  std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
2050 
2051  // Add samples for current channel to the frame.
2052  f->AddAudio(true, channel_filter, start, channel_buffer, samples, 1.0f);
2053 
2054  // Debug output
2055  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (f->AddAudio)",
2056  "frame", starting_frame_number,
2057  "start", start,
2058  "samples", samples,
2059  "channel", channel_filter,
2060  "samples_per_frame", samples_per_frame);
2061 
2062  // Add or update cache
2063  working_cache.Add(f);
2064 
2065  // Decrement remaining samples
2066  remaining_samples -= samples;
2067 
2068  // Increment buffer (to next set of samples)
2069  if (remaining_samples > 0)
2070  channel_buffer += samples;
2071 
2072  // Increment frame number
2073  starting_frame_number++;
2074 
2075  // Reset starting sample #
2076  start = 0;
2077  }
2078  }
2079 
2080  // Free AVFrames
2081  av_free(audio_converted->data[0]);
2082  AV_FREE_FRAME(&audio_converted);
2083  AV_FREE_FRAME(&audio_frame);
2084 
2085  // Get audio PTS in seconds
2086  audio_pts_seconds = (double(audio_pts) * info.audio_timebase.ToDouble()) + pts_offset_seconds;
2087 
2088  // Debug output
2089  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (After)",
2090  "requested_frame", requested_frame,
2091  "starting_frame", location.frame,
2092  "end_frame", starting_frame_number - 1,
2093  "audio_pts_seconds", audio_pts_seconds);
2094 
2095 }
2096 
2097 
2098 // Seek to a specific frame. This is not always frame accurate, it's more of an estimation on many codecs.
2099 void FFmpegReader::Seek(int64_t requested_frame) {
2100  // Adjust for a requested frame that is too small or too large
2101  if (requested_frame < 1)
2102  requested_frame = 1;
2103  if (requested_frame > info.video_length)
2104  requested_frame = info.video_length;
2105  if (requested_frame > largest_frame_processed && packet_status.end_of_file) {
2106  // Not possible to search past largest_frame once EOF is reached (no more packets)
2107  return;
2108  }
2109 
2110  // Debug output
2111  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::Seek",
2112  "requested_frame", requested_frame,
2113  "seek_count", seek_count,
2114  "last_frame", last_frame);
2115 
2116  // Clear working cache (since we are seeking to another location in the file)
2117  working_cache.Clear();
2118 
2119  // Reset the last frame variable
2120  video_pts = 0.0;
2121  video_pts_seconds = NO_PTS_OFFSET;
2122  audio_pts = 0.0;
2123  audio_pts_seconds = NO_PTS_OFFSET;
2124  hold_packet = false;
2125  last_frame = 0;
2126  current_video_frame = 0;
2127  largest_frame_processed = 0;
2128  last_final_video_frame.reset();
2129  bool has_audio_override = info.has_audio;
2130  bool has_video_override = info.has_video;
2131 
2132  // Init end-of-file detection variables
2133  packet_status.reset(false);
2134 
2135  // Increment seek count
2136  seek_count++;
2137 
2138  // If seeking near frame 1, we need to close and re-open the file (this is more reliable than seeking)
2139  int buffer_amount = 12;
2140  if (requested_frame - buffer_amount < 20) {
2141  // prevent Open() from seeking again
2142  is_seeking = true;
2143 
2144  // Close and re-open file (basically seeking to frame 1)
2145  Close();
2146  Open();
2147 
2148  // Update overrides (since closing and re-opening might update these)
2149  info.has_audio = has_audio_override;
2150  info.has_video = has_video_override;
2151 
2152  // Not actually seeking, so clear these flags
2153  is_seeking = false;
2154  if (seek_count == 1) {
2155  // Don't redefine this on multiple seek attempts for a specific frame
2156  seeking_frame = 1;
2157  seeking_pts = ConvertFrameToVideoPTS(1);
2158  }
2159  seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
2160  seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
2161 
2162  } else {
2163  // Seek to nearest key-frame (aka, i-frame)
2164  bool seek_worked = false;
2165  int64_t seek_target = 0;
2166 
2167  // Seek video stream (if any), except album arts
2168  if (!seek_worked && info.has_video && !HasAlbumArt()) {
2169  seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
2170  if (av_seek_frame(pFormatCtx, info.video_stream_index, seek_target, AVSEEK_FLAG_BACKWARD) < 0) {
2171  ZmqLogger::Instance()->Log(std::string(pFormatCtx->AV_FILENAME) + ": error while seeking video stream");
2172  } else {
2173  // VIDEO SEEK
2174  is_video_seek = true;
2175  seek_worked = true;
2176  }
2177  }
2178 
2179  // Seek audio stream (if not already seeked... and if an audio stream is found)
2180  if (!seek_worked && info.has_audio) {
2181  seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
2182  if (av_seek_frame(pFormatCtx, info.audio_stream_index, seek_target, AVSEEK_FLAG_BACKWARD) < 0) {
2183  ZmqLogger::Instance()->Log(std::string(pFormatCtx->AV_FILENAME) + ": error while seeking audio stream");
2184  } else {
2185  // AUDIO SEEK
2186  is_video_seek = false;
2187  seek_worked = true;
2188  }
2189  }
2190 
2191  // Was the seek successful?
2192  if (seek_worked) {
2193  // Flush audio buffer
2194  if (info.has_audio)
2195  avcodec_flush_buffers(aCodecCtx);
2196 
2197  // Flush video buffer
2198  if (info.has_video)
2199  avcodec_flush_buffers(pCodecCtx);
2200 
2201  // Reset previous audio location to zero
2202  previous_packet_location.frame = -1;
2203  previous_packet_location.sample_start = 0;
2204 
2205  // init seek flags
2206  is_seeking = true;
2207  if (seek_count == 1) {
2208  // Don't redefine this on multiple seek attempts for a specific frame
2209  seeking_pts = seek_target;
2210  seeking_frame = requested_frame;
2211  }
2212  seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
2213  seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
2214 
2215  } else {
2216  // seek failed
2217  seeking_pts = 0;
2218  seeking_frame = 0;
2219 
2220  // prevent Open() from seeking again
2221  is_seeking = true;
2222 
2223  // Close and re-open file (basically seeking to frame 1)
2224  Close();
2225  Open();
2226 
2227  // Not actually seeking, so clear these flags
2228  is_seeking = false;
2229 
2230  // disable seeking for this reader (since it failed)
2231  enable_seek = false;
2232 
2233  // Update overrides (since closing and re-opening might update these)
2234  info.has_audio = has_audio_override;
2235  info.has_video = has_video_override;
2236  }
2237  }
2238 }
2239 
2240 // Get the PTS for the current video packet
2241 int64_t FFmpegReader::GetPacketPTS() {
2242  if (packet) {
2243  int64_t current_pts = packet->pts;
2244  if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
2245  current_pts = packet->dts;
2246 
2247  // Return adjusted PTS
2248  return current_pts;
2249  } else {
2250  // No packet, return NO PTS
2251  return AV_NOPTS_VALUE;
2252  }
2253 }
2254 
2255 // Update PTS Offset (if any)
2256 void FFmpegReader::UpdatePTSOffset() {
2257  if (pts_offset_seconds != NO_PTS_OFFSET) {
2258  // Skip this method if we have already set PTS offset
2259  return;
2260  }
2261  pts_offset_seconds = 0.0;
2262  double video_pts_offset_seconds = 0.0;
2263  double audio_pts_offset_seconds = 0.0;
2264 
2265  bool has_video_pts = false;
2266  if (!info.has_video) {
2267  // Mark as checked
2268  has_video_pts = true;
2269  }
2270  bool has_audio_pts = false;
2271  if (!info.has_audio) {
2272  // Mark as checked
2273  has_audio_pts = true;
2274  }
2275 
2276  // Loop through the stream (until a packet from all streams is found)
2277  while (!has_video_pts || !has_audio_pts) {
2278  // Get the next packet (if any)
2279  if (GetNextPacket() < 0)
2280  // Break loop when no more packets found
2281  break;
2282 
2283  // Get PTS of this packet
2284  int64_t pts = GetPacketPTS();
2285 
2286  // Video packet
2287  if (!has_video_pts && packet->stream_index == videoStream) {
2288  // Get the video packet start time (in seconds)
2289  video_pts_offset_seconds = 0.0 - (pts * info.video_timebase.ToDouble());
2290 
2291  // Is timestamp close to zero (within X seconds)
2292  // Ignore wildly invalid timestamps (i.e. -234923423423)
2293  if (std::abs(video_pts_offset_seconds) <= 10.0) {
2294  has_video_pts = true;
2295  }
2296  }
2297  else if (!has_audio_pts && packet->stream_index == audioStream) {
2298  // Get the audio packet start time (in seconds)
2299  audio_pts_offset_seconds = 0.0 - (pts * info.audio_timebase.ToDouble());
2300 
2301  // Is timestamp close to zero (within X seconds)
2302  // Ignore wildly invalid timestamps (i.e. -234923423423)
2303  if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2304  has_audio_pts = true;
2305  }
2306  }
2307  }
2308 
2309  // Choose timestamp origin:
2310  // - If video exists, anchor timeline frame mapping to video start.
2311  // This avoids AAC priming / audio preroll shifting video frame 1 to frame 2.
2312  // - If no video exists (audio-only readers), use audio start.
2313  if (info.has_video && has_video_pts) {
2314  pts_offset_seconds = video_pts_offset_seconds;
2315  } else if (!info.has_video && has_audio_pts) {
2316  pts_offset_seconds = audio_pts_offset_seconds;
2317  } else if (has_video_pts && has_audio_pts) {
2318  // Fallback when stream flags are unusual but both timestamps exist.
2319  pts_offset_seconds = video_pts_offset_seconds;
2320  }
2321 }
2322 
2323 // Convert PTS into Frame Number
2324 int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2325  // Apply PTS offset
2326  int64_t previous_video_frame = current_video_frame;
2327  const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2328  const double video_timebase_value =
2331  : (1.0 / 30.0);
2332 
2333  // Get the video packet start time (in seconds)
2334  double video_seconds = (double(pts) * video_timebase_value) + pts_offset_seconds;
2335 
2336  // Divide by the video timebase, to get the video frame number (frame # is decimal at this point)
2337  int64_t frame = round(video_seconds * fps_value) + 1;
2338 
2339  // Keep track of the expected video frame #
2340  if (current_video_frame == 0)
2341  current_video_frame = frame;
2342  else {
2343 
2344  // Sometimes frames are duplicated due to identical (or similar) timestamps
2345  if (frame == previous_video_frame) {
2346  // return -1 frame number
2347  frame = -1;
2348  } else {
2349  // Increment expected frame
2350  current_video_frame++;
2351  }
2352  }
2353 
2354  // Return frame #
2355  return frame;
2356 }
2357 
2358 // Convert Frame Number into Video PTS
2359 int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2360  const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2361  const double video_timebase_value =
2364  : (1.0 / 30.0);
2365 
2366  // Get timestamp of this frame (in seconds)
2367  double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2368 
2369  // Calculate the # of video packets in this timestamp
2370  int64_t video_pts = round(seconds / video_timebase_value);
2371 
2372  // Apply PTS offset (opposite)
2373  return video_pts;
2374 }
2375 
2376 // Convert Frame Number into Video PTS
2377 int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2378  const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2379  const double audio_timebase_value =
2382  : (1.0 / 48000.0);
2383 
2384  // Get timestamp of this frame (in seconds)
2385  double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2386 
2387  // Calculate the # of audio packets in this timestamp
2388  int64_t audio_pts = round(seconds / audio_timebase_value);
2389 
2390  // Apply PTS offset (opposite)
2391  return audio_pts;
2392 }
2393 
2394 // Calculate Starting video frame and sample # for an audio PTS
2395 AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2396  const double audio_timebase_value =
2399  : (1.0 / 48000.0);
2400  const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2401 
2402  // Get the audio packet start time (in seconds)
2403  double audio_seconds = (double(pts) * audio_timebase_value) + pts_offset_seconds;
2404 
2405  // Divide by the video timebase, to get the video frame number (frame # is decimal at this point)
2406  double frame = (audio_seconds * fps_value) + 1;
2407 
2408  // Frame # as a whole number (no more decimals)
2409  int64_t whole_frame = int64_t(frame);
2410 
2411  // Remove the whole number, and only get the decimal of the frame
2412  double sample_start_percentage = frame - double(whole_frame);
2413 
2414  // Get Samples per frame
2415  int samples_per_frame = Frame::GetSamplesPerFrame(whole_frame, info.fps, info.sample_rate, info.channels);
2416 
2417  // Calculate the sample # to start on
2418  int sample_start = round(double(samples_per_frame) * sample_start_percentage);
2419 
2420  // Protect against broken (i.e. negative) timestamps
2421  if (whole_frame < 1)
2422  whole_frame = 1;
2423  if (sample_start < 0)
2424  sample_start = 0;
2425 
2426  // Prepare final audio packet location
2427  AudioLocation location = {whole_frame, sample_start};
2428 
2429  // Compare to previous audio packet (and fix small gaps due to varying PTS timestamps)
2430  if (previous_packet_location.frame != -1) {
2431  if (location.is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2432  int64_t orig_frame = location.frame;
2433  int orig_start = location.sample_start;
2434 
2435  // Update sample start, to prevent gaps in audio
2436  location.sample_start = previous_packet_location.sample_start;
2437  location.frame = previous_packet_location.frame;
2438 
2439  // Debug output
2440  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)", "Source Frame", orig_frame, "Source Audio Sample", orig_start, "Target Frame", location.frame, "Target Audio Sample", location.sample_start, "pts", pts);
2441 
2442  } else {
2443  // Debug output
2444  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAudioPTSLocation (Audio Gap Ignored - too big)", "Previous location frame", previous_packet_location.frame, "Target Frame", location.frame, "Target Audio Sample", location.sample_start, "pts", pts);
2445  }
2446  }
2447 
2448  // Set previous location
2449  previous_packet_location = location;
2450 
2451  // Return the associated video frame and starting sample #
2452  return location;
2453 }
2454 
2455 // Create a new Frame (or return an existing one) and add it to the working queue.
2456 std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2457  // Check working cache
2458  std::shared_ptr<Frame> output = working_cache.GetFrame(requested_frame);
2459 
2460  if (!output) {
2461  // (re-)Check working cache
2462  output = working_cache.GetFrame(requested_frame);
2463  if(output) return output;
2464 
2465  // Create a new frame on the working cache
2466  output = std::make_shared<Frame>(requested_frame, info.width, info.height, "#000000", Frame::GetSamplesPerFrame(requested_frame, info.fps, info.sample_rate, info.channels), info.channels);
2467  output->SetPixelRatio(info.pixel_ratio.num, info.pixel_ratio.den); // update pixel ratio
2468  output->ChannelsLayout(info.channel_layout); // update audio channel layout from the parent reader
2469  output->SampleRate(info.sample_rate); // update the frame's sample rate of the parent reader
2470 
2471  working_cache.Add(output);
2472 
2473  // Set the largest processed frame (if this is larger)
2474  if (requested_frame > largest_frame_processed)
2475  largest_frame_processed = requested_frame;
2476  }
2477  // Return frame
2478  return output;
2479 }
2480 
2481 // Determine if frame is partial due to seek
2482 bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2483 
2484  // Sometimes a seek gets partial frames, and we need to remove them
2485  bool seek_trash = false;
2486  int64_t max_seeked_frame = seek_audio_frame_found; // determine max seeked frame
2487  if (seek_video_frame_found > max_seeked_frame) {
2488  max_seeked_frame = seek_video_frame_found;
2489  }
2490  if ((info.has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2491  (info.has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2492  seek_trash = true;
2493  }
2494 
2495  return seek_trash;
2496 }
2497 
2498 // Check the working queue, and move finished frames to the finished queue
2499 void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2500 
2501  // Prevent async calls to the following code
2502  const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
2503 
2504  // Get a list of current working queue frames in the cache (in-progress frames)
2505  std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.GetFrames();
2506  std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2507 
2508  // Loop through all working queue frames (sorted by frame #)
2509  for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2510  {
2511  // Get working frame
2512  std::shared_ptr<Frame> f = *working_itr;
2513 
2514  // Was a frame found? Is frame requested yet?
2515  if (!f || f->number > requested_frame) {
2516  // If not, skip to next one
2517  continue;
2518  }
2519 
2520  // Calculate PTS in seconds (of working frame), and the most recent processed pts value
2521  double frame_pts_seconds = (double(f->number - 1) / info.fps.ToDouble()) + pts_offset_seconds;
2522  double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2523 
2524  // Determine if video and audio are ready (based on timestamps)
2525  bool is_video_ready = false;
2526  bool is_audio_ready = false;
2527  double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2528  if ((frame_pts_seconds <= video_pts_seconds)
2529  || (recent_pts_diff > 1.5)
2530  || packet_status.video_eof || packet_status.end_of_file) {
2531  // Video stream is past this frame (so it must be done)
2532  // OR video stream is too far behind, missing, or end-of-file
2533  is_video_ready = true;
2534  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (video ready)",
2535  "frame_number", f->number,
2536  "frame_pts_seconds", frame_pts_seconds,
2537  "video_pts_seconds", video_pts_seconds,
2538  "recent_pts_diff", recent_pts_diff);
2539  if (info.has_video && !f->has_image_data) {
2540  // Frame has no image data. Prefer timeline-previous frames to preserve
2541  // visual order, especially when decode/prefetch is out-of-order.
2542  std::shared_ptr<Frame> previous_frame_instance = final_cache.GetFrame(f->number - 1);
2543  if (previous_frame_instance && previous_frame_instance->has_image_data) {
2544  f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2545  }
2546 
2547  // Fall back to last finalized timeline image (survives cache churn).
2548  if (!f->has_image_data
2549  && last_final_video_frame
2550  && last_final_video_frame->has_image_data
2551  && last_final_video_frame->number <= f->number) {
2552  f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2553  }
2554 
2555  // Fall back to the last decoded image only when it is not from the future.
2556  if (!f->has_image_data
2557  && last_video_frame
2558  && last_video_frame->has_image_data
2559  && last_video_frame->number <= f->number) {
2560  f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2561  }
2562 
2563  // Last-resort fallback if no prior image is available.
2564  if (!f->has_image_data) {
2566  "FFmpegReader::CheckWorkingFrames (no previous image found; using black frame)",
2567  "frame_number", f->number);
2568  f->AddColor("#000000");
2569  }
2570  }
2571  }
2572 
2573  double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2574  if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2575  || (recent_pts_diff > 1.5)
2576  || packet_status.audio_eof || packet_status.end_of_file) {
2577  // Audio stream is past this frame (so it must be done)
2578  // OR audio stream is too far behind, missing, or end-of-file
2579  // Adding a bit of margin here, to allow for partial audio packets
2580  is_audio_ready = true;
2581  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (audio ready)",
2582  "frame_number", f->number,
2583  "frame_pts_seconds", frame_pts_seconds,
2584  "audio_pts_seconds", audio_pts_seconds,
2585  "audio_pts_diff", audio_pts_diff,
2586  "recent_pts_diff", recent_pts_diff);
2587  }
2588  bool is_seek_trash = IsPartialFrame(f->number);
2589 
2590  // Adjust for available streams
2591  if (!info.has_video) is_video_ready = true;
2592  if (!info.has_audio) is_audio_ready = true;
2593 
2594  // Debug output
2595  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames",
2596  "frame_number", f->number,
2597  "is_video_ready", is_video_ready,
2598  "is_audio_ready", is_audio_ready,
2599  "video_eof", packet_status.video_eof,
2600  "audio_eof", packet_status.audio_eof,
2601  "end_of_file", packet_status.end_of_file);
2602 
2603  // Check if working frame is final
2604  if ((!packet_status.end_of_file && is_video_ready && is_audio_ready) || packet_status.end_of_file || is_seek_trash) {
2605  // Debug output
2606  ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (mark frame as final)",
2607  "requested_frame", requested_frame,
2608  "f->number", f->number,
2609  "is_seek_trash", is_seek_trash,
2610  "Working Cache Count", working_cache.Count(),
2611  "Final Cache Count", final_cache.Count(),
2612  "end_of_file", packet_status.end_of_file);
2613 
2614  if (!is_seek_trash) {
2615  // Move frame to final cache
2616  final_cache.Add(f);
2617  if (f->has_image_data) {
2618  last_final_video_frame = f;
2619  }
2620 
2621  // Remove frame from working cache
2622  working_cache.Remove(f->number);
2623 
2624  // Update last frame processed
2625  last_frame = f->number;
2626  } else {
2627  // Seek trash, so delete the frame from the working cache, and never add it to the final cache.
2628  working_cache.Remove(f->number);
2629  }
2630 
2631  }
2632  }
2633 
2634  // Clear vector of frames
2635  working_frames.clear();
2636  working_frames.shrink_to_fit();
2637 }
2638 
2639 // Check for the correct frames per second (FPS) value by scanning the 1st few seconds of video packets.
2640 void FFmpegReader::CheckFPS() {
2641  if (check_fps) {
2642  // Do not check FPS more than 1 time
2643  return;
2644  } else {
2645  check_fps = true;
2646  }
2647 
2648  int frames_per_second[3] = {0,0,0};
2649  int max_fps_index = sizeof(frames_per_second) / sizeof(frames_per_second[0]);
2650  int fps_index = 0;
2651 
2652  int all_frames_detected = 0;
2653  int starting_frames_detected = 0;
2654 
2655  // Loop through the stream
2656  while (true) {
2657  // Get the next packet (if any)
2658  if (GetNextPacket() < 0)
2659  // Break loop when no more packets found
2660  break;
2661 
2662  // Video packet
2663  if (packet->stream_index == videoStream) {
2664  // Get the video packet start time (in seconds)
2665  double video_seconds = (double(GetPacketPTS()) * info.video_timebase.ToDouble()) + pts_offset_seconds;
2666  fps_index = int(video_seconds); // truncate float timestamp to int (second 1, second 2, second 3)
2667 
2668  // Is this video packet from the first few seconds?
2669  if (fps_index >= 0 && fps_index < max_fps_index) {
2670  // Yes, keep track of how many frames per second (over the first few seconds)
2671  starting_frames_detected++;
2672  frames_per_second[fps_index]++;
2673  }
2674 
2675  // Track all video packets detected
2676  all_frames_detected++;
2677  }
2678  }
2679 
2680  // Calculate FPS (based on the first few seconds of video packets)
2681  float avg_fps = 30.0;
2682  if (starting_frames_detected > 0 && fps_index > 0) {
2683  avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2684  }
2685 
2686  // Verify average FPS is a reasonable value
2687  if (avg_fps < 8.0) {
2688  // Invalid FPS assumed, so switching to a sane default FPS instead
2689  avg_fps = 30.0;
2690  }
2691 
2692  // Update FPS (truncate average FPS to Integer)
2693  info.fps = Fraction(int(avg_fps), 1);
2694 
2695  // Update Duration and Length
2696  if (all_frames_detected > 0) {
2697  // Use all video frames detected to calculate # of frames
2698  info.video_length = all_frames_detected;
2699  info.duration = all_frames_detected / avg_fps;
2700  } else {
2701  // Use previous duration to calculate # of frames
2702  info.video_length = info.duration * avg_fps;
2703  }
2704 
2705  // Update video bit rate
2707 }
2708 
2709 // Remove AVFrame from cache (and deallocate its memory)
2710 void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2711  // Remove pFrame (if exists)
2712  if (remove_frame) {
2713  // Free memory
2714  av_freep(&remove_frame->data[0]);
2715 #ifndef WIN32
2716  AV_FREE_FRAME(&remove_frame);
2717 #endif
2718  }
2719 }
2720 
2721 // Remove AVPacket from cache (and deallocate its memory)
2722 void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2723  // deallocate memory for packet
2724  AV_FREE_PACKET(remove_packet);
2725 
2726  // Delete the object
2727  delete remove_packet;
2728 }
2729 
2730 // Generate JSON string of this object
2731 std::string FFmpegReader::Json() const {
2732 
2733  // Return formatted string
2734  return JsonValue().toStyledString();
2735 }
2736 
2737 // Generate Json::Value for this object
2738 Json::Value FFmpegReader::JsonValue() const {
2739 
2740  // Create root json object
2741  Json::Value root = ReaderBase::JsonValue(); // get parent properties
2742  root["type"] = "FFmpegReader";
2743  root["path"] = path;
2744  switch (duration_strategy) {
2746  root["duration_strategy"] = "VideoPreferred";
2747  break;
2749  root["duration_strategy"] = "AudioPreferred";
2750  break;
2752  default:
2753  root["duration_strategy"] = "LongestStream";
2754  break;
2755  }
2756 
2757  // return JsonValue
2758  return root;
2759 }
2760 
2761 // Load JSON string into this object
2762 void FFmpegReader::SetJson(const std::string value) {
2763 
2764  // Parse JSON string into JSON objects
2765  try {
2766  const Json::Value root = openshot::stringToJson(value);
2767  // Set all values that match
2768  SetJsonValue(root);
2769  }
2770  catch (const std::exception& e) {
2771  // Error parsing JSON (or missing keys)
2772  throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
2773  }
2774 }
2775 
2776 // Load Json::Value into this object
2777 void FFmpegReader::SetJsonValue(const Json::Value root) {
2778 
2779  // Set parent data
2781 
2782  // Set data from Json (if key is found)
2783  if (!root["path"].isNull())
2784  path = root["path"].asString();
2785  if (!root["duration_strategy"].isNull()) {
2786  const std::string strategy = root["duration_strategy"].asString();
2787  if (strategy == "VideoPreferred") {
2788  duration_strategy = DurationStrategy::VideoPreferred;
2789  } else if (strategy == "AudioPreferred") {
2790  duration_strategy = DurationStrategy::AudioPreferred;
2791  } else {
2792  duration_strategy = DurationStrategy::LongestStream;
2793  }
2794  }
2795 }
openshot::stringToJson
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
openshot::CacheMemory::Clear
void Clear()
Clear the cache of all frames.
Definition: CacheMemory.cpp:224
AV_FIND_DECODER_CODEC_ID
#define AV_FIND_DECODER_CODEC_ID(av_stream)
Definition: FFmpegUtilities.h:211
openshot::ReaderInfo::sample_rate
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
Definition: ReaderBase.h:60
openshot::FFmpegReader::FFmpegReader
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Definition: FFmpegReader.cpp:76
openshot::Fraction::ToFloat
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
Definition: Fraction.cpp:35
openshot::Settings::HARDWARE_DECODER
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
Definition: Settings.h:62
openshot::Coordinate::Y
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
Definition: Coordinate.h:41
openshot::CacheMemory::Count
int64_t Count()
Count the frames in the queue.
Definition: CacheMemory.cpp:240
FFmpegUtilities.h
Header file for FFmpegUtilities.
openshot::ReaderBase::JsonValue
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
Definition: ReaderBase.cpp:106
openshot::InvalidCodec
Exception when no valid codec is found for a file.
Definition: Exceptions.h:178
openshot::TimelineBase::preview_width
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
Definition: TimelineBase.h:44
openshot::PacketStatus::reset
void reset(bool eof)
Definition: FFmpegReader.h:70
openshot::CacheMemory::GetFrame
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
Definition: CacheMemory.cpp:84
openshot::FFmpegReader::GetFrame
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
Definition: FFmpegReader.cpp:1156
AV_COPY_PICTURE_DATA
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
Definition: FFmpegUtilities.h:223
openshot::CacheMemory::Add
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
Definition: CacheMemory.cpp:47
PixelFormat
#define PixelFormat
Definition: FFmpegUtilities.h:107
AV_ALLOCATE_FRAME
#define AV_ALLOCATE_FRAME()
Definition: FFmpegUtilities.h:203
openshot::ReaderBase::SetJsonValue
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
Definition: ReaderBase.cpp:157
SWR_CONVERT
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
Definition: FFmpegUtilities.h:149
openshot
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:28
openshot::Point::co
Coordinate co
This is the primary coordinate.
Definition: Point.h:66
openshot::Clip::scale_y
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
Definition: Clip.h:317
openshot::AudioLocation
This struct holds the associated video frame and starting sample # for an audio packet.
Definition: AudioLocation.h:25
openshot::AudioLocation::frame
int64_t frame
Definition: AudioLocation.h:26
openshot::ZmqLogger::Log
void Log(std::string message)
Log message to all subscribers of this logger (if any)
Definition: ZmqLogger.cpp:103
openshot::Clip
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:89
openshot::DurationStrategy::AudioPreferred
@ AudioPreferred
Prefer the audio stream's duration, fallback to video then container.
openshot::Fraction
This class represents a fraction.
Definition: Fraction.h:30
openshot::AudioLocation::sample_start
int sample_start
Definition: AudioLocation.h:27
AV_FREE_FRAME
#define AV_FREE_FRAME(av_frame)
Definition: FFmpegUtilities.h:207
MemoryTrim.h
Cross-platform helper to encourage returning freed memory to the OS.
openshot::Keyframe::GetMaxPoint
Point GetMaxPoint() const
Get max point (by Y coordinate)
Definition: KeyFrame.cpp:245
openshot::ReaderBase::info
openshot::ReaderInfo info
Information about the current media file.
Definition: ReaderBase.h:88
openshot::ReaderInfo::interlaced_frame
bool interlaced_frame
Definition: ReaderBase.h:56
Timeline.h
Header file for Timeline class.
openshot::Clip::ParentTimeline
void ParentTimeline(openshot::TimelineBase *new_timeline) override
Set associated Timeline pointer.
Definition: Clip.cpp:446
openshot::FFmpegReader::~FFmpegReader
virtual ~FFmpegReader()
Destructor.
Definition: FFmpegReader.cpp:112
openshot::ReaderInfo::audio_bit_rate
int audio_bit_rate
The bit rate of the audio stream (in bytes)
Definition: ReaderBase.h:59
openshot::CacheMemory::Remove
void Remove(int64_t frame_number)
Remove a specific frame.
Definition: CacheMemory.cpp:158
AV_FREE_PACKET
#define AV_FREE_PACKET(av_packet)
Definition: FFmpegUtilities.h:208
openshot::ReaderInfo::duration
float duration
Length of time (in seconds)
Definition: ReaderBase.h:43
openshot::ReaderInfo::has_video
bool has_video
Determines if this file has a video stream.
Definition: ReaderBase.h:40
openshot::FFmpegReader::JsonValue
Json::Value JsonValue() const override
Generate Json::Value for this object.
Definition: FFmpegReader.cpp:2738
openshot::PacketStatus::audio_read
int64_t audio_read
Definition: FFmpegReader.h:51
openshot::ReaderInfo::width
int width
The width of the video (in pixesl)
Definition: ReaderBase.h:46
openshot::LAYOUT_STEREO
@ LAYOUT_STEREO
Definition: ChannelLayouts.h:31
openshot::FFmpegReader::SetJson
void SetJson(const std::string value) override
Load JSON string into this object.
Definition: FFmpegReader.cpp:2762
openshot::PacketStatus::packets_eof
bool packets_eof
Definition: FFmpegReader.h:57
hw_de_av_pix_fmt_global
AVPixelFormat hw_de_av_pix_fmt_global
Definition: FFmpegReader.cpp:72
openshot::PacketStatus::audio_decoded
int64_t audio_decoded
Definition: FFmpegReader.h:52
openshot::Fraction::ToDouble
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
Definition: Fraction.cpp:40
openshot::PacketStatus::video_read
int64_t video_read
Definition: FFmpegReader.h:49
hw_de_on
int hw_de_on
Definition: FFmpegReader.cpp:70
openshot::CacheBase::SetMaxBytesFromInfo
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
Definition: CacheBase.cpp:28
AV_ALLOCATE_IMAGE
#define AV_ALLOCATE_IMAGE(av_frame, pix_fmt, width, height)
Definition: FFmpegUtilities.h:204
openshot::LAYOUT_MONO
@ LAYOUT_MONO
Definition: ChannelLayouts.h:30
openshot::Clip::scale_x
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
Definition: Clip.h:316
AV_GET_CODEC_ATTRIBUTES
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
Definition: FFmpegUtilities.h:218
openshot::ReaderInfo::video_length
int64_t video_length
The number of frames in the video stream.
Definition: ReaderBase.h:53
hw_de_av_device_type_global
AVHWDeviceType hw_de_av_device_type_global
Definition: FFmpegReader.cpp:73
openshot::ReaderInfo::height
int height
The height of the video (in pixels)
Definition: ReaderBase.h:45
openshot::PacketStatus::video_eof
bool video_eof
Definition: FFmpegReader.h:55
openshot::Fraction::num
int num
Numerator for the fraction.
Definition: Fraction.h:32
if
if(!codec) codec
ZmqLogger.h
Header file for ZeroMQ-based Logger class.
openshot::Fraction::den
int den
Denominator for the fraction.
Definition: Fraction.h:33
OPEN_MP_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Definition: OpenMPUtilities.h:23
AV_RESET_FRAME
#define AV_RESET_FRAME(av_frame)
Definition: FFmpegUtilities.h:206
openshot::AudioLocation::is_near
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
Definition: FFmpegReader.cpp:119
SWR_CLOSE
#define SWR_CLOSE(ctx)
Definition: FFmpegUtilities.h:152
openshot::Fraction::Reciprocal
Fraction Reciprocal() const
Return the reciprocal as a Fraction.
Definition: Fraction.cpp:78
openshot::ReaderInfo::has_audio
bool has_audio
Determines if this file has an audio stream.
Definition: ReaderBase.h:41
openshot::Settings::DE_LIMIT_HEIGHT_MAX
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
Definition: Settings.h:77
openshot::InvalidJSON
Exception for invalid JSON.
Definition: Exceptions.h:223
openshot::FFmpegReader::enable_seek
bool enable_seek
Definition: FFmpegReader.h:254
openshot::ReaderInfo::file_size
int64_t file_size
Size of file (in bytes)
Definition: ReaderBase.h:44
openshot::Timeline
This class represents a timeline.
Definition: Timeline.h:154
openshot::FFmpegReader::Open
void Open() override
Open File - which is called by the constructor automatically.
Definition: FFmpegReader.cpp:236
openshot::OutOfMemory
Exception when memory could not be allocated.
Definition: Exceptions.h:354
openshot::SCALE_CROP
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
Definition: Enums.h:37
SWR_INIT
#define SWR_INIT(ctx)
Definition: FFmpegUtilities.h:154
SWRCONTEXT
#define SWRCONTEXT
Definition: FFmpegUtilities.h:155
openshot::PacketStatus::audio_eof
bool audio_eof
Definition: FFmpegReader.h:56
openshot::ReaderInfo::has_single_image
bool has_single_image
Determines if this file only contains a single image.
Definition: ReaderBase.h:42
openshot::FFmpegReader::final_cache
CacheMemory final_cache
Final cache object used to hold final frames.
Definition: FFmpegReader.h:250
openshot::ReaderInfo::video_timebase
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
Definition: ReaderBase.h:55
openshot::Settings::Instance
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
Definition: Settings.cpp:23
CropHelpers.h
Shared helpers for Crop effect scaling logic.
openshot::ReaderInfo::metadata
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
Definition: ReaderBase.h:65
openshot::DurationStrategy::LongestStream
@ LongestStream
Use the longest value from video, audio, or container.
openshot::FFmpegReader
This class uses the FFmpeg libraries, to open video files and audio files, and return openshot::Frame...
Definition: FFmpegReader.h:103
path
path
Definition: FFmpegWriter.cpp:1469
openshot::Frame::GetSamplesPerFrame
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Definition: Frame.cpp:484
openshot::InvalidFile
Exception for files that can not be found or opened.
Definition: Exceptions.h:193
openshot::ReaderInfo::audio_stream_index
int audio_stream_index
The index of the audio stream.
Definition: ReaderBase.h:63
openshot::ZmqLogger::Instance
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
Definition: ZmqLogger.cpp:35
openshot::DurationStrategy
DurationStrategy
This enumeration determines which duration source to favor.
Definition: Enums.h:60
openshot::ReaderInfo::audio_timebase
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
Definition: ReaderBase.h:64
openshot::FFmpegReader::Close
void Close() override
Close File.
Definition: FFmpegReader.cpp:728
openshot::SCALE_FIT
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
Definition: Enums.h:38
openshot::PacketStatus::packets_read
int64_t packets_read()
Definition: FFmpegReader.h:60
openshot::ReaderInfo::pixel_format
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
Definition: ReaderBase.h:47
openshot::ZmqLogger::AppendDebugMethod
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
Definition: ZmqLogger.cpp:178
openshot::ReaderInfo::vcodec
std::string vcodec
The name of the video codec used to encode / decode the video stream.
Definition: ReaderBase.h:52
openshot::PacketStatus::packets_decoded
int64_t packets_decoded()
Definition: FFmpegReader.h:65
AV_GET_CODEC_TYPE
#define AV_GET_CODEC_TYPE(av_stream)
Definition: FFmpegUtilities.h:210
openshot::ReaderClosed
Exception when a reader is closed, and a frame is requested.
Definition: Exceptions.h:369
openshot::ReaderInfo::channel_layout
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
Definition: ReaderBase.h:62
AV_FREE_CONTEXT
#define AV_FREE_CONTEXT(av_context)
Definition: FFmpegUtilities.h:209
PIX_FMT_RGBA
#define PIX_FMT_RGBA
Definition: FFmpegUtilities.h:110
AV_GET_CODEC_PIXEL_FORMAT
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
Definition: FFmpegUtilities.h:219
AVCODEC_REGISTER_ALL
#define AVCODEC_REGISTER_ALL
Definition: FFmpegUtilities.h:199
SWR_FREE
#define SWR_FREE(ctx)
Definition: FFmpegUtilities.h:153
openshot::Settings::DE_LIMIT_WIDTH_MAX
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
Definition: Settings.h:80
openshot::ReaderInfo::fps
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
Definition: ReaderBase.h:48
AV_GET_SAMPLE_FORMAT
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
Definition: FFmpegUtilities.h:221
FF_AUDIO_NUM_PROCESSORS
#define FF_AUDIO_NUM_PROCESSORS
Definition: OpenMPUtilities.h:25
openshot::ReaderInfo::video_bit_rate
int video_bit_rate
The bit rate of the video stream (in bytes)
Definition: ReaderBase.h:49
openshot::PacketStatus::end_of_file
bool end_of_file
Definition: FFmpegReader.h:58
FF_VIDEO_NUM_PROCESSORS
#define FF_VIDEO_NUM_PROCESSORS
Definition: OpenMPUtilities.h:24
openshot::Clip::scale
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
Definition: Clip.h:177
openshot::ReaderInfo::top_field_first
bool top_field_first
Definition: ReaderBase.h:57
openshot::ChannelLayout
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
Definition: ChannelLayouts.h:28
SWR_ALLOC
#define SWR_ALLOC()
Definition: FFmpegUtilities.h:151
openshot::ReaderInfo::pixel_ratio
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
Definition: ReaderBase.h:50
AV_REGISTER_ALL
#define AV_REGISTER_ALL
Definition: FFmpegUtilities.h:198
openshot::DurationStrategy::VideoPreferred
@ VideoPreferred
Prefer the video stream's duration, fallback to audio then container.
openshot::CacheMemory::GetFrames
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
Definition: CacheMemory.cpp:100
AV_GET_CODEC_CONTEXT
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
Definition: FFmpegUtilities.h:212
openshot::ReaderInfo::video_stream_index
int video_stream_index
The index of the video stream.
Definition: ReaderBase.h:54
openshot::FFmpegReader::SetJsonValue
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
Definition: FFmpegReader.cpp:2777
openshot::SCALE_STRETCH
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
Definition: Enums.h:39
openshot::ReaderInfo::acodec
std::string acodec
The name of the audio codec used to encode / decode the video stream.
Definition: ReaderBase.h:58
openshot::NoStreamsFound
Exception when no streams are found in the file.
Definition: Exceptions.h:291
openshot::ReaderInfo::display_ratio
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
Definition: ReaderBase.h:51
openshot::ReaderInfo::channels
int channels
The number of audio channels used in the audio stream.
Definition: ReaderBase.h:61
openshot::FFmpegReader::Json
std::string Json() const override
Generate JSON string of this object.
Definition: FFmpegReader.cpp:2731
openshot::FFmpegReader::GetIsDurationKnown
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
Definition: FFmpegReader.cpp:1152
openshot::ApplyCropResizeScale
void ApplyCropResizeScale(Clip *clip, int source_width, int source_height, int &max_width, int &max_height)
Scale the requested max_width / max_height based on the Crop resize amount, capped by source size.
Definition: CropHelpers.cpp:40
openshot::PacketStatus::video_decoded
int64_t video_decoded
Definition: FFmpegReader.h:50
opts
AVDictionary * opts
Definition: FFmpegWriter.cpp:1476
Exceptions.h
Header file for all Exception classes.
openshot::Settings::HW_DE_DEVICE_SET
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
Definition: Settings.h:83
FFmpegReader.h
Header file for FFmpegReader class.
openshot::ReaderBase::getFrameMutex
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
Definition: ReaderBase.h:79
openshot::ReaderBase::ParentClip
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Definition: ReaderBase.cpp:240