How can you combine multiple video files with FFMPEG and merging the audio track as well

149 Views Asked by At

I'm trying to combine multiple MP4 files in Delphi with the FFMPEG video library. I have the headers unit with all the functions. All videos are MPEG-4, and so is the destination output file.

I found this question on Stack Overflow asking the same question. To combine video files while keeping the audio and video tracks. I have translated the answers to Delphi, and while the code is executed successfully, the output file is invalid and cannot be played.

Here is my implementation:

var
  Files: TArray<PAnsiChar>;
  Output: PAnsiChar;

  I, S: integer;

  i_fmt_ctx: PAVFormatContext;
  i_video_stream: PAVStream;
  o_fmt_ctx: PAVFormatContext;
  o_video_stream: PAVStream;

  P: PPAVStream;
begin
  SetLength(Files, 2);
  Files[0] := PAnsiChar('.\Clips\file9.mp4');
  Files[1] := PAnsiChar('.\Clips\file10.mp4');
  Output := '.\Output\out.mp4';

  avcodec_register_all();   
  av_register_all();

  (* should set to NULL so that avformat_open_input() allocate a new one *)
  i_fmt_ctx := nil;

  if avformat_open_input(@i_fmt_ctx, Files[0], nil, nil) <> 0 then
    raise Exception.Create('Could not open file');

  if avformat_find_stream_info(i_fmt_ctx, nil) < 0 then
    raise Exception.Create('Could not find stream info');
                
  (* Find 1st video stream *)
  i_video_stream := nil;
  P := i_fmt_ctx.streams;
  for i := 0 to i_fmt_ctx.nb_streams-1 do begin
    if P^.codec.codec_type = AVMEDIA_TYPE_VIDEO then
      begin
        i_video_stream := P^;
        Break;
      end;
    Inc(P);
  end;
  if i_video_stream = nil then
    raise Exception.Create('Could not find video stream');

  avformat_alloc_output_context2(@o_fmt_ctx, nil, nil, Output);

  (*
  since all input files are supposed to be identical (framerate, dimension, color format, ...)
  we can safely set output codec values from first input file
  *)
  o_video_stream := avformat_new_stream(o_fmt_ctx, nil);
  
  var c: PAVCodecContext;
  c := o_video_stream.codec;
  c.bit_rate := 400000;
  c.codec_id := i_video_stream.codec.codec_id;
  c.codec_type := i_video_stream.codec.codec_type;
  c.time_base.num := i_video_stream.time_base.num;
  c.time_base.den := i_video_stream.time_base.den;
  //fprintf(stderr, "time_base.num = %d time_base.den = %d\n", c->time_base.num, c->time_base.den);
  c.width := i_video_stream.codec.width;
  c.height := i_video_stream.codec.height;
  c.pix_fmt := i_video_stream.codec.pix_fmt;
  //printf("%d %d %d", c->width, c->height, c->pix_fmt);
  c.flags := i_video_stream.codec.flags;
  c.flags := c.flags or CODEC_FLAG_GLOBAL_HEADER;
  c.me_range := i_video_stream.codec.me_range;
  c.max_qdiff := i_video_stream.codec.max_qdiff;

  c.qmin := i_video_stream.codec.qmin;
  c.qmax := i_video_stream.codec.qmax;

  c.qcompress := i_video_stream.codec.qcompress;

  c.extradata := i_video_stream.codec.extradata;
  c.extradata_size := i_video_stream.codec.extradata_size;

  avio_open(@o_fmt_ctx.pb, Output, AVIO_FLAG_WRITE);

  (* yes! this is redundant *)
  avformat_close_input(@i_fmt_ctx);

  avformat_write_header(o_fmt_ctx, nil);

  var last_pts: integer; last_pts := 0;
  var last_dts: integer; last_dts := 0;
  for i := 1 to High(Files) do begin
    i_fmt_ctx := nil;

    if avformat_open_input(@i_fmt_ctx, Files[i], nil, nil) <> 0 then
      raise Exception.Create('Could not open input file');

    if avformat_find_stream_info(i_fmt_ctx, nil) < 0 then
      raise Exception.Create('Could not find stream info');

    av_dump_format(i_fmt_ctx, 0, Files[i], 0);
    
    (* we only use first video stream of each input file *)
    i_video_stream := nil;

    P := i_fmt_ctx.streams;
    for S := 0 to i_fmt_ctx.nb_streams-1 do
      begin
        if (P^.codec.codec_type = AVMEDIA_TYPE_VIDEO) then
          begin
            i_video_stream := P^;
            break;
          end;
        
        Inc(P);
      end;

    if i_video_stream = nil then
      raise Exception.Create('Could not find video stream');
    
    var pts, dts: int64;
    pts := 0; dts := 0;
    while true do begin
      var i_pkt: TAVPacket;
      av_init_packet( @i_pkt );
      i_pkt.size := 0;
      i_pkt.data := nil;

      if av_read_frame(i_fmt_ctx, @i_pkt) < 0 then
        break;
      (*
        pts and dts should increase monotonically
        pts should be >= dts
      *)
      i_pkt.flags := i_pkt.flags or AV_PKT_FLAG_KEY;
      pts := i_pkt.pts;
      Inc(i_pkt.pts, last_pts);
      dts := i_pkt.dts;
      Inc(i_pkt.dts, last_dts);
      i_pkt.stream_index := 0;

      // Write
      av_interleaved_write_frame(o_fmt_ctx, @i_pkt);
    end;

    Inc(last_dts, dts);
    Inc(last_pts, pts);  
  
    avformat_close_input(@i_fmt_ctx)
  end;

  av_write_trailer(o_fmt_ctx);

  avcodec_close(o_fmt_ctx.streams^.codec);
  av_freep(&o_fmt_ctx.streams^.codec);
  av_freep(&o_fmt_ctx.streams);

  avio_close(o_fmt_ctx.pb);
  av_free(o_fmt_ctx);

Which is a translation of Михаил Чеботарев's answer.

Even if the code worked, I see no handling of the AVMEDIA_TYPE_AUDIO stream, which means this answer is 1/2 of the problem, since It only combines the video stream.

Another approach I tried was using the UBitmaps2Video FFMPEG implementation, which is successfully able to merge the video files, but only the video stream, no audio.

I tried manually converting the audio stream with the Bass Audio Library. It was able to read the audio and write It in a single WAV file, which then I converted to MP3. Finally muxing the combined video file and the MP3 file with MuxStreams2. Unfortunately, the audio and video do not align properly. I was unable to pinpoint the issue.

Currently, the only functional option is using the precompiled FFMPEG Executables and using ShellExecute with the according parameters to combine the videos. This more exactly:

ffmpeg -f concat -safe 0 -i video-list.txt -c copy output.mp4

But I would still rather use the FFMPEG headers in Delphi to combine the videos that way, as that gives the option for Progress indicatiors, more control of the playback and the ability to pause the thread at any point.

So, why does my implementation to merge video files not work. And what is a good method to include the audio stream as well?

0

There are 0 best solutions below