Read existing MP4 File and write into a new MP4 file using libavcodec

282 Views Asked by At

I am new to the libavcodec space.

I am trying to read video and audio streams from an existing mp4 file and take the data from the two streams and then mux the two streams and write the muxed data into a new mp4 file using libavcodec in C++. Essentially, I am aiming to split the original (existing) mp4 file into small chunks of 1 second clips that then can be played back using a video player. I would like to preserve the original mp4 video's video stream (i.e. preserve its color, resolution and etc.) and preserve the mp4 video's audio stream (i.e. preserve its bit rate and etc.). I am trying to achieve this using libavcodec in C++. But there does not seem to be any tutorial or documentation online that points me to that direction.

So far, I have looked at and tried to implement a solution using this tutorial (tutorial#1): https://github.com/leandromoreira/ffmpeg-libav-tutorial/blob/master/0_hello_world.c

However, tutorial#1 aimed to save each video frame from the existing (original) mp4 video stream into individual .pgm files, which meant that the .pgm files would store a grayscale image of each video frame.

Since, I want to preserve the colors of the original (existing) mp4 file, I looked at this tutorial (tutorial#2) that aimed to convert the grayscale video frame into color using the swscale library: https://www.youtube.com/watch?v=Y7SUm7Xf1sc&ab_channel=Bartholomew However, in tutorial#2, they exported the output from swscale library to a GUI library to be viewed in a GUI application and did not show hwo to write the output data into a new mp4 file that can be played back by a video player.

So then, I looked at this tutorial(tutorial#3) which showed how to create an MP4 file using libavcodec : C++ FFmpeg create mp4 file However, the problem with that solution is that I was not able to take a video frame from the original mp4 video and store it into another mp4 file. I kept getting errors when attempting to do so and I did not succeed in taking the data from the original(existing) mp4 file and storing it into a new mp4 file.

Here is the code that I have written so far:

#include<fstream>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/mathematics.h>
#include <libswscale/swscale.h>
#include <libavfilter/buffersrc.h>
#include <libavfilter/buffersink.h>
#include <libavutil/time.h>
#include <libavutil/opt.h>
}
#pragma comment(lib, "avfilter.lib")
#ifdef av_err2str
#undef av_err2str
#include <string>
av_always_inline std::string av_err2string(int errnum) {
    char str[AV_ERROR_MAX_STRING_SIZE];
    return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
#define av_err2str(err) av_err2string(err).c_str()
#endif  // av_err2str

#include <chrono>
#include <thread>


// decode packets into frames
static int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame);

static void pushFrame(AVFrame* frame, AVCodecContext* outputCodecContext, AVPacket * outputPacket, AVFormatContext* outputFormatContext, AVCodec *outputCodec) {
    
    std::cout<<"outputCodecContext: "<<outputCodecContext<<std::endl<<"outputPacket: "<<outputPacket<<std::endl;
    int response = avcodec_send_packet(outputCodecContext, outputPacket);
    if (response < 0) {
        std::cerr<<"Error while sending a packet to the decoder: "<<av_err2str(response)<<std::endl;
        return;
    }
    response = avcodec_receive_frame(outputCodecContext, frame);
    int err;
    
    if (!frame) {
        frame = av_frame_alloc();
        frame->format = AV_PIX_FMT_YUV420P;
        frame->width = 800;
        frame->height = 800;
        if ((err = av_frame_get_buffer(frame, 32)) < 0) {
            std::cout << "Failed to allocate picture" << err << std::endl;
            return;
        }
    }
    SwsContext* swsCtx = nullptr;
    if (!swsCtx) {
        swsCtx = sws_getContext(800, 800, AV_PIX_FMT_RGB24, 800, 
            800, AV_PIX_FMT_YUV420P, SWS_BICUBIC, 0, 0, 0);
    }
    int inLinesize[1] = { 3 * 800 };
    // From RGB to YUV
    // sws_scale(swsCtx, (const uint8_t* const*)&data, inLinesize, 0, 800, 
        // frame->data, frame->linesize);
    std::cout<<"frame "<<frame<<std::endl;  
    frame->pts = (1.0 / 30.0) * 90000 * (1);
    // std::cout << videoFrame->pts << " " << cctx->time_base.num << " " << 
    //     cctx->time_base.den << " " << 1 << std::endl;
    if ((err = avcodec_send_frame(outputCodecContext, frame)) < 0) {
        std::cout << "Failed to send frame" << err << std::endl;
        return;
    }
    AV_TIME_BASE;
    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;
    pkt.flags |= AV_PKT_FLAG_KEY;
    std::cout<<"here\n";
    if (avcodec_receive_packet(outputCodecContext, outputPacket) == 0) {
        static int counter = 0;
        if (counter == 0) {
            FILE* fp = fopen("dump_first_frame1.dat", "wb");
            fwrite(outputPacket->data, outputPacket->size, 1, fp);
            fclose(fp);
        }
        // std::cout << "pkt key: " << (pkt.flags & AV_PKT_FLAG_KEY) << " " << 
        //     pkt.size << " " << (counter++) << std::endl;
        // uint8_t* size = ((uint8_t*)pkt.data);
        // std::cout << "first: " << (int)size[0] << " " << (int)size[1] << 
        //     " " << (int)size[2] << " " << (int)size[3] << " " << (int)size[4] << 
        //     " " << (int)size[5] << " " << (int)size[6] << " " << (int)size[7] << 
        //     std::endl;
        av_interleaved_write_frame(outputFormatContext, outputPacket);
        av_packet_unref(outputPacket);
    }
}

int main()
{

    char* filename = "c++.mp4";

    AVFormatContext *pFormatContext = avformat_alloc_context();

    AVOutputFormat* outputFormat = NULL;

    AVFormatContext* outputFormatContext = nullptr;

    AVCodecContext* outputCodecContext = nullptr;

    if (!pFormatContext) {
        std::cerr<<"ERROR could not allocate memory for Format Context\n";
        return -1;
    }

    if (avformat_open_input(&pFormatContext, filename , NULL, NULL) != 0) {
        std::cerr<<"ERROR could not open the file\n";
            return -1;
    }

    std::cout<<"format: "<<pFormatContext->iformat->name<<" , duration:"<<(double)(pFormatContext->duration/AV_TIME_BASE)<<"seconds, bit_rate:"<<pFormatContext->bit_rate<<std::endl;

    outputFormat = av_guess_format("mp4", "test.mp4", nullptr);

    if (!outputFormat)
    {
        std::cout << "can't retrieve output format." << std::endl;
        return -1;
    }

    if(avformat_alloc_output_context2(&outputFormatContext, outputFormat, nullptr, "test.mp4"))
    {
        std::cout << "can't create output context" << std::endl;
        return -1;
    }
    
    if (avformat_find_stream_info(pFormatContext,  NULL) < 0) {
        std::cerr<<"ERROR could not get the stream info\n";
            return -1;
    }
    
    AVCodec *outputCodec = NULL;

    outputCodec = avcodec_find_encoder(outputFormat->video_codec);

    
    if (!outputCodec)
    {
        std::cout << "can't create output codec" << std::endl;
        return -1;
    }   
    

    AVStream* outputStream = avformat_new_stream(outputFormatContext, outputCodec);

    if (!outputStream)
    {
        std::cout << "can't find output format" << std::endl;
        return -1;
    }

    outputCodecContext = avcodec_alloc_context3(outputCodec);

    if (!outputCodecContext)
    {
        std::cout << "can't create output codec context" << std::endl;
        return -1;
    }

    AVCodec *pCodec = NULL;

    AVCodecParameters *pCodecParameters =  NULL;

    int video_stream_index = -1;

    AVStream* stream = NULL;
    
    // loop though all the streams and print its main information
    for (int i = 0; i < pFormatContext->nb_streams; i++)
     {
        
        AVCodecParameters *pLocalCodecParameters =  NULL;
        pLocalCodecParameters = pFormatContext->streams[i]->codecpar;

        AVCodec *pLocalCodec = NULL;
        pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);
        if (pLocalCodec==NULL) {
            std::cerr<<"ERROR unsupported codec!\n";
                // In this example if the codec is not found we just skip it
                continue;
            }


        if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {
                if (video_stream_index == -1) {
                    video_stream_index = i;
                    pCodec = pLocalCodec;
                    pCodecParameters = pLocalCodecParameters;
                    stream = pFormatContext->streams[i];
                    std::cout<<"codec id: "<<stream->codecpar->codec_id<<std::endl;
                    std::cout<<"codec type: "<<stream->codecpar->codec_type<<std::endl;
                    std::cout<<"width: "<<stream->codecpar->width<<std::endl;
                    std::cout<<"height: "<<stream->codecpar->height<<std::endl;
                    std::cout<<"stream format: "<<stream->codecpar->format<<std::endl;
                    std::cout<<"bitrate: "<<stream->codecpar->bit_rate<<std::endl;
                    outputStream->codecpar->codec_id = outputFormat->video_codec;
                    // outputStream->codecpar->codec_id = stream->codecpar->codec_id;
                    outputStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
                    outputStream->codecpar->width = stream->codecpar->width;
                    outputStream->codecpar->height = stream->codecpar->height;
                    outputStream->codecpar->format = AV_PIX_FMT_YUV420P;
                    outputStream->codecpar->bit_rate = stream->codecpar->bit_rate;
                    
                    avcodec_parameters_to_context(outputCodecContext, outputStream->codecpar);
                }       

                std::cout<<"Video Codec: resolution " << pLocalCodecParameters->width << " x "<<pLocalCodecParameters->height<<std::endl;
            } 
        else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {
                std::cout<<"Audio Codec: "<<pLocalCodecParameters->channels<<" channels, sample rate "<<pLocalCodecParameters->sample_rate<<std::endl;
            }

        std::cout<<"\tCodec name: " << pLocalCodec->name << " ID: " <<pLocalCodec->id<< " bit_rate: "<<pLocalCodecParameters->bit_rate<<std::endl;

    }   

    if (video_stream_index == -1) {
        std::cerr<<"File "<<filename<<" does not contain a video stream!";
            return -1;
    }   
    
    AVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);
    if (!pCodecContext)
    {
        std::cerr<<"failed to allocated memory for AVCodecContext\n";
            return -1;
    }

    // outputStream->codecpar->codec_id = outputFormat->video_codec;
    // outputStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
    // outputStream->codecpar->width = 300;
    // outputStream->codecpar->height = 300;
    // outputStream->codecpar->format = AV_PIX_FMT_YUV420P;
    // outputStream->codecpar->bit_rate = 200 * 1000;
    outputCodecContext->time_base = (AVRational){ 1, 1 };
    outputCodecContext->max_b_frames = 2;
    outputCodecContext->gop_size = 12;
    outputCodecContext->framerate = (AVRational){ 30, 1 };

    if (avcodec_parameters_to_context(pCodecContext, pCodecParameters) < 0)
    {
        std::cerr<<"failed to copy codec params to codec context\n";
            return -1;
    }

    // std::cout<<"pCodecContext->time_base: "<<sizeof(pCodecContext->time_base)<<std::endl;
    // outputCodecContext->time_base = pCodecContext->time_base;
    // outputCodecContext->max_b_frames = pCodecContext->max_b_frames;
    // outputCodecContext->gop_size = pCodecContext->gop_size;
    // outputCodecContext->framerate = pCodecContext->framerate;

    if (outputStream->codecpar->codec_id == AV_CODEC_ID_H264) {
        // av_opt_set(pCodecContext, "preset", "ultrafast", 0);
        av_opt_set(outputCodecContext, "preset", "ultrafast", 0);
    }
    else if (outputStream->codecpar->codec_id == AV_CODEC_ID_H265)
    {
        // av_opt_set(pCodecContext, "preset", "ultrafast", 0);
        av_opt_set(outputCodecContext, "preset", "ultrafast", 0);
    }

    // avcodec_parameters_from_context(stream->codecpar, pCodecContext);
    avcodec_parameters_from_context(outputStream->codecpar, outputCodecContext);

    if (avcodec_open2(pCodecContext, pCodec, NULL) < 0)
    {
        std::cerr<<"failed to open codec through avcodec_open2\n";
            return -1;
    }

    if (avcodec_open2(outputCodecContext, outputCodec, NULL) < 0)
    {
        std::cerr<<"failed to open output codec through avcodec_open2\n";
            return -1;
    }


    if (!(outputFormat->flags & AVFMT_NOFILE)) {
        if (avio_open(&outputFormatContext->pb, "test.mp4", AVIO_FLAG_WRITE) < 0) {
            std::cout << "Failed to open file" << std::endl;
            return -1;
        }
    }

    if (avformat_write_header(outputFormatContext, NULL) < 0) {
        std::cout << "Failed to write header" << std::endl;
        return -1;
    }

    av_dump_format(outputFormatContext, 0, "test.mp4", 1);


    AVFrame *pFrame = av_frame_alloc();
    if (!pFrame)
    {
        std::cerr<<"failed to allocate memory for AVFrame\n";
            return -1;
    }
    
    // https://ffmpeg.org/doxygen/trunk/structAVPacket.html
    AVPacket *pPacket = av_packet_alloc();
    if (!pPacket)
    {
            std::cerr<<"failed to allocate memory for AVPacket\n";
            return -1;
    }

    int response = 0;
    int how_many_packets_to_process = 300;

    // fill the Packet with data from the Stream
    // https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61
    while (av_read_frame(pFormatContext, pPacket) >= 0)
    {
            // if it's the video stream
            if (pPacket->stream_index == video_stream_index) {
            std::cout<<"AVPacket->pts "<<pPacket->pts;
                // if(av_write_frame(outputFormatContext, pPacket)<0)
                //  std::cout<<"error writing output frame\n";
                // pushFrame(pFrame, outputCodecContext, pPacket, outputFormatContext, outputCodec);
                response = decode_packet(pPacket, pCodecContext, pFrame);
                if (response < 0)
                    break;
                // stop it, otherwise we'll be saving hundreds of frames
                if (--how_many_packets_to_process <= 0) break;
            }
            // https://ffmpeg.org/doxygen/trunk/group__lavc__packet.html#ga63d5a489b419bd5d45cfd09091cbcbc2
            av_packet_unref(pPacket);
    }   

    if(av_write_trailer(outputFormatContext)<0)
        std::cout <<"Error writing output trailer\n";


    return 0;
}

int save_frame_as_mpeg(AVCodecContext* pCodecCtx, AVFrame* pFrame, int FrameNo) {
    int ret = 0;

    const AVCodec* mpegCodec = avcodec_find_encoder(pCodecCtx->codec_id);
    if (!mpegCodec) {
        std::cout<<"failed to open mpegCodec\n";
        return -1;
    }
    AVCodecContext* mpegContext = avcodec_alloc_context3(mpegCodec);
    if (!mpegContext) {
        std::cout<<"failed to open mpegContext\n";
        return -1;
    }

    mpegContext->pix_fmt = pCodecCtx->pix_fmt;
    mpegContext->height = pFrame->height;
    mpegContext->width = pFrame->width;
    mpegContext->time_base = AVRational{ 1,10 };

    ret = avcodec_open2(mpegContext, mpegCodec, NULL);
    if (ret < 0) {
        return ret;
    }
    FILE* MPEGFile;
    char MPEGFName[256];

    AVPacket packet;
    packet.data = NULL;
    packet.size = 0;
    av_init_packet(&packet);

    int gotFrame;

    ret = avcodec_send_frame(mpegContext, pFrame);
    if (ret < 0) {
        std::cout<<"failed to send frame for mpegContext\n";
        return ret;
    }

    ret = avcodec_receive_packet(mpegContext, &packet);
    if (ret < 0) {
        std::cout<<"failed to receive packet for mpegContext\terrocode: "<<ret<<std::endl;
        return ret;
    }

    sprintf(MPEGFName, "%06d.mp4", 1);
    MPEGFile = fopen(MPEGFName, "wb");
    fwrite(packet.data, 1, packet.size, MPEGFile);
    fclose(MPEGFile);

    av_packet_unref(&packet);
    avcodec_close(mpegContext);
    return 0;
}


int save_frame_as_jpeg(AVCodecContext* pCodecCtx, AVFrame* pFrame, int FrameNo) {
    int ret = 0;

    const AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);
    if (!jpegCodec) {
        return -1;
    }
    AVCodecContext* jpegContext = avcodec_alloc_context3(jpegCodec);
    if (!jpegContext) {
        return -1;
    }

    jpegContext->pix_fmt = pCodecCtx->pix_fmt;
    jpegContext->height = pFrame->height;
    jpegContext->width = pFrame->width;
    jpegContext->time_base = AVRational{ 1,10 };

    ret = avcodec_open2(jpegContext, jpegCodec, NULL);
    if (ret < 0) {
        return ret;
    }
    FILE* JPEGFile;
    char JPEGFName[256];

    AVPacket packet;
    packet.data = NULL;
    packet.size = 0;
    av_init_packet(&packet);

    int gotFrame;

    ret = avcodec_send_frame(jpegContext, pFrame);
    if (ret < 0) {
        return ret;
    }

    ret = avcodec_receive_packet(jpegContext, &packet);
    if (ret < 0) {
        return ret;
    }

    sprintf(JPEGFName, "c:\\folder\\dvr-%06d.jpg", FrameNo);
    JPEGFile = fopen(JPEGFName, "wb");
    fwrite(packet.data, 1, packet.size, JPEGFile);
    fclose(JPEGFile);

    av_packet_unref(&packet);
    avcodec_close(jpegContext);
    return 0;
}

static int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame)
{
  // Supply raw packet data as input to a decoder
  // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3
  int response = avcodec_send_packet(pCodecContext, pPacket);
  if (response < 0) {
      std::cerr<<"Error while sending a packet to the decoder: "<<av_err2str(response)<<std::endl;
    return response;
  }

  while (response >= 0)
  {
    // Return decoded output data (into a frame) from a decoder
    // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c
    response = avcodec_receive_frame(pCodecContext, pFrame);
    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
      break;
    } else if (response < 0) {
        std::cerr<<"Error while receiving a frame from the decoder: "<<av_err2str(response)<<std::endl;
      return response;
    }

    if (response >= 0) {

      response = save_frame_as_jpeg(pCodecContext, pFrame, pCodecContext->frame_number);

      if(response<0)
      {
        std::cerr<<"Failed to save frame as jpeg\n";
        return -1;
      }

      response = save_frame_as_mpeg(pCodecContext, pFrame, pCodecContext->frame_number);

      if(response<0)
      {
        std::cerr<<"Failed to save frame as mpeg\n";
        return -1;
      }


     std::cout<<
          "Frame "<<pCodecContext->frame_number<< "type= "<<av_get_picture_type_char(pFrame->pict_type)<<" size= "<<pFrame->pkt_size<<" bytes, format= "<<pFrame->format<<" "<<pFrame->pts<<"pts key_frame "<<pFrame->key_frame<< " [DTS"<<pFrame->coded_picture_number<<" ]\n";
      
      char frame_filename[1024];
      snprintf(frame_filename, sizeof(frame_filename), "%s-%d.pgm", "frame", pCodecContext->frame_number);
      // Check if the frame is a planar YUV 4:2:0, 12bpp
      // That is the format of the provided .mp4 file
      // RGB formats will definitely not give a gray image
      // Other YUV image may do so, but untested, so give a warning
      if (pFrame->format != AV_PIX_FMT_YUV420P)
      {
          std::cout<<"Warning: the generated file may not be a grayscale image, but could e.g. be just the R component if the video format is RGB\n";
      }
   
    }
  }
  return 0;
}

The question that I am seeking an answer to is How can I use libavcodec to split an mp4 file into 1 second chunk clips (those clips will be in mp4 format)?

0

There are 0 best solutions below