Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++

100 Views Asked by At

I am getting "bad src image ptrs" errors when trying to convert my frames to RGB with sws_scale after decoding frames from a H264 file and cannot figure out wht is going wrong.

I checked what is causing the error and found the check_image_pointers function in swscale.c which validates that the planes and line sizes needed for the pixel format (av_pix_fmt_desc_get) are present in the given data which seems not to be the case with my data.

The written pgm files look ok to me, also replaying the file works.

I printed the corresponding data of my frame. The problem seems that planes 1 and 2 have lines sizes of 0. All 3 of them seem to have data. Plane 0 line size is three times image width which is also confusing to me.

Here is my output:

Have videoStreamIndex 0 codec id: 27
saving frame 1 C:\\tmp\\output-frame-1.pgm colorspace 2 pix_fmt 0 w: 3840 h: 2160
Required:
plane 0 : 0
plane 1 : 1
plane 2 : 2
plane 3 : 0
Present:
Frame plane 0: 1 , 11520
Frame plane 1: 1 , 0
Frame plane 2: 1 , 0
Frame plane 3: 0 , 0
Frame plane 4: 0 , 0
Frame plane 5: 0 , 0
Frame plane 6: 0 , 0
Frame plane 7: 0 , 0

Here the whole code of my application, the issues occurs in method decode:

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <string>
#include <iostream>
#include <chrono>

// #include <opencv2/highgui.hpp>
// #include <opencv2/opencv.hpp>

extern "C"
{

#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include "libavutil/imgutils.h"
}

#define INBUF_SIZE 4096
class H264Decoder
{
public:
    H264Decoder(const std::string &inputFilename, const std::string &outputFilenamePrefix)
    {

        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0)
        {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0)
        {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++)
        {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264)
            {
                videoStreamIndex = i;
                std::cout << "Have videoStreamIndex " << videoStreamIndex << " codec id: " << formatContext->streams[i]->codecpar->codec_id << std::endl;
                break;
            }
        }

        if (videoStreamIndex == -1)
        {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        const AVCodec *codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec)
        {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser)
        {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext)
        {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0)
        {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        frame->format = AV_PIX_FMT_YUV420P;
        if (!frame)
        {
            throw std::runtime_error("Could not allocate frame");
        }

        inputPacket = av_packet_alloc();
        if (!inputPacket)
        {
            throw std::runtime_error("Could not allocate packet");
        }

        inputFilename_ = inputFilename;
        outputFilenamePrefix_ = outputFilenamePrefix;
    }

    void decode()
    {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0)
        {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0)
        {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0)
            {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            /* the picture is allocated by the decoder. no need to
               free it */
            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            std::cout << "saving frame " << codecContext->frame_num << " " << buf << " colorspace " << frame->colorspace << " pix_fmt " << codecContext->pix_fmt << " w: " << frame->width << " h: " << frame->height << std::endl;

            SwsContext *sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width,
                                     codecContext->height,
                                     codecContext->pix_fmt,
                                     codecContext->width,
                                     codecContext->height,
                                     AV_PIX_FMT_RGB24,
                                     SWS_BICUBIC,
                                     NULL,
                                     NULL,
                                     NULL);

            AVFrame *frame2 = av_frame_alloc();
            int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
            uint8_t *frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
            av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
            std::cout << "Required:" << std::endl;
            for (int i = 0; i < 4; i++)
            {
                int plane = desc->comp[i].plane;
                std::cout << "plane " << i << " : " << plane << std::endl;
            }
            std::cout << "Present:" << std::endl;
            for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i)
            {
                std::cout << "Frame plane " << i << ": " << static_cast<bool>(frame->data[i]) << " , " << frame->linesize[i] << std::endl;
            }

            sws_scale(sws_ctx, frame->data,
                      frame->linesize, 0, codecContext->height,
                      frame2->data, frame2->linesize);

            // cv::Mat img(frame2->height, frame2->width, CV_8UC3, frame2->data[0]);
            // cv::imshow("Image", img);

            pgm_save(frame->data[0], frame->linesize[0],
                     frame->width, frame->height, buf);
        }
    }

    ~H264Decoder()
    {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_packet_free(&inputPacket);
    }

    void readAndDecode()
    {
        FILE *f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t *data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        auto start = std::chrono::high_resolution_clock::now();
        do
        {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof)
            {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size,
                                       data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0)
                {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size)
                {
                    decode();
                }
                else if (eof)
                {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in " << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms" << std::endl;
    }

private:
    AVFormatContext *formatContext = nullptr;
    AVCodecContext *codecContext = nullptr;
    AVCodecParserContext *parser;
    AVFrame *frame = nullptr;
    AVFrame *frameRgb = nullptr;
    AVPacket *inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;

    static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char *filename)
    {
        FILE *f = fopen(filename, "wb");
        if (!f)
        {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char *argv[])
{
    if (argc < 2)
    {
        std::cout << "Please provide input file name as parameter" << std::endl;
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\output-frame";

    try
    {

        H264Decoder decoder(inputFilename, outputFilenamePrefix);
        decoder.readAndDecode();
    }
    catch (const std::exception &e)
    {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}
2

There are 2 best solutions below

0
Sebastian DELLING On BEST ANSWER

The issue was me trying to initialize frame2. I overrode the linesize of the yuv frame instead of the rgb frame: av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

I removed the complete initialization of frame2 and used av_image_alloc to allocate the buffers for the rgb frame.

Here is my current working code in case anyone wants to use it as a reference. Conversion changed from RGB to BGR to show it with OpenCV.

#include <chrono>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>

#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}

#define INBUF_SIZE 4096
class H264Decoder {
public:
    H264Decoder(const std::string& inputFilename, const std::string& outputFilenamePrefix, uint16_t outputWidth,
                uint16_t outputHeight, bool show, bool save)
            : doShow(show), doSave(save), inputFilename_(inputFilename), outputFilenamePrefix_(outputFilenamePrefix),
              outputHeight(outputHeight), outputWidth(outputWidth) {
        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0) {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0) {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++) {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) {
                videoStreamIndex = i;
                break;
            }
        }

        if (videoStreamIndex == -1) {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec) {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser) {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext) {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0) {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        if (!frame) {
            throw std::runtime_error("Could not allocate frame");
        }

        frameRgb = av_frame_alloc();
        if (!frameRgb) {
            throw std::runtime_error("Could not allocate frame");
        }
        av_image_alloc(frameRgb->data, frameRgb->linesize, outputWidth, outputHeight, AV_PIX_FMT_BGR24, 32);

        inputPacket = av_packet_alloc();
        if (!inputPacket) {
            throw std::runtime_error("Could not allocate packet");
        }

    }

    void decode() {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0) {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0) {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0) {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            SwsContext* sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width, codecContext->height, codecContext->pix_fmt, outputWidth,
                                     outputHeight, AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL);
            if (doSave) {
                pgm_save(frame->data[0], frame->linesize[0], frame->width, frame->height, buf);
            }

            sws_scale(sws_ctx, frame->data, frame->linesize, 0, codecContext->height, frameRgb->data,
                      frameRgb->linesize);

            if (doShow) {
                cv::Mat img(outputHeight, outputWidth, CV_8UC3, frameRgb->data[0]);
                cv::imshow("Image", img);
                cv::waitKey(1);
            }
        }
    }

    ~H264Decoder() {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_frame_free(&frameRgb);
        av_packet_free(&inputPacket);
        av_freep(&frameRgb->data[0]);
    }

    void readAndDecode() {
        FILE* f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t* data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        if (!f) {
            std::cout << "Error opening file" << std::endl;;
            exit(1);
        }
        memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
        auto start = std::chrono::high_resolution_clock::now();
        do {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof) {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size, data, data_size,
                                       AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0) {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size) {
                    decode();
                } else if (eof) {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in "
                  << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms "
                  << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() / codecContext->frame_num
                  << " ms/frame " << std::endl;
    }

private:
    bool doShow{false};
    bool doSave{true};
    const AVCodec* codec;
    AVFormatContext* formatContext = nullptr;
    AVCodecContext* codecContext = nullptr;
    AVCodecParserContext* parser;
    AVFrame* frame = nullptr;
    AVFrame* frameRgb = nullptr;
    AVPacket* inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;
    uint16_t outputHeight = 1280;
    uint16_t outputWidth = 1632;

    static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename) {
        FILE* f = fopen(filename, "wb");
        if (!f) {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char* argv[]) {
    if (argc < 2) {
        std::cout << "Please provide input file name as parameter" << std::endl;
        exit(1);
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\pics\\output-frame";

    try {
        H264Decoder decoder(inputFilename, outputFilenamePrefix, 1632, 1280, true, false);
        decoder.readAndDecode();
    } catch (const std::exception& e) {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}

0
navaneeth mohan On

Since your error says bad src image it means there's something wrong with the way you are allocating your YUV frame or you aren't passing your arguments to sws_scale correctly. Since you say that your written pgm files work correctly, I'm leaning more towards the latter reason.

In your call to sws_scale try one of the following:

  1. (const uint8_t * const *)frame instead of frame->data
  2. frame->data[0] instead of frame->data.

Question: I see your class has a AVFrame *frameRgb. Is this supposed to be the output frame? Then what is AVFrame *frame2? Question: What version of FFMPEG are you using? Question: Could you please use FFPROBE on your output pgm file to confirm that it is indeed a YUV420P?

Here's a C++ wrapper that works for me when rescaling video frames. Unlike your case, the input frame is not a member variable. But, the output frame is a member variable and it's allocated in the constructor. The part about pts and pkt_dts are probably not relevant for your case.

VideoRescaler.hpp

struct VideoRescaler
{
    struct SwsContext *swsCtx_;
    AVFrame *outFrame_;
    void Rescale(AVFrame *inFrame);
    VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt);
    int iWidth_,iHeight_,oWidth_,oHeight_;
    enum AVPixelFormat iPixFmt_,oPixFmt_;
};

VideoRescaler.cpp

VideoRescaler::VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt) : 
iWidth_(iWidth),iHeight_(iHeight),oWidth_(oWidth),oHeight_(oHeight),iPixFmt_(iPixFmt),oPixFmt_(oPixFmt)
{
    outFrame_ = av_frame_alloc();
    outFrame_->width = oWidth_;
    outFrame_->height = oHeight_;
    outFrame_->format = oPixFmt_;
    av_frame_get_buffer(outFrame_,0);

    swsCtx_ = sws_getContext(
        iWidth_, iHeight_, iPixFmt_,
        oWidth_, oHeight_, oPixFmt_,
        SWS_BILINEAR, NULL, NULL, NULL
    );

    printf("INIT RESCALER %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
}


void VideoRescaler::Rescale(AVFrame *inFrame)
{
    int ret = -1;
    printf("RESCALING %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
    ret = sws_scale(
        swsCtx_,
        (const uint8_t * const *)inFrame, 
        inFrame->linesize, 
        0,
        inFrame->height,
        outFrame_->data,
        outFrame_->linesize
    );

    if(ret != outFrame_->height)
    {
        printf("Failed to rescale frame %d:%s\n",ret,av_err2str(ret));
        exit(1);
    }
    outFrame_->pts = inFrame->pts;
    outFrame_->pkt_dts = inFrame->pkt_dts;
}