Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++

Question

Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++

100 Views Asked by Sebastian DELLING At 30 October 2023 at 11:23

I am getting "bad src image ptrs" errors when trying to convert my frames to RGB with sws_scale after decoding frames from a H264 file and cannot figure out wht is going wrong.

I checked what is causing the error and found the check_image_pointers function in swscale.c which validates that the planes and line sizes needed for the pixel format (av_pix_fmt_desc_get) are present in the given data which seems not to be the case with my data.

The written pgm files look ok to me, also replaying the file works.

I printed the corresponding data of my frame. The problem seems that planes 1 and 2 have lines sizes of 0. All 3 of them seem to have data. Plane 0 line size is three times image width which is also confusing to me.

Here is my output:

Have videoStreamIndex 0 codec id: 27
saving frame 1 C:\\tmp\\output-frame-1.pgm colorspace 2 pix_fmt 0 w: 3840 h: 2160
Required:
plane 0 : 0
plane 1 : 1
plane 2 : 2
plane 3 : 0
Present:
Frame plane 0: 1 , 11520
Frame plane 1: 1 , 0
Frame plane 2: 1 , 0
Frame plane 3: 0 , 0
Frame plane 4: 0 , 0
Frame plane 5: 0 , 0
Frame plane 6: 0 , 0
Frame plane 7: 0 , 0

Here the whole code of my application, the issues occurs in method decode:

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <string>
#include <iostream>
#include <chrono>

// #include <opencv2/highgui.hpp>
// #include <opencv2/opencv.hpp>

extern "C"
{

#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include "libavutil/imgutils.h"
}

#define INBUF_SIZE 4096
class H264Decoder
{
public:
    H264Decoder(const std::string &inputFilename, const std::string &outputFilenamePrefix)
    {

        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0)
        {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0)
        {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++)
        {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264)
            {
                videoStreamIndex = i;
                std::cout << "Have videoStreamIndex " << videoStreamIndex << " codec id: " << formatContext->streams[i]->codecpar->codec_id << std::endl;
                break;
            }
        }

        if (videoStreamIndex == -1)
        {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        const AVCodec *codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec)
        {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser)
        {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext)
        {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0)
        {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        frame->format = AV_PIX_FMT_YUV420P;
        if (!frame)
        {
            throw std::runtime_error("Could not allocate frame");
        }

        inputPacket = av_packet_alloc();
        if (!inputPacket)
        {
            throw std::runtime_error("Could not allocate packet");
        }

        inputFilename_ = inputFilename;
        outputFilenamePrefix_ = outputFilenamePrefix;
    }

    void decode()
    {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0)
        {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0)
        {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0)
            {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            /* the picture is allocated by the decoder. no need to
               free it */
            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            std::cout << "saving frame " << codecContext->frame_num << " " << buf << " colorspace " << frame->colorspace << " pix_fmt " << codecContext->pix_fmt << " w: " << frame->width << " h: " << frame->height << std::endl;

            SwsContext *sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width,
                                     codecContext->height,
                                     codecContext->pix_fmt,
                                     codecContext->width,
                                     codecContext->height,
                                     AV_PIX_FMT_RGB24,
                                     SWS_BICUBIC,
                                     NULL,
                                     NULL,
                                     NULL);

            AVFrame *frame2 = av_frame_alloc();
            int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
            uint8_t *frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
            av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
            std::cout << "Required:" << std::endl;
            for (int i = 0; i < 4; i++)
            {
                int plane = desc->comp[i].plane;
                std::cout << "plane " << i << " : " << plane << std::endl;
            }
            std::cout << "Present:" << std::endl;
            for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i)
            {
                std::cout << "Frame plane " << i << ": " << static_cast<bool>(frame->data[i]) << " , " << frame->linesize[i] << std::endl;
            }

            sws_scale(sws_ctx, frame->data,
                      frame->linesize, 0, codecContext->height,
                      frame2->data, frame2->linesize);

            // cv::Mat img(frame2->height, frame2->width, CV_8UC3, frame2->data[0]);
            // cv::imshow("Image", img);

            pgm_save(frame->data[0], frame->linesize[0],
                     frame->width, frame->height, buf);
        }
    }

    ~H264Decoder()
    {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_packet_free(&inputPacket);
    }

    void readAndDecode()
    {
        FILE *f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t *data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        auto start = std::chrono::high_resolution_clock::now();
        do
        {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof)
            {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size,
                                       data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0)
                {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size)
                {
                    decode();
                }
                else if (eof)
                {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in " << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms" << std::endl;
    }

private:
    AVFormatContext *formatContext = nullptr;
    AVCodecContext *codecContext = nullptr;
    AVCodecParserContext *parser;
    AVFrame *frame = nullptr;
    AVFrame *frameRgb = nullptr;
    AVPacket *inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;

    static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char *filename)
    {
        FILE *f = fopen(filename, "wb");
        if (!f)
        {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char *argv[])
{
    if (argc < 2)
    {
        std::cout << "Please provide input file name as parameter" << std::endl;
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\output-frame";

    try
    {

        H264Decoder decoder(inputFilename, outputFilenamePrefix);
        decoder.readAndDecode();
    }
    catch (const std::exception &e)
    {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}

Original Q&A

There are 2 best solutions below

navaneeth mohan On 30 October 2023 at 15:00

Since your error says bad src image it means there's something wrong with the way you are allocating your YUV frame or you aren't passing your arguments to sws_scale correctly. Since you say that your written pgm files work correctly, I'm leaning more towards the latter reason.

In your call to sws_scale try one of the following:

(const uint8_t * const *)frame instead of frame->data
frame->data[0] instead of frame->data.

Question: I see your class has a AVFrame *frameRgb. Is this supposed to be the output frame? Then what is AVFrame *frame2? Question: What version of FFMPEG are you using? Question: Could you please use FFPROBE on your output pgm file to confirm that it is indeed a YUV420P?

Here's a C++ wrapper that works for me when rescaling video frames. Unlike your case, the input frame is not a member variable. But, the output frame is a member variable and it's allocated in the constructor. The part about pts and pkt_dts are probably not relevant for your case.

VideoRescaler.hpp

struct VideoRescaler
{
    struct SwsContext *swsCtx_;
    AVFrame *outFrame_;
    void Rescale(AVFrame *inFrame);
    VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt);
    int iWidth_,iHeight_,oWidth_,oHeight_;
    enum AVPixelFormat iPixFmt_,oPixFmt_;
};

VideoRescaler.cpp

VideoRescaler::VideoRescaler(int iWidth, int iHeight, enum AVPixelFormat iPixFmt, int oWidth, int oHeight, enum AVPixelFormat oPixFmt) : 
iWidth_(iWidth),iHeight_(iHeight),oWidth_(oWidth),oHeight_(oHeight),iPixFmt_(iPixFmt),oPixFmt_(oPixFmt)
{
    outFrame_ = av_frame_alloc();
    outFrame_->width = oWidth_;
    outFrame_->height = oHeight_;
    outFrame_->format = oPixFmt_;
    av_frame_get_buffer(outFrame_,0);

    swsCtx_ = sws_getContext(
        iWidth_, iHeight_, iPixFmt_,
        oWidth_, oHeight_, oPixFmt_,
        SWS_BILINEAR, NULL, NULL, NULL
    );

    printf("INIT RESCALER %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
}


void VideoRescaler::Rescale(AVFrame *inFrame)
{
    int ret = -1;
    printf("RESCALING %d,%d-->%d,%d\n",iWidth_,iHeight_,oWidth_,oHeight_);
    ret = sws_scale(
        swsCtx_,
        (const uint8_t * const *)inFrame, 
        inFrame->linesize, 
        0,
        inFrame->height,
        outFrame_->data,
        outFrame_->linesize
    );

    if(ret != outFrame_->height)
    {
        printf("Failed to rescale frame %d:%s\n",ret,av_err2str(ret));
        exit(1);
    }
    outFrame_->pts = inFrame->pts;
    outFrame_->pkt_dts = inFrame->pkt_dts;
}

**Sebastian DELLING** · Accepted Answer · 2023-10-31T12:47:57.890000

The issue was me trying to initialize frame2. I overrode the linesize of the yuv frame instead of the rgb frame: av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

I removed the complete initialization of frame2 and used av_image_alloc to allocate the buffers for the rgb frame.

Here is my current working code in case anyone wants to use it as a reference. Conversion changed from RGB to BGR to show it with OpenCV.

#include <chrono>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>

#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}

#define INBUF_SIZE 4096
class H264Decoder {
public:
    H264Decoder(const std::string& inputFilename, const std::string& outputFilenamePrefix, uint16_t outputWidth,
                uint16_t outputHeight, bool show, bool save)
            : doShow(show), doSave(save), inputFilename_(inputFilename), outputFilenamePrefix_(outputFilenamePrefix),
              outputHeight(outputHeight), outputWidth(outputWidth) {
        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0) {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0) {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++) {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) {
                videoStreamIndex = i;
                break;
            }
        }

        if (videoStreamIndex == -1) {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec) {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser) {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext) {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0) {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        if (!frame) {
            throw std::runtime_error("Could not allocate frame");
        }

        frameRgb = av_frame_alloc();
        if (!frameRgb) {
            throw std::runtime_error("Could not allocate frame");
        }
        av_image_alloc(frameRgb->data, frameRgb->linesize, outputWidth, outputHeight, AV_PIX_FMT_BGR24, 32);

        inputPacket = av_packet_alloc();
        if (!inputPacket) {
            throw std::runtime_error("Could not allocate packet");
        }

    }

    void decode() {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0) {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0) {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0) {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            SwsContext* sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width, codecContext->height, codecContext->pix_fmt, outputWidth,
                                     outputHeight, AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL);
            if (doSave) {
                pgm_save(frame->data[0], frame->linesize[0], frame->width, frame->height, buf);
            }

            sws_scale(sws_ctx, frame->data, frame->linesize, 0, codecContext->height, frameRgb->data,
                      frameRgb->linesize);

            if (doShow) {
                cv::Mat img(outputHeight, outputWidth, CV_8UC3, frameRgb->data[0]);
                cv::imshow("Image", img);
                cv::waitKey(1);
            }
        }
    }

    ~H264Decoder() {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_frame_free(&frameRgb);
        av_packet_free(&inputPacket);
        av_freep(&frameRgb->data[0]);
    }

    void readAndDecode() {
        FILE* f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t* data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        if (!f) {
            std::cout << "Error opening file" << std::endl;;
            exit(1);
        }
        memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
        auto start = std::chrono::high_resolution_clock::now();
        do {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof) {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size, data, data_size,
                                       AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0) {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size) {
                    decode();
                } else if (eof) {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in "
                  << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms "
                  << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() / codecContext->frame_num
                  << " ms/frame " << std::endl;
    }

private:
    bool doShow{false};
    bool doSave{true};
    const AVCodec* codec;
    AVFormatContext* formatContext = nullptr;
    AVCodecContext* codecContext = nullptr;
    AVCodecParserContext* parser;
    AVFrame* frame = nullptr;
    AVFrame* frameRgb = nullptr;
    AVPacket* inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;
    uint16_t outputHeight = 1280;
    uint16_t outputWidth = 1632;

    static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename) {
        FILE* f = fopen(filename, "wb");
        if (!f) {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char* argv[]) {
    if (argc < 2) {
        std::cout << "Please provide input file name as parameter" << std::endl;
        exit(1);
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\pics\\output-frame";

    try {
        H264Decoder decoder(inputFilename, outputFilenamePrefix, 1632, 1280, true, false);
        decoder.readAndDecode();
    } catch (const std::exception& e) {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}

Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++

There are 2 best solutions below

Related Questions in C++

Related Questions in FFMPEG

Related Questions in RGB

Related Questions in YUV

Related Questions in LIBAVCODEC

Trending Questions

Popular # Hahtags

Popular Questions