Save h265 frames to mp4 container and compress it in realtime in C++

Llincolnxlw · Sep 16, 2024

Hi,

I am trying to write a class to save h265 frames from a depthai pipeline into mp4 container and also compress them in realtime. It should work like

auto h265_packet = encoded_queue->tryGet<dai::ImgFrame>();
if (h265_packet) {
  ffmpeg_encoder.write(h265_packet->getData().data(), h265_packet->getData().size());
}

I am able to use the following code to achieve saving the frames to a mp4 container, but the compression part is not working, no matter what preset value and CRF value I provide. What did I miss?

Header:

#ifndef FFMPEG_ENCODER_H
#define FFMPEG_ENCODER_H

extern "C" {
#include <libavformat/avformat.h>
}

class FfmpegEncoder {
public:
    struct Params {
        const char* preset = "fast";
        bool use_crf = true;
        int crf = 23;
        int bitrate = 4000000;
        int image_width = 1920;
        int image_height = 1080;
        int fps = 30;
    };

    FfmpegEncoder();
    ~FfmpegEncoder();

    bool open(const char* filename, const Params& params);
    void close();
    bool write(const unsigned char* data, int size);
    bool isOpen() const { return mIsOpen; }

private:
    struct Context {
        AVFormatContext* format_context = nullptr;
        AVCodecContext* codec_context = nullptr;
        AVStream* stream = nullptr;
        int frame_index = 0;
    };

    Context mContext;
    bool mIsOpen = false;
    int64_t startTime;
};

#endif // FFMPEG_ENCODER_H

Source:

#include <station_depthai_bringup/ffmpeg_encode.h>
#include <iostream>
#include <chrono>

extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/error.h>
}

FfmpegEncoder::FfmpegEncoder() {
}

FfmpegEncoder::~FfmpegEncoder() {
    close();
}

bool FfmpegEncoder::open(const char *filename, const Params& params) {
    close();

    auto codec = AV_CODEC_ID_H265;

    do {
        avformat_alloc_output_context2(&mContext.format_context, nullptr, nullptr, filename);
        if (!mContext.format_context) {
            std::cout << "Could not allocate output format" << std::endl;
            break;
        }

        AVCodec* av_codec = avcodec_find_encoder(codec);
        if (!av_codec) {
            std::cout << "Could not find codec " << codec << std::endl;
            break;
        }

        mContext.stream = avformat_new_stream(mContext.format_context, nullptr);
        if (!mContext.stream) {
            std::cout << "Could not create stream" << std::endl;
            break;
        }
        mContext.stream->id = 0;
        mContext.stream->codecpar->codec_id = av_codec->id;
        mContext.stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
        mContext.stream->codecpar->width = params.image_width;
        mContext.stream->codecpar->height = params.image_height;
        mContext.stream->codecpar->format = AV_PIX_FMT_NONE;
        mContext.stream->time_base = (AVRational){1, 1000 * 1000};

        mContext.codec_context = avcodec_alloc_context3(av_codec);
        if (!mContext.codec_context) {
            std::cout << "Could not allocate codec context" << std::endl;
            break;
        }
        mContext.codec_context->codec_id = av_codec->id;
        mContext.codec_context->bit_rate = 0;
        mContext.codec_context->width = params.image_width;
        mContext.codec_context->height = params.image_height;
        mContext.codec_context->time_base = (AVRational){1, params.fps};
        mContext.codec_context->gop_size = 12;
        mContext.codec_context->max_b_frames = 2;
        mContext.codec_context->pix_fmt = AV_PIX_FMT_YUV420P;

        av_opt_set(mContext.codec_context->priv_data, "preset", params.preset, 0);
        av_opt_set_int(mContext.codec_context->priv_data, "crf", params.crf, 0);

        if (mContext.format_context->oformat->flags & AVFMT_GLOBALHEADER) {
            mContext.codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
        }

        int ret = avcodec_open2(mContext.codec_context, av_codec, nullptr);
        if (ret < 0) {
            std::cout << "Could not open codec" << std::endl;
            break;
        }

        // Verify CRF value
        int64_t crf_value;
        if (av_opt_get_int(mContext.codec_context->priv_data, "crf", 0, &crf_value) >= 0) {
            std::cout << "  CRF: " << crf_value << std::endl;
        } else {
            std::cout << "  CRF: Not set or invalid" << std::endl;
        }
        // Verify preset value
        char *preset_val = nullptr;
        if (av_opt_get(mContext.codec_context->priv_data, "preset", 0, (uint8_t**)&preset_val) >= 0) {
            std::cout << "  Preset: " << preset_val << std::endl;
            av_free(preset_val);  // Free the allocated memory after printing
        } else {
            std::cout << "  Preset: Not set or invalid" << std::endl;
        }

        ret = avcodec_parameters_from_context(mContext.stream->codecpar, mContext.codec_context);
        if (ret < 0) {
            std::cout << "Could not copy codec parameters" << std::endl;
            break;
        }

        av_dump_format(mContext.format_context, 0, filename, 1);

        ret = avio_open(&mContext.format_context->pb, filename, AVIO_FLAG_WRITE);
        if (ret < 0) {
            std::cout << "Could not open " << filename << std::endl;
            break;
        }

        ret = avformat_write_header(mContext.format_context, nullptr);
        if (ret < 0) {
            std::cout << "Could not write header" << std::endl;
            break;
        }

        mContext.frame_index = 0;  // Initialize frame index
        mIsOpen = true;  // Mark the encoder as open
        startTime = std::chrono::high_resolution_clock::now().time_since_epoch().count();
        return true;
    } while (false);

    close();  // On failure, ensure the context is closed
    return false;
}

void FfmpegEncoder::close() {
    if (mIsOpen) {
        av_write_trailer(mContext.format_context);

        int ret = avio_close(mContext.format_context->pb);
        if (ret != 0) {
            std::cout << "Failed to close file" << std::endl;
        }
    }

    if (mContext.codec_context) {
        avcodec_free_context(&mContext.codec_context);
    }

    if (mContext.format_context) {
        avformat_free_context(mContext.format_context);
    }

    mContext = {};  // Reset context
    mIsOpen = false;  // Mark the encoder as closed
}

bool FfmpegEncoder::write(const unsigned char *data, int size) {
    if (!mIsOpen) {
        return false;
    }

    AVPacket packet;
    av_init_packet(&packet);

    packet.data = (uint8_t*)data;
    packet.size = size;
    packet.stream_index = mContext.stream->index;

    int64_t currentTime = std::chrono::high_resolution_clock::now().time_since_epoch().count();
    packet.pts = (currentTime - startTime) / 1000;  // Set presentation timestamp in microseconds
    packet.dts = packet.pts;  // Set decoding timestamp
    packet.duration = 1;  // Set duration

    int ret = av_interleaved_write_frame(mContext.format_context, &packet);
    if (ret < 0) {
        std::cout << "Error while writing output packet: " << ret << std::endl;
        return false;
    }

    mContext.frame_index++;  // Increment frame index
    return true;
}

erik · Sep 16, 2024

Hi @lincolnxlw ,
I don't understand completely - you're trying to compress (on-device encoded) H265 stream?

Llincolnxlw · Sep 16, 2024

Hi erik

Correct. I want to save to mp4 and compress at runtime so I don't need to run something like ffmpeg -i input.h265 -c:v libx265 -crf 30 -preset fast output.mp4 afterwards.

Thanks

Lincoln

erik · Sep 17, 2024

@lincolnxlw I still don't understand - the video stream is already encoded (compressed), why would you be re-encoding the encoded stream with ffmpeg?

Llincolnxlw · Sep 17, 2024

erik

Then what will be the best way to achieve further size reduction like running ffmpeg -i input.h265 -c:v libx265 -crf 30 -preset fast output.mp4 but in realtime.

In this link I have a video directly saved to h265 format from a depthai pipeline. The video size is 31.3 MB. After running the ffmpeg command afterward, it reduced to 2.59 MB but remains same FPS and resolution. How to achieve this in c++ while the pipeline is running?

Thanks

Lincoln

erik · Sep 18, 2024

@lincolnxlw you can adjust the bitrate on depthai side as well, see setBitrateKbps (along with other settings):
https://docs.luxonis.com/software/depthai-components/nodes/video_encoder/#Usage

Llincolnxlw · Sep 19, 2024

erik, it works! Thanks