Hi,
I am trying to write a class to save h265 frames from a depthai pipeline into mp4 container and also compress them in realtime. It should work like
auto h265_packet = encoded_queue->tryGet<dai::ImgFrame>();
if (h265_packet) {
ffmpeg_encoder.write(h265_packet->getData().data(), h265_packet->getData().size());
}
I am able to use the following code to achieve saving the frames to a mp4 container, but the compression part is not working, no matter what preset value and CRF value I provide. What did I miss?
Header:
#ifndef FFMPEG_ENCODER_H
#define FFMPEG_ENCODER_H
extern "C" {
#include <libavformat/avformat.h>
}
class FfmpegEncoder {
public:
struct Params {
const char* preset = "fast";
bool use_crf = true;
int crf = 23;
int bitrate = 4000000;
int image_width = 1920;
int image_height = 1080;
int fps = 30;
};
FfmpegEncoder();
~FfmpegEncoder();
bool open(const char* filename, const Params& params);
void close();
bool write(const unsigned char* data, int size);
bool isOpen() const { return mIsOpen; }
private:
struct Context {
AVFormatContext* format_context = nullptr;
AVCodecContext* codec_context = nullptr;
AVStream* stream = nullptr;
int frame_index = 0;
};
Context mContext;
bool mIsOpen = false;
int64_t startTime;
};
#endif // FFMPEG_ENCODER_H
Source:
#include <station_depthai_bringup/ffmpeg_encode.h>
#include <iostream>
#include <chrono>
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/error.h>
}
FfmpegEncoder::FfmpegEncoder() {
}
FfmpegEncoder::~FfmpegEncoder() {
close();
}
bool FfmpegEncoder::open(const char *filename, const Params& params) {
close();
auto codec = AV_CODEC_ID_H265;
do {
avformat_alloc_output_context2(&mContext.format_context, nullptr, nullptr, filename);
if (!mContext.format_context) {
std::cout << "Could not allocate output format" << std::endl;
break;
}
AVCodec* av_codec = avcodec_find_encoder(codec);
if (!av_codec) {
std::cout << "Could not find codec " << codec << std::endl;
break;
}
mContext.stream = avformat_new_stream(mContext.format_context, nullptr);
if (!mContext.stream) {
std::cout << "Could not create stream" << std::endl;
break;
}
mContext.stream->id = 0;
mContext.stream->codecpar->codec_id = av_codec->id;
mContext.stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
mContext.stream->codecpar->width = params.image_width;
mContext.stream->codecpar->height = params.image_height;
mContext.stream->codecpar->format = AV_PIX_FMT_NONE;
mContext.stream->time_base = (AVRational){1, 1000 * 1000};
mContext.codec_context = avcodec_alloc_context3(av_codec);
if (!mContext.codec_context) {
std::cout << "Could not allocate codec context" << std::endl;
break;
}
mContext.codec_context->codec_id = av_codec->id;
mContext.codec_context->bit_rate = 0;
mContext.codec_context->width = params.image_width;
mContext.codec_context->height = params.image_height;
mContext.codec_context->time_base = (AVRational){1, params.fps};
mContext.codec_context->gop_size = 12;
mContext.codec_context->max_b_frames = 2;
mContext.codec_context->pix_fmt = AV_PIX_FMT_YUV420P;
av_opt_set(mContext.codec_context->priv_data, "preset", params.preset, 0);
av_opt_set_int(mContext.codec_context->priv_data, "crf", params.crf, 0);
if (mContext.format_context->oformat->flags & AVFMT_GLOBALHEADER) {
mContext.codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
int ret = avcodec_open2(mContext.codec_context, av_codec, nullptr);
if (ret < 0) {
std::cout << "Could not open codec" << std::endl;
break;
}
// Verify CRF value
int64_t crf_value;
if (av_opt_get_int(mContext.codec_context->priv_data, "crf", 0, &crf_value) >= 0) {
std::cout << " CRF: " << crf_value << std::endl;
} else {
std::cout << " CRF: Not set or invalid" << std::endl;
}
// Verify preset value
char *preset_val = nullptr;
if (av_opt_get(mContext.codec_context->priv_data, "preset", 0, (uint8_t**)&preset_val) >= 0) {
std::cout << " Preset: " << preset_val << std::endl;
av_free(preset_val); // Free the allocated memory after printing
} else {
std::cout << " Preset: Not set or invalid" << std::endl;
}
ret = avcodec_parameters_from_context(mContext.stream->codecpar, mContext.codec_context);
if (ret < 0) {
std::cout << "Could not copy codec parameters" << std::endl;
break;
}
av_dump_format(mContext.format_context, 0, filename, 1);
ret = avio_open(&mContext.format_context->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
std::cout << "Could not open " << filename << std::endl;
break;
}
ret = avformat_write_header(mContext.format_context, nullptr);
if (ret < 0) {
std::cout << "Could not write header" << std::endl;
break;
}
mContext.frame_index = 0; // Initialize frame index
mIsOpen = true; // Mark the encoder as open
startTime = std::chrono::high_resolution_clock::now().time_since_epoch().count();
return true;
} while (false);
close(); // On failure, ensure the context is closed
return false;
}
void FfmpegEncoder::close() {
if (mIsOpen) {
av_write_trailer(mContext.format_context);
int ret = avio_close(mContext.format_context->pb);
if (ret != 0) {
std::cout << "Failed to close file" << std::endl;
}
}
if (mContext.codec_context) {
avcodec_free_context(&mContext.codec_context);
}
if (mContext.format_context) {
avformat_free_context(mContext.format_context);
}
mContext = {}; // Reset context
mIsOpen = false; // Mark the encoder as closed
}
bool FfmpegEncoder::write(const unsigned char *data, int size) {
if (!mIsOpen) {
return false;
}
AVPacket packet;
av_init_packet(&packet);
packet.data = (uint8_t*)data;
packet.size = size;
packet.stream_index = mContext.stream->index;
int64_t currentTime = std::chrono::high_resolution_clock::now().time_since_epoch().count();
packet.pts = (currentTime - startTime) / 1000; // Set presentation timestamp in microseconds
packet.dts = packet.pts; // Set decoding timestamp
packet.duration = 1; // Set duration
int ret = av_interleaved_write_frame(mContext.format_context, &packet);
if (ret < 0) {
std::cout << "Error while writing output packet: " << ret << std::endl;
return false;
}
mContext.frame_index++; // Increment frame index
return true;
}