scar-chat7/client/media/video_encoder.cpp
2025-12-07 20:12:08 -07:00

249 lines
7.0 KiB
C++

#include "video_encoder.h"
#include <iostream>
#include <cstring>
namespace scar {
VideoEncoder::VideoEncoder()
: codec_ctx_(nullptr),
frame_(nullptr),
packet_(nullptr),
sws_ctx_(nullptr),
width_(0),
height_(0),
frame_counter_(0),
initialized_(false) {
}
VideoEncoder::~VideoEncoder() {
cleanup();
}
bool VideoEncoder::initialize(int width, int height, int fps, int bitrate, uint32_t spa_format) {
cleanup(); // Clean up any previous state
width_ = width;
height_ = height;
// Find H.264 encoder
const AVCodec* codec = avcodec_find_encoder_by_name("libx264");
if (!codec) {
// Try hardware encoders
codec = avcodec_find_encoder_by_name("h264_vaapi");
if (!codec) {
codec = avcodec_find_encoder_by_name("h264_nvenc");
}
if (!codec) {
std::cerr << "H.264 encoder not found" << std::endl;
return false;
}
}
std::cout << "Using encoder: " << codec->name << std::endl;
// Create codec context
codec_ctx_ = avcodec_alloc_context3(codec);
if (!codec_ctx_) {
std::cerr << "Failed to allocate codec context" << std::endl;
return false;
}
// Set encoding parameters
codec_ctx_->width = width_;
codec_ctx_->height = height_;
codec_ctx_->time_base = AVRational{1, fps};
codec_ctx_->framerate = AVRational{fps, 1};
codec_ctx_->bit_rate = bitrate;
codec_ctx_->gop_size = fps; // One I-frame per second
codec_ctx_->max_b_frames = 0; // Disable B-frames for low latency
codec_ctx_->pix_fmt = AV_PIX_FMT_YUV420P;
// Set encoding preset for low latency
av_opt_set(codec_ctx_->priv_data, "preset", "ultrafast", 0);
av_opt_set(codec_ctx_->priv_data, "tune", "zerolatency", 0);
// Open codec
if (avcodec_open2(codec_ctx_, codec, nullptr) < 0) {
std::cerr << "Failed to open codec" << std::endl;
cleanup();
return false;
}
// Allocate frame
frame_ = av_frame_alloc();
if (!frame_) {
std::cerr << "Failed to allocate frame" << std::endl;
cleanup();
return false;
}
frame_->format = codec_ctx_->pix_fmt;
frame_->width = width_;
frame_->height = height_;
if (av_frame_get_buffer(frame_, 0) < 0) {
std::cerr << "Failed to allocate frame buffer" << std::endl;
cleanup();
return false;
}
// Allocate packet
packet_ = av_packet_alloc();
if (!packet_) {
std::cerr << "Failed to allocate packet" << std::endl;
cleanup();
return false;
}
// Determine source pixel format based on SPA format from PipeWire
// Common PipeWire formats: BGRx (90), RGBx (88), BGRA (89), RGBA (87)
AVPixelFormat src_format = AV_PIX_FMT_BGR0; // Default
int bytes_per_pixel = 4;
if (spa_format != 0) {
// Map SPA format to FFmpeg format
// See <spa/param/video/format.h> for SPA_VIDEO_FORMAT values
switch (spa_format) {
case 90: src_format = AV_PIX_FMT_BGR0; bytes_per_pixel = 4; break; // BGRx
case 88: src_format = AV_PIX_FMT_RGB0; bytes_per_pixel = 4; break; // RGBx
case 89: src_format = AV_PIX_FMT_BGRA; bytes_per_pixel = 4; break; // BGRA
case 87: src_format = AV_PIX_FMT_RGBA; bytes_per_pixel = 4; break; // RGBA
default:
std::cout << "Unknown SPA format " << spa_format << ", defaulting to BGR0" << std::endl;
break;
}
}
std::cout << "Encoder using source format: " << av_get_pix_fmt_name(src_format)
<< " (" << bytes_per_pixel << " bytes/pixel)" << std::endl;
// Initialize swscale context for format -> YUV420P conversion
sws_ctx_ = sws_getContext(
width_, height_, src_format,
width_, height_, AV_PIX_FMT_YUV420P,
SWS_FAST_BILINEAR, nullptr, nullptr, nullptr
);
if (!sws_ctx_) {
std::cerr << "Failed to create swscale context" << std::endl;
cleanup();
return false;
}
initialized_ = true;
frame_counter_ = 0;
std::cout << "Video encoder initialized: " << width_ << "x" << height_
<< " @ " << fps << " fps, " << bitrate << " bps" << std::endl;
return true;
}
std::vector<uint8_t> VideoEncoder::encode(const uint8_t* frame_data, uint32_t frame_size) {
if (!initialized_) {
std::cerr << "Encoder not initialized" << std::endl;
return {};
}
// Validate frame size (should be width * height * 4 for BGR0)
uint32_t expected_size = width_ * height_ * 4;
if (frame_size != expected_size) {
std::cerr << "Invalid frame size: " << frame_size << " (expected " << expected_size << ")" << std::endl;
return {};
}
// Make frame writable
if (av_frame_make_writable(frame_) < 0) {
std::cerr << "Failed to make frame writable" << std::endl;
return {};
}
// Convert BGR0 to YUV420P
const uint8_t* src_data[1] = { frame_data };
int src_linesize[1] = { width_ * 4 };
sws_scale(
sws_ctx_,
src_data, src_linesize, 0, height_,
frame_->data, frame_->linesize
);
frame_->pts = frame_counter_++;
// Send frame to encoder
int ret = avcodec_send_frame(codec_ctx_, frame_);
if (ret < 0) {
std::cerr << "Error sending frame to encoder" << std::endl;
return {};
}
// Receive encoded packet
ret = avcodec_receive_packet(codec_ctx_, packet_);
if (ret == AVERROR(EAGAIN)) {
// Encoder needs more frames
return {};
} else if (ret < 0) {
std::cerr << "Error receiving packet from encoder" << std::endl;
return {};
}
// Copy packet data to vector
std::vector<uint8_t> encoded_data(packet_->data, packet_->data + packet_->size);
av_packet_unref(packet_);
return encoded_data;
}
std::vector<uint8_t> VideoEncoder::flush() {
if (!initialized_) {
return {};
}
// Send NULL frame to flush encoder
avcodec_send_frame(codec_ctx_, nullptr);
// Receive any remaining packets
std::vector<uint8_t> result;
while (true) {
int ret = avcodec_receive_packet(codec_ctx_, packet_);
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) {
break;
}
if (ret < 0) {
break;
}
result.insert(result.end(), packet_->data, packet_->data + packet_->size);
av_packet_unref(packet_);
}
return result;
}
void VideoEncoder::cleanup() {
if (sws_ctx_) {
sws_freeContext(sws_ctx_);
sws_ctx_ = nullptr;
}
if (packet_) {
av_packet_free(&packet_);
packet_ = nullptr;
}
if (frame_) {
av_frame_free(&frame_);
frame_ = nullptr;
}
if (codec_ctx_) {
avcodec_free_context(&codec_ctx_);
codec_ctx_ = nullptr;
}
initialized_ = false;
}
} // namespace scar