scar-chat7/client/media/video_decoder.cpp

242 lines
7.1 KiB
C++
Raw Permalink Normal View History

#include "video_decoder.h"
#include <iostream>
#include <cstring>
extern "C" {
#include <libavutil/imgutils.h>
}
namespace scar {
VideoDecoder::VideoDecoder()
: codec_ctx_(nullptr),
frame_(nullptr),
sws_ctx_(nullptr),
width_(0),
height_(0),
initialized_(false) {
}
VideoDecoder::~VideoDecoder() {
cleanup();
}
bool VideoDecoder::initialize() {
cleanup(); // Clean up any previous state
// Find H.264 decoder
const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
std::cerr << "H.264 decoder not found" << std::endl;
return false;
}
std::cout << "Using H.264 decoder" << std::endl;
// Create codec context
codec_ctx_ = avcodec_alloc_context3(codec);
if (!codec_ctx_) {
std::cerr << "Failed to allocate decoder context" << std::endl;
return false;
}
// Open codec
if (avcodec_open2(codec_ctx_, codec, nullptr) < 0) {
std::cerr << "Failed to open decoder" << std::endl;
cleanup();
return false;
}
// Allocate frame
frame_ = av_frame_alloc();
if (!frame_) {
std::cerr << "Failed to allocate frame" << std::endl;
cleanup();
return false;
}
initialized_ = true;
std::cout << "Video decoder initialized" << std::endl;
return true;
}
2025-12-07 20:12:08 -07:00
std::vector<uint8_t> VideoDecoder::decode(const std::vector<uint8_t>& encoded_data,
int& out_width, int& out_height, int& out_linesize) {
std::lock_guard<std::mutex> lock(decode_mutex_);
if (!initialized_) {
std::cerr << "Decoder not initialized" << std::endl;
return {};
}
if (encoded_data.empty()) {
std::cerr << "Empty encoded data" << std::endl;
return {};
}
static int decode_count = 0;
decode_count++;
// Create packet and copy data (FFmpeg will manage the memory)
AVPacket* pkt = av_packet_alloc();
if (!pkt) {
std::cerr << "Failed to allocate packet" << std::endl;
return {};
}
// Allocate buffer and copy data - FFmpeg will own and free this memory
if (av_new_packet(pkt, encoded_data.size()) < 0) {
std::cerr << "Failed to allocate packet buffer" << std::endl;
av_packet_free(&pkt);
return {};
}
memcpy(pkt->data, encoded_data.data(), encoded_data.size());
// Send packet to decoder
int ret = avcodec_send_packet(codec_ctx_, pkt);
if (ret < 0) {
char errbuf[256];
av_strerror(ret, errbuf, sizeof(errbuf));
std::cerr << "Error sending packet to decoder: " << errbuf << std::endl;
av_packet_free(&pkt);
return {};
}
// Receive decoded frame
ret = avcodec_receive_frame(codec_ctx_, frame_);
// Free packet immediately after sending
av_packet_free(&pkt);
if (ret == AVERROR(EAGAIN)) {
// Decoder needs more data
return {};
} else if (ret < 0) {
char errbuf[256];
av_strerror(ret, errbuf, sizeof(errbuf));
std::cerr << "Error receiving frame from decoder: " << errbuf << std::endl;
return {};
}
std::cout << "Decode #" << decode_count << ": frame " << frame_->width << "x" << frame_->height << std::endl;
// Update dimensions if this is the first frame or they changed
if (width_ != frame_->width || height_ != frame_->height) {
width_ = frame_->width;
height_ = frame_->height;
// Recreate swscale context for new dimensions
if (sws_ctx_) {
sws_freeContext(sws_ctx_);
}
sws_ctx_ = sws_getContext(
width_, height_, AV_PIX_FMT_YUV420P,
width_, height_, AV_PIX_FMT_RGB24,
SWS_FAST_BILINEAR, nullptr, nullptr, nullptr
);
if (!sws_ctx_) {
std::cerr << "Failed to create swscale context" << std::endl;
return {};
}
std::cout << "Decoder dimensions: " << width_ << "x" << height_ << std::endl;
}
2025-12-07 20:12:08 -07:00
// CRITICAL: Use FFmpeg's default alignment (32 bytes) for proper SIMD operations
// Alignment=1 was causing heap corruption because sws_scale expects aligned buffers
int buffer_size = av_image_get_buffer_size(AV_PIX_FMT_RGB24, width_, height_, 32);
if (buffer_size < 0) {
std::cerr << "Failed to get buffer size" << std::endl;
av_frame_unref(frame_);
return {};
}
2025-12-07 20:12:08 -07:00
std::cout << "Allocating aligned buffer: " << buffer_size << " bytes" << std::endl;
2025-12-07 20:12:08 -07:00
// Allocate buffer with extra space for alignment
std::vector<uint8_t> rgb_buffer(buffer_size);
2025-12-07 20:12:08 -07:00
// Use av_image_fill_arrays with same alignment as buffer allocation
uint8_t* dest[4] = {nullptr};
int dest_linesize[4] = {0};
2025-12-07 20:12:08 -07:00
int fill_ret = av_image_fill_arrays(dest, dest_linesize, rgb_buffer.data(),
AV_PIX_FMT_RGB24, width_, height_, 32);
if (fill_ret < 0 || fill_ret > buffer_size) {
std::cerr << "av_image_fill_arrays failed or returned size > buffer_size: "
<< fill_ret << " vs " << buffer_size << std::endl;
av_frame_unref(frame_);
return {};
}
2025-12-07 20:12:08 -07:00
std::cout << "Allocated buffer: " << buffer_size << " bytes, "
<< "linesize[0]=" << dest_linesize[0]
<< ", width=" << width_ << ", height=" << height_ << std::endl;
std::cout << "Calling sws_scale..." << std::endl;
int converted_height = sws_scale(
sws_ctx_,
frame_->data, frame_->linesize, 0, height_,
dest, dest_linesize
);
std::cout << "sws_scale returned: " << converted_height << std::endl;
if (converted_height != height_) {
std::cerr << "sws_scale failed: converted " << converted_height << " lines, expected " << height_ << std::endl;
av_frame_unref(frame_);
return {};
}
2025-12-07 20:12:08 -07:00
// Verify sws_scale didn't write beyond our buffer
size_t bytes_written = dest_linesize[0] * converted_height;
if (bytes_written > rgb_buffer.size()) {
std::cerr << "BUFFER OVERFLOW: sws_scale wrote " << bytes_written
<< " bytes but buffer is only " << rgb_buffer.size() << " bytes!" << std::endl;
av_frame_unref(frame_);
return {};
}
// Clean up frame
av_frame_unref(frame_);
// Set output dimensions
out_width = width_;
out_height = height_;
2025-12-07 20:12:08 -07:00
out_linesize = dest_linesize[0]; // Use FFmpeg's actual linesize
2025-12-07 20:12:08 -07:00
std::cout << "About to return buffer, size=" << rgb_buffer.size()
<< ", linesize=" << out_linesize << std::endl;
2025-12-07 20:12:08 -07:00
// Return by value - compiler will use RVO (Return Value Optimization)
return rgb_buffer;
}
void VideoDecoder::cleanup() {
if (sws_ctx_) {
sws_freeContext(sws_ctx_);
sws_ctx_ = nullptr;
}
if (frame_) {
av_frame_free(&frame_);
frame_ = nullptr;
}
if (codec_ctx_) {
avcodec_free_context(&codec_ctx_);
codec_ctx_ = nullptr;
}
initialized_ = false;
width_ = 0;
height_ = 0;
}
} // namespace scar