scar-chat7/client/media/video_decoder.cpp

#include "video_decoder.h"
#include <iostream>
#include <cstring>

extern "C" {
#include <libavutil/imgutils.h>
}

namespace scar {

VideoDecoder::VideoDecoder()
    : codec_ctx_(nullptr),
      frame_(nullptr),
      sws_ctx_(nullptr),
      width_(0),
      height_(0),
      initialized_(false) {
}

VideoDecoder::~VideoDecoder() {
    cleanup();
}

bool VideoDecoder::initialize() {
    cleanup(); // Clean up any previous state

    // Find H.264 decoder
    const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264);
    if (!codec) {
        std::cerr << "H.264 decoder not found" << std::endl;
        return false;
    }

    std::cout << "Using H.264 decoder" << std::endl;

    // Create codec context
    codec_ctx_ = avcodec_alloc_context3(codec);
    if (!codec_ctx_) {
        std::cerr << "Failed to allocate decoder context" << std::endl;
        return false;
    }

    // Open codec
    if (avcodec_open2(codec_ctx_, codec, nullptr) < 0) {
        std::cerr << "Failed to open decoder" << std::endl;
        cleanup();
        return false;
    }

    // Allocate frame
    frame_ = av_frame_alloc();
    if (!frame_) {
        std::cerr << "Failed to allocate frame" << std::endl;
        cleanup();
        return false;
    }

    initialized_ = true;

    std::cout << "Video decoder initialized" << std::endl;

    return true;
}

std::vector<uint8_t> VideoDecoder::decode(const std::vector<uint8_t>& encoded_data, int& out_width, int& out_height) {
    std::lock_guard<std::mutex> lock(decode_mutex_);

    if (!initialized_) {
        std::cerr << "Decoder not initialized" << std::endl;
        return {};
    }

    if (encoded_data.empty()) {
        std::cerr << "Empty encoded data" << std::endl;
        return {};
    }

    static int decode_count = 0;
    decode_count++;

    // Create packet and copy data (FFmpeg will manage the memory)
    AVPacket* pkt = av_packet_alloc();
    if (!pkt) {
        std::cerr << "Failed to allocate packet" << std::endl;
        return {};
    }

    // Allocate buffer and copy data - FFmpeg will own and free this memory
    if (av_new_packet(pkt, encoded_data.size()) < 0) {
        std::cerr << "Failed to allocate packet buffer" << std::endl;
        av_packet_free(&pkt);
        return {};
    }

    memcpy(pkt->data, encoded_data.data(), encoded_data.size());

    // Send packet to decoder
    int ret = avcodec_send_packet(codec_ctx_, pkt);
    if (ret < 0) {
        char errbuf[256];
        av_strerror(ret, errbuf, sizeof(errbuf));
        std::cerr << "Error sending packet to decoder: " << errbuf << std::endl;
        av_packet_free(&pkt);
        return {};
    }

    // Receive decoded frame
    ret = avcodec_receive_frame(codec_ctx_, frame_);

    // Free packet immediately after sending
    av_packet_free(&pkt);

    if (ret == AVERROR(EAGAIN)) {
        // Decoder needs more data
        return {};
    } else if (ret < 0) {
        char errbuf[256];
        av_strerror(ret, errbuf, sizeof(errbuf));
        std::cerr << "Error receiving frame from decoder: " << errbuf << std::endl;
        return {};
    }

    std::cout << "Decode #" << decode_count << ": frame " << frame_->width << "x" << frame_->height << std::endl;

    // Update dimensions if this is the first frame or they changed
    if (width_ != frame_->width || height_ != frame_->height) {
        width_ = frame_->width;
        height_ = frame_->height;

        // Recreate swscale context for new dimensions
        if (sws_ctx_) {
            sws_freeContext(sws_ctx_);
        }

        sws_ctx_ = sws_getContext(
            width_, height_, AV_PIX_FMT_YUV420P,
            width_, height_, AV_PIX_FMT_RGB24,
            SWS_FAST_BILINEAR, nullptr, nullptr, nullptr
        );

        if (!sws_ctx_) {
            std::cerr << "Failed to create swscale context" << std::endl;
            return {};
        }

        std::cout << "Decoder dimensions: " << width_ << "x" << height_ << std::endl;
    }

    // Calculate aligned linesize for RGB24 (4-byte alignment)
    int linesize = width_ * 3;
    linesize = (linesize + 3) & ~3;

    // Use av_image_get_buffer_size to get the exact size FFmpeg expects
    int buffer_size = av_image_get_buffer_size(AV_PIX_FMT_RGB24, width_, height_, 1);
    if (buffer_size < 0) {
        std::cerr << "Failed to get buffer size" << std::endl;
        av_frame_unref(frame_);
        return {};
    }

    std::cout << "FFmpeg buffer size: " << buffer_size << " vs our calculation: " << (linesize * height_) << std::endl;

    // Allocate using FFmpeg's calculated size
    std::vector<uint8_t> rgb_buffer(buffer_size);

    std::cout << "Allocated buffer: " << buffer_size << " bytes (linesize=" << linesize
              << ", width=" << width_ << ", height=" << height_ << ")" << std::endl;

    // Use av_image_fill_arrays to properly set up the buffer pointers
    uint8_t* dest[4] = {nullptr};
    int dest_linesize[4] = {0};

    av_image_fill_arrays(dest, dest_linesize, rgb_buffer.data(),
                         AV_PIX_FMT_RGB24, width_, height_, 1);

    std::cout << "av_image_fill_arrays: linesize[0]=" << dest_linesize[0] << std::endl;

    std::cout << "Calling sws_scale..." << std::endl;
    int converted_height = sws_scale(
        sws_ctx_,
        frame_->data, frame_->linesize, 0, height_,
        dest, dest_linesize
    );
    std::cout << "sws_scale returned: " << converted_height << std::endl;

    if (converted_height != height_) {
        std::cerr << "sws_scale failed: converted " << converted_height << " lines, expected " << height_ << std::endl;
        av_frame_unref(frame_);
        return {};
    }

    // Clean up frame
    av_frame_unref(frame_);

    // Set output dimensions
    out_width = width_;
    out_height = height_;

    std::cout << "About to return buffer, size=" << rgb_buffer.size() << std::endl;

    // Use move semantics explicitly to avoid any copy
    return std::move(rgb_buffer);
}

void VideoDecoder::cleanup() {
    if (sws_ctx_) {
        sws_freeContext(sws_ctx_);
        sws_ctx_ = nullptr;
    }

    if (frame_) {
        av_frame_free(&frame_);
        frame_ = nullptr;
    }

    if (codec_ctx_) {
        avcodec_free_context(&codec_ctx_);
        codec_ctx_ = nullptr;
    }

    initialized_ = false;
    width_ = 0;
    height_ = 0;
}

} // namespace scar