AAC的音频编码和解码实现

呱牛
2022-11-29 15:17:19
音视频
ffmpeg 音视频

折腾了几天的AAC编码和解码，最开始用的是ffmpeg的接口，实现好实现，但是调试总是有各种问题，最后还是使用faac-1.28/faad2-2.7实现了AAC的编码和解码功能，使用这两个库的原因，是因为手里有另一套代码工程，已经实现了AAC的编码和解码，所以就直接拿来用了。代码参考：https://gitee.com/anyRTC/anyRTC-RTMP-OpenSource/blob/master/ArLiveLite/codec/aacencode.cc

libfaad2-2.7

默认PS_DEC开着，所以单声道又被修改为了双声道，出现解码错误！

#if (defined(PS_DEC) || defined(DRM_PS))
    /* check if we have a mono file */
    if (*channels == 1)
    {
        /* upMatrix to 2 channels for implicit signalling of PS */
        *channels = 2;
    }
#endif

使用FFMPEG实现的aac编解码程序，没有实际调通，代码仅供参考！

问题包括：

1、内置的aac-codec不支持AV_SAMPLE_FMT_S16；

2、av_frame_get_buffer可能会报错；

//aac_codec.h
#ifndef __AAC_CODEC_H__
#define __AAC_CODEC_H__



extern int close_aac_codec(void);
extern int aac_decode_frame(unsigned char* inbuf, unsigned int inlen, unsigned char* outbuf, unsigned int* outlen);
extern int aac_encode_frame(unsigned char* inbuf, unsigned int inlen, unsigned char* outbuf, unsigned int* outlen);
extern int init_aac_codec(uint8_t encode_flag, int sample_rate, int channels, int bitrate);

#endif

//aac_codec.cpp

//aac_codec.cpp


#ifdef __cplusplus
extern "C" {
#endif
#include "libavutil/time.h"
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libavdevice/avdevice.h"
#include "libswresample/swresample.h"
#include "libswscale/swscale.h"

#ifdef __cplusplus
}
#endif
 
#include "BroadcastJson.h"
#include "logutils.h"
#include "aac_codec.h"



/* Global timestamp for the audio frames. */
static int64_t pts = 0; 
static AVCodecContext *input_codec_context = NULL;
static AVFrame        *input_codec_frame = NULL;

static void init_packet(AVPacket *packet)
{
    av_init_packet(packet);
    /* Set the packet data and size so that it is recognized as being empty. */
    packet->data = NULL;
    packet->size = 0;
}

/*
sample_rate:16000
channels:1
bitrate:64000
*/
int init_aac_codec(uint8_t encode_flag, int sample_rate, int channels, int bitrate){

    AVCodec *input_codec;
    AVCodecContext *avctx;
    int error;
    AVCodecID codec_id = AV_CODEC_ID_AAC;
    /* Find a decoder for the audio stream. */
    if (encode_flag){ 
       input_codec = avcodec_find_encoder_by_name("libfdk_aac");
       //input_codec = avcodec_find_encoder(codec_id);
        if (!input_codec) {
            LOGD("Could not find input encodec\n"); 
            return -1;
        }
    }else{ 
        input_codec = avcodec_find_decoder_by_name("libfdk_aac");
        //input_codec = avcodec_find_decoder(codec_id);
        if (!input_codec) {

            LOGD("Could not find input decodec\n"); 
            return -1;
        }
    }

    //first close aaccodec.
    close_aac_codec();

    /* Allocate a new decoding context. */
    avctx = avcodec_alloc_context3(input_codec);
    if (!avctx) {
        LOGD("Could not allocate a decoding context\n"); 
        return -1;
    }
    avctx->strict_std_compliance =FF_COMPLIANCE_EXPERIMENTAL;   
    //avctx->codec_id = codec_id;
    avctx->sample_fmt = AV_SAMPLE_FMT_S16;//AV_SAMPLE_FMT_FLTP 
    avctx->sample_rate = sample_rate;
    avctx->channels = channels;
    avctx->channel_layout = av_get_default_channel_layout(channels);
    // 音频编码规格
    avctx->profile = FF_PROFILE_AAC_HE_V2;
    avctx->bit_rate = bitrate;
 
    /* Open the decoder for the audio stream to use it later. */
    if ((error = avcodec_open2(avctx, input_codec, NULL)) < 0) {
        LOGD("Could not open input codec (error '%d')\n", error);
        avcodec_free_context(&avctx); 
        return error;
    }

    /* frame containing input raw audio */
    input_codec_frame = av_frame_alloc();
    if (!input_codec_frame) {
        LOGD("Could not allocate audio frame\n");
        return error;
    }
 
    input_codec_frame->sample_rate = sample_rate; 
    input_codec_frame->channels = channels;
    if (encode_flag){
        input_codec_frame->nb_samples     = avctx->frame_size;//(sample_rate*channels*2*20)/8000;//
    }else{ 
        input_codec_frame->nb_samples     = (sample_rate/1000)*20*channels;//avctx->frame_size;//(sample_rate*channels*2*20)/8000;//  
 
    }
    input_codec_frame->format         = avctx->sample_fmt;
    input_codec_frame->channel_layout = avctx->channel_layout;
    /* allocate the data buffers */
    int ret = av_frame_get_buffer(input_codec_frame, 0);
    if (ret < 0) {
        LOGD("Could not allocate audio data buffers ret:%d avctx->frame_size:%d\n", ret, avctx->frame_size);
        avcodec_free_context(&avctx); 
        return error;
    }


    /* Save the decoder context for easier access later. */
    input_codec_context = avctx;

    LOGD("init_aac_codec init sample_rate:%d,  channels:%d, bitrate:%d.", sample_rate, channels, bitrate);

    return 0;
}
 
int aac_encode_frame(unsigned char* inbuf, unsigned int inlen, unsigned char* outbuf, unsigned int* outlen)
{
    // 
    AVPacket output_packet; 
    uint16_t *samples ;
    int  data_present = 0;
    int index = 0;
    int ret = 0;
  
    init_packet(&output_packet);

    if (input_codec_frame == NULL){
        return -1;
    }
    /* make sure the frame is writable -- makes a copy if the encoder
     * kept a reference internally */
    ret = av_frame_make_writable(input_codec_frame);
    samples = (uint16_t*)input_codec_frame->data[0];
    memcpy((void *)samples, (void*)inbuf, inlen);

    /* Set a timestamp based on the sample rate for the container. */
    if (input_codec_frame) {
        input_codec_frame->pts = pts;
        pts += input_codec_frame->nb_samples;
    }

    /* send the packet with the compressed data to the decoder */
    ret = avcodec_send_frame(input_codec_context, input_codec_frame);
    if (ret < 0) {
        LOGD("Error submitting the packet to the decoder\n");
        exit(1);
    }

    /* read all the output frames (in general there may be any number of them */
    while (ret >= 0) {
        ret = avcodec_receive_packet(input_codec_context, &output_packet);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            break;
        else if (ret < 0) {
            LOGD("Error during decoding\n");
            exit(1);
        }
        int  data_size = av_get_bytes_per_sample(input_codec_context->sample_fmt);
        if (data_size < 0) {
            /* This should not occur, checking just for paranoia */
            LOGD("Failed to calculate data size\n");
            return -1;
        }
        memcpy(outbuf+index, output_packet.data, output_packet.size);
        index+=output_packet.size;
    }
    *outlen = index;
    return 0;
}

int aac_decode_frame(unsigned char* inbuf, unsigned int inlen, unsigned char* outbuf, unsigned int* outlen)
{
    AVPacket output_packet; 
    uint16_t *samples ;
    int  data_present = 0;
    int index = 0;
    int ret = 0;
  
    if (input_codec_frame == NULL){
        return -1;
    }
    init_packet(&output_packet);
    /* make sure the frame is writable -- makes a copy if the encoder
     * kept a reference internally */
    ret = av_frame_make_writable(input_codec_frame);
    samples = (uint16_t*)input_codec_frame->data[0];
    memcpy((void *)samples, (void*)inbuf, inlen);

    /* send the packet with the compressed data to the decoder */
    ret = avcodec_send_frame(input_codec_context, input_codec_frame);
    if (ret < 0) {
        LOGD("Error submitting the packet to the decoder, inlen:%d\n", inlen);
        return -1;
    }

    /* read all the output frames (in general there may be any number of them */
    while (ret >= 0) {
        ret = avcodec_receive_packet(input_codec_context, &output_packet);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            break;
        else if (ret < 0) {
            LOGD("Error during decoding\n");
            return -1;
        }
        int data_size = av_get_bytes_per_sample(input_codec_context->sample_fmt);
        if (data_size < 0) {
            /* This should not occur, checking just for paranoia */
            LOGD("Failed to calculate data size\n");
            return -1;
        }
        memcpy(outbuf+index, output_packet.data, output_packet.size);
        index+=output_packet.size;
    }
    *outlen = index;
    return 0;
}

int close_aac_codec(void){

    if (input_codec_frame != NULL){
        av_frame_free(&input_codec_frame); 
        input_codec_frame = NULL;
    }
    if (input_codec_context != NULL){
        avcodec_free_context(&input_codec_context);
    }
    return 0;
}

呱牛笔记