diff --git a/app/src/main/cpp/audio_resample.cpp b/app/src/main/cpp/audio_resample.cpp index 9e157a0..048df48 100644 --- a/app/src/main/cpp/audio_resample.cpp +++ b/app/src/main/cpp/audio_resample.cpp @@ -8,10 +8,17 @@ extern "C" { #endif #include "libavformat/avformat.h" -#include -#include -#include -#include +#include "libavformat/avio.h" + +#include "libavcodec/avcodec.h" + +#include "libavutil/audio_fifo.h" +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/frame.h" +#include "libavutil/opt.h" + +#include "libswresample/swresample.h" #ifdef __cplusplus } #endif @@ -20,6 +27,14 @@ extern "C" { #define ALOGE(Format, ...) LOGE("audio_resample", Format, ##__VA_ARGS__) +/* The output bit rate in bit/s */ +#define OUTPUT_BIT_RATE 96000 +/* The number of output channels */ +#define OUTPUT_CHANNELS 2 + +/* Global timestamp for the audio frames. */ +static int64_t pts = 0; + static void log_error(const char *functionName, int errorNumber) { int buffer_len = 1024; char *buffer = new char [buffer_len]; @@ -28,260 +43,771 @@ static void log_error(const char *functionName, int errorNumber) { delete []buffer; } -static int get_format_from_sample_fmt(const char **fmt, enum AVSampleFormat sample_fmt) +/** + * Open an input file and the required decoder. + * + */ +static int open_input_file(const char *filename, + AVFormatContext **input_format_context, + AVCodecContext **input_codec_context) { - *fmt = nullptr; - - struct sample_fmt_entry { - enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le; - } sample_fmt_entries[] = { - { AV_SAMPLE_FMT_U8, "u8", "u8" }, - { AV_SAMPLE_FMT_S16, "s16be", "s16le" }, - { AV_SAMPLE_FMT_S32, "s32be", "s32le" }, - { AV_SAMPLE_FMT_FLT, "f32be", "f32le" }, - { AV_SAMPLE_FMT_DBL, "f64be", "f64le" }, - }; - - for (int i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) { - struct sample_fmt_entry *entry = &sample_fmt_entries[i]; - if (sample_fmt == entry->sample_fmt) { - *fmt = AV_NE(entry->fmt_be, entry->fmt_le); - return 0; - } + AVCodecContext *avctx; + AVCodec *input_codec; + int error; + + /* Open the input file to read from it. */ + if ((error = avformat_open_input(input_format_context, filename, nullptr, + nullptr)) < 0) { + fprintf(stderr, "Could not open input file '%s' (error '%s')\n", + filename, av_err2str(error)); + *input_format_context = nullptr; + return error; + } + + /* Get information on the input file (number of streams etc.). */ + if ((error = avformat_find_stream_info(*input_format_context, nullptr)) < 0) { + fprintf(stderr, "Could not open find stream info (error '%s')\n", + av_err2str(error)); + avformat_close_input(input_format_context); + return error; + } + + /* Make sure that there is only one stream in the input file. */ + if ((*input_format_context)->nb_streams != 1) { + fprintf(stderr, "Expected one audio input stream, but found %d\n", + (*input_format_context)->nb_streams); + avformat_close_input(input_format_context); + return AVERROR_EXIT; + } + + /* Find a decoder for the audio stream. */ + if (!(input_codec = avcodec_find_decoder((*input_format_context)->streams[0]->codecpar->codec_id))) { + fprintf(stderr, "Could not find input codec\n"); + avformat_close_input(input_format_context); + return AVERROR_EXIT; + } + + /* Allocate a new decoding context. */ + avctx = avcodec_alloc_context3(input_codec); + if (!avctx) { + fprintf(stderr, "Could not allocate a decoding context\n"); + avformat_close_input(input_format_context); + return AVERROR(ENOMEM); + } + + /* Initialize the stream parameters with demuxer information. */ + error = avcodec_parameters_to_context(avctx, (*input_format_context)->streams[0]->codecpar); + if (error < 0) { + avformat_close_input(input_format_context); + avcodec_free_context(&avctx); + return error; + } + + /* Open the decoder for the audio stream to use it later. */ + if ((error = avcodec_open2(avctx, input_codec, nullptr)) < 0) { + fprintf(stderr, "Could not open input codec (error '%s')\n", + av_err2str(error)); + avcodec_free_context(&avctx); + avformat_close_input(input_format_context); + return error; } - ALOGE("Sample format %s not supported as output format, msg=%s\n", - av_get_sample_fmt_name(sample_fmt), strerror(errno)); - return AVERROR(EINVAL); + /* Save the decoder context for easier access later. */ + *input_codec_context = avctx; + + return 0; } -int init_audio_codec(AVFormatContext *fmt_ctx, AVCodecContext **avcodec_ctx, bool is_encoder) { - AVCodecContext *codec_ctx = nullptr; - for (int i = 0; i < fmt_ctx->nb_streams; ++i) { - if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - codec_ctx = fmt_ctx->streams[i]->codec; - } +/** + * Open an output file and the required encoder. + * Also set some basic encoder parameters. + * Some of these parameters are based on the input file's parameters. + * + */ +static int open_output_file(const char *filename, + int sample_rate, + AVCodecContext *input_codec_context, + AVFormatContext **output_format_context, + AVCodecContext **output_codec_context) +{ + AVCodecContext *avctx = nullptr; + AVIOContext *output_io_context = nullptr; + AVStream *stream = nullptr; + AVCodec *output_codec = nullptr; + int error; + + /* Open the output file to write to it. */ + if ((error = avio_open(&output_io_context, filename, + AVIO_FLAG_WRITE)) < 0) { + fprintf(stderr, "Could not open output file '%s' (error '%s')\n", + filename, av_err2str(error)); + return error; } - AVCodec *codec = is_encoder ? avcodec_find_encoder(codec_ctx->codec_id) - : avcodec_find_decoder(codec_ctx->codec_id); - if (!codec) { - ALOGE("can't found codec id=%d\n", codec_ctx->codec_id); - return -1; + + /* Create a new format context for the output container format. */ + if (!(*output_format_context = avformat_alloc_context())) { + fprintf(stderr, "Could not allocate output format context\n"); + return AVERROR(ENOMEM); } - int ret = avcodec_open2(codec_ctx, codec, nullptr); - if (ret < 0) - ALOGE("avcodec_open2 fail:%d", ret); - *avcodec_ctx = codec_ctx; - return ret; + + /* Associate the output file (pointer) with the container format context. */ + (*output_format_context)->pb = output_io_context; + + /* Guess the desired container format based on the file extension. */ + if (!((*output_format_context)->oformat = av_guess_format(nullptr, filename, + nullptr))) { + fprintf(stderr, "Could not find output file format\n"); + goto cleanup; + } + + if (!((*output_format_context)->url = av_strdup(filename))) { + fprintf(stderr, "Could not allocate url.\n"); + error = AVERROR(ENOMEM); + goto cleanup; + } + + /* Find the encoder to be used by its name. */ + if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC))) { + fprintf(stderr, "Could not find an AAC encoder.\n"); + goto cleanup; + } + + /* Create a new audio stream in the output file container. */ + if (!(stream = avformat_new_stream(*output_format_context, nullptr))) { + fprintf(stderr, "Could not create new stream\n"); + error = AVERROR(ENOMEM); + goto cleanup; + } + + avctx = avcodec_alloc_context3(output_codec); + if (!avctx) { + fprintf(stderr, "Could not allocate an encoding context\n"); + error = AVERROR(ENOMEM); + goto cleanup; + } + + /* Set the basic encoder parameters. + * The input file's sample rate is used to avoid a sample rate conversion. */ + avctx->channels = OUTPUT_CHANNELS; + avctx->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS); + avctx->sample_rate = sample_rate; //input_codec_context->sample_rate; + avctx->sample_fmt = output_codec->sample_fmts[0]; + avctx->bit_rate = OUTPUT_BIT_RATE; + + /* Allow the use of the experimental AAC encoder. */ + avctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; + + /* Set the sample rate for the container. */ + stream->time_base.den = input_codec_context->sample_rate; + stream->time_base.num = 1; + + /* Some container formats (like MP4) require global headers to be present. + * Mark the encoder so that it behaves accordingly. */ + if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER) + avctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + + /* Open the encoder for the audio stream to use it later. */ + if ((error = avcodec_open2(avctx, output_codec, nullptr)) < 0) { + fprintf(stderr, "Could not open output codec (error '%s')\n", + av_err2str(error)); + goto cleanup; + } + + error = avcodec_parameters_from_context(stream->codecpar, avctx); + if (error < 0) { + fprintf(stderr, "Could not initialize stream parameters\n"); + goto cleanup; + } + + /* Save the encoder context for easier access later. */ + *output_codec_context = avctx; + + return 0; + +cleanup: + avcodec_free_context(&avctx); + avio_closep(&(*output_format_context)->pb); + avformat_free_context(*output_format_context); + *output_format_context = nullptr; + return error < 0 ? error : AVERROR_EXIT; } -int init_audio_decoder(AVFormatContext *fmt_ctx, AVCodecContext **avcodec_ctx) { - return init_audio_codec(fmt_ctx, avcodec_ctx, false); +/** + * Initialize one data packet for reading or writing. + * @param packet Packet to be initialized + */ +static void init_packet(AVPacket *packet) +{ + av_init_packet(packet); + packet->data = nullptr; + packet->size = 0; } -int init_audio_encoder(AVFormatContext *fmt_ctx, AVCodecContext **avcodec_ctx) { - return init_audio_codec(fmt_ctx, avcodec_ctx, true); +/** + * Initialize one audio frame for reading from the input file. + * + */ +static int init_input_frame(AVFrame **frame) +{ + if (!(*frame = av_frame_alloc())) { + fprintf(stderr, "Could not allocate input frame\n"); + return AVERROR(ENOMEM); + } + return 0; } -int init_audio_muxer(AVFormatContext *ifmt_ctx, AVFormatContext **ofmt_ctx, const char* filename, - int sample_rate, int channels, int64_t channel_layout, AVSampleFormat sampleFormat) { - int ret; - AVFormatContext *fmt_ctx = *ofmt_ctx; - avformat_alloc_output_context2(&fmt_ctx, nullptr, nullptr, filename); - if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) { - ret = avio_open(&fmt_ctx->pb, filename, AVIO_FLAG_WRITE); - if (ret < 0) { - ALOGE("Could not open output file %s\n", filename); - return ret; - } +/** + * Initialize the audio resampler based on the input and output codec settings. + * + */ +static int init_resampler(AVCodecContext *input_codec_context, + AVCodecContext *output_codec_context, + SwrContext **resample_context) +{ + int error; + + /* + * Create a resampler context for the conversion. + * Set the conversion parameters. + * Default channel layouts based on the number of channels + * are assumed for simplicity (they are sometimes not detected + * properly by the demuxer and/or decoder). + */ + *resample_context = swr_alloc_set_opts(nullptr, + av_get_default_channel_layout(output_codec_context->channels), + output_codec_context->sample_fmt, + output_codec_context->sample_rate, + av_get_default_channel_layout(input_codec_context->channels), + input_codec_context->sample_fmt, + input_codec_context->sample_rate, + 0, nullptr); + if (!*resample_context) { + fprintf(stderr, "Could not allocate resample context\n"); + return AVERROR(ENOMEM); } - av_dump_format(fmt_ctx, 0, filename, 1); - - for (int i = 0; i < ifmt_ctx->nb_streams; i++) { - AVStream* in_stream = ifmt_ctx->streams[i]; - AVCodecParameters* codecpar = in_stream->codecpar; - AVCodec* encoder = avcodec_find_encoder(codecpar->codec_id); - AVStream* out_stream = avformat_new_stream(fmt_ctx, encoder); - avcodec_parameters_copy(out_stream->codecpar, codecpar); - - if(out_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - out_stream->codecpar->channels = channels; - out_stream->codecpar->sample_rate = sample_rate; - out_stream->codecpar->channel_layout = channel_layout; - out_stream->codec->sample_fmt = sampleFormat; - avcodec_parameters_to_context(out_stream->codec, out_stream->codecpar); - out_stream->time_base = in_stream->codec->time_base; - out_stream->duration = in_stream->duration; - break; - } + /* + * Perform a sanity check so that the number of converted samples is + * not greater than the number of samples to be converted. + * If the sample rates differ, this case has to be handled differently + */ + av_assert0(output_codec_context->sample_rate == input_codec_context->sample_rate); + + /* Open the re-sampler with the specified parameters. */ + if ((error = swr_init(*resample_context)) < 0) { + fprintf(stderr, "Could not open resample context\n"); + swr_free(resample_context); + return error; } + return 0; +} - ret = avformat_write_header(fmt_ctx, nullptr); - if (ret < 0) { - log_error("Error occurred when opening output file", ret); +/** + * Initialize a FIFO buffer for the audio samples to be encoded. + * + */ +static int init_fifo(AVAudioFifo **fifo, AVCodecContext *output_codec_context) +{ + /* Create the FIFO buffer based on the specified output sample format. */ + if (!(*fifo = av_audio_fifo_alloc(output_codec_context->sample_fmt, + output_codec_context->channels, 1))) { + fprintf(stderr, "Could not allocate FIFO\n"); + return AVERROR(ENOMEM); } - *ofmt_ctx = fmt_ctx; - return ret; + return 0; } -void init_out_frame(AVFrame **frame, AVCodecContext *codec_ctx, int nb_samples) { - AVFrame *out_frame = *frame; - av_frame_free(&out_frame); - out_frame = av_frame_alloc(); - out_frame->nb_samples = nb_samples; - out_frame->format = codec_ctx->sample_fmt; - out_frame->sample_rate = codec_ctx->sample_rate; - out_frame->channel_layout = codec_ctx->channel_layout; - av_frame_get_buffer(out_frame,0); - av_frame_make_writable(out_frame); - *frame = out_frame; +/** + * Write the header of the output file container. + * + */ +static int write_output_file_header(AVFormatContext *output_format_context) +{ + int error; + if ((error = avformat_write_header(output_format_context, nullptr)) < 0) { + fprintf(stderr, "Could not write output file header (error '%s')\n", + av_err2str(error)); + return error; + } + return 0; } -int resampling(const char *src_filename, const char *dst_filename, int dst_rate) +/** + * Decode one audio frame from the input file. + * + */ +static int decode_audio_frame(AVFrame *frame, + AVFormatContext *input_format_context, + AVCodecContext *input_codec_context, + int *data_present, int *finished) { - int src_rate; - int64_t src_ch_layout; - enum AVSampleFormat src_sample_fmt; - - int dst_bufsize; - int dst_linesize; - int dst_nb_channels; - int dst_nb_samples, max_dst_nb_samples; - int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO; - enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16; - - int ret; - const char *fmt; - AVPacket packet; - AVPacket *opacket; - AVFrame *frame; - int got_frame_ptr; - int got_packet_ptr; - struct SwrContext *swr_ctx; - AVFormatContext *iformat_ctx = nullptr; - AVFormatContext *oformat_ctx = nullptr; - AVCodecContext *icodec_ctx = nullptr; - AVCodecContext *ocodec_ctx = nullptr; - - ret = avformat_open_input(&iformat_ctx, src_filename, nullptr, nullptr); - if (ret < 0) { - ALOGE("open input fail, path=%s, ret=%d", src_filename, ret); - goto end; - } - avformat_find_stream_info(iformat_ctx, nullptr); - frame = av_frame_alloc(); - opacket = av_packet_alloc(); - init_audio_decoder(iformat_ctx, &icodec_ctx); - src_rate = icodec_ctx->sample_rate; - src_ch_layout = (int64_t) icodec_ctx->channel_layout; - src_sample_fmt = icodec_ctx->sample_fmt; - - /* create resample context */ - swr_ctx = swr_alloc(); - if (!swr_ctx) { - ALOGE("Could not allocate resample context...\n"); - ret = AVERROR(ENOMEM); - goto end; - } - - /* set options */ - av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0); - av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0); - av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0); - - av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0); - av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0); - av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0); - - /* initialize the resampling context */ - if ((ret = swr_init(swr_ctx)) < 0) { - log_error("Failed to initialize the resampling context", ret); - goto end; - } - - dst_nb_samples = (int) av_rescale_rnd(src_rate, dst_rate, src_rate, AV_ROUND_UP); - max_dst_nb_samples = dst_nb_samples; - dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout); - - ret = init_audio_muxer(iformat_ctx, &oformat_ctx, dst_filename, dst_rate, dst_nb_channels, dst_ch_layout, dst_sample_fmt); - if (ret < 0) { - goto end; - } - init_audio_encoder(oformat_ctx, &ocodec_ctx); - - while (av_read_frame(iformat_ctx, &packet) >= 0) { - - ret = avcodec_decode_audio4(icodec_ctx, frame, &got_frame_ptr, &packet); - if (ret < 0) { - ALOGE("decode audio error:%d\n", ret); - continue; - } - ALOGE("decode succ, pts=%ld\n", frame->pts); - - /* compute destination number of samples */ - dst_nb_samples = (int) av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) + - frame->nb_samples, dst_rate, src_rate, AV_ROUND_UP); - if (dst_nb_samples > max_dst_nb_samples) { - init_out_frame(&frame, ocodec_ctx, dst_nb_samples); - max_dst_nb_samples = dst_nb_samples; + /* Packet used for temporary storage. */ + AVPacket input_packet; + int error; + init_packet(&input_packet); + + /* Read one audio frame from the input file into a temporary packet. */ + if ((error = av_read_frame(input_format_context, &input_packet)) < 0) { + /* If we are at the end of the file, flush the decoder below. */ + if (error == AVERROR_EOF) + *finished = 1; + else { + fprintf(stderr, "Could not read frame (error '%s')\n", + av_err2str(error)); + return error; } + } - /* convert to destination format */ - int samples_result = swr_convert(swr_ctx, frame->data, dst_nb_samples, (const uint8_t **)frame->data, frame->nb_samples); - if (samples_result < 0) { - ALOGE("Error while converting..."); - goto end; - } - dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, - samples_result, dst_sample_fmt, 1); - if (dst_bufsize < 0) { - ALOGE("Could not get sample buffer size..."); - goto end; - } - ALOGE("resample size=%d", dst_bufsize); + /* Send the audio frame stored in the temporary packet to the decoder. + * The input audio stream decoder is used to do this. */ + if ((error = avcodec_send_packet(input_codec_context, &input_packet)) < 0) { + fprintf(stderr, "Could not send packet for decoding (error '%s')\n", + av_err2str(error)); + return error; + } - ret = avcodec_encode_audio2(ocodec_ctx, opacket, frame, &got_packet_ptr); - if (ret < 0) { - log_error("encode audio error", ret); - continue; - } + /* Receive one frame from the decoder. */ + error = avcodec_receive_frame(input_codec_context, frame); + /* If the decoder asks for more data to be able to decode a frame, + * return indicating that no data is present. */ + if (error == AVERROR(EAGAIN)) { + error = 0; + goto cleanup; + /* If the end of the input file is reached, stop decoding. */ + } else if (error == AVERROR_EOF) { + *finished = 1; + error = 0; + goto cleanup; + } else if (error < 0) { + fprintf(stderr, "Could not decode frame (error '%s')\n", + av_err2str(error)); + goto cleanup; + /* Default case: Return decoded data. */ + } else { + *data_present = 1; + goto cleanup; + } + +cleanup: + av_packet_unref(&input_packet); + return error; +} + +/** + * Initialize a temporary storage for the specified number of audio samples. + * The conversion requires temporary storage due to the different format. + * The number of audio samples to be allocated is specified in frame_size. + * + */ +static int init_converted_samples(uint8_t ***converted_input_samples, + AVCodecContext *output_codec_context, + int frame_size) +{ + int error; + + /* Allocate as many pointers as there are audio channels. + * Each pointer will later point to the audio samples of the corresponding channels + */ + if (!(*converted_input_samples = (uint8_t **) calloc(output_codec_context->channels, + sizeof(**converted_input_samples)))) { + fprintf(stderr, "Could not allocate converted input sample pointers\n"); + return AVERROR(ENOMEM); + } + + /* Allocate memory for the samples of all channels in one consecutive + * block for convenience. */ + if ((error = av_samples_alloc(*converted_input_samples, nullptr, + output_codec_context->channels, + frame_size, + output_codec_context->sample_fmt, 0)) < 0) { + fprintf(stderr, + "Could not allocate converted input samples (error '%s')\n", + av_err2str(error)); + av_freep(&(*converted_input_samples)[0]); + free(*converted_input_samples); + return error; + } + return 0; +} + +/** + * Convert the input audio samples into the output sample format. + * The conversion happens on a per-frame basis, the size of which is + * specified by frame_size. + * + */ +static int convert_samples(const uint8_t **input_data, + uint8_t **converted_data, const int frame_size, + SwrContext *resample_context) +{ + int error; + + /* Convert the samples using the resampler. */ + if ((error = swr_convert(resample_context, + converted_data, frame_size, + input_data , frame_size)) < 0) { + fprintf(stderr, "Could not convert input samples (error '%s')\n", + av_err2str(error)); + return error; + } + + return 0; +} + +/** + * Add converted input audio samples to the FIFO buffer for later processing. + * + */ +static int add_samples_to_fifo(AVAudioFifo *fifo, + uint8_t **converted_input_samples, + const int frame_size) +{ + int error; + + /* Make the FIFO as large as it needs to be to hold both, + * the old and the new samples. */ + if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) { + fprintf(stderr, "Could not reallocate FIFO\n"); + return error; + } + + /* Store the new samples in the FIFO buffer. */ + if (av_audio_fifo_write(fifo, (void **)converted_input_samples, + frame_size) < frame_size) { + fprintf(stderr, "Could not write data to FIFO\n"); + return AVERROR_EXIT; + } + return 0; +} + +/** + * Read one audio frame from the input file, decode, convert and store + * it in the FIFO buffer. + * + */ +static int read_decode_convert_and_store(AVAudioFifo *fifo, + AVFormatContext *input_format_context, + AVCodecContext *input_codec_context, + AVCodecContext *output_codec_context, + SwrContext *resampler_context, + int *finished) +{ + /* Temporary storage of the input samples of the frame read from the file. */ + AVFrame *input_frame = nullptr; + /* Temporary storage for the converted input samples. */ + uint8_t **converted_input_samples = nullptr; + int data_present = 0; + int ret = AVERROR_EXIT; + + /* Initialize temporary storage for one input frame. */ + if (init_input_frame(&input_frame)) + goto cleanup; + /* Decode one frame worth of audio samples. */ + if (decode_audio_frame(input_frame, input_format_context, + input_codec_context, &data_present, finished)) + goto cleanup; + /* If we are at the end of the file and there are no more samples + * in the decoder which are delayed, we are actually finished. + * This must not be treated as an error. */ + if (*finished) { + ret = 0; + goto cleanup; + } + /* If there is decoded data, convert and store it. */ + if (data_present) { + /* Initialize the temporary storage for the converted input samples. */ + if (init_converted_samples(&converted_input_samples, output_codec_context, + input_frame->nb_samples)) + goto cleanup; + + /* Convert the input samples to the desired output sample format. + * This requires a temporary storage provided by converted_input_samples. */ + if (convert_samples((const uint8_t**)input_frame->extended_data, converted_input_samples, + input_frame->nb_samples, resampler_context)) + goto cleanup; + + /* Add the converted input samples to the FIFO buffer for later processing. */ + if (add_samples_to_fifo(fifo, converted_input_samples, + input_frame->nb_samples)) + goto cleanup; + ret = 0; + } + ret = 0; + +cleanup: + if (converted_input_samples) { + av_freep(&converted_input_samples[0]); + free(converted_input_samples); + } + av_frame_free(&input_frame); + + return ret; +} - AVStream *in_stream = iformat_ctx->streams[0]; - AVStream *out_stream = oformat_ctx->streams[0]; - opacket->pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, - static_cast(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX)); - opacket->dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, - static_cast(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX)); - opacket->duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base); - opacket->pos = -1; +/** + * Initialize one input frame for writing to the output file. + * The frame will be exactly frame_size samples large. + * + */ +static int init_output_frame(AVFrame **frame, + AVCodecContext *output_codec_context, + int frame_size) +{ + int error; - av_interleaved_write_frame(oformat_ctx, opacket); + /* Create a new frame to store the audio samples. */ + if (!(*frame = av_frame_alloc())) { + fprintf(stderr, "Could not allocate output frame\n"); + return AVERROR_EXIT; + } - av_packet_unref(opacket); - av_packet_unref(&packet); + /* Set the frame's parameters, especially its size and format. + * av_frame_get_buffer needs this to allocate memory for the + * audio samples of the frame. + * Default channel layouts based on the number of channels + * are assumed for simplicity. */ + (*frame)->nb_samples = frame_size; + (*frame)->channel_layout = output_codec_context->channel_layout; + (*frame)->format = output_codec_context->sample_fmt; + (*frame)->sample_rate = output_codec_context->sample_rate; + + /* Allocate the samples of the created frame. This call will make + * sure that the audio frame can hold as many samples as specified. */ + if ((error = av_frame_get_buffer(*frame, 0)) < 0) { + fprintf(stderr, "Could not allocate output frame samples (error '%s')\n", + av_err2str(error)); + av_frame_free(frame); + return error; } - if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0) - goto end; - ALOGE("Resampling succeeded. Play the output file with the command:\n" - "ffplay -f %s -channel_layout %" PRId64 " -channels %d -ar %d %s\n", - fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename); + return 0; +} -end: +/** + * Encode one frame worth of audio to the output file. + * @param frame Samples to be encoded + * @param output_format_context Format context of the output file + * @param output_codec_context Codec context of the output file + * @param[out] data_present Indicates whether data has been + * encoded + * @return Error code (0 if successful) + */ +static int encode_audio_frame(AVFrame *frame, + AVFormatContext *output_format_context, + AVCodecContext *output_codec_context, + int *data_present) +{ + /* Packet used for temporary storage. */ + AVPacket output_packet; + int error; + init_packet(&output_packet); + + /* Set a timestamp based on the sample rate for the container. */ + if (frame) { + frame->pts = pts; + pts += frame->nb_samples; + } - av_packet_free(&opacket); - av_frame_free(&frame); + /* Send the audio frame stored in the temporary packet to the encoder. + * The output audio stream encoder is used to do this. */ + error = avcodec_send_frame(output_codec_context, frame); + /* The encoder signals that it has nothing more to encode. */ + if (error == AVERROR_EOF) { + error = 0; + goto cleanup; + } else if (error < 0) { + fprintf(stderr, "Could not send packet for encoding (error '%s')\n", + av_err2str(error)); + return error; + } - swr_free(&swr_ctx); - avformat_close_input(&iformat_ctx); + /* Receive one encoded frame from the encoder. */ + error = avcodec_receive_packet(output_codec_context, &output_packet); + /* If the encoder asks for more data to be able to provide an + * encoded frame, return indicating that no data is present. */ + if (error == AVERROR(EAGAIN)) { + error = 0; + goto cleanup; + /* If the last frame has been encoded, stop encoding. */ + } else if (error == AVERROR_EOF) { + error = 0; + goto cleanup; + } else if (error < 0) { + fprintf(stderr, "Could not encode frame (error '%s')\n", + av_err2str(error)); + goto cleanup; + /* Default case: Return encoded data. */ + } else { + *data_present = 1; + } + + /* Write one audio frame from the temporary packet to the output file. */ + if (*data_present && + (error = av_write_frame(output_format_context, &output_packet)) < 0) { + fprintf(stderr, "Could not write frame (error '%s')\n", + av_err2str(error)); + goto cleanup; + } + + cleanup: + av_packet_unref(&output_packet); + return error; +} + +/** + * Load one audio frame from the FIFO buffer, encode and write it to the + * output file. + * + */ +static int load_encode_and_write(AVAudioFifo *fifo, + AVFormatContext *output_format_context, + AVCodecContext *output_codec_context) +{ + /* Temporary storage of the output samples of the frame written to the file. */ + AVFrame *output_frame; + /* Use the maximum number of possible samples per frame. + * If there is less than the maximum possible frame size in the FIFO + * buffer use this number. Otherwise, use the maximum possible frame size. */ + const int frame_size = FFMIN(av_audio_fifo_size(fifo), + output_codec_context->frame_size); + int data_written; + + /* Initialize temporary storage for one output frame. */ + if (init_output_frame(&output_frame, output_codec_context, frame_size)) + return AVERROR_EXIT; + + /* Read as many samples from the FIFO buffer as required to fill the frame. + * The samples are stored in the frame temporarily. */ + if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) { + fprintf(stderr, "Could not read data from FIFO\n"); + av_frame_free(&output_frame); + return AVERROR_EXIT; + } - if (oformat_ctx) { - av_write_trailer(oformat_ctx); - if (!(oformat_ctx->oformat->flags & AVFMT_NOFILE)) { - avio_close(oformat_ctx->pb); + /* Encode one frame worth of audio samples. */ + if (encode_audio_frame(output_frame, output_format_context, + output_codec_context, &data_written)) { + av_frame_free(&output_frame); + return AVERROR_EXIT; + } + av_frame_free(&output_frame); + return 0; +} + +/** + * Write the trailer of the output file container. + * + */ +static int write_output_file_trailer(AVFormatContext *output_format_context) +{ + int error; + if ((error = av_write_trailer(output_format_context)) < 0) { + fprintf(stderr, "Could not write output file trailer (error '%s')\n", + av_err2str(error)); + return error; + } + return 0; +} + +int resampling(const char *src_file, const char *dst_file, int sampleRate) +{ + AVFormatContext *input_format_context = nullptr, *output_format_context = nullptr; + AVCodecContext *input_codec_context = nullptr, *output_codec_context = nullptr; + SwrContext *resample_context = nullptr; + AVAudioFifo *fifo = nullptr; + int ret = AVERROR_EXIT; + + /* Open the input file for reading. */ + if (open_input_file(src_file, &input_format_context, + &input_codec_context)) + goto cleanup; + /* Open the output file for writing. */ + if (open_output_file(dst_file, sampleRate, input_codec_context, + &output_format_context, &output_codec_context)) + goto cleanup; + /* Initialize the re-sampler to be able to convert audio sample formats. */ + if (init_resampler(input_codec_context, output_codec_context, + &resample_context)) + goto cleanup; + /* Initialize the FIFO buffer to store audio samples to be encoded. */ + if (init_fifo(&fifo, output_codec_context)) + goto cleanup; + /* Write the header of the output file container. */ + if (write_output_file_header(output_format_context)) + goto cleanup; + + /* Loop as long as we have input samples to read or output samples + * to write; abort as soon as we have neither. */ + while (1) { + /* Use the encoder's desired frame size for processing. */ + const int output_frame_size = output_codec_context->frame_size; + int finished = 0; + + /* Make sure that there is one frame worth of samples in the FIFO + * buffer so that the encoder can do its work. + * Since the decoder's and the encoder's frame size may differ, we + * need to FIFO buffer to store as many frames worth of input samples + * that they make up at least one frame worth of output samples. */ + while (av_audio_fifo_size(fifo) < output_frame_size) { + /* Decode one frame worth of audio samples, convert it to the + * output sample format and put it into the FIFO buffer. */ + if (read_decode_convert_and_store(fifo, input_format_context, + input_codec_context, + output_codec_context, + resample_context, &finished)) + goto cleanup; + + /* If we are at the end of the input file, we continue + * encoding the remaining audio samples to the output file. */ + if (finished) + break; + } + + /* If we have enough samples for the encoder, we encode them. + * At the end of the file, we pass the remaining samples to + * the encoder. */ + while (av_audio_fifo_size(fifo) >= output_frame_size || + (finished && av_audio_fifo_size(fifo) > 0)) + /* Take one frame worth of audio samples from the FIFO buffer, + * encode it and write it to the output file. */ + if (load_encode_and_write(fifo, output_format_context, + output_codec_context)) + goto cleanup; + + /* If we are at the end of the input file and have encoded + * all remaining samples, we can exit this loop and finish. */ + if (finished) { + int data_written; + /* Flush the encoder as it may have delayed frames. */ + do { + data_written = 0; + if (encode_audio_frame(nullptr, output_format_context, + output_codec_context, &data_written)) + goto cleanup; + } while (data_written); + break; } - avformat_free_context(oformat_ctx); } + + /* Write the trailer of the output file container. */ + if (write_output_file_trailer(output_format_context)) + goto cleanup; + ret = 0; + +cleanup: + if (fifo) + av_audio_fifo_free(fifo); + swr_free(&resample_context); + if (output_codec_context) + avcodec_free_context(&output_codec_context); + if (output_format_context) { + avio_closep(&output_format_context->pb); + avformat_free_context(output_format_context); + } + if (input_codec_context) + avcodec_free_context(&input_codec_context); + if (input_format_context) + avformat_close_input(&input_format_context); + return ret; }