diff --git a/app/src/main/cpp/audio_resample.cpp b/app/src/main/cpp/audio_resample.cpp
index 9e157a0..048df48 100644
--- a/app/src/main/cpp/audio_resample.cpp
+++ b/app/src/main/cpp/audio_resample.cpp
@@ -8,10 +8,17 @@
 extern "C" {
 #endif
 #include "libavformat/avformat.h"
-#include <libavutil/opt.h>
-#include <libavutil/channel_layout.h>
-#include <libavutil/samplefmt.h>
-#include <libswresample/swresample.h>
+#include "libavformat/avio.h"
+
+#include "libavcodec/avcodec.h"
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/frame.h"
+#include "libavutil/opt.h"
+
+#include "libswresample/swresample.h"
 #ifdef __cplusplus
 }
 #endif
@@ -20,6 +27,14 @@ extern "C" {
 
 #define ALOGE(Format, ...) LOGE("audio_resample", Format, ##__VA_ARGS__)
 
+/* The output bit rate in bit/s */
+#define OUTPUT_BIT_RATE 96000
+/* The number of output channels */
+#define OUTPUT_CHANNELS 2
+
+/* Global timestamp for the audio frames. */
+static int64_t pts = 0;
+
 static void log_error(const char *functionName, int errorNumber) {
     int buffer_len = 1024;
     char *buffer = new char [buffer_len];
@@ -28,260 +43,771 @@ static void log_error(const char *functionName, int errorNumber) {
     delete []buffer;
 }
 
-static int get_format_from_sample_fmt(const char **fmt, enum AVSampleFormat sample_fmt)
+/**
+ * Open an input file and the required decoder.
+ *
+ */
+static int open_input_file(const char *filename,
+                           AVFormatContext **input_format_context,
+                           AVCodecContext **input_codec_context)
 {
-    *fmt = nullptr;
-
-    struct sample_fmt_entry {
-        enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
-    } sample_fmt_entries[] = {
-            { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
-            { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
-            { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
-            { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
-            { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
-    };
-
-    for (int i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
-        struct sample_fmt_entry *entry = &sample_fmt_entries[i];
-        if (sample_fmt == entry->sample_fmt) {
-            *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
-            return 0;
-        }
+    AVCodecContext *avctx;
+    AVCodec *input_codec;
+    int error;
+
+    /* Open the input file to read from it. */
+    if ((error = avformat_open_input(input_format_context, filename, nullptr,
+                                     nullptr)) < 0) {
+        fprintf(stderr, "Could not open input file '%s' (error '%s')\n",
+                filename, av_err2str(error));
+        *input_format_context = nullptr;
+        return error;
+    }
+
+    /* Get information on the input file (number of streams etc.). */
+    if ((error = avformat_find_stream_info(*input_format_context, nullptr)) < 0) {
+        fprintf(stderr, "Could not open find stream info (error '%s')\n",
+                av_err2str(error));
+        avformat_close_input(input_format_context);
+        return error;
+    }
+
+    /* Make sure that there is only one stream in the input file. */
+    if ((*input_format_context)->nb_streams != 1) {
+        fprintf(stderr, "Expected one audio input stream, but found %d\n",
+                (*input_format_context)->nb_streams);
+        avformat_close_input(input_format_context);
+        return AVERROR_EXIT;
+    }
+
+    /* Find a decoder for the audio stream. */
+    if (!(input_codec = avcodec_find_decoder((*input_format_context)->streams[0]->codecpar->codec_id))) {
+        fprintf(stderr, "Could not find input codec\n");
+        avformat_close_input(input_format_context);
+        return AVERROR_EXIT;
+    }
+
+    /* Allocate a new decoding context. */
+    avctx = avcodec_alloc_context3(input_codec);
+    if (!avctx) {
+        fprintf(stderr, "Could not allocate a decoding context\n");
+        avformat_close_input(input_format_context);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Initialize the stream parameters with demuxer information. */
+    error = avcodec_parameters_to_context(avctx, (*input_format_context)->streams[0]->codecpar);
+    if (error < 0) {
+        avformat_close_input(input_format_context);
+        avcodec_free_context(&avctx);
+        return error;
+    }
+
+    /* Open the decoder for the audio stream to use it later. */
+    if ((error = avcodec_open2(avctx, input_codec, nullptr)) < 0) {
+        fprintf(stderr, "Could not open input codec (error '%s')\n",
+                av_err2str(error));
+        avcodec_free_context(&avctx);
+        avformat_close_input(input_format_context);
+        return error;
     }
 
-    ALOGE("Sample format %s not supported as output format, msg=%s\n",
-            av_get_sample_fmt_name(sample_fmt), strerror(errno));
-    return AVERROR(EINVAL);
+    /* Save the decoder context for easier access later. */
+    *input_codec_context = avctx;
+
+    return 0;
 }
 
-int init_audio_codec(AVFormatContext *fmt_ctx, AVCodecContext **avcodec_ctx, bool is_encoder) {
-    AVCodecContext *codec_ctx = nullptr;
-    for (int i = 0; i < fmt_ctx->nb_streams; ++i) {
-        if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            codec_ctx = fmt_ctx->streams[i]->codec;
-        }
+/**
+ * Open an output file and the required encoder.
+ * Also set some basic encoder parameters.
+ * Some of these parameters are based on the input file's parameters.
+ *
+ */
+static int open_output_file(const char *filename,
+                            int sample_rate,
+                            AVCodecContext *input_codec_context,
+                            AVFormatContext **output_format_context,
+                            AVCodecContext **output_codec_context)
+{
+    AVCodecContext *avctx          = nullptr;
+    AVIOContext *output_io_context = nullptr;
+    AVStream *stream               = nullptr;
+    AVCodec *output_codec          = nullptr;
+    int error;
+
+    /* Open the output file to write to it. */
+    if ((error = avio_open(&output_io_context, filename,
+                           AVIO_FLAG_WRITE)) < 0) {
+        fprintf(stderr, "Could not open output file '%s' (error '%s')\n",
+                filename, av_err2str(error));
+        return error;
     }
-    AVCodec *codec = is_encoder ? avcodec_find_encoder(codec_ctx->codec_id)
-            : avcodec_find_decoder(codec_ctx->codec_id);
-    if (!codec) {
-        ALOGE("can't found codec id=%d\n", codec_ctx->codec_id);
-        return -1;
+
+    /* Create a new format context for the output container format. */
+    if (!(*output_format_context = avformat_alloc_context())) {
+        fprintf(stderr, "Could not allocate output format context\n");
+        return AVERROR(ENOMEM);
     }
-    int ret = avcodec_open2(codec_ctx, codec, nullptr);
-    if (ret < 0)
-        ALOGE("avcodec_open2 fail:%d", ret);
-    *avcodec_ctx = codec_ctx;
-    return ret;
+
+    /* Associate the output file (pointer) with the container format context. */
+    (*output_format_context)->pb = output_io_context;
+
+    /* Guess the desired container format based on the file extension. */
+    if (!((*output_format_context)->oformat = av_guess_format(nullptr, filename,
+                                                              nullptr))) {
+        fprintf(stderr, "Could not find output file format\n");
+        goto cleanup;
+    }
+
+    if (!((*output_format_context)->url = av_strdup(filename))) {
+        fprintf(stderr, "Could not allocate url.\n");
+        error = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    /* Find the encoder to be used by its name. */
+    if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC))) {
+        fprintf(stderr, "Could not find an AAC encoder.\n");
+        goto cleanup;
+    }
+
+    /* Create a new audio stream in the output file container. */
+    if (!(stream = avformat_new_stream(*output_format_context, nullptr))) {
+        fprintf(stderr, "Could not create new stream\n");
+        error = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    avctx = avcodec_alloc_context3(output_codec);
+    if (!avctx) {
+        fprintf(stderr, "Could not allocate an encoding context\n");
+        error = AVERROR(ENOMEM);
+        goto cleanup;
+    }
+
+    /* Set the basic encoder parameters.
+     * The input file's sample rate is used to avoid a sample rate conversion. */
+    avctx->channels       = OUTPUT_CHANNELS;
+    avctx->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);
+    avctx->sample_rate    = sample_rate; //input_codec_context->sample_rate;
+    avctx->sample_fmt     = output_codec->sample_fmts[0];
+    avctx->bit_rate       = OUTPUT_BIT_RATE;
+
+    /* Allow the use of the experimental AAC encoder. */
+    avctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+
+    /* Set the sample rate for the container. */
+    stream->time_base.den = input_codec_context->sample_rate;
+    stream->time_base.num = 1;
+
+    /* Some container formats (like MP4) require global headers to be present.
+     * Mark the encoder so that it behaves accordingly. */
+    if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER)
+        avctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+
+    /* Open the encoder for the audio stream to use it later. */
+    if ((error = avcodec_open2(avctx, output_codec, nullptr)) < 0) {
+        fprintf(stderr, "Could not open output codec (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+    }
+
+    error = avcodec_parameters_from_context(stream->codecpar, avctx);
+    if (error < 0) {
+        fprintf(stderr, "Could not initialize stream parameters\n");
+        goto cleanup;
+    }
+
+    /* Save the encoder context for easier access later. */
+    *output_codec_context = avctx;
+
+    return 0;
+
+cleanup:
+    avcodec_free_context(&avctx);
+    avio_closep(&(*output_format_context)->pb);
+    avformat_free_context(*output_format_context);
+    *output_format_context = nullptr;
+    return error < 0 ? error : AVERROR_EXIT;
 }
 
-int init_audio_decoder(AVFormatContext *fmt_ctx, AVCodecContext **avcodec_ctx) {
-    return init_audio_codec(fmt_ctx, avcodec_ctx, false);
+/**
+ * Initialize one data packet for reading or writing.
+ * @param packet Packet to be initialized
+ */
+static void init_packet(AVPacket *packet)
+{
+    av_init_packet(packet);
+    packet->data = nullptr;
+    packet->size = 0;
 }
 
-int init_audio_encoder(AVFormatContext *fmt_ctx, AVCodecContext **avcodec_ctx) {
-    return init_audio_codec(fmt_ctx, avcodec_ctx, true);
+/**
+ * Initialize one audio frame for reading from the input file.
+ *
+ */
+static int init_input_frame(AVFrame **frame)
+{
+    if (!(*frame = av_frame_alloc())) {
+        fprintf(stderr, "Could not allocate input frame\n");
+        return AVERROR(ENOMEM);
+    }
+    return 0;
 }
 
-int init_audio_muxer(AVFormatContext *ifmt_ctx, AVFormatContext **ofmt_ctx, const char* filename,
-                     int sample_rate, int channels, int64_t channel_layout, AVSampleFormat sampleFormat) {
-    int ret;
-    AVFormatContext *fmt_ctx = *ofmt_ctx;
-    avformat_alloc_output_context2(&fmt_ctx, nullptr, nullptr, filename);
-    if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
-        ret = avio_open(&fmt_ctx->pb, filename, AVIO_FLAG_WRITE);
-        if (ret < 0) {
-            ALOGE("Could not open output file %s\n", filename);
-            return ret;
-        }
+/**
+ * Initialize the audio resampler based on the input and output codec settings.
+ *
+ */
+static int init_resampler(AVCodecContext *input_codec_context,
+                          AVCodecContext *output_codec_context,
+                          SwrContext **resample_context)
+{
+    int error;
+
+    /*
+     * Create a resampler context for the conversion.
+     * Set the conversion parameters.
+     * Default channel layouts based on the number of channels
+     * are assumed for simplicity (they are sometimes not detected
+     * properly by the demuxer and/or decoder).
+     */
+    *resample_context = swr_alloc_set_opts(nullptr,
+                                           av_get_default_channel_layout(output_codec_context->channels),
+                                           output_codec_context->sample_fmt,
+                                           output_codec_context->sample_rate,
+                                           av_get_default_channel_layout(input_codec_context->channels),
+                                           input_codec_context->sample_fmt,
+                                           input_codec_context->sample_rate,
+                                           0, nullptr);
+    if (!*resample_context) {
+        fprintf(stderr, "Could not allocate resample context\n");
+        return AVERROR(ENOMEM);
     }
-    av_dump_format(fmt_ctx, 0, filename, 1);
-
-    for (int i = 0; i < ifmt_ctx->nb_streams; i++) {
-        AVStream* in_stream = ifmt_ctx->streams[i];
-        AVCodecParameters* codecpar = in_stream->codecpar;
-        AVCodec* encoder = avcodec_find_encoder(codecpar->codec_id);
-        AVStream* out_stream = avformat_new_stream(fmt_ctx, encoder);
-        avcodec_parameters_copy(out_stream->codecpar, codecpar);
-
-        if(out_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            out_stream->codecpar->channels = channels;
-            out_stream->codecpar->sample_rate = sample_rate;
-            out_stream->codecpar->channel_layout = channel_layout;
-            out_stream->codec->sample_fmt = sampleFormat;
-            avcodec_parameters_to_context(out_stream->codec, out_stream->codecpar);
-            out_stream->time_base = in_stream->codec->time_base;
-            out_stream->duration = in_stream->duration;
-            break;
-        }
+    /*
+    * Perform a sanity check so that the number of converted samples is
+    * not greater than the number of samples to be converted.
+    * If the sample rates differ, this case has to be handled differently
+    */
+    av_assert0(output_codec_context->sample_rate == input_codec_context->sample_rate);
+
+    /* Open the re-sampler with the specified parameters. */
+    if ((error = swr_init(*resample_context)) < 0) {
+        fprintf(stderr, "Could not open resample context\n");
+        swr_free(resample_context);
+        return error;
     }
+    return 0;
+}
 
-    ret = avformat_write_header(fmt_ctx, nullptr);
-    if (ret < 0) {
-        log_error("Error occurred when opening output file", ret);
+/**
+ * Initialize a FIFO buffer for the audio samples to be encoded.
+ *
+ */
+static int init_fifo(AVAudioFifo **fifo, AVCodecContext *output_codec_context)
+{
+    /* Create the FIFO buffer based on the specified output sample format. */
+    if (!(*fifo = av_audio_fifo_alloc(output_codec_context->sample_fmt,
+                                      output_codec_context->channels, 1))) {
+        fprintf(stderr, "Could not allocate FIFO\n");
+        return AVERROR(ENOMEM);
     }
-    *ofmt_ctx = fmt_ctx;
-    return ret;
+    return 0;
 }
 
-void init_out_frame(AVFrame **frame, AVCodecContext *codec_ctx, int nb_samples) {
-    AVFrame *out_frame = *frame;
-    av_frame_free(&out_frame);
-    out_frame = av_frame_alloc();
-    out_frame->nb_samples = nb_samples;
-    out_frame->format = codec_ctx->sample_fmt;
-    out_frame->sample_rate = codec_ctx->sample_rate;
-    out_frame->channel_layout = codec_ctx->channel_layout;
-    av_frame_get_buffer(out_frame,0);
-    av_frame_make_writable(out_frame);
-    *frame = out_frame;
+/**
+ * Write the header of the output file container.
+ *
+ */
+static int write_output_file_header(AVFormatContext *output_format_context)
+{
+    int error;
+    if ((error = avformat_write_header(output_format_context, nullptr)) < 0) {
+        fprintf(stderr, "Could not write output file header (error '%s')\n",
+                av_err2str(error));
+        return error;
+    }
+    return 0;
 }
 
-int resampling(const char *src_filename, const char *dst_filename, int dst_rate)
+/**
+ * Decode one audio frame from the input file.
+ *
+ */
+static int decode_audio_frame(AVFrame *frame,
+                              AVFormatContext *input_format_context,
+                              AVCodecContext *input_codec_context,
+                              int *data_present, int *finished)
 {
-    int src_rate;
-    int64_t src_ch_layout;
-    enum AVSampleFormat src_sample_fmt;
-
-    int dst_bufsize;
-    int dst_linesize;
-    int dst_nb_channels;
-    int dst_nb_samples, max_dst_nb_samples;
-    int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
-    enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
-
-    int ret;
-    const char *fmt;
-    AVPacket packet;
-    AVPacket *opacket;
-    AVFrame *frame;
-    int got_frame_ptr;
-    int got_packet_ptr;
-    struct SwrContext *swr_ctx;
-    AVFormatContext *iformat_ctx = nullptr;
-    AVFormatContext *oformat_ctx = nullptr;
-    AVCodecContext  *icodec_ctx  = nullptr;
-    AVCodecContext  *ocodec_ctx  = nullptr;
-
-    ret = avformat_open_input(&iformat_ctx, src_filename, nullptr, nullptr);
-    if (ret < 0) {
-        ALOGE("open input fail, path=%s, ret=%d", src_filename, ret);
-        goto end;
-    }
-    avformat_find_stream_info(iformat_ctx, nullptr);
-    frame = av_frame_alloc();
-    opacket = av_packet_alloc();
-    init_audio_decoder(iformat_ctx, &icodec_ctx);
-    src_rate       = icodec_ctx->sample_rate;
-    src_ch_layout  = (int64_t) icodec_ctx->channel_layout;
-    src_sample_fmt = icodec_ctx->sample_fmt;
-
-    /* create resample context */
-    swr_ctx = swr_alloc();
-    if (!swr_ctx) {
-        ALOGE("Could not allocate resample context...\n");
-        ret = AVERROR(ENOMEM);
-        goto end;
-    }
-
-    /* set options */
-    av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);
-    av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);
-    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
-
-    av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);
-    av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);
-    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
-
-    /* initialize the resampling context */
-    if ((ret = swr_init(swr_ctx)) < 0) {
-        log_error("Failed to initialize the resampling context", ret);
-        goto end;
-    }
-
-    dst_nb_samples = (int) av_rescale_rnd(src_rate, dst_rate, src_rate, AV_ROUND_UP);
-    max_dst_nb_samples = dst_nb_samples;
-    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
-
-    ret = init_audio_muxer(iformat_ctx, &oformat_ctx, dst_filename, dst_rate, dst_nb_channels, dst_ch_layout, dst_sample_fmt);
-    if (ret < 0) {
-        goto end;
-    }
-    init_audio_encoder(oformat_ctx, &ocodec_ctx);
-
-    while (av_read_frame(iformat_ctx, &packet) >= 0) {
-
-        ret = avcodec_decode_audio4(icodec_ctx, frame, &got_frame_ptr, &packet);
-        if (ret < 0) {
-            ALOGE("decode audio error:%d\n", ret);
-            continue;
-        }
-        ALOGE("decode succ, pts=%ld\n", frame->pts);
-
-        /* compute destination number of samples */
-        dst_nb_samples = (int) av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
-                                        frame->nb_samples, dst_rate, src_rate, AV_ROUND_UP);
-        if (dst_nb_samples > max_dst_nb_samples) {
-            init_out_frame(&frame, ocodec_ctx, dst_nb_samples);
-            max_dst_nb_samples = dst_nb_samples;
+    /* Packet used for temporary storage. */
+    AVPacket input_packet;
+    int error;
+    init_packet(&input_packet);
+
+    /* Read one audio frame from the input file into a temporary packet. */
+    if ((error = av_read_frame(input_format_context, &input_packet)) < 0) {
+        /* If we are at the end of the file, flush the decoder below. */
+        if (error == AVERROR_EOF)
+            *finished = 1;
+        else {
+            fprintf(stderr, "Could not read frame (error '%s')\n",
+                    av_err2str(error));
+            return error;
         }
+    }
 
-        /* convert to destination format */
-        int samples_result = swr_convert(swr_ctx, frame->data, dst_nb_samples, (const uint8_t **)frame->data, frame->nb_samples);
-        if (samples_result < 0) {
-            ALOGE("Error while converting...");
-            goto end;
-        }
-        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
-                                                 samples_result, dst_sample_fmt, 1);
-        if (dst_bufsize < 0) {
-            ALOGE("Could not get sample buffer size...");
-            goto end;
-        }
-        ALOGE("resample size=%d", dst_bufsize);
+    /* Send the audio frame stored in the temporary packet to the decoder.
+     * The input audio stream decoder is used to do this. */
+    if ((error = avcodec_send_packet(input_codec_context, &input_packet)) < 0) {
+        fprintf(stderr, "Could not send packet for decoding (error '%s')\n",
+                av_err2str(error));
+        return error;
+    }
 
-        ret = avcodec_encode_audio2(ocodec_ctx, opacket, frame, &got_packet_ptr);
-        if (ret < 0) {
-            log_error("encode audio error", ret);
-            continue;
-        }
+    /* Receive one frame from the decoder. */
+    error = avcodec_receive_frame(input_codec_context, frame);
+    /* If the decoder asks for more data to be able to decode a frame,
+     * return indicating that no data is present. */
+    if (error == AVERROR(EAGAIN)) {
+        error = 0;
+        goto cleanup;
+        /* If the end of the input file is reached, stop decoding. */
+    } else if (error == AVERROR_EOF) {
+        *finished = 1;
+        error = 0;
+        goto cleanup;
+    } else if (error < 0) {
+        fprintf(stderr, "Could not decode frame (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+        /* Default case: Return decoded data. */
+    } else {
+        *data_present = 1;
+        goto cleanup;
+    }
+
+cleanup:
+    av_packet_unref(&input_packet);
+    return error;
+}
+
+/**
+ * Initialize a temporary storage for the specified number of audio samples.
+ * The conversion requires temporary storage due to the different format.
+ * The number of audio samples to be allocated is specified in frame_size.
+ *
+ */
+static int init_converted_samples(uint8_t ***converted_input_samples,
+                                  AVCodecContext *output_codec_context,
+                                  int frame_size)
+{
+    int error;
+
+    /* Allocate as many pointers as there are audio channels.
+     * Each pointer will later point to the audio samples of the corresponding channels
+     */
+    if (!(*converted_input_samples = (uint8_t **) calloc(output_codec_context->channels,
+                                            sizeof(**converted_input_samples)))) {
+        fprintf(stderr, "Could not allocate converted input sample pointers\n");
+        return AVERROR(ENOMEM);
+    }
+
+    /* Allocate memory for the samples of all channels in one consecutive
+     * block for convenience. */
+    if ((error = av_samples_alloc(*converted_input_samples, nullptr,
+                                  output_codec_context->channels,
+                                  frame_size,
+                                  output_codec_context->sample_fmt, 0)) < 0) {
+        fprintf(stderr,
+                "Could not allocate converted input samples (error '%s')\n",
+                av_err2str(error));
+        av_freep(&(*converted_input_samples)[0]);
+        free(*converted_input_samples);
+        return error;
+    }
+    return 0;
+}
+
+/**
+ * Convert the input audio samples into the output sample format.
+ * The conversion happens on a per-frame basis, the size of which is
+ * specified by frame_size.
+ *
+ */
+static int convert_samples(const uint8_t **input_data,
+                           uint8_t **converted_data, const int frame_size,
+                           SwrContext *resample_context)
+{
+    int error;
+
+    /* Convert the samples using the resampler. */
+    if ((error = swr_convert(resample_context,
+                             converted_data, frame_size,
+                             input_data    , frame_size)) < 0) {
+        fprintf(stderr, "Could not convert input samples (error '%s')\n",
+                av_err2str(error));
+        return error;
+    }
+
+    return 0;
+}
+
+/**
+ * Add converted input audio samples to the FIFO buffer for later processing.
+ *
+ */
+static int add_samples_to_fifo(AVAudioFifo *fifo,
+                               uint8_t **converted_input_samples,
+                               const int frame_size)
+{
+    int error;
+
+    /* Make the FIFO as large as it needs to be to hold both,
+     * the old and the new samples. */
+    if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) {
+        fprintf(stderr, "Could not reallocate FIFO\n");
+        return error;
+    }
+
+    /* Store the new samples in the FIFO buffer. */
+    if (av_audio_fifo_write(fifo, (void **)converted_input_samples,
+                            frame_size) < frame_size) {
+        fprintf(stderr, "Could not write data to FIFO\n");
+        return AVERROR_EXIT;
+    }
+    return 0;
+}
+
+/**
+ * Read one audio frame from the input file, decode, convert and store
+ * it in the FIFO buffer.
+ *
+ */
+static int read_decode_convert_and_store(AVAudioFifo *fifo,
+                                         AVFormatContext *input_format_context,
+                                         AVCodecContext *input_codec_context,
+                                         AVCodecContext *output_codec_context,
+                                         SwrContext *resampler_context,
+                                         int *finished)
+{
+    /* Temporary storage of the input samples of the frame read from the file. */
+    AVFrame *input_frame = nullptr;
+    /* Temporary storage for the converted input samples. */
+    uint8_t **converted_input_samples = nullptr;
+    int data_present = 0;
+    int ret = AVERROR_EXIT;
+
+    /* Initialize temporary storage for one input frame. */
+    if (init_input_frame(&input_frame))
+        goto cleanup;
+    /* Decode one frame worth of audio samples. */
+    if (decode_audio_frame(input_frame, input_format_context,
+                           input_codec_context, &data_present, finished))
+        goto cleanup;
+    /* If we are at the end of the file and there are no more samples
+     * in the decoder which are delayed, we are actually finished.
+     * This must not be treated as an error. */
+    if (*finished) {
+        ret = 0;
+        goto cleanup;
+    }
+    /* If there is decoded data, convert and store it. */
+    if (data_present) {
+        /* Initialize the temporary storage for the converted input samples. */
+        if (init_converted_samples(&converted_input_samples, output_codec_context,
+                                   input_frame->nb_samples))
+            goto cleanup;
+
+        /* Convert the input samples to the desired output sample format.
+         * This requires a temporary storage provided by converted_input_samples. */
+        if (convert_samples((const uint8_t**)input_frame->extended_data, converted_input_samples,
+                            input_frame->nb_samples, resampler_context))
+            goto cleanup;
+
+        /* Add the converted input samples to the FIFO buffer for later processing. */
+        if (add_samples_to_fifo(fifo, converted_input_samples,
+                                input_frame->nb_samples))
+            goto cleanup;
+        ret = 0;
+    }
+    ret = 0;
+
+cleanup:
+    if (converted_input_samples) {
+        av_freep(&converted_input_samples[0]);
+        free(converted_input_samples);
+    }
+    av_frame_free(&input_frame);
+
+    return ret;
+}
 
-        AVStream *in_stream = iformat_ctx->streams[0];
-        AVStream *out_stream = oformat_ctx->streams[0];
-        opacket->pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base,
-                                    static_cast<AVRounding>(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
-        opacket->dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base,
-                                    static_cast<AVRounding>(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
-        opacket->duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);
-        opacket->pos = -1;
+/**
+ * Initialize one input frame for writing to the output file.
+ * The frame will be exactly frame_size samples large.
+ *
+ */
+static int init_output_frame(AVFrame **frame,
+                             AVCodecContext *output_codec_context,
+                             int frame_size)
+{
+    int error;
 
-        av_interleaved_write_frame(oformat_ctx, opacket);
+    /* Create a new frame to store the audio samples. */
+    if (!(*frame = av_frame_alloc())) {
+        fprintf(stderr, "Could not allocate output frame\n");
+        return AVERROR_EXIT;
+    }
 
-        av_packet_unref(opacket);
-        av_packet_unref(&packet);
+    /* Set the frame's parameters, especially its size and format.
+     * av_frame_get_buffer needs this to allocate memory for the
+     * audio samples of the frame.
+     * Default channel layouts based on the number of channels
+     * are assumed for simplicity. */
+    (*frame)->nb_samples     = frame_size;
+    (*frame)->channel_layout = output_codec_context->channel_layout;
+    (*frame)->format         = output_codec_context->sample_fmt;
+    (*frame)->sample_rate    = output_codec_context->sample_rate;
+
+    /* Allocate the samples of the created frame. This call will make
+     * sure that the audio frame can hold as many samples as specified. */
+    if ((error = av_frame_get_buffer(*frame, 0)) < 0) {
+        fprintf(stderr, "Could not allocate output frame samples (error '%s')\n",
+                av_err2str(error));
+        av_frame_free(frame);
+        return error;
     }
 
-    if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
-        goto end;
-    ALOGE("Resampling succeeded. Play the output file with the command:\n"
-                    "ffplay -f %s -channel_layout %" PRId64 " -channels %d -ar %d %s\n",
-            fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);
+    return 0;
+}
 
-end:
+/**
+ * Encode one frame worth of audio to the output file.
+ * @param      frame                 Samples to be encoded
+ * @param      output_format_context Format context of the output file
+ * @param      output_codec_context  Codec context of the output file
+ * @param[out] data_present          Indicates whether data has been
+ *                                   encoded
+ * @return Error code (0 if successful)
+ */
+static int encode_audio_frame(AVFrame *frame,
+                              AVFormatContext *output_format_context,
+                              AVCodecContext *output_codec_context,
+                              int *data_present)
+{
+    /* Packet used for temporary storage. */
+    AVPacket output_packet;
+    int error;
+    init_packet(&output_packet);
+
+    /* Set a timestamp based on the sample rate for the container. */
+    if (frame) {
+        frame->pts = pts;
+        pts += frame->nb_samples;
+    }
 
-    av_packet_free(&opacket);
-    av_frame_free(&frame);
+    /* Send the audio frame stored in the temporary packet to the encoder.
+     * The output audio stream encoder is used to do this. */
+    error = avcodec_send_frame(output_codec_context, frame);
+    /* The encoder signals that it has nothing more to encode. */
+    if (error == AVERROR_EOF) {
+        error = 0;
+        goto cleanup;
+    } else if (error < 0) {
+        fprintf(stderr, "Could not send packet for encoding (error '%s')\n",
+                av_err2str(error));
+        return error;
+    }
 
-    swr_free(&swr_ctx);
-    avformat_close_input(&iformat_ctx);
+    /* Receive one encoded frame from the encoder. */
+    error = avcodec_receive_packet(output_codec_context, &output_packet);
+    /* If the encoder asks for more data to be able to provide an
+     * encoded frame, return indicating that no data is present. */
+    if (error == AVERROR(EAGAIN)) {
+        error = 0;
+        goto cleanup;
+        /* If the last frame has been encoded, stop encoding. */
+    } else if (error == AVERROR_EOF) {
+        error = 0;
+        goto cleanup;
+    } else if (error < 0) {
+        fprintf(stderr, "Could not encode frame (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+        /* Default case: Return encoded data. */
+    } else {
+        *data_present = 1;
+    }
+
+    /* Write one audio frame from the temporary packet to the output file. */
+    if (*data_present &&
+        (error = av_write_frame(output_format_context, &output_packet)) < 0) {
+        fprintf(stderr, "Could not write frame (error '%s')\n",
+                av_err2str(error));
+        goto cleanup;
+    }
+
+    cleanup:
+    av_packet_unref(&output_packet);
+    return error;
+}
+
+/**
+ * Load one audio frame from the FIFO buffer, encode and write it to the
+ * output file.
+ *
+ */
+static int load_encode_and_write(AVAudioFifo *fifo,
+                                 AVFormatContext *output_format_context,
+                                 AVCodecContext *output_codec_context)
+{
+    /* Temporary storage of the output samples of the frame written to the file. */
+    AVFrame *output_frame;
+    /* Use the maximum number of possible samples per frame.
+     * If there is less than the maximum possible frame size in the FIFO
+     * buffer use this number. Otherwise, use the maximum possible frame size. */
+    const int frame_size = FFMIN(av_audio_fifo_size(fifo),
+                                 output_codec_context->frame_size);
+    int data_written;
+
+    /* Initialize temporary storage for one output frame. */
+    if (init_output_frame(&output_frame, output_codec_context, frame_size))
+        return AVERROR_EXIT;
+
+    /* Read as many samples from the FIFO buffer as required to fill the frame.
+     * The samples are stored in the frame temporarily. */
+    if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) {
+        fprintf(stderr, "Could not read data from FIFO\n");
+        av_frame_free(&output_frame);
+        return AVERROR_EXIT;
+    }
 
-    if (oformat_ctx) {
-        av_write_trailer(oformat_ctx);
-        if (!(oformat_ctx->oformat->flags & AVFMT_NOFILE)) {
-            avio_close(oformat_ctx->pb);
+    /* Encode one frame worth of audio samples. */
+    if (encode_audio_frame(output_frame, output_format_context,
+                           output_codec_context, &data_written)) {
+        av_frame_free(&output_frame);
+        return AVERROR_EXIT;
+    }
+    av_frame_free(&output_frame);
+    return 0;
+}
+
+/**
+ * Write the trailer of the output file container.
+ *
+ */
+static int write_output_file_trailer(AVFormatContext *output_format_context)
+{
+    int error;
+    if ((error = av_write_trailer(output_format_context)) < 0) {
+        fprintf(stderr, "Could not write output file trailer (error '%s')\n",
+                av_err2str(error));
+        return error;
+    }
+    return 0;
+}
+
+int resampling(const char *src_file, const char *dst_file, int sampleRate)
+{
+    AVFormatContext *input_format_context = nullptr, *output_format_context = nullptr;
+    AVCodecContext *input_codec_context = nullptr, *output_codec_context = nullptr;
+    SwrContext *resample_context = nullptr;
+    AVAudioFifo *fifo = nullptr;
+    int ret = AVERROR_EXIT;
+
+    /* Open the input file for reading. */
+    if (open_input_file(src_file, &input_format_context,
+                        &input_codec_context))
+        goto cleanup;
+    /* Open the output file for writing. */
+    if (open_output_file(dst_file, sampleRate, input_codec_context,
+                         &output_format_context, &output_codec_context))
+        goto cleanup;
+    /* Initialize the re-sampler to be able to convert audio sample formats. */
+    if (init_resampler(input_codec_context, output_codec_context,
+                       &resample_context))
+        goto cleanup;
+    /* Initialize the FIFO buffer to store audio samples to be encoded. */
+    if (init_fifo(&fifo, output_codec_context))
+        goto cleanup;
+    /* Write the header of the output file container. */
+    if (write_output_file_header(output_format_context))
+        goto cleanup;
+
+    /* Loop as long as we have input samples to read or output samples
+     * to write; abort as soon as we have neither. */
+    while (1) {
+        /* Use the encoder's desired frame size for processing. */
+        const int output_frame_size = output_codec_context->frame_size;
+        int finished                = 0;
+
+        /* Make sure that there is one frame worth of samples in the FIFO
+         * buffer so that the encoder can do its work.
+         * Since the decoder's and the encoder's frame size may differ, we
+         * need to FIFO buffer to store as many frames worth of input samples
+         * that they make up at least one frame worth of output samples. */
+        while (av_audio_fifo_size(fifo) < output_frame_size) {
+            /* Decode one frame worth of audio samples, convert it to the
+             * output sample format and put it into the FIFO buffer. */
+            if (read_decode_convert_and_store(fifo, input_format_context,
+                                              input_codec_context,
+                                              output_codec_context,
+                                              resample_context, &finished))
+                goto cleanup;
+
+            /* If we are at the end of the input file, we continue
+             * encoding the remaining audio samples to the output file. */
+            if (finished)
+                break;
+        }
+
+        /* If we have enough samples for the encoder, we encode them.
+         * At the end of the file, we pass the remaining samples to
+         * the encoder. */
+        while (av_audio_fifo_size(fifo) >= output_frame_size ||
+               (finished && av_audio_fifo_size(fifo) > 0))
+            /* Take one frame worth of audio samples from the FIFO buffer,
+             * encode it and write it to the output file. */
+            if (load_encode_and_write(fifo, output_format_context,
+                                      output_codec_context))
+                goto cleanup;
+
+        /* If we are at the end of the input file and have encoded
+         * all remaining samples, we can exit this loop and finish. */
+        if (finished) {
+            int data_written;
+            /* Flush the encoder as it may have delayed frames. */
+            do {
+                data_written = 0;
+                if (encode_audio_frame(nullptr, output_format_context,
+                                       output_codec_context, &data_written))
+                    goto cleanup;
+            } while (data_written);
+            break;
         }
-        avformat_free_context(oformat_ctx);
     }
+
+    /* Write the trailer of the output file container. */
+    if (write_output_file_trailer(output_format_context))
+        goto cleanup;
+    ret = 0;
+
+cleanup:
+    if (fifo)
+        av_audio_fifo_free(fifo);
+    swr_free(&resample_context);
+    if (output_codec_context)
+        avcodec_free_context(&output_codec_context);
+    if (output_format_context) {
+        avio_closep(&output_format_context->pb);
+        avformat_free_context(output_format_context);
+    }
+    if (input_codec_context)
+        avcodec_free_context(&input_codec_context);
+    if (input_format_context)
+        avformat_close_input(&input_format_context);
+
     return ret;
 }