diff --git a/library/src/main/java/com/mux/video/upload/internal/Resample.kt b/library/src/main/java/com/mux/video/upload/internal/Resample.kt
new file mode 100644
index 00000000..f69f5aa9
--- /dev/null
+++ b/library/src/main/java/com/mux/video/upload/internal/Resample.kt
@@ -0,0 +1,176 @@
+package com.mux.video.upload.internal
+
+class Resample {
+    fun resample(
+        data: ByteArray,
+        length: Int,
+        stereo: Boolean,
+        inFrequency: Int,
+        outFrequency: Int
+    ): ByteArray? {
+        if (inFrequency < outFrequency) return upsample(
+            data,
+            length,
+            stereo,
+            inFrequency,
+            outFrequency
+        )
+        return if (inFrequency > outFrequency) downsample(
+            data,
+            length,
+            stereo,
+            inFrequency,
+            outFrequency
+        ) else trimArray(data, length)
+    }
+
+    /**
+     * Basic upsampling algorithm. Uses linear approximation to fill in the
+     * missing data.
+     *
+     * @param data          Input data
+     * @param length        The current size of the input array (usually, data.length)
+     * @param inputIsStereo True if input is inputIsStereo
+     * @param inFrequency   Frequency of input
+     * @param outFrequency  Frequency of output
+     *
+     * @return Upsampled audio data
+     */
+    private fun upsample(
+        data: ByteArray,
+        length: Int,
+        inputIsStereo: Boolean,
+        inFrequency: Int,
+        outFrequency: Int
+    ): ByteArray? {
+
+        // Special case for no action
+        if (inFrequency == outFrequency) return trimArray(data, length)
+        val scale = inFrequency.toDouble() / outFrequency.toDouble()
+        var pos = 0.0
+        val output: ByteArray
+        if (!inputIsStereo) {
+            output = ByteArray((length / scale).toInt())
+            for (i in output.indices) {
+                var inPos = pos.toInt()
+                var proportion = pos - inPos
+                if (inPos >= length - 1) {
+                    inPos = length - 2
+                    proportion = 1.0
+                }
+                output[i] =
+                    Math.round(data[inPos] * (1 - proportion) + data[inPos + 1] * proportion)
+                        .toByte()
+                pos += scale
+            }
+        } else {
+            output = ByteArray(2 * (length / 2 / scale).toInt())
+            for (i in 0 until output.size / 2) {
+                val inPos = pos.toInt()
+                var proportion = pos - inPos
+                var inRealPos = inPos * 2
+                if (inRealPos >= length - 3) {
+                    inRealPos = length - 4
+                    proportion = 1.0
+                }
+                output[i * 2] =
+                    Math.round(data[inRealPos] * (1 - proportion) + data[inRealPos + 2] * proportion)
+                        .toByte()
+                output[i * 2 + 1] =
+                    Math.round(data[inRealPos + 1] * (1 - proportion) + data[inRealPos + 3] * proportion)
+                        .toByte()
+                pos += scale
+            }
+        }
+        return output
+    }
+
+    /**
+     * Basic downsampling algorithm. Uses linear approximation to reduce data.
+     *
+     * @param data          Input data
+     * @param length        The current size of the input array (usually, data.length)
+     * @param inputIsStereo True if input is inputIsStereo
+     * @param inFrequency   Frequency of input
+     * @param outFrequency  Frequency of output
+     *
+     * @return Downsampled audio data
+     */
+    private fun downsample(
+        data: ByteArray,
+        length: Int,
+        inputIsStereo: Boolean,
+        inFrequency: Int,
+        outFrequency: Int
+    ): ByteArray? {
+
+        // Special case for no action
+        if (inFrequency == outFrequency) return trimArray(data, length)
+        val scale = outFrequency.toDouble() / inFrequency.toDouble()
+        val output: ByteArray
+        var pos = 0.0
+        var outPos = 0
+        if (!inputIsStereo) {
+            var sum = 0.0
+            output = ByteArray((length * scale).toInt())
+            var inPos = 0
+            while (outPos < output.size) {
+                val firstVal = data[inPos++].toDouble()
+                var nextPos = pos + scale
+                if (nextPos >= 1) {
+                    sum += firstVal * (1 - pos)
+                    output[outPos++] = Math.round(sum).toByte()
+                    nextPos -= 1.0
+                    sum = nextPos * firstVal
+                } else {
+                    sum += scale * firstVal
+                }
+                pos = nextPos
+                if (inPos >= length && outPos < output.size) {
+                    output[outPos++] = Math.round(sum / pos).toByte()
+                }
+            }
+        } else {
+            var sum1 = 0.0
+            var sum2 = 0.0
+            output = ByteArray(2 * (length / 2 * scale).toInt())
+            var inPos = 0
+            while (outPos < output.size) {
+                val firstVal = data[inPos++].toDouble()
+                val nextVal = data[inPos++].toDouble()
+                var nextPos = pos + scale
+                if (nextPos >= 1) {
+                    sum1 += firstVal * (1 - pos)
+                    sum2 += nextVal * (1 - pos)
+                    output[outPos++] = Math.round(sum1).toByte()
+                    output[outPos++] = Math.round(sum2).toByte()
+                    nextPos -= 1.0
+                    sum1 = nextPos * firstVal
+                    sum2 = nextPos * nextVal
+                } else {
+                    sum1 += scale * firstVal
+                    sum2 += scale * nextVal
+                }
+                pos = nextPos
+                if (inPos >= length && outPos < output.size) {
+                    output[outPos++] = Math.round(sum1 / pos).toByte()
+                    output[outPos++] = Math.round(sum2 / pos).toByte()
+                }
+            }
+        }
+        return output
+    }
+
+    /**
+     * @param data   Data
+     * @param length Length of valid data
+     *
+     * @return Array trimmed to length (or same array if it already is)
+     */
+    fun trimArray(data: ByteArray, length: Int): ByteArray? {
+        if (data.size == length) return data
+        val output = ByteArray(length)
+        System.arraycopy(output, 0, data, 0, length)
+        return output
+    }
+}
\ No newline at end of file
diff --git a/library/src/main/java/com/mux/video/upload/internal/TranscoderContext.kt b/library/src/main/java/com/mux/video/upload/internal/TranscoderContext.kt
index a94defb9..f6297dcb 100644
--- a/library/src/main/java/com/mux/video/upload/internal/TranscoderContext.kt
+++ b/library/src/main/java/com/mux/video/upload/internal/TranscoderContext.kt
@@ -1,15 +1,19 @@
 package com.mux.video.upload.internal
 
+import android.R.id.input
 import android.content.Context
 import android.media.*
 import android.media.MediaCodec.BufferInfo
 import android.media.MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420SemiPlanar
 import android.os.Build
+import android.util.Log
 import androidx.annotation.RequiresApi
 import com.mux.video.upload.MuxUploadSdk
 import io.github.crow_misia.libyuv.FilterMode
 import io.github.crow_misia.libyuv.Nv12Buffer
 import java.io.File
+import java.io.OutputStream
+import java.nio.BufferOverflowException
 import java.nio.ByteBuffer
 import java.util.*
 import kotlin.experimental.and
@@ -28,6 +32,9 @@ internal class TranscoderContext private constructor(
     val MAX_ALLOWED_HEIGTH = 1080
     val OPTIMAL_FRAMERATE = 30
     val I_FRAME_INTERVAL = 5 // in seconds
+    val OUTPUT_SAMPLERATE = 48000
+    val OUTPUT_NUMBER_OF_CHANNELS = 2
+    val OUTPUT_AUDIO_BITRATE = 96000
 
     private val extractor: MediaExtractor = MediaExtractor()
     private var muxer: MediaMuxer? = null
@@ -45,6 +52,7 @@ internal class TranscoderContext private constructor(
 
     // This is what decoder actually provide as an output, bit different then what we used to configure it
     private var videoDecoderOutputFormat: MediaFormat? = null
+    private var audioDecoderOutputFormat: MediaFormat? = null
     private var decodedFrameWidth: Int = -1;
     private var decodedFrameHeight: Int = -1;
     private var targetedWidth = -1
@@ -52,13 +60,17 @@ internal class TranscoderContext private constructor(
     private var targetedFramerate = -1
     private var targetedBitrate = -1
     private var scaledSizeYuv: Nv12Buffer? = null
-    val audioFrames = ArrayList<AVFrame>()
+    private var resampleCreated = false
+    private var resample: Resample = Resample()
+    private val audioFrames = ArrayList<AVFrame>()
 
     // Input parameters
     private var inputWidth = -1
     private var inputHeighth = -1
     private var inputBitrate = -1
     private var inputFramerate = -1
+    private var inputChannelCount = -1
+    private var inputSamplerate = -1
 
     // Wait indefinetly for negative value, exit imidetly on 0, or timeout after a given us+
     private var dequeueTimeout:Long = 0;
@@ -67,31 +79,42 @@ internal class TranscoderContext private constructor(
     private var muxerConfigured = false;
     private var numberOfDecodedFrames = 0;
     private var numberOfEncodedFrames = 0;
+    private var numberOfDecodedSamples = 0;
+    private var numberOfEncodedSamples = 0;
     private var numberOfInputFrames = -1;
+    private var numberOfLostAudioFrames = 0;
 
     private var videoDecoder:MediaCodec? = null
     private var audioDecoder:MediaCodec? = null
     private var videoEncoder:MediaCodec? = null
     private var audioEncoder:MediaCodec? = null
+    private var videoOutputStream:OutputStream? = null;
+    private var audioOutputStream:OutputStream? = null;
+    private var rawAudioOutputStream:OutputStream? = null;
+    private var resampledAudioOutputStream:OutputStream? = null;
     var fileTranscoded = false
     private var configured = false
 
     companion object {
-      const val LOG_TAG = "TranscoderContext"
+        const val LOG_TAG = "TranscoderContext"
 
-      @JvmSynthetic
-      internal fun create(uploadInfo: UploadInfo, appContext: Context): TranscoderContext {
-        return TranscoderContext(uploadInfo, appContext)
-      }
+        @JvmSynthetic
+        internal fun create(uploadInfo: UploadInfo, appContext: Context): TranscoderContext {
+            return TranscoderContext(uploadInfo, appContext)
+        }
     }
 
-    private fun getHWCapableEncoders(mimeType: String): ArrayList<MediaCodecInfo> {
+    private fun getEncoders(mimeType: String, hwCapableOnly:Boolean): ArrayList<MediaCodecInfo> {
         val list = MediaCodecList(MediaCodecList.REGULAR_CODECS);
         var result:ArrayList<MediaCodecInfo> = ArrayList<MediaCodecInfo>();
         for(codecInfo in list.codecInfos) {
             logger.v("CodecInfo", codecInfo.name)
-            if(codecInfo.name.contains(mimeType) && codecInfo.isEncoder && codecInfo.isHardwareAcceleratedCompat) {
-                result.add(codecInfo);
+            if(codecInfo.name.contains(mimeType) && codecInfo.isEncoder) {
+                if (!hwCapableOnly) {
+                    result.add(codecInfo);
+                } else if (codecInfo.isHardwareAcceleratedCompat) {
+                    result.add(codecInfo);
+                }
             }
         }
         return result;
@@ -99,19 +122,35 @@ internal class TranscoderContext private constructor(
 
     private fun configure() {
       val cacheDir = File(appContext.cacheDir, "mux-upload")
+//        val cacheDir = File(appContext.externalCacheDir, "mux-upload")
       cacheDir.mkdirs()
-      val destFile = File(cacheDir, UUID.randomUUID().toString() + ".mp4")
-      destFile.createNewFile()
-
-      muxer = MediaMuxer(destFile.absolutePath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4)
-      uploadInfo = uploadInfo.update(standardizedFile = destFile)
-
-      try {
-        configureDecoders()
-        configured = true
-      } catch (e:Exception) {
-        logger.e(LOG_TAG, "Failed to initialize.", e)
-      }
+
+//        val videoOutput = File(cacheDir,  "video.h264")
+//        videoOutputStream = videoOutput.outputStream()
+//
+//        val audioOutput = File(cacheDir,  "audio.aac")
+//        audioOutputStream = audioOutput.outputStream()
+//
+//        val rawAudioOutput = File(cacheDir,  "audio_original.raw")
+//        rawAudioOutputStream = rawAudioOutput.outputStream()
+//
+//        val resampledAudioOutput = File(cacheDir,  "audio_resampled.raw")
+//        resampledAudioOutputStream = resampledAudioOutput.outputStream()
+
+        val destFile = File(cacheDir, UUID.randomUUID().toString() + ".mp4")
+//        val destFile = File(cacheDir,  "output.mp4")
+        destFile.createNewFile()
+
+        muxer = MediaMuxer(destFile.absolutePath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4)
+        uploadInfo = uploadInfo.update(standardizedFile = destFile)
+
+        try {
+            configureDecoders()
+            configureAudioEncoder()
+            configured = true
+        } catch (e:Exception) {
+            logger.e(LOG_TAG, "Failed to initialize.", e)
+        }
     }
 
     private fun checkIfTranscodingIsNeeded(): Boolean {
@@ -162,8 +201,11 @@ internal class TranscoderContext private constructor(
                         )
                         shouldStandardize = true
                         targetedBitrate = MAX_ALLOWED_BITRATE
+                    } else {
+                        targetedBitrate = inputBitrate
                     }
                     inputFramerate = format.getIntegerCompat(MediaFormat.KEY_FRAME_RATE, -1)
+                    targetedFramerate = OPTIMAL_FRAMERATE
                     if (inputFramerate > MAX_ALLOWED_FRAMERATE) {
                         logger.v(
                             LOG_TAG,
@@ -179,14 +221,31 @@ internal class TranscoderContext private constructor(
                     extractor.selectTrack(i)
                 }
                 if (mime?.lowercase()?.contains("audio") == true) {
-                    // TODO check if audio need to be standardized
+                    //  check if audio need to be standardized
                     audioTrackIndex = i;
                     inputAudioFormat = format;
                     extractor.selectTrack(i)
+                    inputChannelCount = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT)
+                    inputSamplerate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
+                    if (inputChannelCount > 2) {
+                        // We do not support this
+                        transcodeAudio = false
+                    } else {
+                        if (!mime.equals(MediaFormat.MIMETYPE_AUDIO_AAC)) {
+                            transcodeAudio = true;
+                        }
+//                        if (format.getInteger(MediaFormat.KEY_SAMPLE_RATE) != 48000) {
+//                            transcodeAudio = true;
+//                        }
+                    }
                 }
             }
         } catch (ex:Exception) {
-            ex.printStackTrace()
+          logger.e(
+            LOG_TAG,
+            "Couldn't completely inspect input. Will standardize? $shouldStandardize",
+            ex
+          )
         }
         return shouldStandardize
     }
@@ -206,7 +265,34 @@ internal class TranscoderContext private constructor(
         }
     }
 
-    private fun configureEncoders() {
+    private fun configureAudioEncoder() {
+        if (transcodeAudio) {
+            outputAudioFormat = MediaFormat.createAudioFormat(MediaFormat.MIMETYPE_AUDIO_AAC,
+                inputSamplerate, inputChannelCount)
+            outputAudioFormat!!.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC)
+            outputAudioFormat!!.setInteger(MediaFormat.KEY_PROFILE, 2)
+            outputAudioFormat!!.setInteger(MediaFormat.KEY_BIT_RATE, OUTPUT_AUDIO_BITRATE)
+            outputAudioFormat!!.setInteger("max-bitrate", OUTPUT_AUDIO_BITRATE)
+            outputAudioFormat!!.setInteger("aac-format-adif", 0)
+            val audioEncoders = getEncoders("aac", false)
+            for (encoder in audioEncoders) {
+                try {
+                    // TODO see the codec capabileties
+                    val codecCap = encoder.getCapabilitiesForType(MediaFormat.MIMETYPE_AUDIO_AAC)
+                    audioEncoder = MediaCodec.createByCodecName(encoder.name)
+                    audioEncoder!!.configure(outputAudioFormat,null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
+                    audioEncoder!!.start()
+                    break
+                } catch (err:java.lang.Exception) {
+                    logger.w(LOG_TAG, "Couldn't evaluate audio encoder ${encoder.name}. Skipping it", err)
+                }
+            }
+        } else {
+            outputAudioFormat = inputAudioFormat
+        }
+    }
+
+    private fun configureVideoEncoder() {
         // We will need this when we apply the image resize
         decodedFrameWidth = videoDecoderOutputFormat!!.getInteger(MediaFormat.KEY_WIDTH)
         decodedFrameHeight = videoDecoderOutputFormat!!.getInteger(MediaFormat.KEY_HEIGHT)
@@ -223,20 +309,15 @@ internal class TranscoderContext private constructor(
         )
         outputVideoFormat!!.setInteger("slice-height", targetedHeight + targetedHeight/2);
         outputVideoFormat!!.setInteger("stride", targetedWidth);
-        outputVideoFormat!!.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, I_FRAME_INTERVAL);
-        outputVideoFormat!!.setInteger(MediaFormat.KEY_BITRATE_MODE, MediaCodecInfo.EncoderCapabilities.BITRATE_MODE_VBR)
+        outputVideoFormat!!.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, I_FRAME_INTERVAL)
+        outputVideoFormat!!.setInteger(
+            MediaFormat.KEY_BITRATE_MODE,
+            MediaCodecInfo.EncoderCapabilities.BITRATE_MODE_VBR
+        )
         outputVideoFormat!!.setInteger(MediaFormat.KEY_BIT_RATE, targetedBitrate)
-        // configure output audio format, if input format is already AAC, then just do copy
-        transcodeAudio = !inputAudioFormat!!.getString(MediaFormat.KEY_MIME)?.contains(MediaFormat.MIMETYPE_AUDIO_AAC)!!
-        if (transcodeAudio) {
-            outputAudioFormat = MediaFormat.createAudioFormat(MediaFormat.MIMETYPE_AUDIO_AAC, 48000, 2)
-            outputAudioFormat!!.setString(MediaFormat.KEY_AAC_PROFILE, "2")
-            outputAudioFormat!!.setString(MediaFormat.KEY_PROFILE, "2")
-        } else {
-            outputAudioFormat = inputAudioFormat
-        }
 
-        val encoders = getHWCapableEncoders("avc")
+
+        val encoders = getEncoders("avc", true)
         for (encoder in encoders) {
             try {
                 val codecCap = encoder.getCapabilitiesForType("video/avc")
@@ -262,16 +343,10 @@ internal class TranscoderContext private constructor(
                 videoEncoder!!.configure(outputVideoFormat,null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
                 break;
             } catch (err:java.lang.Exception) {
-              logger.w(LOG_TAG, "Couldn't evaluate encoder ${encoder.name}. Skipping it", err)
+                logger.w(LOG_TAG, "Couldn't evaluate video encoder ${encoder.name}. Skipping it", err)
             }
         }
         videoEncoder!!.start()
-        if (transcodeAudio) {
-            audioEncoder =
-                MediaCodec.createEncoderByType(outputAudioFormat!!.getString(MediaFormat.KEY_MIME)!!)
-            audioEncoder!!.configure(outputAudioFormat, null, null, 0)
-            audioEncoder!!.start()
-        }
     }
 
     private fun releaseCodecs() {
@@ -287,9 +362,15 @@ internal class TranscoderContext private constructor(
     private fun configureMuxer() {
         outputVideoTrackIndex = muxer!!.addTrack(videoEncoder!!.outputFormat)
         muxer!!.setOrientationHint(inputVideoFormat!!.getInteger(MediaFormat.KEY_ROTATION))
-        if (inputAudioFormat != null) {
-            outputAudioTrackIndex = muxer!!.addTrack(outputAudioFormat!!)
+        if (transcodeAudio) {
+            outputAudioTrackIndex = muxer!!.addTrack(audioEncoder!!.outputFormat)
+        } else {
+            // Audio copy if present
+            if (inputAudioFormat != null ){
+                outputAudioTrackIndex = muxer!!.addTrack(inputAudioFormat!!)
+            }
         }
+
         muxer!!.start()
     }
 
@@ -297,23 +378,22 @@ internal class TranscoderContext private constructor(
     internal fun process(): UploadInfo {
         logger.v(LOG_TAG, "process() starting")
         if (!checkIfTranscodingIsNeeded()) {
-          logger.i(LOG_TAG, "Standardization was not required. Skipping")
-          return uploadInfo
+            logger.i(LOG_TAG, "Standardization was not required. Skipping")
+            return uploadInfo
         }
 
         logger.i(LOG_TAG, "Standardizing input")
         configure()
         if (!configured) {
             logger.e(
-              LOG_TAG,
-              "Skipped: Components could not be configured. Check the logs for errors"
+                LOG_TAG,
+                "Skipped: Components could not be configured. Check the logs for errors"
             )
             return uploadInfo;
         }
 
         val started = System.currentTimeMillis()
         try {
-            extractor.selectTrack(videoTrackIndex)
             while (!eofReached) {
                 if (extractor.sampleTrackIndex == audioTrackIndex) {
                     muxAudioFrame()
@@ -336,7 +416,6 @@ internal class TranscoderContext private constructor(
         logger.i(LOG_TAG, "Transcoding duration time: $duration")
         logger.i(LOG_TAG, "Original file size: ${uploadInfo.inputFile.length()}")
         logger.i(LOG_TAG, "Transcoded file size: ${uploadInfo.standardizedFile?.length()}")
-      
         return uploadInfo
     }
 
@@ -357,26 +436,33 @@ internal class TranscoderContext private constructor(
     }
 
     private fun muxAudioFrame() {
-        val audioFrame = getNextAudioFrame()
-        // This is an audio frame, for now just copy, in the future, transcode maybe
-        if (outputAudioTrackIndex == -1) {
-            // Muxer not initialized yet, store these and mux later
-//            Log.i(
-//                "Muxer", "Not ready, save audio frame for later muxing, pts: "
-//                        + audioFrame!!.info.presentationTimeUs
-//            )
-            audioFrames.add(audioFrame!!)
-        } else {
-            // if we have some accumulated audio samples write them first
-            for (audioFrame in audioFrames) {
-//                Log.i(
-//                    "Muxer", "Muxing accumulated audio frame, pts: "
-//                            + audioFrame.info.presentationTimeUs
-//                )
-                muxAudioFrame(audioFrame)
+        if (transcodeAudio) {
+            if (!eofReached) {
+                // This will advance the extractor
+                feedAudioDecoder()
             }
-            audioFrames.clear()
-            muxAudioFrame(audioFrame!!)
+            val decodedFrames = getDecodedAudioFrame()
+            for (decoded in decodedFrames ) {
+                feedAudioEncoder(decoded)
+                decoded.release()
+            }
+            // iterate encoded audio frames and mux them
+            val encodedAudioFrames = getEncodedAudioFrames()
+            for(frame:AVFrame in encodedAudioFrames) {
+                if (outputAudioTrackIndex == -1) {
+                    // Muxer not initialized yet, store these and mux later
+                    audioFrames.add(frame)
+                } else {
+                    // if we have some accumulated audio samples write them first
+                    for (queuedFrame in audioFrames) {
+                        muxAudioFrame(queuedFrame)
+                    }
+                    audioFrames.clear()
+                    muxAudioFrame(frame)
+                }
+            }
+        } else {
+            copyAudioFrame();
         }
     }
 
@@ -393,7 +479,6 @@ internal class TranscoderContext private constructor(
     }
 
     private fun getVideoFrames() : ArrayList<AVFrame> {
-        // TODO if EOF is reached maybe call flush on decoder and encoder some frames may still be in there
         if (!eofReached) {
             // This will advance the extractor
             feedVideoDecoder()
@@ -406,22 +491,6 @@ internal class TranscoderContext private constructor(
         return getEncodedVideoFrames()
     }
 
-    private fun getNextAudioFrame(): AVFrame? {
-        val extractorBuffer:ByteBuffer = ByteBuffer.allocate(1024)
-        val extractedFrame = AVFrame(-1, extractorBuffer, BufferInfo(), isRaw = false)
-        val sampleSize = extractor.readSampleData(extractorBuffer, 0);
-        if (sampleSize == -1) {
-            eofReached = true;
-            // TODO fuls encoders / decoders
-            return null;
-        } else {
-            extractedFrame.info.size = sampleSize
-            extractedFrame.info.presentationTimeUs = extractor.sampleTime
-            extractor.advance()
-        }
-        return extractedFrame;
-    }
-
     private fun feedVideoDecoder() {
         val inIndex: Int = videoDecoder!!.dequeueInputBuffer(dequeueTimeout)
         if (inIndex >= 0) {
@@ -434,7 +503,6 @@ internal class TranscoderContext private constructor(
             } else {
                 videoDecoder!!.queueInputBuffer(inIndex, 0, sampleSize, extractor.sampleTime, 0)
                 extractor.advance()
-                numberOfInputFrames++
             }
         }
     }
@@ -446,7 +514,6 @@ internal class TranscoderContext private constructor(
         var outIndex = videoDecoder!!.dequeueOutputBuffer(info, dequeueTimeout);
         while(outIndex > 0) {
             outputBuffer = videoDecoder!!.getOutputBuffer(outIndex);
-            numberOfDecodedFrames++;
             result.add(AVFrame(
                 outIndex, outputBuffer!!, info, decodedFrameWidth, decodedFrameHeight,
                 videoDecoder!!, true
@@ -458,7 +525,7 @@ internal class TranscoderContext private constructor(
             MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> {
                 // This give us real image height, to avoid corruptions in video
                 videoDecoderOutputFormat = videoDecoder!!.outputFormat;
-                configureEncoders()
+                configureVideoEncoder()
             }
             MediaCodec.INFO_TRY_AGAIN_LATER -> {
                 // Timedout also not good
@@ -467,37 +534,6 @@ internal class TranscoderContext private constructor(
         return result
     }
 
-    private fun findAnnexBPosition(buff:ByteBuffer, startSearchAt:Int, buffSize:Int): Int {
-        // We are assuming integer is 4 bytes on every device, we also assume anexB is 4 bytes long
-        // instead of 3 which is also possible sometimes
-        for(i in startSearchAt..buffSize - 4) {
-            if (buff.getInt(i) == 1) {
-                return i;
-            }
-        }
-        return -1
-    }
-
-    private fun convertAnnexBtoAvcc(buff:ByteBuffer, buffSize:Int) {
-        val positions = ArrayList<Int>()
-        var annexBPos = findAnnexBPosition(buff, 0, buffSize)
-        while (annexBPos != -1) {
-            positions.add(annexBPos)
-            annexBPos = findAnnexBPosition(buff, annexBPos + 4, buffSize)
-        }
-        for (i in 0..positions.size -1) {
-            var naluLength = 0
-            if (i == positions.size -1) {
-                // This is the last position
-                naluLength =  buffSize - positions.get(i) - 4
-            } else {
-                naluLength = positions.get(i + 1) - positions.get(i) -4;
-            }
-            buff.position(positions.get(i))
-            buff.putInt(naluLength)
-        }
-    }
-
     private fun feedVideoEncoder(rawInput:AVFrame) {
         val inIndex: Int = videoEncoder!!.dequeueInputBuffer(dequeueTimeout)
         if (inIndex >= 0) {
@@ -537,55 +573,185 @@ internal class TranscoderContext private constructor(
         return result;
     }
 
-    private fun encodeVideoFrame(rawInput:AVFrame): AVFrame? {
-        val result:AVFrame? = null;
-        val inIndex: Int = videoEncoder!!.dequeueInputBuffer(dequeueTimeout)
+    /////////////////////////////////////////////////////////////////////////////////////////////
+    ///////////// Audio ////////////////////////////////////////////////////////////////////////
+
+    private fun copyAudioFrame() {
+        val audioFrame = getNextAudioFrame()
+        if (outputAudioTrackIndex == -1) {
+            // Muxer not initialized yet, store these and mux later
+            audioFrames.add(audioFrame!!)
+        } else {
+            // if we have some accumulated audio samples write them first
+            for (audioFrame in audioFrames) {
+                muxAudioFrame(audioFrame)
+            }
+            audioFrames.clear()
+            muxAudioFrame(audioFrame!!)
+        }
+    }
+
+    private fun getNextAudioFrame(): AVFrame? {
+        val extractorBuffer:ByteBuffer = ByteBuffer.allocate(1024)
+        val extractedFrame = AVFrame(-1, extractorBuffer, BufferInfo(), isRaw = false)
+        val sampleSize = extractor.readSampleData(extractorBuffer, 0);
+        if (sampleSize == -1) {
+            eofReached = true;
+            return null;
+        } else {
+            extractedFrame.info.size = sampleSize
+            extractedFrame.info.presentationTimeUs = extractor.sampleTime
+            extractor.advance()
+        }
+        return extractedFrame;
+    }
+
+    private fun feedAudioDecoder() {
+        val inIndex: Int = audioDecoder!!.dequeueInputBuffer(dequeueTimeout)
         if (inIndex >= 0) {
-            // Scale input to match output
-            rawInput.yuvBuffer!!.scale(scaledSizeYuv!!, FilterMode.BILINEAR)
-            val buffer: ByteBuffer = videoEncoder!!.getInputBuffer(inIndex)!!;
-            buffer.clear()
-            scaledSizeYuv!!.write(buffer)
-            videoEncoder!!.queueInputBuffer(inIndex, 0,
-                buffer.capacity(), rawInput.info.presentationTimeUs, 0)
+            val buffer: ByteBuffer = audioDecoder!!.getInputBuffer(inIndex)!!;
+            val sampleSize = extractor.readSampleData(buffer, 0)
+            if (sampleSize < 0) {
+                // We have reached the end of video
+                eofReached = true;
+            } else {
+                audioDecoder!!.queueInputBuffer(inIndex, 0, sampleSize, extractor.sampleTime, 0)
+                extractor.advance()
+            }
         }
+    }
 
+    private fun getDecodedAudioFrame():ArrayList<AVFrame> {
         var info = BufferInfo()
-        var outIndex = videoEncoder!!.dequeueOutputBuffer(info, dequeueTimeout)
+        var outputBuffer:ByteBuffer? = null
+        val result = ArrayList<AVFrame>()
+        var outIndex = audioDecoder!!.dequeueOutputBuffer(info, dequeueTimeout);
+        while(outIndex > 0) {
+            outputBuffer = audioDecoder!!.getOutputBuffer(outIndex);
+            result.add(AVFrame(
+                outIndex, outputBuffer!!, info, 0, 0,
+                audioDecoder!!, true
+            ))
+            numberOfDecodedSamples++
+            outIndex = audioDecoder!!.dequeueOutputBuffer(info, dequeueTimeout);
+        }
+        when (outIndex) {
+            MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> {
+                // Not sure what to do here
+                audioDecoderOutputFormat = audioDecoder!!.outputFormat;
+//                configureEncoders()
+            }
+            MediaCodec.INFO_TRY_AGAIN_LATER -> {
+                // Timedout also not good
+            }
+        }
+        return result
+    }
+
+    private fun feedAudioEncoder(rawInput:AVFrame) {
+        rawInput.buff.rewind()
+        // Resampling not working as expected, we need a better solution
+//        var tmp:ByteBuffer = ByteBuffer.allocate(rawInput.info.size)
+//        tmp.put(rawInput.buff)
+//        rawInput.buff.rewind()
+//        tmp.rewind()
+//        rawAudioOutputStream!!.write(tmp.array(), 0, tmp.remaining())
+//        tmp.rewind()
+//        val resampled = resample.resample(tmp.array(), rawInput.info.size, true, inputSamplerate, OUTPUT_SAMPLERATE)
+////            val output_len = resample.resampleEx(rawInput.buff, buffer, rawInput.buff.remaining())
+//        if (resampled == null) {
+//            logger.i(LOG_TAG, "It is a problem :-D")
+//        }
+//        resampledAudioOutputStream!!.write(resampled, 0, resampled!!.size)
+//        var bytesQueued = 0;
+
+//        while (bytesQueued < resampled!!.size) {
+            val inIndex: Int = audioEncoder!!.dequeueInputBuffer(dequeueTimeout)
+            if (inIndex >= 0) {
+                // resample input to match output
+                val buffer: ByteBuffer = audioEncoder!!.getInputBuffer(inIndex)!!
+                buffer.rewind()
+//                val remaining = resampled!!.size - bytesQueued
+//                var toWrite =  buffer.capacity()
+//                if (remaining < buffer.capacity()) {
+//                    toWrite = remaining
+//                }
+                buffer.put(rawInput.buff)
+                buffer.rewind()
+                audioEncoder!!.queueInputBuffer(
+                    inIndex, 0,
+                    buffer.remaining(), rawInput.info.presentationTimeUs, 0
+                )
+//                bytesQueued+=toWrite
+            } else {
+                logger.e(LOG_TAG, "We lost audio frame :-D")
+                numberOfLostAudioFrames++;
+            }
+//        }
+    }
+
+    private fun getEncodedAudioFrames():ArrayList<AVFrame> {
+        val result = ArrayList<AVFrame>()
+        if (audioEncoder == null) {
+            return result;
+        }
+        var info = BufferInfo()
+        var outIndex = audioEncoder!!.dequeueOutputBuffer(info, dequeueTimeout)
         var outputBuffer:ByteBuffer?
-        val encodedBuffers:ArrayList<AVFrame> = ArrayList<AVFrame>()
-        var totalBufferSize:Int = 0
         while (outIndex >= 0) {
-            outputBuffer = videoEncoder!!.getOutputBuffer(outIndex)
-            totalBufferSize += info.size
-            encodedBuffers.add(AVFrame(outIndex, outputBuffer!!, info, 0, 0,
-                videoEncoder, true, false))
-            numberOfEncodedFrames++
+            if (!muxerConfigured) {
+                // TODO maybe note that audio is ready to be configured in a muxer
+            }
+            outputBuffer = audioEncoder!!.getOutputBuffer(outIndex)
+            val buff = ByteBuffer.allocate(info.size)
+            outputBuffer!!.get(buff.array(), 0, info.size)
+            result.add(AVFrame(outIndex, buff, info, 0, 0,
+                audioEncoder, true, false))
+            numberOfEncodedSamples++
+            audioEncoder!!.releaseOutputBuffer(outIndex, false)
             info = BufferInfo()
-            outIndex = videoEncoder!!.dequeueOutputBuffer(info, dequeueTimeout)
+            outIndex = audioEncoder!!.dequeueOutputBuffer(info, dequeueTimeout)
         }
-        if (encodedBuffers.size > 0) {
-            val outputBuffer = ByteBuffer.allocate(totalBufferSize)
-            var offset = 0
-            val info = BufferInfo()
-            for (frame in encodedBuffers) {
-                // TODO maybe convert annexB to avcc, pay attention, sps and pps are in single buffer
-//                frame.buff.position(4)
-                frame.buff.get(outputBuffer.array(), offset, frame.info.size)
-                offset += frame.info.size
-                info.flags = info.flags or frame.info.flags
-                info.presentationTimeUs = frame.info.presentationTimeUs
-                frame.release()
-            }
+        return result;
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////// Helpers //////////////////////////////////////////////////////////
 
-            info.offset = 0
-            info.size = totalBufferSize
-            return AVFrame(outIndex, outputBuffer, info, targetedWidth,
-                targetedHeight, videoEncoder!!, false, false)
+    private fun findAnnexBPosition(buff:ByteBuffer, startSearchAt:Int, buffSize:Int): Int {
+        // We are assuming integer is 4 bytes on every device, we also assume anexB is 4 bytes long
+        // instead of 3 which is also possible sometimes
+        for(i in startSearchAt..buffSize - 4) {
+            if (buff.getInt(i) == 1) {
+                return i;
+            }
+        }
+        return -1
+    }
+    private fun convertAnnexBtoAvcc(buff:ByteBuffer, buffSize:Int) {
+        val positions = ArrayList<Int>()
+        var annexBPos = findAnnexBPosition(buff, 0, buffSize)
+        while (annexBPos != -1) {
+            positions.add(annexBPos)
+            annexBPos = findAnnexBPosition(buff, annexBPos + 4, buffSize)
+        }
+        for (i in 0..positions.size -1) {
+            var naluLength = 0
+            if (i == positions.size -1) {
+                // This is the last position
+                naluLength =  buffSize - positions.get(i) - 4
+            } else {
+                naluLength = positions.get(i + 1) - positions.get(i) -4;
+            }
+            buff.position(positions.get(i))
+            buff.putInt(naluLength)
         }
-        return result;
     }
 
+    ////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////
+
+
     class AVFrame constructor(val index:Int, val buff:ByteBuffer, val info:BufferInfo,
                               val width:Int = 0, val heigth:Int = 0,
                               val codec:MediaCodec? = null, val shouldRelease:Boolean = true, val isRaw:Boolean = true){