services/audioflinger/AudioResamplerDyn.cpp - android_frameworks_av - Gitiles

 /*
  * Copyright (C) 2013 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #define LOG_TAG "AudioResamplerDyn"
 //#define LOG_NDEBUG 0

 #include <malloc.h>
 #include <string.h>
 #include <stdlib.h>
 #include <dlfcn.h>
 #include <math.h>

 #include <cutils/compiler.h>
 #include <cutils/properties.h>
 #include <utils/Log.h>

 #include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here
 #include "AudioResamplerFirProcess.h"
 #include "AudioResamplerFirProcessNeon.h"
 #include "AudioResamplerFirGen.h" // requires math.h
 #include "AudioResamplerDyn.h"

 //#define DEBUG_RESAMPLER

 namespace android {

 // generate a unique resample type compile-time constant (constexpr)
 #define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \
     ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 | (COEFTYPE)<<2 \
     | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3)

 /*
  * InBuffer is a type agnostic input buffer.
  *
  * Layout of the state buffer for halfNumCoefs=8.
  *
  * [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr]
  *  S            I                                R
  *
  * S = mState
  * I = mImpulse
  * R = mRingFull
  * p = past samples, convoluted with the (p)ositive side of sinc()
  * n = future samples, convoluted with the (n)egative side of sinc()
  * r = extra space for implementing the ring buffer
  */

 template<typename TI>
 AudioResamplerDyn::InBuffer<TI>::InBuffer()
     : mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) {
 }

 template<typename TI>
 AudioResamplerDyn::InBuffer<TI>::~InBuffer() {
     init();
 }

 template<typename TI>
 void AudioResamplerDyn::InBuffer<TI>::init() {
     free(mState);
     mState = NULL;
     mImpulse = NULL;
     mRingFull = NULL;
     mStateSize = 0;
 }

 // resizes the state buffer to accommodate the appropriate filter length
 template<typename TI>
 void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) {
     // calculate desired state size
     int stateSize = halfNumCoefs * CHANNELS * 2
             * kStateSizeMultipleOfFilterLength;

     // check if buffer needs resizing
     if (mState
             && stateSize == mStateSize
             && mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) {
         return;
     }

     // create new buffer
     TI* state = (int16_t*)memalign(32, stateSize*sizeof(*state));
     memset(state, 0, stateSize*sizeof(*state));

     // attempt to preserve state
     if (mState) {
         TI* srcLo = mImpulse - halfNumCoefs*CHANNELS;
         TI* srcHi = mImpulse + halfNumCoefs*CHANNELS;
         TI* dst = state;

         if (srcLo < mState) {
             dst += mState-srcLo;
             srcLo = mState;
         }
         if (srcHi > mState + mStateSize) {
             srcHi = mState + mStateSize;
         }
         memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo));
         free(mState);
     }

     // set class member vars
     mState = state;
     mStateSize = stateSize;
     mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed
     mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS;
 }

 // copy in the input data into the head (impulse+halfNumCoefs) of the buffer.
 template<typename TI>
 template<int CHANNELS>
 void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs,
         const TI* const in, const size_t inputIndex) {
     int16_t* head = impulse + halfNumCoefs*CHANNELS;
     for (size_t i=0 ; i<CHANNELS ; i++) {
         head[i] = in[inputIndex*CHANNELS + i];
     }
 }

 // advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs)
 template<typename TI>
 template<int CHANNELS>
 void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs,
         const TI* const in, const size_t inputIndex) {
     impulse += CHANNELS;

     if (CC_UNLIKELY(impulse >= mRingFull)) {
         const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS;
         memcpy(mState, mState+shiftDown, halfNumCoefs*CHANNELS*2*sizeof(TI));
         impulse -= shiftDown;
     }
     readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
 }

 void AudioResamplerDyn::Constants::set(
         int L, int halfNumCoefs, int inSampleRate, int outSampleRate)
 {
     int bits = 0;
     int lscale = inSampleRate/outSampleRate < 2 ? L - 1 :
             static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate);
     for (int i=lscale; i; ++bits, i>>=1)
         ;
     mL = L;
     mShift = kNumPhaseBits - bits;
     mHalfNumCoefs = halfNumCoefs;
 }

 AudioResamplerDyn::AudioResamplerDyn(int bitDepth,
         int inChannelCount, int32_t sampleRate, src_quality quality)
     : AudioResampler(bitDepth, inChannelCount, sampleRate, quality),
     mResampleType(0), mFilterSampleRate(0), mFilterQuality(DEFAULT_QUALITY),
     mCoefBuffer(NULL)
 {
     mVolumeSimd[0] = mVolumeSimd[1] = 0;
     // The AudioResampler base class assumes we are always ready for 1:1 resampling.
     // We reset mInSampleRate to 0, so setSampleRate() will calculate filters for
     // setSampleRate() for 1:1. (May be removed if precalculated filters are used.)
     mInSampleRate = 0;
     mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better
 }

 AudioResamplerDyn::~AudioResamplerDyn() {
     free(mCoefBuffer);
 }

 void AudioResamplerDyn::init() {
     mFilterSampleRate = 0; // always trigger new filter generation
     mInBuffer.init();
 }

 void AudioResamplerDyn::setVolume(int16_t left, int16_t right) {
     AudioResampler::setVolume(left, right);
     mVolumeSimd[0] = static_cast<int32_t>(left)<<16;
     mVolumeSimd[1] = static_cast<int32_t>(right)<<16;
 }

 template <typename T> T max(T a, T b) {return a > b ? a : b;}

 template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;}

 template<typename T>
 void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten,
         int inSampleRate, int outSampleRate, double tbwCheat) {
     T* buf = reinterpret_cast<T*>(memalign(32, (c.mL+1)*c.mHalfNumCoefs*sizeof(T)));
     static const double atten = 0.9998;   // to avoid ripple overflow
     double fcr;
     double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten);

     if (inSampleRate < outSampleRate) { // upsample
         fcr = max(0.5*tbwCheat - tbw/2, tbw/2);
     } else { // downsample
         fcr = max(0.5*tbwCheat*outSampleRate/inSampleRate - tbw/2, tbw/2);
     }
     // create and set filter
     firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten);
     c.setBuf(buf);
     if (mCoefBuffer) {
         free(mCoefBuffer);
     }
     mCoefBuffer = buf;
 #ifdef DEBUG_RESAMPLER
     // print basic filter stats
     printf("L:%d  hnc:%d  stopBandAtten:%lf  fcr:%lf  atten:%lf  tbw:%lf\n",
             c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw);
     // test the filter and report results
     double fp = (fcr - tbw/2)/c.mL;
     double fs = (fcr + tbw/2)/c.mL;
     double passMin, passMax, passRipple;
     double stopMax, stopRipple;
     testFir(buf, c.mL, c.mHalfNumCoefs, fp, fs, /*passSteps*/ 1000, /*stopSteps*/ 100000,
             passMin, passMax, passRipple, stopMax, stopRipple);
     printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, passMin, passMax, passRipple);
     printf("stopband(%lf, %lf): %.8lf %.3lf\n", fs, 0.5, stopMax, stopRipple);
 #endif
 }

 // recursive gcd. Using objdump, it appears the tail recursion is converted to a while loop.
 static int gcd(int n, int m) {
     if (m == 0) {
         return n;
     }
     return gcd(m, n % m);
 }

 static bool isClose(int32_t newSampleRate, int32_t prevSampleRate,
         int32_t filterSampleRate, int32_t outSampleRate) {

     // different upsampling ratios do not need a filter change.
     if (filterSampleRate != 0
             && filterSampleRate < outSampleRate
             && newSampleRate < outSampleRate)
         return true;

     // check design criteria again if downsampling is detected.
     int pdiff = absdiff(newSampleRate, prevSampleRate);
     int adiff = absdiff(newSampleRate, filterSampleRate);

     // allow up to 6% relative change increments.
     // allow up to 12% absolute change increments (from filter design)
     return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3;
 }

 void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) {
     if (mInSampleRate == inSampleRate) {
         return;
     }
     int32_t oldSampleRate = mInSampleRate;
     int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs;
     uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift;
     bool useS32 = false;

     mInSampleRate = inSampleRate;

     // TODO: Add precalculated Equiripple filters

     if (mFilterQuality != getQuality() ||
             !isClose(inSampleRate, oldSampleRate, mFilterSampleRate, mSampleRate)) {
         mFilterSampleRate = inSampleRate;
         mFilterQuality = getQuality();

         // Begin Kaiser Filter computation
         //
         // The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB.
         // Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters
         //
         // For s32 we keep the stop band attenuation at the same as 16b resolution, about
         // 96-98dB
         //

         double stopBandAtten;
         double tbwCheat = 1.; // how much we "cheat" into aliasing
         int halfLength;
         if (mFilterQuality == DYN_HIGH_QUALITY) {
             // 32b coefficients, 64 length
             useS32 = true;
             stopBandAtten = 98.;
             if (inSampleRate >= mSampleRate * 4) {
                 halfLength = 48;
             } else if (inSampleRate >= mSampleRate * 2) {
                 halfLength = 40;
             } else {
                 halfLength = 32;
             }
         } else if (mFilterQuality == DYN_LOW_QUALITY) {
             // 16b coefficients, 16-32 length
             useS32 = false;
             stopBandAtten = 80.;
             if (inSampleRate >= mSampleRate * 4) {
                 halfLength = 24;
             } else if (inSampleRate >= mSampleRate * 2) {
                 halfLength = 16;
             } else {
                 halfLength = 8;
             }
             if (inSampleRate <= mSampleRate) {
                 tbwCheat = 1.05;
             } else {
                 tbwCheat = 1.03;
             }
         } else { // DYN_MED_QUALITY
             // 16b coefficients, 32-64 length
             // note: > 64 length filters with 16b coefs can have quantization noise problems
             useS32 = false;
             stopBandAtten = 84.;
             if (inSampleRate >= mSampleRate * 4) {
                 halfLength = 32;
             } else if (inSampleRate >= mSampleRate * 2) {
                 halfLength = 24;
             } else {
                 halfLength = 16;
             }
             if (inSampleRate <= mSampleRate) {
                 tbwCheat = 1.03;
             } else {
                 tbwCheat = 1.01;
             }
         }

         // determine the number of polyphases in the filterbank.
         // for 16b, it is desirable to have 2^(16/2) = 256 phases.
         // https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html
         //
         // We are a bit more lax on this.

         int phases = mSampleRate / gcd(mSampleRate, inSampleRate);

         // TODO: Once dynamic sample rate change is an option, the code below
         // should be modified to execute only when dynamic sample rate change is enabled.
         //
         // as above, #phases less than 63 is too few phases for accurate linear interpolation.
         // we increase the phases to compensate, but more phases means more memory per
         // filter and more time to compute the filter.
         //
         // if we know that the filter will be used for dynamic sample rate changes,
         // that would allow us skip this part for fixed sample rate resamplers.
         //
         while (phases<63) {
             phases *= 2; // this code only needed to support dynamic rate changes
         }

         if (phases>=256) {  // too many phases, always interpolate
             phases = 127;
         }

         // create the filter
         mConstants.set(phases, halfLength, inSampleRate, mSampleRate);
         if (useS32) {
             createKaiserFir<int32_t>(mConstants, stopBandAtten,
                     inSampleRate, mSampleRate, tbwCheat);
         } else {
             createKaiserFir<int16_t>(mConstants, stopBandAtten,
                     inSampleRate, mSampleRate, tbwCheat);
         }
     } // End Kaiser filter

     // update phase and state based on the new filter.
     const Constants& c(mConstants);
     mInBuffer.resize(mChannelCount, c.mHalfNumCoefs);
     const uint32_t phaseWrapLimit = c.mL << c.mShift;
     // try to preserve as much of the phase fraction as possible for on-the-fly changes
     mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction)
             * phaseWrapLimit / oldPhaseWrapLimit;
     mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case.
     mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit)
             * inSampleRate / mSampleRate);

     // determine which resampler to use
     // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits")
     int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0;
     int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2;
     if (locked) {
         mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase
     }

     mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32);
 #ifdef DEBUG_RESAMPLER
     printf("channels:%d  %s  stride:%d  %s  coef:%d  shift:%d\n",
             mChannelCount, locked ? "locked" : "interpolated",
             stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift);
 #endif
 }

 void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
             AudioBufferProvider* provider)
 {
     // TODO:
     // 24 cases - this perhaps can be reduced later, as testing might take too long
     switch (mResampleType) {

     // stride 16 (falls back to stride 2 for machines that do not support NEON)
     case RESAMPLETYPE(1, true, 16, 0):
         return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(2, true, 16, 0):
         return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(1, false, 16, 0):
         return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(2, false, 16, 0):
         return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(1, true, 16, 1):
         return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(2, true, 16, 1):
         return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(1, false, 16, 1):
         return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(2, false, 16, 1):
         return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
 #if 0
     // TODO: Remove these?
     // stride 8
     case RESAMPLETYPE(1, true, 8, 0):
         return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(2, true, 8, 0):
         return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(1, false, 8, 0):
         return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(2, false, 8, 0):
         return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(1, true, 8, 1):
         return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(2, true, 8, 1):
         return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(1, false, 8, 1):
         return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(2, false, 8, 1):
         return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     // stride 2 (can handle any filter length)
     case RESAMPLETYPE(1, true, 2, 0):
         return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(2, true, 2, 0):
         return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(1, false, 2, 0):
         return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(2, false, 2, 0):
         return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
     case RESAMPLETYPE(1, true, 2, 1):
         return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(2, true, 2, 1):
         return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(1, false, 2, 1):
         return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
     case RESAMPLETYPE(2, false, 2, 1):
         return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
 #endif
     default:
         ; // error
     }
 }

 template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
 void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
         const TC* const coefs,  AudioBufferProvider* provider)
 {
     const Constants& c(mConstants);
     int16_t* impulse = mInBuffer.getImpulse();
     size_t inputIndex = mInputIndex;
     uint32_t phaseFraction = mPhaseFraction;
     const uint32_t phaseIncrement = mPhaseIncrement;
     size_t outputIndex = 0;
     size_t outputSampleCount = outFrameCount * 2;   // stereo output
     size_t inFrameCount = getInFrameCountRequired(outFrameCount);
     const uint32_t phaseWrapLimit = c.mL << c.mShift;

     // NOTE: be very careful when modifying the code here. register
     // pressure is very high and a small change might cause the compiler
     // to generate far less efficient code.
     // Always sanity check the result with objdump or test-resample.

     // the following logic is a bit convoluted to keep the main processing loop
     // as tight as possible with register allocation.
     while (outputIndex < outputSampleCount) {
         // buffer is empty, fetch a new one
         while (mBuffer.frameCount == 0) {
             mBuffer.frameCount = inFrameCount;
             provider->getNextBuffer(&mBuffer,
                     calculateOutputPTS(outputIndex / 2));
             if (mBuffer.raw == NULL) {
                 goto resample_exit;
             }
             if (phaseFraction >= phaseWrapLimit) { // read in data
                 mInBuffer.readAdvance<CHANNELS>(
                         impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
                 phaseFraction -= phaseWrapLimit;
                 while (phaseFraction >= phaseWrapLimit) {
                     inputIndex++;
                     if (inputIndex >= mBuffer.frameCount) {
                         inputIndex -= mBuffer.frameCount;
                         provider->releaseBuffer(&mBuffer);
                         break;
                     }
                     mInBuffer.readAdvance<CHANNELS>(
                             impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
                     phaseFraction -= phaseWrapLimit;
                 }
             }
         }
         const int16_t* const in = mBuffer.i16;
         const size_t frameCount = mBuffer.frameCount;
         const int coefShift = c.mShift;
         const int halfNumCoefs = c.mHalfNumCoefs;
         const int32_t* const volumeSimd = mVolumeSimd;

         // reread the last input in.
         mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);

         // main processing loop
         while (CC_LIKELY(outputIndex < outputSampleCount)) {
             // caution: fir() is inlined and may be large.
             // output will be loaded with the appropriate values
             //
             // from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs]
             // from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs.
             //
             fir<CHANNELS, LOCKED, STRIDE>(
                     &out[outputIndex],
                     phaseFraction, phaseWrapLimit,
                     coefShift, halfNumCoefs, coefs,
                     impulse, volumeSimd);
             outputIndex += 2;

             phaseFraction += phaseIncrement;
             while (phaseFraction >= phaseWrapLimit) {
                 inputIndex++;
                 if (inputIndex >= frameCount) {
                     goto done;  // need a new buffer
                 }
                 mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
                 phaseFraction -= phaseWrapLimit;
             }
         }
 done:
         // often arrives here when input buffer runs out
         if (inputIndex >= frameCount) {
             inputIndex -= frameCount;
             provider->releaseBuffer(&mBuffer);
             // mBuffer.frameCount MUST be zero here.
         }
     }

 resample_exit:
     mInBuffer.setImpulse(impulse);
     mInputIndex = inputIndex;
     mPhaseFraction = phaseFraction;
 }

 // ----------------------------------------------------------------------------
 }; // namespace android
	/*
	* Copyright (C) 2013 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#define LOG_TAG "AudioResamplerDyn"
	//#define LOG_NDEBUG 0

	#include <malloc.h>
	#include <string.h>
	#include <stdlib.h>
	#include <dlfcn.h>
	#include <math.h>

	#include <cutils/compiler.h>
	#include <cutils/properties.h>
	#include <utils/Log.h>

	#include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here
	#include "AudioResamplerFirProcess.h"
	#include "AudioResamplerFirProcessNeon.h"
	#include "AudioResamplerFirGen.h" // requires math.h
	#include "AudioResamplerDyn.h"

	//#define DEBUG_RESAMPLER

	namespace android {

	// generate a unique resample type compile-time constant (constexpr)
	#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \
	((((CHANNELS)-1)&1) \| !!(LOCKED)<<1 \| (COEFTYPE)<<2 \
	\| ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3)

	/*
	* InBuffer is a type agnostic input buffer.
	*
	* Layout of the state buffer for halfNumCoefs=8.
	*
	* [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr]
	* S I R
	*
	* S = mState
	* I = mImpulse
	* R = mRingFull
	* p = past samples, convoluted with the (p)ositive side of sinc()
	* n = future samples, convoluted with the (n)egative side of sinc()
	* r = extra space for implementing the ring buffer
	*/

	template<typename TI>
	AudioResamplerDyn::InBuffer<TI>::InBuffer()
	: mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) {
	}

	template<typename TI>
	AudioResamplerDyn::InBuffer<TI>::~InBuffer() {
	init();
	}

	template<typename TI>
	void AudioResamplerDyn::InBuffer<TI>::init() {
	free(mState);
	mState = NULL;
	mImpulse = NULL;
	mRingFull = NULL;
	mStateSize = 0;
	}

	// resizes the state buffer to accommodate the appropriate filter length
	template<typename TI>
	void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) {
	// calculate desired state size
	int stateSize = halfNumCoefs * CHANNELS * 2
	* kStateSizeMultipleOfFilterLength;

	// check if buffer needs resizing
	if (mState
	&& stateSize == mStateSize
	&& mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) {
	return;
	}

	// create new buffer
	TI* state = (int16_t)memalign(32, stateSizesizeof(*state));
	memset(state, 0, stateSizesizeof(state));

	// attempt to preserve state
	if (mState) {
	TI* srcLo = mImpulse - halfNumCoefs*CHANNELS;
	TI* srcHi = mImpulse + halfNumCoefs*CHANNELS;
	TI* dst = state;

	if (srcLo < mState) {
	dst += mState-srcLo;
	srcLo = mState;
	}
	if (srcHi > mState + mStateSize) {
	srcHi = mState + mStateSize;
	}
	memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo));
	free(mState);
	}

	// set class member vars
	mState = state;
	mStateSize = stateSize;
	mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed
	mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS;
	}

	// copy in the input data into the head (impulse+halfNumCoefs) of the buffer.
	template<typename TI>
	template<int CHANNELS>
	void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs,
	const TI* const in, const size_t inputIndex) {
	int16_t* head = impulse + halfNumCoefs*CHANNELS;
	for (size_t i=0 ; i<CHANNELS ; i++) {
	head[i] = in[inputIndex*CHANNELS + i];
	}
	}

	// advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs)
	template<typename TI>
	template<int CHANNELS>
	void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs,
	const TI* const in, const size_t inputIndex) {
	impulse += CHANNELS;

	if (CC_UNLIKELY(impulse >= mRingFull)) {
	const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS;
	memcpy(mState, mState+shiftDown, halfNumCoefsCHANNELS2*sizeof(TI));
	impulse -= shiftDown;
	}
	readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
	}

	void AudioResamplerDyn::Constants::set(
	int L, int halfNumCoefs, int inSampleRate, int outSampleRate)
	{
	int bits = 0;
	int lscale = inSampleRate/outSampleRate < 2 ? L - 1 :
	static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate);
	for (int i=lscale; i; ++bits, i>>=1)
	;
	mL = L;
	mShift = kNumPhaseBits - bits;
	mHalfNumCoefs = halfNumCoefs;
	}

	AudioResamplerDyn::AudioResamplerDyn(int bitDepth,
	int inChannelCount, int32_t sampleRate, src_quality quality)
	: AudioResampler(bitDepth, inChannelCount, sampleRate, quality),
	mResampleType(0), mFilterSampleRate(0), mFilterQuality(DEFAULT_QUALITY),
	mCoefBuffer(NULL)
	{
	mVolumeSimd[0] = mVolumeSimd[1] = 0;
	// The AudioResampler base class assumes we are always ready for 1:1 resampling.
	// We reset mInSampleRate to 0, so setSampleRate() will calculate filters for
	// setSampleRate() for 1:1. (May be removed if precalculated filters are used.)
	mInSampleRate = 0;
	mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better
	}

	AudioResamplerDyn::~AudioResamplerDyn() {
	free(mCoefBuffer);
	}

	void AudioResamplerDyn::init() {
	mFilterSampleRate = 0; // always trigger new filter generation
	mInBuffer.init();
	}

	void AudioResamplerDyn::setVolume(int16_t left, int16_t right) {
	AudioResampler::setVolume(left, right);
	mVolumeSimd[0] = static_cast<int32_t>(left)<<16;
	mVolumeSimd[1] = static_cast<int32_t>(right)<<16;
	}

	template <typename T> T max(T a, T b) {return a > b ? a : b;}

	template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;}

	template<typename T>
	void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten,
	int inSampleRate, int outSampleRate, double tbwCheat) {
	T* buf = reinterpret_cast<T>(memalign(32, (c.mL+1)c.mHalfNumCoefs*sizeof(T)));
	static const double atten = 0.9998; // to avoid ripple overflow
	double fcr;
	double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten);

	if (inSampleRate < outSampleRate) { // upsample
	fcr = max(0.5*tbwCheat - tbw/2, tbw/2);
	} else { // downsample
	fcr = max(0.5tbwCheatoutSampleRate/inSampleRate - tbw/2, tbw/2);
	}
	// create and set filter
	firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten);
	c.setBuf(buf);
	if (mCoefBuffer) {
	free(mCoefBuffer);
	}
	mCoefBuffer = buf;
	#ifdef DEBUG_RESAMPLER
	// print basic filter stats
	printf("L:%d hnc:%d stopBandAtten:%lf fcr:%lf atten:%lf tbw:%lf\n",
	c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw);
	// test the filter and report results
	double fp = (fcr - tbw/2)/c.mL;
	double fs = (fcr + tbw/2)/c.mL;
	double passMin, passMax, passRipple;
	double stopMax, stopRipple;
	testFir(buf, c.mL, c.mHalfNumCoefs, fp, fs, /passSteps/ 1000, /stopSteps/ 100000,
	passMin, passMax, passRipple, stopMax, stopRipple);
	printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, passMin, passMax, passRipple);
	printf("stopband(%lf, %lf): %.8lf %.3lf\n", fs, 0.5, stopMax, stopRipple);
	#endif
	}

	// recursive gcd. Using objdump, it appears the tail recursion is converted to a while loop.
	static int gcd(int n, int m) {
	if (m == 0) {
	return n;
	}
	return gcd(m, n % m);
	}

	static bool isClose(int32_t newSampleRate, int32_t prevSampleRate,
	int32_t filterSampleRate, int32_t outSampleRate) {

	// different upsampling ratios do not need a filter change.
	if (filterSampleRate != 0
	&& filterSampleRate < outSampleRate
	&& newSampleRate < outSampleRate)
	return true;

	// check design criteria again if downsampling is detected.
	int pdiff = absdiff(newSampleRate, prevSampleRate);
	int adiff = absdiff(newSampleRate, filterSampleRate);

	// allow up to 6% relative change increments.
	// allow up to 12% absolute change increments (from filter design)
	return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3;
	}

	void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) {
	if (mInSampleRate == inSampleRate) {
	return;
	}
	int32_t oldSampleRate = mInSampleRate;
	int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs;
	uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift;
	bool useS32 = false;

	mInSampleRate = inSampleRate;

	// TODO: Add precalculated Equiripple filters

	if (mFilterQuality != getQuality() \|\|
	!isClose(inSampleRate, oldSampleRate, mFilterSampleRate, mSampleRate)) {
	mFilterSampleRate = inSampleRate;
	mFilterQuality = getQuality();

	// Begin Kaiser Filter computation
	//
	// The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB.
	// Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters
	//
	// For s32 we keep the stop band attenuation at the same as 16b resolution, about
	// 96-98dB
	//

	double stopBandAtten;
	double tbwCheat = 1.; // how much we "cheat" into aliasing
	int halfLength;
	if (mFilterQuality == DYN_HIGH_QUALITY) {
	// 32b coefficients, 64 length
	useS32 = true;
	stopBandAtten = 98.;
	if (inSampleRate >= mSampleRate * 4) {
	halfLength = 48;
	} else if (inSampleRate >= mSampleRate * 2) {
	halfLength = 40;
	} else {
	halfLength = 32;
	}
	} else if (mFilterQuality == DYN_LOW_QUALITY) {
	// 16b coefficients, 16-32 length
	useS32 = false;
	stopBandAtten = 80.;
	if (inSampleRate >= mSampleRate * 4) {
	halfLength = 24;
	} else if (inSampleRate >= mSampleRate * 2) {
	halfLength = 16;
	} else {
	halfLength = 8;
	}
	if (inSampleRate <= mSampleRate) {
	tbwCheat = 1.05;
	} else {
	tbwCheat = 1.03;
	}
	} else { // DYN_MED_QUALITY
	// 16b coefficients, 32-64 length
	// note: > 64 length filters with 16b coefs can have quantization noise problems
	useS32 = false;
	stopBandAtten = 84.;
	if (inSampleRate >= mSampleRate * 4) {
	halfLength = 32;
	} else if (inSampleRate >= mSampleRate * 2) {
	halfLength = 24;
	} else {
	halfLength = 16;
	}
	if (inSampleRate <= mSampleRate) {
	tbwCheat = 1.03;
	} else {
	tbwCheat = 1.01;
	}
	}

	// determine the number of polyphases in the filterbank.
	// for 16b, it is desirable to have 2^(16/2) = 256 phases.
	// https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html
	//
	// We are a bit more lax on this.

	int phases = mSampleRate / gcd(mSampleRate, inSampleRate);

	// TODO: Once dynamic sample rate change is an option, the code below
	// should be modified to execute only when dynamic sample rate change is enabled.
	//
	// as above, #phases less than 63 is too few phases for accurate linear interpolation.
	// we increase the phases to compensate, but more phases means more memory per
	// filter and more time to compute the filter.
	//
	// if we know that the filter will be used for dynamic sample rate changes,
	// that would allow us skip this part for fixed sample rate resamplers.
	//
	while (phases<63) {
	phases *= 2; // this code only needed to support dynamic rate changes
	}

	if (phases>=256) { // too many phases, always interpolate
	phases = 127;
	}

	// create the filter
	mConstants.set(phases, halfLength, inSampleRate, mSampleRate);
	if (useS32) {
	createKaiserFir<int32_t>(mConstants, stopBandAtten,
	inSampleRate, mSampleRate, tbwCheat);
	} else {
	createKaiserFir<int16_t>(mConstants, stopBandAtten,
	inSampleRate, mSampleRate, tbwCheat);
	}
	} // End Kaiser filter

	// update phase and state based on the new filter.
	const Constants& c(mConstants);
	mInBuffer.resize(mChannelCount, c.mHalfNumCoefs);
	const uint32_t phaseWrapLimit = c.mL << c.mShift;
	// try to preserve as much of the phase fraction as possible for on-the-fly changes
	mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction)
	* phaseWrapLimit / oldPhaseWrapLimit;
	mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case.
	mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit)
	* inSampleRate / mSampleRate);

	// determine which resampler to use
	// check if locked phase (works only if mPhaseIncrement has no "fractional phase bits")
	int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0;
	int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2;
	if (locked) {
	mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase
	}

	mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32);
	#ifdef DEBUG_RESAMPLER
	printf("channels:%d %s stride:%d %s coef:%d shift:%d\n",
	mChannelCount, locked ? "locked" : "interpolated",
	stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift);
	#endif
	}

	void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
	AudioBufferProvider* provider)
	{
	// TODO:
	// 24 cases - this perhaps can be reduced later, as testing might take too long
	switch (mResampleType) {

	// stride 16 (falls back to stride 2 for machines that do not support NEON)
	case RESAMPLETYPE(1, true, 16, 0):
	return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(2, true, 16, 0):
	return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(1, false, 16, 0):
	return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(2, false, 16, 0):
	return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(1, true, 16, 1):
	return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(2, true, 16, 1):
	return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(1, false, 16, 1):
	return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(2, false, 16, 1):
	return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	#if 0
	// TODO: Remove these?
	// stride 8
	case RESAMPLETYPE(1, true, 8, 0):
	return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(2, true, 8, 0):
	return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(1, false, 8, 0):
	return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(2, false, 8, 0):
	return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(1, true, 8, 1):
	return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(2, true, 8, 1):
	return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(1, false, 8, 1):
	return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(2, false, 8, 1):
	return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	// stride 2 (can handle any filter length)
	case RESAMPLETYPE(1, true, 2, 0):
	return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(2, true, 2, 0):
	return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(1, false, 2, 0):
	return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(2, false, 2, 0):
	return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
	case RESAMPLETYPE(1, true, 2, 1):
	return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(2, true, 2, 1):
	return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(1, false, 2, 1):
	return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	case RESAMPLETYPE(2, false, 2, 1):
	return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
	#endif
	default:
	; // error
	}
	}

	template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
	void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
	const TC* const coefs, AudioBufferProvider* provider)
	{
	const Constants& c(mConstants);
	int16_t* impulse = mInBuffer.getImpulse();
	size_t inputIndex = mInputIndex;
	uint32_t phaseFraction = mPhaseFraction;
	const uint32_t phaseIncrement = mPhaseIncrement;
	size_t outputIndex = 0;
	size_t outputSampleCount = outFrameCount * 2; // stereo output
	size_t inFrameCount = getInFrameCountRequired(outFrameCount);
	const uint32_t phaseWrapLimit = c.mL << c.mShift;

	// NOTE: be very careful when modifying the code here. register
	// pressure is very high and a small change might cause the compiler
	// to generate far less efficient code.
	// Always sanity check the result with objdump or test-resample.

	// the following logic is a bit convoluted to keep the main processing loop
	// as tight as possible with register allocation.
	while (outputIndex < outputSampleCount) {
	// buffer is empty, fetch a new one
	while (mBuffer.frameCount == 0) {
	mBuffer.frameCount = inFrameCount;
	provider->getNextBuffer(&mBuffer,
	calculateOutputPTS(outputIndex / 2));
	if (mBuffer.raw == NULL) {
	goto resample_exit;
	}
	if (phaseFraction >= phaseWrapLimit) { // read in data
	mInBuffer.readAdvance<CHANNELS>(
	impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
	phaseFraction -= phaseWrapLimit;
	while (phaseFraction >= phaseWrapLimit) {
	inputIndex++;
	if (inputIndex >= mBuffer.frameCount) {
	inputIndex -= mBuffer.frameCount;
	provider->releaseBuffer(&mBuffer);
	break;
	}
	mInBuffer.readAdvance<CHANNELS>(
	impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
	phaseFraction -= phaseWrapLimit;
	}
	}
	}
	const int16_t* const in = mBuffer.i16;
	const size_t frameCount = mBuffer.frameCount;
	const int coefShift = c.mShift;
	const int halfNumCoefs = c.mHalfNumCoefs;
	const int32_t* const volumeSimd = mVolumeSimd;

	// reread the last input in.
	mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);

	// main processing loop
	while (CC_LIKELY(outputIndex < outputSampleCount)) {
	// caution: fir() is inlined and may be large.
	// output will be loaded with the appropriate values
	//
	// from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs]
	// from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs.
	//
	fir<CHANNELS, LOCKED, STRIDE>(
	&out[outputIndex],
	phaseFraction, phaseWrapLimit,
	coefShift, halfNumCoefs, coefs,
	impulse, volumeSimd);
	outputIndex += 2;

	phaseFraction += phaseIncrement;
	while (phaseFraction >= phaseWrapLimit) {
	inputIndex++;
	if (inputIndex >= frameCount) {
	goto done; // need a new buffer
	}
	mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
	phaseFraction -= phaseWrapLimit;
	}
	}
	done:
	// often arrives here when input buffer runs out
	if (inputIndex >= frameCount) {
	inputIndex -= frameCount;
	provider->releaseBuffer(&mBuffer);
	// mBuffer.frameCount MUST be zero here.
	}
	}

	resample_exit:
	mInBuffer.setImpulse(impulse);
	mInputIndex = inputIndex;
	mPhaseFraction = phaseFraction;
	}

	// ----------------------------------------------------------------------------
	}; // namespace android