Blame - services/audioflinger/AudioResamplerFirGen.h - android_frameworks_av

blob: 1f21c60c84bf2177055fab464af6f44bb0508841 [file] [log] [blame]

Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2013 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
				18	#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
				19
				20	namespace android {
				21
				22	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	23	* generates a sine wave at equal steps.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	24	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	25	* As most of our functions use sine or cosine at equal steps,
				26	* it is very efficient to compute them that way (single multiply and subtract),
				27	* rather than invoking the math library sin() or cos() each time.
				28	*
				29	* SineGen uses Goertzel's Algorithm (as a generator not a filter)
				30	* to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
				31	* by stepping through 0, 1, ... n.
				32	*
				33	* e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
				34	*
				35	* or looking at just the imaginary sine term, as the cosine follows identically:
				36	*
				37	* sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
				38	*
				39	* Goertzel's algorithm is more efficient than the angle addition formula,
				40	* e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
				41	* 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
				42	* cosine generation due to the complex * complex multiply (full rotation).
				43	*
				44	* See: http://en.wikipedia.org/wiki/Goertzel_algorithm
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	45	*
				46	*/
				47
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	48	class SineGen {
				49	public:
				50	SineGen(double wstart, double wstep, bool cosine = false) {
				51	if (cosine) {
				52	mCurrent = cos(wstart);
				53	mPrevious = cos(wstart - wstep);
				54	} else {
				55	mCurrent = sin(wstart);
				56	mPrevious = sin(wstart - wstep);
				57	}
				58	mTwoCos = 2.*cos(wstep);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	59	}
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	60	SineGen(double expNow, double expPrev, double twoCosStep) {
				61	mCurrent = expNow;
				62	mPrevious = expPrev;
				63	mTwoCos = twoCosStep;
				64	}
				65	inline double value() const {
				66	return mCurrent;
				67	}
				68	inline void advance() {
				69	double tmp = mCurrent;
				70	mCurrent = mCurrent*mTwoCos - mPrevious;
				71	mPrevious = tmp;
				72	}
				73	inline double valueAdvance() {
				74	double tmp = mCurrent;
				75	mCurrent = mCurrent*mTwoCos - mPrevious;
				76	mPrevious = tmp;
				77	return tmp;
				78	}
				79
				80	private:
				81	double mCurrent; // current value of sine/cosine
				82	double mPrevious; // previous value of sine/cosine
				83	double mTwoCos; // stepping factor
				84	};
				85
				86	/*
				87	* generates a series of sine generators, phase offset by fixed steps.
				88	*
				89	* This is used to generate polyphase sine generators, one per polyphase
				90	* in the filter code below.
				91	*
				92	* The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
				93	* increments by innerStep.
				94	*
				95	*/
				96
				97	class SineGenGen {
				98	public:
				99	SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
				100	: mSineInnerCur(outerStart, outerStep, cosine),
				101	mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
				102	{
				103	mTwoCos = 2.*cos(innerStep);
				104	}
				105	inline SineGen value() {
				106	return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
				107	}
				108	inline void advance() {
				109	mSineInnerCur.advance();
				110	mSineInnerPrev.advance();
				111	}
				112	inline SineGen valueAdvance() {
				113	return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
				114	}
				115
				116	private:
				117	SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
				118	SineGen mSineInnerPrev; // generate the inner sine previous values
				119	// (behind by innerStep, stepped by outerStep).
				120	double mTwoCos; // the inner stepping factor for the returned SineGen.
				121	};
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	122
				123	static inline double sqr(double x) {
				124	return x * x;
				125	}
				126
				127	/*
				128	* rounds a double to the nearest integer for FIR coefficients.
				129	*
				130	* One variant uses noise shaping, which must keep error history
				131	* to work (the err parameter, initialized to 0).
				132	* The other variant is a non-noise shaped version for
				133	* S32 coefficients (noise shaping doesn't gain much).
				134	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	135	* Caution: No bounds saturation is applied, but isn't needed in this case.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	136	*
				137	* @param x is the value to round.
				138	*
				139	* @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
				140	* Typically this may be the maximum positive integer+1 (using the fact that double precision
				141	* FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
				142	*
				143	* @param err is the previous error (actual - rounded) for the previous rounding op.
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	144	* For 16b coefficients this can improve stopband dB performance by up to 2dB.
				145	*
				146	* Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	147	*
				148	*/
				149
				150	static inline int64_t toint(double x, int64_t maxval, double& err) {
				151	double val = x * maxval;
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	152	double ival = floor(val + 0.5 + err*0.2);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	153	err = val - ival;
				154	return static_cast<int64_t>(ival);
				155	}
				156
				157	static inline int64_t toint(double x, int64_t maxval) {
				158	return static_cast<int64_t>(floor(x * maxval + 0.5));
				159	}
				160
				161	/*
				162	* Modified Bessel function of the first kind
				163	* http://en.wikipedia.org/wiki/Bessel_function
				164	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	165	* The formulas are taken from Abramowitz and Stegun,
				166	* _Handbook of Mathematical Functions_ (links below):
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	167	*
				168	* http://people.math.sfu.ca/~cbm/aands/page_375.htm
				169	* http://people.math.sfu.ca/~cbm/aands/page_378.htm
				170	*
				171	* http://dlmf.nist.gov/10.25
				172	* http://dlmf.nist.gov/10.40
				173	*
				174	* Note we assume x is nonnegative (the function is symmetric,
				175	* pass in the absolute value as needed).
				176	*
				177	* Constants are compile time derived with templates I0Term<> and
				178	* I0ATerm<> to the precision of the compiler. The series can be expanded
				179	* to any precision needed, but currently set around 24b precision.
				180	*
				181	* We use a bit of template math here, constexpr would probably be
				182	* more appropriate for a C++11 compiler.
				183	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	184	* For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
				185	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	186	*/
				187
				188	template <int N>
				189	struct I0Term {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	190	static const double value = I0Term<N-1>::value / (4. * N * N);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	191	};
				192
				193	template <>
				194	struct I0Term<0> {
				195	static const double value = 1.;
				196	};
				197
				198	template <int N>
				199	struct I0ATerm {
				200	static const double value = I0ATerm<N-1>::value * (2.N-1.) (2.N-1.) / (8. N);
				201	};
				202
				203	template <>
				204	struct I0ATerm<0> { // 1/sqrt(2*PI);
				205	static const double value = 0.398942280401432677939946059934381868475858631164934657665925;
				206	};
				207
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	208	#if USE_HORNERS_METHOD
				209	/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
				210	* using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
				211	*
				212	* This has fewer multiplications than Estrin's method below, but has back to back
				213	* floating point dependencies.
				214	*
				215	* On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
				216	*/
				217
				218	inline double Poly2(double A, double B, double x) {
				219	return A + x * B;
				220	}
				221
				222	inline double Poly4(double A, double B, double C, double D, double x) {
				223	return A + x * (B + x * (C + x * (D)));
				224	}
				225
				226	inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
				227	double x) {
				228	return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
				229	}
				230
				231	inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
				232	double H, double I, double x) {
				233	return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
				234	}
				235
				236	#else
				237	/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
				238	* using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
				239	*
				240	* This is typically faster, perhaps gains about 5-10% overall on ARM processors
				241	* over Horner's method above.
				242	*/
				243
				244	inline double Poly2(double A, double B, double x) {
				245	return A + B * x;
				246	}
				247
				248	inline double Poly3(double A, double B, double C, double x, double x2) {
				249	return Poly2(A, B, x) + C * x2;
				250	}
				251
				252	inline double Poly3(double A, double B, double C, double x) {
				253	return Poly2(A, B, x) + C * x * x;
				254	}
				255
				256	inline double Poly4(double A, double B, double C, double D, double x, double x2) {
				257	return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
				258	}
				259
				260	inline double Poly4(double A, double B, double C, double D, double x) {
				261	return Poly4(A, B, C, D, x, x * x);
				262	}
				263
				264	inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
				265	double x) {
				266	double x2 = x * x;
				267	return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
				268	}
				269
				270	inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
				271	double H, double x, double x2, double x4) {
				272	return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
				273	}
				274
				275	inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
				276	double H, double I, double x) {
				277	double x2 = x * x;
				278	#if 1
				279	// It does not seem faster to explicitly decompose Poly8 into Poly4, but
				280	// could depend on compiler floating point scheduling.
				281	double x4 = x2 * x2;
				282	return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
				283	#else
				284	double val = Poly4(A, B, C, D, x, x2);
				285	double x4 = x2 * x2;
				286	return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
				287	#endif
				288	}
				289	#endif
				290
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	291	static inline double I0(double x) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	292	if (x < 3.75) {
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	293	x *= x;
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	294	return Poly7(I0Term<0>::value, I0Term<1>::value,
				295	I0Term<2>::value, I0Term<3>::value,
				296	I0Term<4>::value, I0Term<5>::value,
				297	I0Term<6>::value, x); // e < 1.6e-7
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	298	}
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	299	if (1) {
				300	/*
				301	* Series expansion coefs are easy to calculate, but are expanded around 0,
				302	* so error is unequal over the interval 0 < x < 3.75, the error being
				303	* significantly better near 0.
				304	*
				305	* A better solution is to use precise minimax polynomial fits.
				306	*
				307	* We use a slightly more complicated solution for 3.75 < x < 15, based on
				308	* the tables in Blair and Edwards, "Stable Rational Minimax Approximations
				309	* to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
				310	* AECL-4928.
				311	*
				312	* http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
				313	*
				314	* See Table 11 for 0 < x < 15; e < 10^(-7.13).
				315	*
				316	* Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
				317	*
				318	* This speeds up overall computation by about 40% over using the else clause below,
				319	* which requires sqrt and exp.
				320	*
				321	*/
				322
				323	x *= x;
				324	double num = Poly9(-0.13544938430e9, -0.33153754512e8,
				325	-0.19406631946e7, -0.48058318783e5,
				326	-0.63269783360e3, -0.49520779070e1,
				327	-0.24970910370e-1, -0.74741159550e-4,
				328	-0.18257612460e-6, x);
				329	double y = x - 225.; // reflection around 15 (squared)
				330	double den = Poly4(-0.34598737196e8, 0.23852643181e6,
				331	-0.70699387620e3, 0.10000000000e1, y);
				332	return num / den;
				333
				334	#if IO_EXTENDED_BETA
				335	/* Table 42 for x > 15; e < 10^(-8.11).
				336	* This is used for Beta>15, but is disabled here as
				337	* we never use Beta that high.
				338	*
				339	* NOTE: This should be enabled only for x > 15.
				340	*/
				341
				342	double y = 1./x;
				343	double z = y - (1./15);
				344	double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
				345	double den = Poly3(0.103150763823e2, -0.14181687413e2,
				346	0.1000000000e1, z);
				347	return exp(x) * sqrt(y) * num / den;
				348	#endif
				349	} else {
				350	/*
				351	* NOT USED, but reference for large Beta.
				352	*
				353	* Abramowitz and Stegun asymptotic formula.
				354	* works for x > 3.75.
				355	*/
				356	double y = 1./x;
				357	return exp(x) * sqrt(y) *
				358	// note: reciprocal squareroot may be easier!
				359	// http://en.wikipedia.org/wiki/Fast_inverse_square_root
				360	Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
				361	I0ATerm<2>::value, I0ATerm<3>::value,
				362	I0ATerm<4>::value, I0ATerm<5>::value,
				363	I0ATerm<6>::value, I0ATerm<7>::value,
				364	I0ATerm<8>::value, y); // (... e) < 1.9e-7
				365	}
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	366	}
				367
Andy Hung	bafa561	2014-04-02 13:52:10 -0700	[diff] [blame^]	368	/* A speed optimized version of the Modified Bessel I0() which incorporates
				369	* the sqrt and numerator multiply and denominator divide into the computation.
				370	* This speeds up filter computation by about 10-15%.
				371	*/
				372	static inline double I0SqrRat(double x2, double num, double den) {
				373	if (x2 < (3.75 * 3.75)) {
				374	return Poly7(I0Term<0>::value, I0Term<1>::value,
				375	I0Term<2>::value, I0Term<3>::value,
				376	I0Term<4>::value, I0Term<5>::value,
				377	I0Term<6>::value, x2) * num / den; // e < 1.6e-7
				378	}
				379	num *= Poly9(-0.13544938430e9, -0.33153754512e8,
				380	-0.19406631946e7, -0.48058318783e5,
				381	-0.63269783360e3, -0.49520779070e1,
				382	-0.24970910370e-1, -0.74741159550e-4,
				383	-0.18257612460e-6, x2); // e < 10^(-7.13).
				384	double y = x2 - 225.; // reflection around 15 (squared)
				385	den *= Poly4(-0.34598737196e8, 0.23852643181e6,
				386	-0.70699387620e3, 0.10000000000e1, y);
				387	return num / den;
				388	}
				389
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	390	/*
				391	* calculates the transition bandwidth for a Kaiser filter
				392	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	393	* Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
				394	* Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	395	*
				396	* @param halfNumCoef is half the number of coefficients per filter phase.
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	397	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	398	* @param stopBandAtten is the stop band attenuation desired.
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	399	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	400	* @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
				401	*/
				402	static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	403	return (stopBandAtten - 7.95)/((2.14.36)halfNumCoef);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	404	}
				405
				406	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	407	* calculates the fir transfer response of the overall polyphase filter at w.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	408	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	409	* Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
				410	* fact that h[n] is symmetric (cosines only, no complex arithmetic).
				411	*
				412	* We use Goertzel's algorithm to accelerate the computation to essentially
				413	* a single multiply and 2 adds per filter coefficient h[].
				414	*
				415	* Be careful be careful to consider that h[n] is the overall polyphase filter,
				416	* with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
				417	* as you only use one of the polyphases at a time.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	418	*/
				419	template <typename T>
				420	static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	421	double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank
				422	coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank).
				423	#if SLOW_FIRTRANSFER
				424	/* Original code for reference. This is equivalent to the code below, but slower. */
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	425	for (int i=1 ; i<=L ; ++i) {
				426	for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
				427	accum += cos(ixw)static_cast<double>(*coef++);
				428	}
				429	}
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	430	#else
				431	/*
				432	* Our overall filter is stored striped by polyphases, not a contiguous h[n].
				433	* We could fetch coefficients in a non-contiguous fashion
				434	* but that will not scale to vector processing.
				435	*
				436	* We apply Goertzel's algorithm directly to each polyphase filter bank instead of
				437	* using cosine generation/multiplication, thereby saving one multiply per inner loop.
				438	*
				439	* See: http://en.wikipedia.org/wiki/Goertzel_algorithm
				440	* Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
				441	*
				442	* We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
				443	* s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
				444	*
				445	* y[n] = s[n] - e^(iw)s[n-1]
				446	* = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
				447	* = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
				448	*
				449	* The summation contains the frequency steps we want multiplied by the source
				450	* (similar to a DTFT).
				451	*
				452	* Using symmetry, and just the real part (be careful, this must happen
				453	* after any internal complex multiplications), the polyphase filterbank
				454	* transfer function is:
				455	*
				456	* Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
				457	* = Re{ e^(iwn + iw_0) y[n]}
				458	* = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
				459	*
				460	* using the fact that s[n] of real x[n] is real.
				461	*
				462	*/
				463	double dcos = 2. * cos(L*w);
				464	int start = ((halfNumCoef)*L + 1);
				465	SineGen cc((start - L) * w, w, true); // cosine
				466	SineGen cp(start * w, w, true); // cosine
				467	for (int i=1 ; i<=L ; ++i) {
				468	double sc = 0;
				469	double sp = 0;
				470	for (int j=0 ; j<halfNumCoef ; ++j) {
				471	double tmp = sc;
				472	sc = static_cast<double>(coef++) + dcossc - sp;
				473	sp = tmp;
				474	}
				475	// If we are awfully clever, we can apply Goertzel's algorithm
				476	// again on the sc and sp sequences returned here.
				477	accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
				478	}
				479	#endif
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	480	return accum*2.;
				481	}
				482
				483	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	484	* evaluates the minimum and maximum \|H(f)\| bound in a band region.
				485	*
				486	* This is usually done with equally spaced increments in the target band in question.
				487	* The passband is often very small, and sampled that way. The stopband is often much
				488	* larger.
				489	*
				490	* We use the fact that the overall polyphase filter has an additional bank at the end
				491	* for interpolation; hence it is overspecified for the H(f) computation. Thus the
				492	* first polyphase is never actually checked, excepting its first term.
				493	*
				494	* In this code we use the firTransfer() evaluator above, which uses Goertzel's
				495	* algorithm to calculate the transfer function at each point.
				496	*
				497	* TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal
				498	* spacing is a chirp transform.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	499	*
				500	* @param coef is the designed polyphase filter banks
				501	*
				502	* @param L is the number of phases (for interpolation)
				503	*
				504	* @param halfNumCoef should be half the number of coefficients for a single
				505	* polyphase.
				506	*
				507	* @param fstart is the normalized frequency start.
				508	*
				509	* @param fend is the normalized frequency end.
				510	*
				511	* @param steps is the number of steps to take (sampling) between frequency start and end
				512	*
				513	* @param firMin returns the minimum transfer \|H(f)\| found
				514	*
				515	* @param firMax returns the maximum transfer \|H(f)\| found
				516	*
				517	* 0 <= f <= 0.5.
				518	* This is used to test passband and stopband performance.
				519	*/
				520	template <typename T>
				521	static void testFir(const T* coef, int L, int halfNumCoef,
				522	double fstart, double fend, int steps, double &firMin, double &firMax) {
				523	double wstart = fstart(2.M_PI);
				524	double wend = fend(2.M_PI);
				525	double wstep = (wend - wstart)/steps;
				526	double fmax, fmin;
				527	double trf = firTransfer(coef, L, halfNumCoef, wstart);
				528	if (trf<0) {
				529	trf = -trf;
				530	}
				531	fmin = fmax = trf;
				532	wstart += wstep;
				533	for (int i=1; i<steps; ++i) {
				534	trf = firTransfer(coef, L, halfNumCoef, wstart);
				535	if (trf<0) {
				536	trf = -trf;
				537	}
				538	if (trf>fmax) {
				539	fmax = trf;
				540	}
				541	else if (trf<fmin) {
				542	fmin = trf;
				543	}
				544	wstart += wstep;
				545	}
				546	// renormalize - this is only needed for integer filter types
				547	double norm = 1./((1ULL<<(sizeof(T)8-1))L);
				548
				549	firMin = fmin * norm;
				550	firMax = fmax * norm;
				551	}
				552
				553	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	554	* evaluates the \|H(f)\| lowpass band characteristics.
				555	*
				556	* This function tests the lowpass characteristics for the overall polyphase filter,
				557	* and is used to verify the design. For this case, fp should be set to the
				558	* passband normalized frequency from 0 to 0.5 for the overall filter (thus it
				559	* is the designed polyphase bank value / L). Likewise for fs.
				560	*
				561	* @param coef is the designed polyphase filter banks
				562	*
				563	* @param L is the number of phases (for interpolation)
				564	*
				565	* @param halfNumCoef should be half the number of coefficients for a single
				566	* polyphase.
				567	*
				568	* @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
				569	*
				570	* @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
				571	*
				572	* @param passSteps is the number of passband sampling steps.
				573	*
				574	* @param stopSteps is the number of stopband sampling steps.
				575	*
				576	* @param passMin is the minimum value in the passband
				577	*
				578	* @param passMax is the maximum value in the passband (useful for scaling). This should
				579	* be less than 1., to avoid sine wave test overflow.
				580	*
				581	* @param passRipple is the passband ripple. Typically this should be less than 0.1 for
				582	* an audio filter. Generally speaker/headphone device characteristics will dominate
				583	* the passband term.
				584	*
				585	* @param stopMax is the maximum value in the stopband.
				586	*
				587	* @param stopRipple is the stopband ripple, also known as stopband attenuation.
				588	* Typically this should be greater than ~80dB for low quality, and greater than
				589	* ~100dB for full 16b quality, otherwise aliasing may become noticeable.
				590	*
				591	*/
				592	template <typename T>
				593	static void testFir(const T* coef, int L, int halfNumCoef,
				594	double fp, double fs, int passSteps, int stopSteps,
				595	double &passMin, double &passMax, double &passRipple,
				596	double &stopMax, double &stopRipple) {
				597	double fmin, fmax;
				598	testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
				599	double d1 = (fmax - fmin)/2.;
				600	passMin = fmin;
				601	passMax = fmax;
				602	passRipple = -20.*log10(1. - d1); // passband ripple
				603	testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
				604	// fmin is really not important for the stopband.
				605	stopMax = fmax;
				606	stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
				607	}
				608
				609	/*
				610	* Calculates the overall polyphase filter based on a windowed sinc function.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	611	*
				612	* The windowed sinc is an odd length symmetric filter of exactly LhalfNumCoef2+1
				613	* taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks.
				614	* The last filterbank is used for interpolation purposes (and is mostly composed
				615	* of the first bank shifted by one sample), and is unnecessary if one does
				616	* not do interpolation.
				617	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	618	* We use the last filterbank for some transfer function calculation purposes,
				619	* so it needs to be generated anyways.
				620	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	621	* @param coef is the caller allocated space for coefficients. This should be
				622	* exactly (L+1)*halfNumCoef in size.
				623	*
				624	* @param L is the number of phases (for interpolation)
				625	*
				626	* @param halfNumCoef should be half the number of coefficients for a single
				627	* polyphase.
				628	*
				629	* @param stopBandAtten is the stopband value, should be >50dB.
				630	*
				631	* @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy
				632	* should be 6dB less. (fcr is where the amplitude drops by half). Use the
				633	* firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint
				634	* between the stop band and the pass band (fstop+fpass)/2.
				635	*
				636	* @param atten is the attenuation (generally slightly less than 1).
				637	*/
				638
				639	template <typename T>
				640	static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
				641	double stopBandAtten, double fcr, double atten) {
				642	//
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	643	// Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
				644	// Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	645	//
				646	// See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
				647	//
				648	// Kaiser window and beta parameter
				649	//
				650	// \| 0.1102*(A - 8.7) A > 50
				651	// beta = \| 0.5842(A - 21)^0.4 + 0.07886(A - 21) 21 <= A <= 50
				652	// \| 0. A < 21
				653	//
				654	// with A is the desired stop-band attenuation in dBFS
				655	//
				656	// 30 dB 2.210
				657	// 40 dB 3.384
				658	// 50 dB 4.538
				659	// 60 dB 5.658
				660	// 70 dB 6.764
				661	// 80 dB 7.865
				662	// 90 dB 8.960
				663	// 100 dB 10.056
				664
				665	const int N = L * halfNumCoef; // non-negative half
				666	const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	667	const double xstep = (2. * M_PI) * fcr / L;
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	668	const double xfrac = 1. / N;
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	669	const double yscale = atten * L / (I0(beta) * M_PI);
Andy Hung	bafa561	2014-04-02 13:52:10 -0700	[diff] [blame^]	670	const double sqrbeta = sqr(beta);
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	671
				672	// We use sine generators, which computes sines on regular step intervals.
				673	// This speeds up overall computation about 40% from computing the sine directly.
				674
				675	SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
				676
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	677	for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	678
				679	// computation for a single polyphase of the overall filter.
				680	SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
				681	double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
				682
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	683	for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	684	double y;
				685	if (CC_LIKELY(ix)) {
				686	double x = static_cast<double>(ix);
				687
				688	// sine generator: sg.valueAdvance() returns sin(ix*xstep);
Andy Hung	bafa561	2014-04-02 13:52:10 -0700	[diff] [blame^]	689	// y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
				690	y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x);
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	691	} else {
				692	y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
				693	sg.advance();
				694	}
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	695
				696	// (caution!) float version does not need rounding
				697	if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
				698	coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)8-1), err));
				699	} else {
				700	coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)8-1)));
				701	}
				702	}
				703	}
				704	}
				705
				706	}; // namespace android
				707
				708	#endif /ANDROID_AUDIO_RESAMPLER_FIR_GEN_H/