Blame - services/audioflinger/AudioResamplerFirGen.h - android_frameworks_av

blob: ad18965506287cdf2dad40f9c973ee94aaeb2e59 [file] [log] [blame]

Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2013 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
				18	#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
				19
Dan Albert	36802bd	2014-11-20 11:31:17 -0800	[diff] [blame]	20	#include "utils/Compat.h"
				21
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	22	namespace android {
				23
				24	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	25	* generates a sine wave at equal steps.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	26	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	27	* As most of our functions use sine or cosine at equal steps,
				28	* it is very efficient to compute them that way (single multiply and subtract),
				29	* rather than invoking the math library sin() or cos() each time.
				30	*
				31	* SineGen uses Goertzel's Algorithm (as a generator not a filter)
				32	* to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
				33	* by stepping through 0, 1, ... n.
				34	*
				35	* e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
				36	*
				37	* or looking at just the imaginary sine term, as the cosine follows identically:
				38	*
				39	* sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
				40	*
				41	* Goertzel's algorithm is more efficient than the angle addition formula,
				42	* e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
				43	* 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
				44	* cosine generation due to the complex * complex multiply (full rotation).
				45	*
				46	* See: http://en.wikipedia.org/wiki/Goertzel_algorithm
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	47	*
				48	*/
				49
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	50	class SineGen {
				51	public:
				52	SineGen(double wstart, double wstep, bool cosine = false) {
				53	if (cosine) {
				54	mCurrent = cos(wstart);
				55	mPrevious = cos(wstart - wstep);
				56	} else {
				57	mCurrent = sin(wstart);
				58	mPrevious = sin(wstart - wstep);
				59	}
				60	mTwoCos = 2.*cos(wstep);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	61	}
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	62	SineGen(double expNow, double expPrev, double twoCosStep) {
				63	mCurrent = expNow;
				64	mPrevious = expPrev;
				65	mTwoCos = twoCosStep;
				66	}
				67	inline double value() const {
				68	return mCurrent;
				69	}
				70	inline void advance() {
				71	double tmp = mCurrent;
				72	mCurrent = mCurrent*mTwoCos - mPrevious;
				73	mPrevious = tmp;
				74	}
				75	inline double valueAdvance() {
				76	double tmp = mCurrent;
				77	mCurrent = mCurrent*mTwoCos - mPrevious;
				78	mPrevious = tmp;
				79	return tmp;
				80	}
				81
				82	private:
				83	double mCurrent; // current value of sine/cosine
				84	double mPrevious; // previous value of sine/cosine
				85	double mTwoCos; // stepping factor
				86	};
				87
				88	/*
				89	* generates a series of sine generators, phase offset by fixed steps.
				90	*
				91	* This is used to generate polyphase sine generators, one per polyphase
				92	* in the filter code below.
				93	*
				94	* The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
				95	* increments by innerStep.
				96	*
				97	*/
				98
				99	class SineGenGen {
				100	public:
				101	SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
				102	: mSineInnerCur(outerStart, outerStep, cosine),
				103	mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
				104	{
				105	mTwoCos = 2.*cos(innerStep);
				106	}
				107	inline SineGen value() {
				108	return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
				109	}
				110	inline void advance() {
				111	mSineInnerCur.advance();
				112	mSineInnerPrev.advance();
				113	}
				114	inline SineGen valueAdvance() {
				115	return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
				116	}
				117
				118	private:
				119	SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
				120	SineGen mSineInnerPrev; // generate the inner sine previous values
				121	// (behind by innerStep, stepped by outerStep).
				122	double mTwoCos; // the inner stepping factor for the returned SineGen.
				123	};
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	124
				125	static inline double sqr(double x) {
				126	return x * x;
				127	}
				128
				129	/*
				130	* rounds a double to the nearest integer for FIR coefficients.
				131	*
				132	* One variant uses noise shaping, which must keep error history
				133	* to work (the err parameter, initialized to 0).
				134	* The other variant is a non-noise shaped version for
				135	* S32 coefficients (noise shaping doesn't gain much).
				136	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	137	* Caution: No bounds saturation is applied, but isn't needed in this case.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	138	*
				139	* @param x is the value to round.
				140	*
				141	* @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
				142	* Typically this may be the maximum positive integer+1 (using the fact that double precision
				143	* FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
				144	*
				145	* @param err is the previous error (actual - rounded) for the previous rounding op.
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	146	* For 16b coefficients this can improve stopband dB performance by up to 2dB.
				147	*
				148	* Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	149	*
				150	*/
				151
				152	static inline int64_t toint(double x, int64_t maxval, double& err) {
				153	double val = x * maxval;
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	154	double ival = floor(val + 0.5 + err*0.2);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	155	err = val - ival;
				156	return static_cast<int64_t>(ival);
				157	}
				158
				159	static inline int64_t toint(double x, int64_t maxval) {
				160	return static_cast<int64_t>(floor(x * maxval + 0.5));
				161	}
				162
				163	/*
				164	* Modified Bessel function of the first kind
				165	* http://en.wikipedia.org/wiki/Bessel_function
				166	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	167	* The formulas are taken from Abramowitz and Stegun,
				168	* _Handbook of Mathematical Functions_ (links below):
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	169	*
				170	* http://people.math.sfu.ca/~cbm/aands/page_375.htm
				171	* http://people.math.sfu.ca/~cbm/aands/page_378.htm
				172	*
				173	* http://dlmf.nist.gov/10.25
				174	* http://dlmf.nist.gov/10.40
				175	*
				176	* Note we assume x is nonnegative (the function is symmetric,
				177	* pass in the absolute value as needed).
				178	*
				179	* Constants are compile time derived with templates I0Term<> and
				180	* I0ATerm<> to the precision of the compiler. The series can be expanded
				181	* to any precision needed, but currently set around 24b precision.
				182	*
				183	* We use a bit of template math here, constexpr would probably be
				184	* more appropriate for a C++11 compiler.
				185	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	186	* For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
				187	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	188	*/
				189
				190	template <int N>
				191	struct I0Term {
Dan Albert	36802bd	2014-11-20 11:31:17 -0800	[diff] [blame]	192	static const CONSTEXPR double value = I0Term<N-1>::value / (4. * N * N);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	193	};
				194
				195	template <>
				196	struct I0Term<0> {
Dan Albert	36802bd	2014-11-20 11:31:17 -0800	[diff] [blame]	197	static const CONSTEXPR double value = 1.;
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	198	};
				199
				200	template <int N>
				201	struct I0ATerm {
Dan Albert	36802bd	2014-11-20 11:31:17 -0800	[diff] [blame]	202	static const CONSTEXPR double value = I0ATerm<N-1>::value * (2.N-1.) (2.N-1.) / (8. N);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	203	};
				204
				205	template <>
				206	struct I0ATerm<0> { // 1/sqrt(2*PI);
Glenn Kasten	b187de1	2014-12-30 08:18:15 -0800	[diff] [blame]	207	static const CONSTEXPR double value =
				208	0.398942280401432677939946059934381868475858631164934657665925;
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	209	};
				210
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	211	#if USE_HORNERS_METHOD
				212	/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
				213	* using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
				214	*
				215	* This has fewer multiplications than Estrin's method below, but has back to back
				216	* floating point dependencies.
				217	*
				218	* On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
				219	*/
				220
				221	inline double Poly2(double A, double B, double x) {
				222	return A + x * B;
				223	}
				224
				225	inline double Poly4(double A, double B, double C, double D, double x) {
				226	return A + x * (B + x * (C + x * (D)));
				227	}
				228
				229	inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
				230	double x) {
				231	return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
				232	}
				233
				234	inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
				235	double H, double I, double x) {
				236	return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
				237	}
				238
				239	#else
				240	/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
				241	* using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
				242	*
				243	* This is typically faster, perhaps gains about 5-10% overall on ARM processors
				244	* over Horner's method above.
				245	*/
				246
				247	inline double Poly2(double A, double B, double x) {
				248	return A + B * x;
				249	}
				250
				251	inline double Poly3(double A, double B, double C, double x, double x2) {
				252	return Poly2(A, B, x) + C * x2;
				253	}
				254
				255	inline double Poly3(double A, double B, double C, double x) {
				256	return Poly2(A, B, x) + C * x * x;
				257	}
				258
				259	inline double Poly4(double A, double B, double C, double D, double x, double x2) {
				260	return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
				261	}
				262
				263	inline double Poly4(double A, double B, double C, double D, double x) {
				264	return Poly4(A, B, C, D, x, x * x);
				265	}
				266
				267	inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
				268	double x) {
				269	double x2 = x * x;
				270	return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
				271	}
				272
				273	inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
				274	double H, double x, double x2, double x4) {
				275	return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
				276	}
				277
				278	inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
				279	double H, double I, double x) {
				280	double x2 = x * x;
				281	#if 1
				282	// It does not seem faster to explicitly decompose Poly8 into Poly4, but
				283	// could depend on compiler floating point scheduling.
				284	double x4 = x2 * x2;
				285	return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
				286	#else
				287	double val = Poly4(A, B, C, D, x, x2);
				288	double x4 = x2 * x2;
				289	return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
				290	#endif
				291	}
				292	#endif
				293
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	294	static inline double I0(double x) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	295	if (x < 3.75) {
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	296	x *= x;
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	297	return Poly7(I0Term<0>::value, I0Term<1>::value,
				298	I0Term<2>::value, I0Term<3>::value,
				299	I0Term<4>::value, I0Term<5>::value,
				300	I0Term<6>::value, x); // e < 1.6e-7
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	301	}
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	302	if (1) {
				303	/*
				304	* Series expansion coefs are easy to calculate, but are expanded around 0,
				305	* so error is unequal over the interval 0 < x < 3.75, the error being
				306	* significantly better near 0.
				307	*
				308	* A better solution is to use precise minimax polynomial fits.
				309	*
				310	* We use a slightly more complicated solution for 3.75 < x < 15, based on
				311	* the tables in Blair and Edwards, "Stable Rational Minimax Approximations
				312	* to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
				313	* AECL-4928.
				314	*
				315	* http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
				316	*
				317	* See Table 11 for 0 < x < 15; e < 10^(-7.13).
				318	*
				319	* Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
				320	*
				321	* This speeds up overall computation by about 40% over using the else clause below,
				322	* which requires sqrt and exp.
				323	*
				324	*/
				325
				326	x *= x;
				327	double num = Poly9(-0.13544938430e9, -0.33153754512e8,
				328	-0.19406631946e7, -0.48058318783e5,
				329	-0.63269783360e3, -0.49520779070e1,
				330	-0.24970910370e-1, -0.74741159550e-4,
				331	-0.18257612460e-6, x);
				332	double y = x - 225.; // reflection around 15 (squared)
				333	double den = Poly4(-0.34598737196e8, 0.23852643181e6,
				334	-0.70699387620e3, 0.10000000000e1, y);
				335	return num / den;
				336
				337	#if IO_EXTENDED_BETA
				338	/* Table 42 for x > 15; e < 10^(-8.11).
				339	* This is used for Beta>15, but is disabled here as
				340	* we never use Beta that high.
				341	*
				342	* NOTE: This should be enabled only for x > 15.
				343	*/
				344
				345	double y = 1./x;
				346	double z = y - (1./15);
				347	double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
				348	double den = Poly3(0.103150763823e2, -0.14181687413e2,
				349	0.1000000000e1, z);
				350	return exp(x) * sqrt(y) * num / den;
				351	#endif
				352	} else {
				353	/*
				354	* NOT USED, but reference for large Beta.
				355	*
				356	* Abramowitz and Stegun asymptotic formula.
				357	* works for x > 3.75.
				358	*/
				359	double y = 1./x;
				360	return exp(x) * sqrt(y) *
				361	// note: reciprocal squareroot may be easier!
				362	// http://en.wikipedia.org/wiki/Fast_inverse_square_root
				363	Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
				364	I0ATerm<2>::value, I0ATerm<3>::value,
				365	I0ATerm<4>::value, I0ATerm<5>::value,
				366	I0ATerm<6>::value, I0ATerm<7>::value,
				367	I0ATerm<8>::value, y); // (... e) < 1.9e-7
				368	}
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	369	}
				370
Andy Hung	bafa561	2014-04-02 13:52:10 -0700	[diff] [blame]	371	/* A speed optimized version of the Modified Bessel I0() which incorporates
				372	* the sqrt and numerator multiply and denominator divide into the computation.
				373	* This speeds up filter computation by about 10-15%.
				374	*/
				375	static inline double I0SqrRat(double x2, double num, double den) {
				376	if (x2 < (3.75 * 3.75)) {
				377	return Poly7(I0Term<0>::value, I0Term<1>::value,
				378	I0Term<2>::value, I0Term<3>::value,
				379	I0Term<4>::value, I0Term<5>::value,
				380	I0Term<6>::value, x2) * num / den; // e < 1.6e-7
				381	}
				382	num *= Poly9(-0.13544938430e9, -0.33153754512e8,
				383	-0.19406631946e7, -0.48058318783e5,
				384	-0.63269783360e3, -0.49520779070e1,
				385	-0.24970910370e-1, -0.74741159550e-4,
				386	-0.18257612460e-6, x2); // e < 10^(-7.13).
				387	double y = x2 - 225.; // reflection around 15 (squared)
				388	den *= Poly4(-0.34598737196e8, 0.23852643181e6,
				389	-0.70699387620e3, 0.10000000000e1, y);
				390	return num / den;
				391	}
				392
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	393	/*
				394	* calculates the transition bandwidth for a Kaiser filter
				395	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	396	* Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
				397	* Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	398	*
				399	* @param halfNumCoef is half the number of coefficients per filter phase.
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	400	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	401	* @param stopBandAtten is the stop band attenuation desired.
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	402	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	403	* @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
				404	*/
				405	static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	406	return (stopBandAtten - 7.95)/((2.14.36)halfNumCoef);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	407	}
				408
				409	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	410	* calculates the fir transfer response of the overall polyphase filter at w.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	411	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	412	* Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
				413	* fact that h[n] is symmetric (cosines only, no complex arithmetic).
				414	*
				415	* We use Goertzel's algorithm to accelerate the computation to essentially
				416	* a single multiply and 2 adds per filter coefficient h[].
				417	*
				418	* Be careful be careful to consider that h[n] is the overall polyphase filter,
				419	* with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
				420	* as you only use one of the polyphases at a time.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	421	*/
				422	template <typename T>
				423	static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	424	double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank
				425	coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank).
				426	#if SLOW_FIRTRANSFER
				427	/* Original code for reference. This is equivalent to the code below, but slower. */
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	428	for (int i=1 ; i<=L ; ++i) {
				429	for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
				430	accum += cos(ixw)static_cast<double>(*coef++);
				431	}
				432	}
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	433	#else
				434	/*
				435	* Our overall filter is stored striped by polyphases, not a contiguous h[n].
				436	* We could fetch coefficients in a non-contiguous fashion
				437	* but that will not scale to vector processing.
				438	*
				439	* We apply Goertzel's algorithm directly to each polyphase filter bank instead of
				440	* using cosine generation/multiplication, thereby saving one multiply per inner loop.
				441	*
				442	* See: http://en.wikipedia.org/wiki/Goertzel_algorithm
				443	* Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
				444	*
				445	* We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
				446	* s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
				447	*
				448	* y[n] = s[n] - e^(iw)s[n-1]
				449	* = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
				450	* = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
				451	*
				452	* The summation contains the frequency steps we want multiplied by the source
				453	* (similar to a DTFT).
				454	*
				455	* Using symmetry, and just the real part (be careful, this must happen
				456	* after any internal complex multiplications), the polyphase filterbank
				457	* transfer function is:
				458	*
				459	* Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
				460	* = Re{ e^(iwn + iw_0) y[n]}
				461	* = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
				462	*
				463	* using the fact that s[n] of real x[n] is real.
				464	*
				465	*/
				466	double dcos = 2. * cos(L*w);
				467	int start = ((halfNumCoef)*L + 1);
				468	SineGen cc((start - L) * w, w, true); // cosine
				469	SineGen cp(start * w, w, true); // cosine
				470	for (int i=1 ; i<=L ; ++i) {
				471	double sc = 0;
				472	double sp = 0;
				473	for (int j=0 ; j<halfNumCoef ; ++j) {
				474	double tmp = sc;
				475	sc = static_cast<double>(coef++) + dcossc - sp;
				476	sp = tmp;
				477	}
				478	// If we are awfully clever, we can apply Goertzel's algorithm
				479	// again on the sc and sp sequences returned here.
				480	accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
				481	}
				482	#endif
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	483	return accum*2.;
				484	}
				485
				486	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	487	* evaluates the minimum and maximum \|H(f)\| bound in a band region.
				488	*
				489	* This is usually done with equally spaced increments in the target band in question.
				490	* The passband is often very small, and sampled that way. The stopband is often much
				491	* larger.
				492	*
				493	* We use the fact that the overall polyphase filter has an additional bank at the end
				494	* for interpolation; hence it is overspecified for the H(f) computation. Thus the
				495	* first polyphase is never actually checked, excepting its first term.
				496	*
				497	* In this code we use the firTransfer() evaluator above, which uses Goertzel's
				498	* algorithm to calculate the transfer function at each point.
				499	*
				500	* TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal
				501	* spacing is a chirp transform.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	502	*
				503	* @param coef is the designed polyphase filter banks
				504	*
				505	* @param L is the number of phases (for interpolation)
				506	*
				507	* @param halfNumCoef should be half the number of coefficients for a single
				508	* polyphase.
				509	*
				510	* @param fstart is the normalized frequency start.
				511	*
				512	* @param fend is the normalized frequency end.
				513	*
				514	* @param steps is the number of steps to take (sampling) between frequency start and end
				515	*
				516	* @param firMin returns the minimum transfer \|H(f)\| found
				517	*
				518	* @param firMax returns the maximum transfer \|H(f)\| found
				519	*
				520	* 0 <= f <= 0.5.
				521	* This is used to test passband and stopband performance.
				522	*/
				523	template <typename T>
				524	static void testFir(const T* coef, int L, int halfNumCoef,
				525	double fstart, double fend, int steps, double &firMin, double &firMax) {
				526	double wstart = fstart(2.M_PI);
				527	double wend = fend(2.M_PI);
				528	double wstep = (wend - wstart)/steps;
				529	double fmax, fmin;
				530	double trf = firTransfer(coef, L, halfNumCoef, wstart);
				531	if (trf<0) {
				532	trf = -trf;
				533	}
				534	fmin = fmax = trf;
				535	wstart += wstep;
				536	for (int i=1; i<steps; ++i) {
				537	trf = firTransfer(coef, L, halfNumCoef, wstart);
				538	if (trf<0) {
				539	trf = -trf;
				540	}
				541	if (trf>fmax) {
				542	fmax = trf;
				543	}
				544	else if (trf<fmin) {
				545	fmin = trf;
				546	}
				547	wstart += wstep;
				548	}
				549	// renormalize - this is only needed for integer filter types
				550	double norm = 1./((1ULL<<(sizeof(T)8-1))L);
				551
				552	firMin = fmin * norm;
				553	firMax = fmax * norm;
				554	}
				555
				556	/*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	557	* evaluates the \|H(f)\| lowpass band characteristics.
				558	*
				559	* This function tests the lowpass characteristics for the overall polyphase filter,
				560	* and is used to verify the design. For this case, fp should be set to the
				561	* passband normalized frequency from 0 to 0.5 for the overall filter (thus it
				562	* is the designed polyphase bank value / L). Likewise for fs.
				563	*
				564	* @param coef is the designed polyphase filter banks
				565	*
				566	* @param L is the number of phases (for interpolation)
				567	*
				568	* @param halfNumCoef should be half the number of coefficients for a single
				569	* polyphase.
				570	*
				571	* @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
				572	*
				573	* @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
				574	*
				575	* @param passSteps is the number of passband sampling steps.
				576	*
				577	* @param stopSteps is the number of stopband sampling steps.
				578	*
				579	* @param passMin is the minimum value in the passband
				580	*
				581	* @param passMax is the maximum value in the passband (useful for scaling). This should
				582	* be less than 1., to avoid sine wave test overflow.
				583	*
				584	* @param passRipple is the passband ripple. Typically this should be less than 0.1 for
				585	* an audio filter. Generally speaker/headphone device characteristics will dominate
				586	* the passband term.
				587	*
				588	* @param stopMax is the maximum value in the stopband.
				589	*
				590	* @param stopRipple is the stopband ripple, also known as stopband attenuation.
				591	* Typically this should be greater than ~80dB for low quality, and greater than
				592	* ~100dB for full 16b quality, otherwise aliasing may become noticeable.
				593	*
				594	*/
				595	template <typename T>
				596	static void testFir(const T* coef, int L, int halfNumCoef,
				597	double fp, double fs, int passSteps, int stopSteps,
				598	double &passMin, double &passMax, double &passRipple,
				599	double &stopMax, double &stopRipple) {
				600	double fmin, fmax;
				601	testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
				602	double d1 = (fmax - fmin)/2.;
				603	passMin = fmin;
				604	passMax = fmax;
				605	passRipple = -20.*log10(1. - d1); // passband ripple
				606	testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
				607	// fmin is really not important for the stopband.
				608	stopMax = fmax;
				609	stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
				610	}
				611
				612	/*
				613	* Calculates the overall polyphase filter based on a windowed sinc function.
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	614	*
				615	* The windowed sinc is an odd length symmetric filter of exactly LhalfNumCoef2+1
				616	* taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks.
				617	* The last filterbank is used for interpolation purposes (and is mostly composed
				618	* of the first bank shifted by one sample), and is unnecessary if one does
				619	* not do interpolation.
				620	*
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	621	* We use the last filterbank for some transfer function calculation purposes,
				622	* so it needs to be generated anyways.
				623	*
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	624	* @param coef is the caller allocated space for coefficients. This should be
				625	* exactly (L+1)*halfNumCoef in size.
				626	*
				627	* @param L is the number of phases (for interpolation)
				628	*
				629	* @param halfNumCoef should be half the number of coefficients for a single
				630	* polyphase.
				631	*
				632	* @param stopBandAtten is the stopband value, should be >50dB.
				633	*
				634	* @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy
				635	* should be 6dB less. (fcr is where the amplitude drops by half). Use the
				636	* firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint
				637	* between the stop band and the pass band (fstop+fpass)/2.
				638	*
				639	* @param atten is the attenuation (generally slightly less than 1).
				640	*/
				641
				642	template <typename T>
				643	static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
				644	double stopBandAtten, double fcr, double atten) {
				645	//
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	646	// Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
				647	// Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	648	//
				649	// See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
				650	//
				651	// Kaiser window and beta parameter
				652	//
				653	// \| 0.1102*(A - 8.7) A > 50
				654	// beta = \| 0.5842(A - 21)^0.4 + 0.07886(A - 21) 21 <= A <= 50
				655	// \| 0. A < 21
				656	//
				657	// with A is the desired stop-band attenuation in dBFS
				658	//
				659	// 30 dB 2.210
				660	// 40 dB 3.384
				661	// 50 dB 4.538
				662	// 60 dB 5.658
				663	// 70 dB 6.764
				664	// 80 dB 7.865
				665	// 90 dB 8.960
				666	// 100 dB 10.056
				667
				668	const int N = L * halfNumCoef; // non-negative half
				669	const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	670	const double xstep = (2. * M_PI) * fcr / L;
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	671	const double xfrac = 1. / N;
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	672	const double yscale = atten * L / (I0(beta) * M_PI);
Andy Hung	bafa561	2014-04-02 13:52:10 -0700	[diff] [blame]	673	const double sqrbeta = sqr(beta);
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	674
				675	// We use sine generators, which computes sines on regular step intervals.
				676	// This speeds up overall computation about 40% from computing the sine directly.
				677
				678	SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
				679
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	680	for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	681
				682	// computation for a single polyphase of the overall filter.
				683	SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
				684	double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
				685
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	686	for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	687	double y;
				688	if (CC_LIKELY(ix)) {
				689	double x = static_cast<double>(ix);
				690
				691	// sine generator: sg.valueAdvance() returns sin(ix*xstep);
Andy Hung	bafa561	2014-04-02 13:52:10 -0700	[diff] [blame]	692	// y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
				693	y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x);
Andy Hung	6582f2b	2014-01-03 12:30:41 -0800	[diff] [blame]	694	} else {
				695	y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
				696	sg.advance();
				697	}
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	698
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	699	if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
				700	coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)8-1), err));
Andy Hung	430b61c	2014-04-08 18:10:02 -0700	[diff] [blame]	701	} else if (is_same<T, int32_t>::value) {
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	702	coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)8-1)));
Andy Hung	430b61c	2014-04-08 18:10:02 -0700	[diff] [blame]	703	} else { // assumed float or double
				704	*coef++ = static_cast<T>(y);
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	705	}
				706	}
				707	}
				708	}
				709
Glenn Kasten	63238ef	2015-03-02 15:50:29 -0800	[diff] [blame^]	710	} // namespace android
Andy Hung	86eae0e	2013-12-09 12:12:46 -0800	[diff] [blame]	711
				712	#endif /ANDROID_AUDIO_RESAMPLER_FIR_GEN_H/