blob: ad18965506287cdf2dad40f9c973ee94aaeb2e59 [file] [log] [blame]
Andy Hung86eae0e2013-12-09 12:12:46 -08001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
18#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
19
Dan Albert36802bd2014-11-20 11:31:17 -080020#include "utils/Compat.h"
21
Andy Hung86eae0e2013-12-09 12:12:46 -080022namespace android {
23
24/*
Andy Hung6582f2b2014-01-03 12:30:41 -080025 * generates a sine wave at equal steps.
Andy Hung86eae0e2013-12-09 12:12:46 -080026 *
Andy Hung6582f2b2014-01-03 12:30:41 -080027 * As most of our functions use sine or cosine at equal steps,
28 * it is very efficient to compute them that way (single multiply and subtract),
29 * rather than invoking the math library sin() or cos() each time.
30 *
31 * SineGen uses Goertzel's Algorithm (as a generator not a filter)
32 * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
33 * by stepping through 0, 1, ... n.
34 *
35 * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
36 *
37 * or looking at just the imaginary sine term, as the cosine follows identically:
38 *
39 * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
40 *
41 * Goertzel's algorithm is more efficient than the angle addition formula,
42 * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
43 * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
44 * cosine generation due to the complex * complex multiply (full rotation).
45 *
46 * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
Andy Hung86eae0e2013-12-09 12:12:46 -080047 *
48 */
49
Andy Hung6582f2b2014-01-03 12:30:41 -080050class SineGen {
51public:
52 SineGen(double wstart, double wstep, bool cosine = false) {
53 if (cosine) {
54 mCurrent = cos(wstart);
55 mPrevious = cos(wstart - wstep);
56 } else {
57 mCurrent = sin(wstart);
58 mPrevious = sin(wstart - wstep);
59 }
60 mTwoCos = 2.*cos(wstep);
Andy Hung86eae0e2013-12-09 12:12:46 -080061 }
Andy Hung6582f2b2014-01-03 12:30:41 -080062 SineGen(double expNow, double expPrev, double twoCosStep) {
63 mCurrent = expNow;
64 mPrevious = expPrev;
65 mTwoCos = twoCosStep;
66 }
67 inline double value() const {
68 return mCurrent;
69 }
70 inline void advance() {
71 double tmp = mCurrent;
72 mCurrent = mCurrent*mTwoCos - mPrevious;
73 mPrevious = tmp;
74 }
75 inline double valueAdvance() {
76 double tmp = mCurrent;
77 mCurrent = mCurrent*mTwoCos - mPrevious;
78 mPrevious = tmp;
79 return tmp;
80 }
81
82private:
83 double mCurrent; // current value of sine/cosine
84 double mPrevious; // previous value of sine/cosine
85 double mTwoCos; // stepping factor
86};
87
88/*
89 * generates a series of sine generators, phase offset by fixed steps.
90 *
91 * This is used to generate polyphase sine generators, one per polyphase
92 * in the filter code below.
93 *
94 * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
95 * increments by innerStep.
96 *
97 */
98
99class SineGenGen {
100public:
101 SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
102 : mSineInnerCur(outerStart, outerStep, cosine),
103 mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
104 {
105 mTwoCos = 2.*cos(innerStep);
106 }
107 inline SineGen value() {
108 return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
109 }
110 inline void advance() {
111 mSineInnerCur.advance();
112 mSineInnerPrev.advance();
113 }
114 inline SineGen valueAdvance() {
115 return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
116 }
117
118private:
119 SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
120 SineGen mSineInnerPrev; // generate the inner sine previous values
121 // (behind by innerStep, stepped by outerStep).
122 double mTwoCos; // the inner stepping factor for the returned SineGen.
123};
Andy Hung86eae0e2013-12-09 12:12:46 -0800124
125static inline double sqr(double x) {
126 return x * x;
127}
128
129/*
130 * rounds a double to the nearest integer for FIR coefficients.
131 *
132 * One variant uses noise shaping, which must keep error history
133 * to work (the err parameter, initialized to 0).
134 * The other variant is a non-noise shaped version for
135 * S32 coefficients (noise shaping doesn't gain much).
136 *
Andy Hung6582f2b2014-01-03 12:30:41 -0800137 * Caution: No bounds saturation is applied, but isn't needed in this case.
Andy Hung86eae0e2013-12-09 12:12:46 -0800138 *
139 * @param x is the value to round.
140 *
141 * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
142 * Typically this may be the maximum positive integer+1 (using the fact that double precision
143 * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
144 *
145 * @param err is the previous error (actual - rounded) for the previous rounding op.
Andy Hung6582f2b2014-01-03 12:30:41 -0800146 * For 16b coefficients this can improve stopband dB performance by up to 2dB.
147 *
148 * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
Andy Hung86eae0e2013-12-09 12:12:46 -0800149 *
150 */
151
152static inline int64_t toint(double x, int64_t maxval, double& err) {
153 double val = x * maxval;
Andy Hung6582f2b2014-01-03 12:30:41 -0800154 double ival = floor(val + 0.5 + err*0.2);
Andy Hung86eae0e2013-12-09 12:12:46 -0800155 err = val - ival;
156 return static_cast<int64_t>(ival);
157}
158
159static inline int64_t toint(double x, int64_t maxval) {
160 return static_cast<int64_t>(floor(x * maxval + 0.5));
161}
162
163/*
164 * Modified Bessel function of the first kind
165 * http://en.wikipedia.org/wiki/Bessel_function
166 *
Andy Hung6582f2b2014-01-03 12:30:41 -0800167 * The formulas are taken from Abramowitz and Stegun,
168 * _Handbook of Mathematical Functions_ (links below):
Andy Hung86eae0e2013-12-09 12:12:46 -0800169 *
170 * http://people.math.sfu.ca/~cbm/aands/page_375.htm
171 * http://people.math.sfu.ca/~cbm/aands/page_378.htm
172 *
173 * http://dlmf.nist.gov/10.25
174 * http://dlmf.nist.gov/10.40
175 *
176 * Note we assume x is nonnegative (the function is symmetric,
177 * pass in the absolute value as needed).
178 *
179 * Constants are compile time derived with templates I0Term<> and
180 * I0ATerm<> to the precision of the compiler. The series can be expanded
181 * to any precision needed, but currently set around 24b precision.
182 *
183 * We use a bit of template math here, constexpr would probably be
184 * more appropriate for a C++11 compiler.
185 *
Andy Hung6582f2b2014-01-03 12:30:41 -0800186 * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
187 *
Andy Hung86eae0e2013-12-09 12:12:46 -0800188 */
189
190template <int N>
191struct I0Term {
Dan Albert36802bd2014-11-20 11:31:17 -0800192 static const CONSTEXPR double value = I0Term<N-1>::value / (4. * N * N);
Andy Hung86eae0e2013-12-09 12:12:46 -0800193};
194
195template <>
196struct I0Term<0> {
Dan Albert36802bd2014-11-20 11:31:17 -0800197 static const CONSTEXPR double value = 1.;
Andy Hung86eae0e2013-12-09 12:12:46 -0800198};
199
200template <int N>
201struct I0ATerm {
Dan Albert36802bd2014-11-20 11:31:17 -0800202 static const CONSTEXPR double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N);
Andy Hung86eae0e2013-12-09 12:12:46 -0800203};
204
205template <>
206struct I0ATerm<0> { // 1/sqrt(2*PI);
Glenn Kastenb187de12014-12-30 08:18:15 -0800207 static const CONSTEXPR double value =
208 0.398942280401432677939946059934381868475858631164934657665925;
Andy Hung86eae0e2013-12-09 12:12:46 -0800209};
210
Andy Hung6582f2b2014-01-03 12:30:41 -0800211#if USE_HORNERS_METHOD
212/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
213 * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
214 *
215 * This has fewer multiplications than Estrin's method below, but has back to back
216 * floating point dependencies.
217 *
218 * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
219 */
220
221inline double Poly2(double A, double B, double x) {
222 return A + x * B;
223}
224
225inline double Poly4(double A, double B, double C, double D, double x) {
226 return A + x * (B + x * (C + x * (D)));
227}
228
229inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
230 double x) {
231 return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
232}
233
234inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
235 double H, double I, double x) {
236 return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
237}
238
239#else
240/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
241 * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
242 *
243 * This is typically faster, perhaps gains about 5-10% overall on ARM processors
244 * over Horner's method above.
245 */
246
247inline double Poly2(double A, double B, double x) {
248 return A + B * x;
249}
250
251inline double Poly3(double A, double B, double C, double x, double x2) {
252 return Poly2(A, B, x) + C * x2;
253}
254
255inline double Poly3(double A, double B, double C, double x) {
256 return Poly2(A, B, x) + C * x * x;
257}
258
259inline double Poly4(double A, double B, double C, double D, double x, double x2) {
260 return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
261}
262
263inline double Poly4(double A, double B, double C, double D, double x) {
264 return Poly4(A, B, C, D, x, x * x);
265}
266
267inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
268 double x) {
269 double x2 = x * x;
270 return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
271}
272
273inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
274 double H, double x, double x2, double x4) {
275 return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
276}
277
278inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
279 double H, double I, double x) {
280 double x2 = x * x;
281#if 1
282 // It does not seem faster to explicitly decompose Poly8 into Poly4, but
283 // could depend on compiler floating point scheduling.
284 double x4 = x2 * x2;
285 return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
286#else
287 double val = Poly4(A, B, C, D, x, x2);
288 double x4 = x2 * x2;
289 return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
290#endif
291}
292#endif
293
Andy Hung86eae0e2013-12-09 12:12:46 -0800294static inline double I0(double x) {
Andy Hung6582f2b2014-01-03 12:30:41 -0800295 if (x < 3.75) {
Andy Hung86eae0e2013-12-09 12:12:46 -0800296 x *= x;
Andy Hung6582f2b2014-01-03 12:30:41 -0800297 return Poly7(I0Term<0>::value, I0Term<1>::value,
298 I0Term<2>::value, I0Term<3>::value,
299 I0Term<4>::value, I0Term<5>::value,
300 I0Term<6>::value, x); // e < 1.6e-7
Andy Hung86eae0e2013-12-09 12:12:46 -0800301 }
Andy Hung6582f2b2014-01-03 12:30:41 -0800302 if (1) {
303 /*
304 * Series expansion coefs are easy to calculate, but are expanded around 0,
305 * so error is unequal over the interval 0 < x < 3.75, the error being
306 * significantly better near 0.
307 *
308 * A better solution is to use precise minimax polynomial fits.
309 *
310 * We use a slightly more complicated solution for 3.75 < x < 15, based on
311 * the tables in Blair and Edwards, "Stable Rational Minimax Approximations
312 * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
313 * AECL-4928.
314 *
315 * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
316 *
317 * See Table 11 for 0 < x < 15; e < 10^(-7.13).
318 *
319 * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
320 *
321 * This speeds up overall computation by about 40% over using the else clause below,
322 * which requires sqrt and exp.
323 *
324 */
325
326 x *= x;
327 double num = Poly9(-0.13544938430e9, -0.33153754512e8,
328 -0.19406631946e7, -0.48058318783e5,
329 -0.63269783360e3, -0.49520779070e1,
330 -0.24970910370e-1, -0.74741159550e-4,
331 -0.18257612460e-6, x);
332 double y = x - 225.; // reflection around 15 (squared)
333 double den = Poly4(-0.34598737196e8, 0.23852643181e6,
334 -0.70699387620e3, 0.10000000000e1, y);
335 return num / den;
336
337#if IO_EXTENDED_BETA
338 /* Table 42 for x > 15; e < 10^(-8.11).
339 * This is used for Beta>15, but is disabled here as
340 * we never use Beta that high.
341 *
342 * NOTE: This should be enabled only for x > 15.
343 */
344
345 double y = 1./x;
346 double z = y - (1./15);
347 double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
348 double den = Poly3(0.103150763823e2, -0.14181687413e2,
349 0.1000000000e1, z);
350 return exp(x) * sqrt(y) * num / den;
351#endif
352 } else {
353 /*
354 * NOT USED, but reference for large Beta.
355 *
356 * Abramowitz and Stegun asymptotic formula.
357 * works for x > 3.75.
358 */
359 double y = 1./x;
360 return exp(x) * sqrt(y) *
361 // note: reciprocal squareroot may be easier!
362 // http://en.wikipedia.org/wiki/Fast_inverse_square_root
363 Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
364 I0ATerm<2>::value, I0ATerm<3>::value,
365 I0ATerm<4>::value, I0ATerm<5>::value,
366 I0ATerm<6>::value, I0ATerm<7>::value,
367 I0ATerm<8>::value, y); // (... e) < 1.9e-7
368 }
Andy Hung86eae0e2013-12-09 12:12:46 -0800369}
370
Andy Hungbafa5612014-04-02 13:52:10 -0700371/* A speed optimized version of the Modified Bessel I0() which incorporates
372 * the sqrt and numerator multiply and denominator divide into the computation.
373 * This speeds up filter computation by about 10-15%.
374 */
375static inline double I0SqrRat(double x2, double num, double den) {
376 if (x2 < (3.75 * 3.75)) {
377 return Poly7(I0Term<0>::value, I0Term<1>::value,
378 I0Term<2>::value, I0Term<3>::value,
379 I0Term<4>::value, I0Term<5>::value,
380 I0Term<6>::value, x2) * num / den; // e < 1.6e-7
381 }
382 num *= Poly9(-0.13544938430e9, -0.33153754512e8,
383 -0.19406631946e7, -0.48058318783e5,
384 -0.63269783360e3, -0.49520779070e1,
385 -0.24970910370e-1, -0.74741159550e-4,
386 -0.18257612460e-6, x2); // e < 10^(-7.13).
387 double y = x2 - 225.; // reflection around 15 (squared)
388 den *= Poly4(-0.34598737196e8, 0.23852643181e6,
389 -0.70699387620e3, 0.10000000000e1, y);
390 return num / den;
391}
392
Andy Hung86eae0e2013-12-09 12:12:46 -0800393/*
394 * calculates the transition bandwidth for a Kaiser filter
395 *
Andy Hung6582f2b2014-01-03 12:30:41 -0800396 * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
397 * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
Andy Hung86eae0e2013-12-09 12:12:46 -0800398 *
399 * @param halfNumCoef is half the number of coefficients per filter phase.
Andy Hung6582f2b2014-01-03 12:30:41 -0800400 *
Andy Hung86eae0e2013-12-09 12:12:46 -0800401 * @param stopBandAtten is the stop band attenuation desired.
Andy Hung6582f2b2014-01-03 12:30:41 -0800402 *
Andy Hung86eae0e2013-12-09 12:12:46 -0800403 * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
404 */
405static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
Andy Hung6582f2b2014-01-03 12:30:41 -0800406 return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef);
Andy Hung86eae0e2013-12-09 12:12:46 -0800407}
408
409/*
Andy Hung6582f2b2014-01-03 12:30:41 -0800410 * calculates the fir transfer response of the overall polyphase filter at w.
Andy Hung86eae0e2013-12-09 12:12:46 -0800411 *
Andy Hung6582f2b2014-01-03 12:30:41 -0800412 * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
413 * fact that h[n] is symmetric (cosines only, no complex arithmetic).
414 *
415 * We use Goertzel's algorithm to accelerate the computation to essentially
416 * a single multiply and 2 adds per filter coefficient h[].
417 *
418 * Be careful be careful to consider that h[n] is the overall polyphase filter,
419 * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
420 * as you only use one of the polyphases at a time.
Andy Hung86eae0e2013-12-09 12:12:46 -0800421 */
422template <typename T>
423static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
Andy Hung6582f2b2014-01-03 12:30:41 -0800424 double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank
425 coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank).
426#if SLOW_FIRTRANSFER
427 /* Original code for reference. This is equivalent to the code below, but slower. */
Andy Hung86eae0e2013-12-09 12:12:46 -0800428 for (int i=1 ; i<=L ; ++i) {
429 for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
430 accum += cos(ix*w)*static_cast<double>(*coef++);
431 }
432 }
Andy Hung6582f2b2014-01-03 12:30:41 -0800433#else
434 /*
435 * Our overall filter is stored striped by polyphases, not a contiguous h[n].
436 * We could fetch coefficients in a non-contiguous fashion
437 * but that will not scale to vector processing.
438 *
439 * We apply Goertzel's algorithm directly to each polyphase filter bank instead of
440 * using cosine generation/multiplication, thereby saving one multiply per inner loop.
441 *
442 * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
443 * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
444 *
445 * We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
446 * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
447 *
448 * y[n] = s[n] - e^(iw)s[n-1]
449 * = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
450 * = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
451 *
452 * The summation contains the frequency steps we want multiplied by the source
453 * (similar to a DTFT).
454 *
455 * Using symmetry, and just the real part (be careful, this must happen
456 * after any internal complex multiplications), the polyphase filterbank
457 * transfer function is:
458 *
459 * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
460 * = Re{ e^(iwn + iw_0) y[n]}
461 * = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
462 *
463 * using the fact that s[n] of real x[n] is real.
464 *
465 */
466 double dcos = 2. * cos(L*w);
467 int start = ((halfNumCoef)*L + 1);
468 SineGen cc((start - L) * w, w, true); // cosine
469 SineGen cp(start * w, w, true); // cosine
470 for (int i=1 ; i<=L ; ++i) {
471 double sc = 0;
472 double sp = 0;
473 for (int j=0 ; j<halfNumCoef ; ++j) {
474 double tmp = sc;
475 sc = static_cast<double>(*coef++) + dcos*sc - sp;
476 sp = tmp;
477 }
478 // If we are awfully clever, we can apply Goertzel's algorithm
479 // again on the sc and sp sequences returned here.
480 accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
481 }
482#endif
Andy Hung86eae0e2013-12-09 12:12:46 -0800483 return accum*2.;
484}
485
486/*
Andy Hung6582f2b2014-01-03 12:30:41 -0800487 * evaluates the minimum and maximum |H(f)| bound in a band region.
488 *
489 * This is usually done with equally spaced increments in the target band in question.
490 * The passband is often very small, and sampled that way. The stopband is often much
491 * larger.
492 *
493 * We use the fact that the overall polyphase filter has an additional bank at the end
494 * for interpolation; hence it is overspecified for the H(f) computation. Thus the
495 * first polyphase is never actually checked, excepting its first term.
496 *
497 * In this code we use the firTransfer() evaluator above, which uses Goertzel's
498 * algorithm to calculate the transfer function at each point.
499 *
500 * TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal
501 * spacing is a chirp transform.
Andy Hung86eae0e2013-12-09 12:12:46 -0800502 *
503 * @param coef is the designed polyphase filter banks
504 *
505 * @param L is the number of phases (for interpolation)
506 *
507 * @param halfNumCoef should be half the number of coefficients for a single
508 * polyphase.
509 *
510 * @param fstart is the normalized frequency start.
511 *
512 * @param fend is the normalized frequency end.
513 *
514 * @param steps is the number of steps to take (sampling) between frequency start and end
515 *
516 * @param firMin returns the minimum transfer |H(f)| found
517 *
518 * @param firMax returns the maximum transfer |H(f)| found
519 *
520 * 0 <= f <= 0.5.
521 * This is used to test passband and stopband performance.
522 */
523template <typename T>
524static void testFir(const T* coef, int L, int halfNumCoef,
525 double fstart, double fend, int steps, double &firMin, double &firMax) {
526 double wstart = fstart*(2.*M_PI);
527 double wend = fend*(2.*M_PI);
528 double wstep = (wend - wstart)/steps;
529 double fmax, fmin;
530 double trf = firTransfer(coef, L, halfNumCoef, wstart);
531 if (trf<0) {
532 trf = -trf;
533 }
534 fmin = fmax = trf;
535 wstart += wstep;
536 for (int i=1; i<steps; ++i) {
537 trf = firTransfer(coef, L, halfNumCoef, wstart);
538 if (trf<0) {
539 trf = -trf;
540 }
541 if (trf>fmax) {
542 fmax = trf;
543 }
544 else if (trf<fmin) {
545 fmin = trf;
546 }
547 wstart += wstep;
548 }
549 // renormalize - this is only needed for integer filter types
550 double norm = 1./((1ULL<<(sizeof(T)*8-1))*L);
551
552 firMin = fmin * norm;
553 firMax = fmax * norm;
554}
555
556/*
Andy Hung6582f2b2014-01-03 12:30:41 -0800557 * evaluates the |H(f)| lowpass band characteristics.
558 *
559 * This function tests the lowpass characteristics for the overall polyphase filter,
560 * and is used to verify the design. For this case, fp should be set to the
561 * passband normalized frequency from 0 to 0.5 for the overall filter (thus it
562 * is the designed polyphase bank value / L). Likewise for fs.
563 *
564 * @param coef is the designed polyphase filter banks
565 *
566 * @param L is the number of phases (for interpolation)
567 *
568 * @param halfNumCoef should be half the number of coefficients for a single
569 * polyphase.
570 *
571 * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
572 *
573 * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
574 *
575 * @param passSteps is the number of passband sampling steps.
576 *
577 * @param stopSteps is the number of stopband sampling steps.
578 *
579 * @param passMin is the minimum value in the passband
580 *
581 * @param passMax is the maximum value in the passband (useful for scaling). This should
582 * be less than 1., to avoid sine wave test overflow.
583 *
584 * @param passRipple is the passband ripple. Typically this should be less than 0.1 for
585 * an audio filter. Generally speaker/headphone device characteristics will dominate
586 * the passband term.
587 *
588 * @param stopMax is the maximum value in the stopband.
589 *
590 * @param stopRipple is the stopband ripple, also known as stopband attenuation.
591 * Typically this should be greater than ~80dB for low quality, and greater than
592 * ~100dB for full 16b quality, otherwise aliasing may become noticeable.
593 *
594 */
595template <typename T>
596static void testFir(const T* coef, int L, int halfNumCoef,
597 double fp, double fs, int passSteps, int stopSteps,
598 double &passMin, double &passMax, double &passRipple,
599 double &stopMax, double &stopRipple) {
600 double fmin, fmax;
601 testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
602 double d1 = (fmax - fmin)/2.;
603 passMin = fmin;
604 passMax = fmax;
605 passRipple = -20.*log10(1. - d1); // passband ripple
606 testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
607 // fmin is really not important for the stopband.
608 stopMax = fmax;
609 stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
610}
611
612/*
613 * Calculates the overall polyphase filter based on a windowed sinc function.
Andy Hung86eae0e2013-12-09 12:12:46 -0800614 *
615 * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1
616 * taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks.
617 * The last filterbank is used for interpolation purposes (and is mostly composed
618 * of the first bank shifted by one sample), and is unnecessary if one does
619 * not do interpolation.
620 *
Andy Hung6582f2b2014-01-03 12:30:41 -0800621 * We use the last filterbank for some transfer function calculation purposes,
622 * so it needs to be generated anyways.
623 *
Andy Hung86eae0e2013-12-09 12:12:46 -0800624 * @param coef is the caller allocated space for coefficients. This should be
625 * exactly (L+1)*halfNumCoef in size.
626 *
627 * @param L is the number of phases (for interpolation)
628 *
629 * @param halfNumCoef should be half the number of coefficients for a single
630 * polyphase.
631 *
632 * @param stopBandAtten is the stopband value, should be >50dB.
633 *
634 * @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy
635 * should be 6dB less. (fcr is where the amplitude drops by half). Use the
636 * firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint
637 * between the stop band and the pass band (fstop+fpass)/2.
638 *
639 * @param atten is the attenuation (generally slightly less than 1).
640 */
641
642template <typename T>
643static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
644 double stopBandAtten, double fcr, double atten) {
645 //
Andy Hung6582f2b2014-01-03 12:30:41 -0800646 // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
647 // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
Andy Hung86eae0e2013-12-09 12:12:46 -0800648 //
649 // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
650 //
651 // Kaiser window and beta parameter
652 //
653 // | 0.1102*(A - 8.7) A > 50
654 // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50
655 // | 0. A < 21
656 //
657 // with A is the desired stop-band attenuation in dBFS
658 //
659 // 30 dB 2.210
660 // 40 dB 3.384
661 // 50 dB 4.538
662 // 60 dB 5.658
663 // 70 dB 6.764
664 // 80 dB 7.865
665 // 90 dB 8.960
666 // 100 dB 10.056
667
668 const int N = L * halfNumCoef; // non-negative half
669 const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
Andy Hung6582f2b2014-01-03 12:30:41 -0800670 const double xstep = (2. * M_PI) * fcr / L;
Andy Hung86eae0e2013-12-09 12:12:46 -0800671 const double xfrac = 1. / N;
Andy Hung6582f2b2014-01-03 12:30:41 -0800672 const double yscale = atten * L / (I0(beta) * M_PI);
Andy Hungbafa5612014-04-02 13:52:10 -0700673 const double sqrbeta = sqr(beta);
Andy Hung6582f2b2014-01-03 12:30:41 -0800674
675 // We use sine generators, which computes sines on regular step intervals.
676 // This speeds up overall computation about 40% from computing the sine directly.
677
678 SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
679
Andy Hung86eae0e2013-12-09 12:12:46 -0800680 for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
Andy Hung6582f2b2014-01-03 12:30:41 -0800681
682 // computation for a single polyphase of the overall filter.
683 SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
684 double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
685
Andy Hung86eae0e2013-12-09 12:12:46 -0800686 for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
Andy Hung6582f2b2014-01-03 12:30:41 -0800687 double y;
688 if (CC_LIKELY(ix)) {
689 double x = static_cast<double>(ix);
690
691 // sine generator: sg.valueAdvance() returns sin(ix*xstep);
Andy Hungbafa5612014-04-02 13:52:10 -0700692 // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
693 y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x);
Andy Hung6582f2b2014-01-03 12:30:41 -0800694 } else {
695 y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
696 sg.advance();
697 }
Andy Hung86eae0e2013-12-09 12:12:46 -0800698
Andy Hung86eae0e2013-12-09 12:12:46 -0800699 if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
700 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err));
Andy Hung430b61c2014-04-08 18:10:02 -0700701 } else if (is_same<T, int32_t>::value) {
Andy Hung86eae0e2013-12-09 12:12:46 -0800702 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1)));
Andy Hung430b61c2014-04-08 18:10:02 -0700703 } else { // assumed float or double
704 *coef++ = static_cast<T>(y);
Andy Hung86eae0e2013-12-09 12:12:46 -0800705 }
706 }
707 }
708}
709
Glenn Kasten63238ef2015-03-02 15:50:29 -0800710} // namespace android
Andy Hung86eae0e2013-12-09 12:12:46 -0800711
712#endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/