Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2013 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H |
| 18 | #define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H |
| 19 | |
Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 20 | #include "utils/Compat.h" |
| 21 | |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 22 | namespace android { |
| 23 | |
| 24 | /* |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 25 | * generates a sine wave at equal steps. |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 26 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 27 | * As most of our functions use sine or cosine at equal steps, |
| 28 | * it is very efficient to compute them that way (single multiply and subtract), |
| 29 | * rather than invoking the math library sin() or cos() each time. |
| 30 | * |
| 31 | * SineGen uses Goertzel's Algorithm (as a generator not a filter) |
| 32 | * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep) |
| 33 | * by stepping through 0, 1, ... n. |
| 34 | * |
| 35 | * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep) |
| 36 | * |
| 37 | * or looking at just the imaginary sine term, as the cosine follows identically: |
| 38 | * |
| 39 | * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep) |
| 40 | * |
| 41 | * Goertzel's algorithm is more efficient than the angle addition formula, |
| 42 | * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to |
| 43 | * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and |
| 44 | * cosine generation due to the complex * complex multiply (full rotation). |
| 45 | * |
| 46 | * See: http://en.wikipedia.org/wiki/Goertzel_algorithm |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 47 | * |
| 48 | */ |
| 49 | |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 50 | class SineGen { |
| 51 | public: |
| 52 | SineGen(double wstart, double wstep, bool cosine = false) { |
| 53 | if (cosine) { |
| 54 | mCurrent = cos(wstart); |
| 55 | mPrevious = cos(wstart - wstep); |
| 56 | } else { |
| 57 | mCurrent = sin(wstart); |
| 58 | mPrevious = sin(wstart - wstep); |
| 59 | } |
| 60 | mTwoCos = 2.*cos(wstep); |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 61 | } |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 62 | SineGen(double expNow, double expPrev, double twoCosStep) { |
| 63 | mCurrent = expNow; |
| 64 | mPrevious = expPrev; |
| 65 | mTwoCos = twoCosStep; |
| 66 | } |
| 67 | inline double value() const { |
| 68 | return mCurrent; |
| 69 | } |
| 70 | inline void advance() { |
| 71 | double tmp = mCurrent; |
| 72 | mCurrent = mCurrent*mTwoCos - mPrevious; |
| 73 | mPrevious = tmp; |
| 74 | } |
| 75 | inline double valueAdvance() { |
| 76 | double tmp = mCurrent; |
| 77 | mCurrent = mCurrent*mTwoCos - mPrevious; |
| 78 | mPrevious = tmp; |
| 79 | return tmp; |
| 80 | } |
| 81 | |
| 82 | private: |
| 83 | double mCurrent; // current value of sine/cosine |
| 84 | double mPrevious; // previous value of sine/cosine |
| 85 | double mTwoCos; // stepping factor |
| 86 | }; |
| 87 | |
| 88 | /* |
| 89 | * generates a series of sine generators, phase offset by fixed steps. |
| 90 | * |
| 91 | * This is used to generate polyphase sine generators, one per polyphase |
| 92 | * in the filter code below. |
| 93 | * |
| 94 | * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep; |
| 95 | * increments by innerStep. |
| 96 | * |
| 97 | */ |
| 98 | |
| 99 | class SineGenGen { |
| 100 | public: |
| 101 | SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false) |
| 102 | : mSineInnerCur(outerStart, outerStep, cosine), |
| 103 | mSineInnerPrev(outerStart-innerStep, outerStep, cosine) |
| 104 | { |
| 105 | mTwoCos = 2.*cos(innerStep); |
| 106 | } |
| 107 | inline SineGen value() { |
| 108 | return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos); |
| 109 | } |
| 110 | inline void advance() { |
| 111 | mSineInnerCur.advance(); |
| 112 | mSineInnerPrev.advance(); |
| 113 | } |
| 114 | inline SineGen valueAdvance() { |
| 115 | return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos); |
| 116 | } |
| 117 | |
| 118 | private: |
| 119 | SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep). |
| 120 | SineGen mSineInnerPrev; // generate the inner sine previous values |
| 121 | // (behind by innerStep, stepped by outerStep). |
| 122 | double mTwoCos; // the inner stepping factor for the returned SineGen. |
| 123 | }; |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 124 | |
| 125 | static inline double sqr(double x) { |
| 126 | return x * x; |
| 127 | } |
| 128 | |
| 129 | /* |
| 130 | * rounds a double to the nearest integer for FIR coefficients. |
| 131 | * |
| 132 | * One variant uses noise shaping, which must keep error history |
| 133 | * to work (the err parameter, initialized to 0). |
| 134 | * The other variant is a non-noise shaped version for |
| 135 | * S32 coefficients (noise shaping doesn't gain much). |
| 136 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 137 | * Caution: No bounds saturation is applied, but isn't needed in this case. |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 138 | * |
| 139 | * @param x is the value to round. |
| 140 | * |
| 141 | * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom). |
| 142 | * Typically this may be the maximum positive integer+1 (using the fact that double precision |
| 143 | * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition). |
| 144 | * |
| 145 | * @param err is the previous error (actual - rounded) for the previous rounding op. |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 146 | * For 16b coefficients this can improve stopband dB performance by up to 2dB. |
| 147 | * |
| 148 | * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 149 | * |
| 150 | */ |
| 151 | |
| 152 | static inline int64_t toint(double x, int64_t maxval, double& err) { |
| 153 | double val = x * maxval; |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 154 | double ival = floor(val + 0.5 + err*0.2); |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 155 | err = val - ival; |
| 156 | return static_cast<int64_t>(ival); |
| 157 | } |
| 158 | |
| 159 | static inline int64_t toint(double x, int64_t maxval) { |
| 160 | return static_cast<int64_t>(floor(x * maxval + 0.5)); |
| 161 | } |
| 162 | |
| 163 | /* |
| 164 | * Modified Bessel function of the first kind |
| 165 | * http://en.wikipedia.org/wiki/Bessel_function |
| 166 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 167 | * The formulas are taken from Abramowitz and Stegun, |
| 168 | * _Handbook of Mathematical Functions_ (links below): |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 169 | * |
| 170 | * http://people.math.sfu.ca/~cbm/aands/page_375.htm |
| 171 | * http://people.math.sfu.ca/~cbm/aands/page_378.htm |
| 172 | * |
| 173 | * http://dlmf.nist.gov/10.25 |
| 174 | * http://dlmf.nist.gov/10.40 |
| 175 | * |
| 176 | * Note we assume x is nonnegative (the function is symmetric, |
| 177 | * pass in the absolute value as needed). |
| 178 | * |
| 179 | * Constants are compile time derived with templates I0Term<> and |
| 180 | * I0ATerm<> to the precision of the compiler. The series can be expanded |
| 181 | * to any precision needed, but currently set around 24b precision. |
| 182 | * |
| 183 | * We use a bit of template math here, constexpr would probably be |
| 184 | * more appropriate for a C++11 compiler. |
| 185 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 186 | * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit. |
| 187 | * |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 188 | */ |
| 189 | |
| 190 | template <int N> |
| 191 | struct I0Term { |
Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 192 | static const CONSTEXPR double value = I0Term<N-1>::value / (4. * N * N); |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 193 | }; |
| 194 | |
| 195 | template <> |
| 196 | struct I0Term<0> { |
Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 197 | static const CONSTEXPR double value = 1.; |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 198 | }; |
| 199 | |
| 200 | template <int N> |
| 201 | struct I0ATerm { |
Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 202 | static const CONSTEXPR double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N); |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 203 | }; |
| 204 | |
| 205 | template <> |
| 206 | struct I0ATerm<0> { // 1/sqrt(2*PI); |
Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 207 | static const CONSTEXPR double value = 0.398942280401432677939946059934381868475858631164934657665925; |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 208 | }; |
| 209 | |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 210 | #if USE_HORNERS_METHOD |
| 211 | /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... |
| 212 | * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method |
| 213 | * |
| 214 | * This has fewer multiplications than Estrin's method below, but has back to back |
| 215 | * floating point dependencies. |
| 216 | * |
| 217 | * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled. |
| 218 | */ |
| 219 | |
| 220 | inline double Poly2(double A, double B, double x) { |
| 221 | return A + x * B; |
| 222 | } |
| 223 | |
| 224 | inline double Poly4(double A, double B, double C, double D, double x) { |
| 225 | return A + x * (B + x * (C + x * (D))); |
| 226 | } |
| 227 | |
| 228 | inline double Poly7(double A, double B, double C, double D, double E, double F, double G, |
| 229 | double x) { |
| 230 | return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G)))))); |
| 231 | } |
| 232 | |
| 233 | inline double Poly9(double A, double B, double C, double D, double E, double F, double G, |
| 234 | double H, double I, double x) { |
| 235 | return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I)))))))); |
| 236 | } |
| 237 | |
| 238 | #else |
| 239 | /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... |
| 240 | * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme |
| 241 | * |
| 242 | * This is typically faster, perhaps gains about 5-10% overall on ARM processors |
| 243 | * over Horner's method above. |
| 244 | */ |
| 245 | |
| 246 | inline double Poly2(double A, double B, double x) { |
| 247 | return A + B * x; |
| 248 | } |
| 249 | |
| 250 | inline double Poly3(double A, double B, double C, double x, double x2) { |
| 251 | return Poly2(A, B, x) + C * x2; |
| 252 | } |
| 253 | |
| 254 | inline double Poly3(double A, double B, double C, double x) { |
| 255 | return Poly2(A, B, x) + C * x * x; |
| 256 | } |
| 257 | |
| 258 | inline double Poly4(double A, double B, double C, double D, double x, double x2) { |
| 259 | return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2); |
| 260 | } |
| 261 | |
| 262 | inline double Poly4(double A, double B, double C, double D, double x) { |
| 263 | return Poly4(A, B, C, D, x, x * x); |
| 264 | } |
| 265 | |
| 266 | inline double Poly7(double A, double B, double C, double D, double E, double F, double G, |
| 267 | double x) { |
| 268 | double x2 = x * x; |
| 269 | return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2); |
| 270 | } |
| 271 | |
| 272 | inline double Poly8(double A, double B, double C, double D, double E, double F, double G, |
| 273 | double H, double x, double x2, double x4) { |
| 274 | return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4; |
| 275 | } |
| 276 | |
| 277 | inline double Poly9(double A, double B, double C, double D, double E, double F, double G, |
| 278 | double H, double I, double x) { |
| 279 | double x2 = x * x; |
| 280 | #if 1 |
| 281 | // It does not seem faster to explicitly decompose Poly8 into Poly4, but |
| 282 | // could depend on compiler floating point scheduling. |
| 283 | double x4 = x2 * x2; |
| 284 | return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4); |
| 285 | #else |
| 286 | double val = Poly4(A, B, C, D, x, x2); |
| 287 | double x4 = x2 * x2; |
| 288 | return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4); |
| 289 | #endif |
| 290 | } |
| 291 | #endif |
| 292 | |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 293 | static inline double I0(double x) { |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 294 | if (x < 3.75) { |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 295 | x *= x; |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 296 | return Poly7(I0Term<0>::value, I0Term<1>::value, |
| 297 | I0Term<2>::value, I0Term<3>::value, |
| 298 | I0Term<4>::value, I0Term<5>::value, |
| 299 | I0Term<6>::value, x); // e < 1.6e-7 |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 300 | } |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 301 | if (1) { |
| 302 | /* |
| 303 | * Series expansion coefs are easy to calculate, but are expanded around 0, |
| 304 | * so error is unequal over the interval 0 < x < 3.75, the error being |
| 305 | * significantly better near 0. |
| 306 | * |
| 307 | * A better solution is to use precise minimax polynomial fits. |
| 308 | * |
| 309 | * We use a slightly more complicated solution for 3.75 < x < 15, based on |
| 310 | * the tables in Blair and Edwards, "Stable Rational Minimax Approximations |
| 311 | * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory, |
| 312 | * AECL-4928. |
| 313 | * |
| 314 | * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf |
| 315 | * |
| 316 | * See Table 11 for 0 < x < 15; e < 10^(-7.13). |
| 317 | * |
| 318 | * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b). |
| 319 | * |
| 320 | * This speeds up overall computation by about 40% over using the else clause below, |
| 321 | * which requires sqrt and exp. |
| 322 | * |
| 323 | */ |
| 324 | |
| 325 | x *= x; |
| 326 | double num = Poly9(-0.13544938430e9, -0.33153754512e8, |
| 327 | -0.19406631946e7, -0.48058318783e5, |
| 328 | -0.63269783360e3, -0.49520779070e1, |
| 329 | -0.24970910370e-1, -0.74741159550e-4, |
| 330 | -0.18257612460e-6, x); |
| 331 | double y = x - 225.; // reflection around 15 (squared) |
| 332 | double den = Poly4(-0.34598737196e8, 0.23852643181e6, |
| 333 | -0.70699387620e3, 0.10000000000e1, y); |
| 334 | return num / den; |
| 335 | |
| 336 | #if IO_EXTENDED_BETA |
| 337 | /* Table 42 for x > 15; e < 10^(-8.11). |
| 338 | * This is used for Beta>15, but is disabled here as |
| 339 | * we never use Beta that high. |
| 340 | * |
| 341 | * NOTE: This should be enabled only for x > 15. |
| 342 | */ |
| 343 | |
| 344 | double y = 1./x; |
| 345 | double z = y - (1./15); |
| 346 | double num = Poly2(0.415079861746e1, -0.5149092496e1, z); |
| 347 | double den = Poly3(0.103150763823e2, -0.14181687413e2, |
| 348 | 0.1000000000e1, z); |
| 349 | return exp(x) * sqrt(y) * num / den; |
| 350 | #endif |
| 351 | } else { |
| 352 | /* |
| 353 | * NOT USED, but reference for large Beta. |
| 354 | * |
| 355 | * Abramowitz and Stegun asymptotic formula. |
| 356 | * works for x > 3.75. |
| 357 | */ |
| 358 | double y = 1./x; |
| 359 | return exp(x) * sqrt(y) * |
| 360 | // note: reciprocal squareroot may be easier! |
| 361 | // http://en.wikipedia.org/wiki/Fast_inverse_square_root |
| 362 | Poly9(I0ATerm<0>::value, I0ATerm<1>::value, |
| 363 | I0ATerm<2>::value, I0ATerm<3>::value, |
| 364 | I0ATerm<4>::value, I0ATerm<5>::value, |
| 365 | I0ATerm<6>::value, I0ATerm<7>::value, |
| 366 | I0ATerm<8>::value, y); // (... e) < 1.9e-7 |
| 367 | } |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 368 | } |
| 369 | |
Andy Hung | bafa561 | 2014-04-02 13:52:10 -0700 | [diff] [blame] | 370 | /* A speed optimized version of the Modified Bessel I0() which incorporates |
| 371 | * the sqrt and numerator multiply and denominator divide into the computation. |
| 372 | * This speeds up filter computation by about 10-15%. |
| 373 | */ |
| 374 | static inline double I0SqrRat(double x2, double num, double den) { |
| 375 | if (x2 < (3.75 * 3.75)) { |
| 376 | return Poly7(I0Term<0>::value, I0Term<1>::value, |
| 377 | I0Term<2>::value, I0Term<3>::value, |
| 378 | I0Term<4>::value, I0Term<5>::value, |
| 379 | I0Term<6>::value, x2) * num / den; // e < 1.6e-7 |
| 380 | } |
| 381 | num *= Poly9(-0.13544938430e9, -0.33153754512e8, |
| 382 | -0.19406631946e7, -0.48058318783e5, |
| 383 | -0.63269783360e3, -0.49520779070e1, |
| 384 | -0.24970910370e-1, -0.74741159550e-4, |
| 385 | -0.18257612460e-6, x2); // e < 10^(-7.13). |
| 386 | double y = x2 - 225.; // reflection around 15 (squared) |
| 387 | den *= Poly4(-0.34598737196e8, 0.23852643181e6, |
| 388 | -0.70699387620e3, 0.10000000000e1, y); |
| 389 | return num / den; |
| 390 | } |
| 391 | |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 392 | /* |
| 393 | * calculates the transition bandwidth for a Kaiser filter |
| 394 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 395 | * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 |
| 396 | * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 397 | * |
| 398 | * @param halfNumCoef is half the number of coefficients per filter phase. |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 399 | * |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 400 | * @param stopBandAtten is the stop band attenuation desired. |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 401 | * |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 402 | * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5) |
| 403 | */ |
| 404 | static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) { |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 405 | return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef); |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 406 | } |
| 407 | |
| 408 | /* |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 409 | * calculates the fir transfer response of the overall polyphase filter at w. |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 410 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 411 | * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the |
| 412 | * fact that h[n] is symmetric (cosines only, no complex arithmetic). |
| 413 | * |
| 414 | * We use Goertzel's algorithm to accelerate the computation to essentially |
| 415 | * a single multiply and 2 adds per filter coefficient h[]. |
| 416 | * |
| 417 | * Be careful be careful to consider that h[n] is the overall polyphase filter, |
| 418 | * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain", |
| 419 | * as you only use one of the polyphases at a time. |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 420 | */ |
| 421 | template <typename T> |
| 422 | static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) { |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 423 | double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank |
| 424 | coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank). |
| 425 | #if SLOW_FIRTRANSFER |
| 426 | /* Original code for reference. This is equivalent to the code below, but slower. */ |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 427 | for (int i=1 ; i<=L ; ++i) { |
| 428 | for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { |
| 429 | accum += cos(ix*w)*static_cast<double>(*coef++); |
| 430 | } |
| 431 | } |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 432 | #else |
| 433 | /* |
| 434 | * Our overall filter is stored striped by polyphases, not a contiguous h[n]. |
| 435 | * We could fetch coefficients in a non-contiguous fashion |
| 436 | * but that will not scale to vector processing. |
| 437 | * |
| 438 | * We apply Goertzel's algorithm directly to each polyphase filter bank instead of |
| 439 | * using cosine generation/multiplication, thereby saving one multiply per inner loop. |
| 440 | * |
| 441 | * See: http://en.wikipedia.org/wiki/Goertzel_algorithm |
| 442 | * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720. |
| 443 | * |
| 444 | * We use the basic recursion to incorporate the cosine steps into real sequence x[n]: |
| 445 | * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2] |
| 446 | * |
| 447 | * y[n] = s[n] - e^(iw)s[n-1] |
| 448 | * = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k)) |
| 449 | * = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk) |
| 450 | * |
| 451 | * The summation contains the frequency steps we want multiplied by the source |
| 452 | * (similar to a DTFT). |
| 453 | * |
| 454 | * Using symmetry, and just the real part (be careful, this must happen |
| 455 | * after any internal complex multiplications), the polyphase filterbank |
| 456 | * transfer function is: |
| 457 | * |
| 458 | * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0) |
| 459 | * = Re{ e^(iwn + iw_0) y[n]} |
| 460 | * = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1] |
| 461 | * |
| 462 | * using the fact that s[n] of real x[n] is real. |
| 463 | * |
| 464 | */ |
| 465 | double dcos = 2. * cos(L*w); |
| 466 | int start = ((halfNumCoef)*L + 1); |
| 467 | SineGen cc((start - L) * w, w, true); // cosine |
| 468 | SineGen cp(start * w, w, true); // cosine |
| 469 | for (int i=1 ; i<=L ; ++i) { |
| 470 | double sc = 0; |
| 471 | double sp = 0; |
| 472 | for (int j=0 ; j<halfNumCoef ; ++j) { |
| 473 | double tmp = sc; |
| 474 | sc = static_cast<double>(*coef++) + dcos*sc - sp; |
| 475 | sp = tmp; |
| 476 | } |
| 477 | // If we are awfully clever, we can apply Goertzel's algorithm |
| 478 | // again on the sc and sp sequences returned here. |
| 479 | accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp; |
| 480 | } |
| 481 | #endif |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 482 | return accum*2.; |
| 483 | } |
| 484 | |
| 485 | /* |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 486 | * evaluates the minimum and maximum |H(f)| bound in a band region. |
| 487 | * |
| 488 | * This is usually done with equally spaced increments in the target band in question. |
| 489 | * The passband is often very small, and sampled that way. The stopband is often much |
| 490 | * larger. |
| 491 | * |
| 492 | * We use the fact that the overall polyphase filter has an additional bank at the end |
| 493 | * for interpolation; hence it is overspecified for the H(f) computation. Thus the |
| 494 | * first polyphase is never actually checked, excepting its first term. |
| 495 | * |
| 496 | * In this code we use the firTransfer() evaluator above, which uses Goertzel's |
| 497 | * algorithm to calculate the transfer function at each point. |
| 498 | * |
| 499 | * TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal |
| 500 | * spacing is a chirp transform. |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 501 | * |
| 502 | * @param coef is the designed polyphase filter banks |
| 503 | * |
| 504 | * @param L is the number of phases (for interpolation) |
| 505 | * |
| 506 | * @param halfNumCoef should be half the number of coefficients for a single |
| 507 | * polyphase. |
| 508 | * |
| 509 | * @param fstart is the normalized frequency start. |
| 510 | * |
| 511 | * @param fend is the normalized frequency end. |
| 512 | * |
| 513 | * @param steps is the number of steps to take (sampling) between frequency start and end |
| 514 | * |
| 515 | * @param firMin returns the minimum transfer |H(f)| found |
| 516 | * |
| 517 | * @param firMax returns the maximum transfer |H(f)| found |
| 518 | * |
| 519 | * 0 <= f <= 0.5. |
| 520 | * This is used to test passband and stopband performance. |
| 521 | */ |
| 522 | template <typename T> |
| 523 | static void testFir(const T* coef, int L, int halfNumCoef, |
| 524 | double fstart, double fend, int steps, double &firMin, double &firMax) { |
| 525 | double wstart = fstart*(2.*M_PI); |
| 526 | double wend = fend*(2.*M_PI); |
| 527 | double wstep = (wend - wstart)/steps; |
| 528 | double fmax, fmin; |
| 529 | double trf = firTransfer(coef, L, halfNumCoef, wstart); |
| 530 | if (trf<0) { |
| 531 | trf = -trf; |
| 532 | } |
| 533 | fmin = fmax = trf; |
| 534 | wstart += wstep; |
| 535 | for (int i=1; i<steps; ++i) { |
| 536 | trf = firTransfer(coef, L, halfNumCoef, wstart); |
| 537 | if (trf<0) { |
| 538 | trf = -trf; |
| 539 | } |
| 540 | if (trf>fmax) { |
| 541 | fmax = trf; |
| 542 | } |
| 543 | else if (trf<fmin) { |
| 544 | fmin = trf; |
| 545 | } |
| 546 | wstart += wstep; |
| 547 | } |
| 548 | // renormalize - this is only needed for integer filter types |
| 549 | double norm = 1./((1ULL<<(sizeof(T)*8-1))*L); |
| 550 | |
| 551 | firMin = fmin * norm; |
| 552 | firMax = fmax * norm; |
| 553 | } |
| 554 | |
| 555 | /* |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 556 | * evaluates the |H(f)| lowpass band characteristics. |
| 557 | * |
| 558 | * This function tests the lowpass characteristics for the overall polyphase filter, |
| 559 | * and is used to verify the design. For this case, fp should be set to the |
| 560 | * passband normalized frequency from 0 to 0.5 for the overall filter (thus it |
| 561 | * is the designed polyphase bank value / L). Likewise for fs. |
| 562 | * |
| 563 | * @param coef is the designed polyphase filter banks |
| 564 | * |
| 565 | * @param L is the number of phases (for interpolation) |
| 566 | * |
| 567 | * @param halfNumCoef should be half the number of coefficients for a single |
| 568 | * polyphase. |
| 569 | * |
| 570 | * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5. |
| 571 | * |
| 572 | * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5. |
| 573 | * |
| 574 | * @param passSteps is the number of passband sampling steps. |
| 575 | * |
| 576 | * @param stopSteps is the number of stopband sampling steps. |
| 577 | * |
| 578 | * @param passMin is the minimum value in the passband |
| 579 | * |
| 580 | * @param passMax is the maximum value in the passband (useful for scaling). This should |
| 581 | * be less than 1., to avoid sine wave test overflow. |
| 582 | * |
| 583 | * @param passRipple is the passband ripple. Typically this should be less than 0.1 for |
| 584 | * an audio filter. Generally speaker/headphone device characteristics will dominate |
| 585 | * the passband term. |
| 586 | * |
| 587 | * @param stopMax is the maximum value in the stopband. |
| 588 | * |
| 589 | * @param stopRipple is the stopband ripple, also known as stopband attenuation. |
| 590 | * Typically this should be greater than ~80dB for low quality, and greater than |
| 591 | * ~100dB for full 16b quality, otherwise aliasing may become noticeable. |
| 592 | * |
| 593 | */ |
| 594 | template <typename T> |
| 595 | static void testFir(const T* coef, int L, int halfNumCoef, |
| 596 | double fp, double fs, int passSteps, int stopSteps, |
| 597 | double &passMin, double &passMax, double &passRipple, |
| 598 | double &stopMax, double &stopRipple) { |
| 599 | double fmin, fmax; |
| 600 | testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax); |
| 601 | double d1 = (fmax - fmin)/2.; |
| 602 | passMin = fmin; |
| 603 | passMax = fmax; |
| 604 | passRipple = -20.*log10(1. - d1); // passband ripple |
| 605 | testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax); |
| 606 | // fmin is really not important for the stopband. |
| 607 | stopMax = fmax; |
| 608 | stopRipple = -20.*log10(fmax); // stopband ripple/attenuation |
| 609 | } |
| 610 | |
| 611 | /* |
| 612 | * Calculates the overall polyphase filter based on a windowed sinc function. |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 613 | * |
| 614 | * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1 |
| 615 | * taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks. |
| 616 | * The last filterbank is used for interpolation purposes (and is mostly composed |
| 617 | * of the first bank shifted by one sample), and is unnecessary if one does |
| 618 | * not do interpolation. |
| 619 | * |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 620 | * We use the last filterbank for some transfer function calculation purposes, |
| 621 | * so it needs to be generated anyways. |
| 622 | * |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 623 | * @param coef is the caller allocated space for coefficients. This should be |
| 624 | * exactly (L+1)*halfNumCoef in size. |
| 625 | * |
| 626 | * @param L is the number of phases (for interpolation) |
| 627 | * |
| 628 | * @param halfNumCoef should be half the number of coefficients for a single |
| 629 | * polyphase. |
| 630 | * |
| 631 | * @param stopBandAtten is the stopband value, should be >50dB. |
| 632 | * |
| 633 | * @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy |
| 634 | * should be 6dB less. (fcr is where the amplitude drops by half). Use the |
| 635 | * firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint |
| 636 | * between the stop band and the pass band (fstop+fpass)/2. |
| 637 | * |
| 638 | * @param atten is the attenuation (generally slightly less than 1). |
| 639 | */ |
| 640 | |
| 641 | template <typename T> |
| 642 | static inline void firKaiserGen(T* coef, int L, int halfNumCoef, |
| 643 | double stopBandAtten, double fcr, double atten) { |
| 644 | // |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 645 | // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 |
| 646 | // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 647 | // |
| 648 | // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf |
| 649 | // |
| 650 | // Kaiser window and beta parameter |
| 651 | // |
| 652 | // | 0.1102*(A - 8.7) A > 50 |
| 653 | // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50 |
| 654 | // | 0. A < 21 |
| 655 | // |
| 656 | // with A is the desired stop-band attenuation in dBFS |
| 657 | // |
| 658 | // 30 dB 2.210 |
| 659 | // 40 dB 3.384 |
| 660 | // 50 dB 4.538 |
| 661 | // 60 dB 5.658 |
| 662 | // 70 dB 6.764 |
| 663 | // 80 dB 7.865 |
| 664 | // 90 dB 8.960 |
| 665 | // 100 dB 10.056 |
| 666 | |
| 667 | const int N = L * halfNumCoef; // non-negative half |
| 668 | const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 669 | const double xstep = (2. * M_PI) * fcr / L; |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 670 | const double xfrac = 1. / N; |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 671 | const double yscale = atten * L / (I0(beta) * M_PI); |
Andy Hung | bafa561 | 2014-04-02 13:52:10 -0700 | [diff] [blame] | 672 | const double sqrbeta = sqr(beta); |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 673 | |
| 674 | // We use sine generators, which computes sines on regular step intervals. |
| 675 | // This speeds up overall computation about 40% from computing the sine directly. |
| 676 | |
| 677 | SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase) |
| 678 | |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 679 | for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 680 | |
| 681 | // computation for a single polyphase of the overall filter. |
| 682 | SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop. |
| 683 | double err = 0; // for noise shaping on int16_t coefficients (over each polyphase) |
| 684 | |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 685 | for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 686 | double y; |
| 687 | if (CC_LIKELY(ix)) { |
| 688 | double x = static_cast<double>(ix); |
| 689 | |
| 690 | // sine generator: sg.valueAdvance() returns sin(ix*xstep); |
Andy Hung | bafa561 | 2014-04-02 13:52:10 -0700 | [diff] [blame] | 691 | // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x; |
| 692 | y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x); |
Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 693 | } else { |
| 694 | y = 2. * atten * fcr; // center of filter, sinc(0) = 1. |
| 695 | sg.advance(); |
| 696 | } |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 697 | |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 698 | if (is_same<T, int16_t>::value) { // int16_t needs noise shaping |
| 699 | *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err)); |
Andy Hung | 430b61c | 2014-04-08 18:10:02 -0700 | [diff] [blame] | 700 | } else if (is_same<T, int32_t>::value) { |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 701 | *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1))); |
Andy Hung | 430b61c | 2014-04-08 18:10:02 -0700 | [diff] [blame] | 702 | } else { // assumed float or double |
| 703 | *coef++ = static_cast<T>(y); |
Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 704 | } |
| 705 | } |
| 706 | } |
| 707 | } |
| 708 | |
| 709 | }; // namespace android |
| 710 | |
| 711 | #endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/ |