| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2013 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H | 
|  | 18 | #define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H | 
|  | 19 |  | 
| Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 20 | #include "utils/Compat.h" | 
|  | 21 |  | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 22 | namespace android { | 
|  | 23 |  | 
|  | 24 | /* | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 25 | * generates a sine wave at equal steps. | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 26 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 27 | * As most of our functions use sine or cosine at equal steps, | 
|  | 28 | * it is very efficient to compute them that way (single multiply and subtract), | 
|  | 29 | * rather than invoking the math library sin() or cos() each time. | 
|  | 30 | * | 
|  | 31 | * SineGen uses Goertzel's Algorithm (as a generator not a filter) | 
|  | 32 | * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep) | 
|  | 33 | * by stepping through 0, 1, ... n. | 
|  | 34 | * | 
|  | 35 | * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep) | 
|  | 36 | * | 
|  | 37 | * or looking at just the imaginary sine term, as the cosine follows identically: | 
|  | 38 | * | 
|  | 39 | * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep) | 
|  | 40 | * | 
|  | 41 | * Goertzel's algorithm is more efficient than the angle addition formula, | 
|  | 42 | * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to | 
|  | 43 | * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and | 
|  | 44 | * cosine generation due to the complex * complex multiply (full rotation). | 
|  | 45 | * | 
|  | 46 | * See: http://en.wikipedia.org/wiki/Goertzel_algorithm | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 47 | * | 
|  | 48 | */ | 
|  | 49 |  | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 50 | class SineGen { | 
|  | 51 | public: | 
|  | 52 | SineGen(double wstart, double wstep, bool cosine = false) { | 
|  | 53 | if (cosine) { | 
|  | 54 | mCurrent = cos(wstart); | 
|  | 55 | mPrevious = cos(wstart - wstep); | 
|  | 56 | } else { | 
|  | 57 | mCurrent = sin(wstart); | 
|  | 58 | mPrevious = sin(wstart - wstep); | 
|  | 59 | } | 
|  | 60 | mTwoCos = 2.*cos(wstep); | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 61 | } | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 62 | SineGen(double expNow, double expPrev, double twoCosStep) { | 
|  | 63 | mCurrent = expNow; | 
|  | 64 | mPrevious = expPrev; | 
|  | 65 | mTwoCos = twoCosStep; | 
|  | 66 | } | 
|  | 67 | inline double value() const { | 
|  | 68 | return mCurrent; | 
|  | 69 | } | 
|  | 70 | inline void advance() { | 
|  | 71 | double tmp = mCurrent; | 
|  | 72 | mCurrent = mCurrent*mTwoCos - mPrevious; | 
|  | 73 | mPrevious = tmp; | 
|  | 74 | } | 
|  | 75 | inline double valueAdvance() { | 
|  | 76 | double tmp = mCurrent; | 
|  | 77 | mCurrent = mCurrent*mTwoCos - mPrevious; | 
|  | 78 | mPrevious = tmp; | 
|  | 79 | return tmp; | 
|  | 80 | } | 
|  | 81 |  | 
|  | 82 | private: | 
|  | 83 | double mCurrent; // current value of sine/cosine | 
|  | 84 | double mPrevious; // previous value of sine/cosine | 
|  | 85 | double mTwoCos; // stepping factor | 
|  | 86 | }; | 
|  | 87 |  | 
|  | 88 | /* | 
|  | 89 | * generates a series of sine generators, phase offset by fixed steps. | 
|  | 90 | * | 
|  | 91 | * This is used to generate polyphase sine generators, one per polyphase | 
|  | 92 | * in the filter code below. | 
|  | 93 | * | 
|  | 94 | * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep; | 
|  | 95 | * increments by innerStep. | 
|  | 96 | * | 
|  | 97 | */ | 
|  | 98 |  | 
|  | 99 | class SineGenGen { | 
|  | 100 | public: | 
|  | 101 | SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false) | 
|  | 102 | : mSineInnerCur(outerStart, outerStep, cosine), | 
|  | 103 | mSineInnerPrev(outerStart-innerStep, outerStep, cosine) | 
|  | 104 | { | 
|  | 105 | mTwoCos = 2.*cos(innerStep); | 
|  | 106 | } | 
|  | 107 | inline SineGen value() { | 
|  | 108 | return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos); | 
|  | 109 | } | 
|  | 110 | inline void advance() { | 
|  | 111 | mSineInnerCur.advance(); | 
|  | 112 | mSineInnerPrev.advance(); | 
|  | 113 | } | 
|  | 114 | inline SineGen valueAdvance() { | 
|  | 115 | return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos); | 
|  | 116 | } | 
|  | 117 |  | 
|  | 118 | private: | 
|  | 119 | SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep). | 
|  | 120 | SineGen mSineInnerPrev; // generate the inner sine previous values | 
|  | 121 | // (behind by innerStep, stepped by outerStep). | 
|  | 122 | double mTwoCos; // the inner stepping factor for the returned SineGen. | 
|  | 123 | }; | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 124 |  | 
|  | 125 | static inline double sqr(double x) { | 
|  | 126 | return x * x; | 
|  | 127 | } | 
|  | 128 |  | 
|  | 129 | /* | 
|  | 130 | * rounds a double to the nearest integer for FIR coefficients. | 
|  | 131 | * | 
|  | 132 | * One variant uses noise shaping, which must keep error history | 
|  | 133 | * to work (the err parameter, initialized to 0). | 
|  | 134 | * The other variant is a non-noise shaped version for | 
|  | 135 | * S32 coefficients (noise shaping doesn't gain much). | 
|  | 136 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 137 | * Caution: No bounds saturation is applied, but isn't needed in this case. | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 138 | * | 
|  | 139 | * @param x is the value to round. | 
|  | 140 | * | 
|  | 141 | * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom). | 
|  | 142 | * Typically this may be the maximum positive integer+1 (using the fact that double precision | 
|  | 143 | * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition). | 
|  | 144 | * | 
|  | 145 | * @param err is the previous error (actual - rounded) for the previous rounding op. | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 146 | * For 16b coefficients this can improve stopband dB performance by up to 2dB. | 
|  | 147 | * | 
|  | 148 | * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 149 | * | 
|  | 150 | */ | 
|  | 151 |  | 
|  | 152 | static inline int64_t toint(double x, int64_t maxval, double& err) { | 
|  | 153 | double val = x * maxval; | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 154 | double ival = floor(val + 0.5 + err*0.2); | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 155 | err = val - ival; | 
|  | 156 | return static_cast<int64_t>(ival); | 
|  | 157 | } | 
|  | 158 |  | 
|  | 159 | static inline int64_t toint(double x, int64_t maxval) { | 
|  | 160 | return static_cast<int64_t>(floor(x * maxval + 0.5)); | 
|  | 161 | } | 
|  | 162 |  | 
|  | 163 | /* | 
|  | 164 | * Modified Bessel function of the first kind | 
|  | 165 | * http://en.wikipedia.org/wiki/Bessel_function | 
|  | 166 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 167 | * The formulas are taken from Abramowitz and Stegun, | 
|  | 168 | * _Handbook of Mathematical Functions_ (links below): | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 169 | * | 
|  | 170 | * http://people.math.sfu.ca/~cbm/aands/page_375.htm | 
|  | 171 | * http://people.math.sfu.ca/~cbm/aands/page_378.htm | 
|  | 172 | * | 
|  | 173 | * http://dlmf.nist.gov/10.25 | 
|  | 174 | * http://dlmf.nist.gov/10.40 | 
|  | 175 | * | 
|  | 176 | * Note we assume x is nonnegative (the function is symmetric, | 
|  | 177 | * pass in the absolute value as needed). | 
|  | 178 | * | 
|  | 179 | * Constants are compile time derived with templates I0Term<> and | 
|  | 180 | * I0ATerm<> to the precision of the compiler.  The series can be expanded | 
|  | 181 | * to any precision needed, but currently set around 24b precision. | 
|  | 182 | * | 
|  | 183 | * We use a bit of template math here, constexpr would probably be | 
|  | 184 | * more appropriate for a C++11 compiler. | 
|  | 185 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 186 | * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit. | 
|  | 187 | * | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 188 | */ | 
|  | 189 |  | 
|  | 190 | template <int N> | 
|  | 191 | struct I0Term { | 
| Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 192 | static const CONSTEXPR double value = I0Term<N-1>::value / (4. * N * N); | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 193 | }; | 
|  | 194 |  | 
|  | 195 | template <> | 
|  | 196 | struct I0Term<0> { | 
| Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 197 | static const CONSTEXPR double value = 1.; | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 198 | }; | 
|  | 199 |  | 
|  | 200 | template <int N> | 
|  | 201 | struct I0ATerm { | 
| Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 202 | static const CONSTEXPR double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N); | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 203 | }; | 
|  | 204 |  | 
|  | 205 | template <> | 
|  | 206 | struct I0ATerm<0> { // 1/sqrt(2*PI); | 
| Dan Albert | 36802bd | 2014-11-20 11:31:17 -0800 | [diff] [blame^] | 207 | static const CONSTEXPR double value = 0.398942280401432677939946059934381868475858631164934657665925; | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 208 | }; | 
|  | 209 |  | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 210 | #if USE_HORNERS_METHOD | 
|  | 211 | /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... | 
|  | 212 | * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method | 
|  | 213 | * | 
|  | 214 | * This has fewer multiplications than Estrin's method below, but has back to back | 
|  | 215 | * floating point dependencies. | 
|  | 216 | * | 
|  | 217 | * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled. | 
|  | 218 | */ | 
|  | 219 |  | 
|  | 220 | inline double Poly2(double A, double B, double x) { | 
|  | 221 | return A + x * B; | 
|  | 222 | } | 
|  | 223 |  | 
|  | 224 | inline double Poly4(double A, double B, double C, double D, double x) { | 
|  | 225 | return A + x * (B + x * (C + x * (D))); | 
|  | 226 | } | 
|  | 227 |  | 
|  | 228 | inline double Poly7(double A, double B, double C, double D, double E, double F, double G, | 
|  | 229 | double x) { | 
|  | 230 | return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G)))))); | 
|  | 231 | } | 
|  | 232 |  | 
|  | 233 | inline double Poly9(double A, double B, double C, double D, double E, double F, double G, | 
|  | 234 | double H, double I, double x) { | 
|  | 235 | return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I)))))))); | 
|  | 236 | } | 
|  | 237 |  | 
|  | 238 | #else | 
|  | 239 | /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... | 
|  | 240 | * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme | 
|  | 241 | * | 
|  | 242 | * This is typically faster, perhaps gains about 5-10% overall on ARM processors | 
|  | 243 | * over Horner's method above. | 
|  | 244 | */ | 
|  | 245 |  | 
|  | 246 | inline double Poly2(double A, double B, double x) { | 
|  | 247 | return A + B * x; | 
|  | 248 | } | 
|  | 249 |  | 
|  | 250 | inline double Poly3(double A, double B, double C, double x, double x2) { | 
|  | 251 | return Poly2(A, B, x) + C * x2; | 
|  | 252 | } | 
|  | 253 |  | 
|  | 254 | inline double Poly3(double A, double B, double C, double x) { | 
|  | 255 | return Poly2(A, B, x) + C * x * x; | 
|  | 256 | } | 
|  | 257 |  | 
|  | 258 | inline double Poly4(double A, double B, double C, double D, double x, double x2) { | 
|  | 259 | return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2); | 
|  | 260 | } | 
|  | 261 |  | 
|  | 262 | inline double Poly4(double A, double B, double C, double D, double x) { | 
|  | 263 | return Poly4(A, B, C, D, x, x * x); | 
|  | 264 | } | 
|  | 265 |  | 
|  | 266 | inline double Poly7(double A, double B, double C, double D, double E, double F, double G, | 
|  | 267 | double x) { | 
|  | 268 | double x2 = x * x; | 
|  | 269 | return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2); | 
|  | 270 | } | 
|  | 271 |  | 
|  | 272 | inline double Poly8(double A, double B, double C, double D, double E, double F, double G, | 
|  | 273 | double H, double x, double x2, double x4) { | 
|  | 274 | return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4; | 
|  | 275 | } | 
|  | 276 |  | 
|  | 277 | inline double Poly9(double A, double B, double C, double D, double E, double F, double G, | 
|  | 278 | double H, double I, double x) { | 
|  | 279 | double x2 = x * x; | 
|  | 280 | #if 1 | 
|  | 281 | // It does not seem faster to explicitly decompose Poly8 into Poly4, but | 
|  | 282 | // could depend on compiler floating point scheduling. | 
|  | 283 | double x4 = x2 * x2; | 
|  | 284 | return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4); | 
|  | 285 | #else | 
|  | 286 | double val = Poly4(A, B, C, D, x, x2); | 
|  | 287 | double x4 = x2 * x2; | 
|  | 288 | return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4); | 
|  | 289 | #endif | 
|  | 290 | } | 
|  | 291 | #endif | 
|  | 292 |  | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 293 | static inline double I0(double x) { | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 294 | if (x < 3.75) { | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 295 | x *= x; | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 296 | return Poly7(I0Term<0>::value, I0Term<1>::value, | 
|  | 297 | I0Term<2>::value, I0Term<3>::value, | 
|  | 298 | I0Term<4>::value, I0Term<5>::value, | 
|  | 299 | I0Term<6>::value, x); // e < 1.6e-7 | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 300 | } | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 301 | if (1) { | 
|  | 302 | /* | 
|  | 303 | * Series expansion coefs are easy to calculate, but are expanded around 0, | 
|  | 304 | * so error is unequal over the interval 0 < x < 3.75, the error being | 
|  | 305 | * significantly better near 0. | 
|  | 306 | * | 
|  | 307 | * A better solution is to use precise minimax polynomial fits. | 
|  | 308 | * | 
|  | 309 | * We use a slightly more complicated solution for 3.75 < x < 15, based on | 
|  | 310 | * the tables in Blair and Edwards, "Stable Rational Minimax Approximations | 
|  | 311 | * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory, | 
|  | 312 | * AECL-4928. | 
|  | 313 | * | 
|  | 314 | * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf | 
|  | 315 | * | 
|  | 316 | * See Table 11 for 0 < x < 15; e < 10^(-7.13). | 
|  | 317 | * | 
|  | 318 | * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b). | 
|  | 319 | * | 
|  | 320 | * This speeds up overall computation by about 40% over using the else clause below, | 
|  | 321 | * which requires sqrt and exp. | 
|  | 322 | * | 
|  | 323 | */ | 
|  | 324 |  | 
|  | 325 | x *= x; | 
|  | 326 | double num = Poly9(-0.13544938430e9, -0.33153754512e8, | 
|  | 327 | -0.19406631946e7, -0.48058318783e5, | 
|  | 328 | -0.63269783360e3, -0.49520779070e1, | 
|  | 329 | -0.24970910370e-1, -0.74741159550e-4, | 
|  | 330 | -0.18257612460e-6, x); | 
|  | 331 | double y = x - 225.; // reflection around 15 (squared) | 
|  | 332 | double den = Poly4(-0.34598737196e8, 0.23852643181e6, | 
|  | 333 | -0.70699387620e3, 0.10000000000e1, y); | 
|  | 334 | return num / den; | 
|  | 335 |  | 
|  | 336 | #if IO_EXTENDED_BETA | 
|  | 337 | /* Table 42 for x > 15; e < 10^(-8.11). | 
|  | 338 | * This is used for Beta>15, but is disabled here as | 
|  | 339 | * we never use Beta that high. | 
|  | 340 | * | 
|  | 341 | * NOTE: This should be enabled only for x > 15. | 
|  | 342 | */ | 
|  | 343 |  | 
|  | 344 | double y = 1./x; | 
|  | 345 | double z = y - (1./15); | 
|  | 346 | double num = Poly2(0.415079861746e1, -0.5149092496e1, z); | 
|  | 347 | double den = Poly3(0.103150763823e2, -0.14181687413e2, | 
|  | 348 | 0.1000000000e1, z); | 
|  | 349 | return exp(x) * sqrt(y) * num / den; | 
|  | 350 | #endif | 
|  | 351 | } else { | 
|  | 352 | /* | 
|  | 353 | * NOT USED, but reference for large Beta. | 
|  | 354 | * | 
|  | 355 | * Abramowitz and Stegun asymptotic formula. | 
|  | 356 | * works for x > 3.75. | 
|  | 357 | */ | 
|  | 358 | double y = 1./x; | 
|  | 359 | return exp(x) * sqrt(y) * | 
|  | 360 | // note: reciprocal squareroot may be easier! | 
|  | 361 | // http://en.wikipedia.org/wiki/Fast_inverse_square_root | 
|  | 362 | Poly9(I0ATerm<0>::value, I0ATerm<1>::value, | 
|  | 363 | I0ATerm<2>::value, I0ATerm<3>::value, | 
|  | 364 | I0ATerm<4>::value, I0ATerm<5>::value, | 
|  | 365 | I0ATerm<6>::value, I0ATerm<7>::value, | 
|  | 366 | I0ATerm<8>::value, y); // (... e) < 1.9e-7 | 
|  | 367 | } | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 368 | } | 
|  | 369 |  | 
| Andy Hung | bafa561 | 2014-04-02 13:52:10 -0700 | [diff] [blame] | 370 | /* A speed optimized version of the Modified Bessel I0() which incorporates | 
|  | 371 | * the sqrt and numerator multiply and denominator divide into the computation. | 
|  | 372 | * This speeds up filter computation by about 10-15%. | 
|  | 373 | */ | 
|  | 374 | static inline double I0SqrRat(double x2, double num, double den) { | 
|  | 375 | if (x2 < (3.75 * 3.75)) { | 
|  | 376 | return Poly7(I0Term<0>::value, I0Term<1>::value, | 
|  | 377 | I0Term<2>::value, I0Term<3>::value, | 
|  | 378 | I0Term<4>::value, I0Term<5>::value, | 
|  | 379 | I0Term<6>::value, x2) * num / den; // e < 1.6e-7 | 
|  | 380 | } | 
|  | 381 | num *= Poly9(-0.13544938430e9, -0.33153754512e8, | 
|  | 382 | -0.19406631946e7, -0.48058318783e5, | 
|  | 383 | -0.63269783360e3, -0.49520779070e1, | 
|  | 384 | -0.24970910370e-1, -0.74741159550e-4, | 
|  | 385 | -0.18257612460e-6, x2); // e < 10^(-7.13). | 
|  | 386 | double y = x2 - 225.; // reflection around 15 (squared) | 
|  | 387 | den *= Poly4(-0.34598737196e8, 0.23852643181e6, | 
|  | 388 | -0.70699387620e3, 0.10000000000e1, y); | 
|  | 389 | return num / den; | 
|  | 390 | } | 
|  | 391 |  | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 392 | /* | 
|  | 393 | * calculates the transition bandwidth for a Kaiser filter | 
|  | 394 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 395 | * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 | 
|  | 396 | * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 397 | * | 
|  | 398 | * @param halfNumCoef is half the number of coefficients per filter phase. | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 399 | * | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 400 | * @param stopBandAtten is the stop band attenuation desired. | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 401 | * | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 402 | * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5) | 
|  | 403 | */ | 
|  | 404 | static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) { | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 405 | return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef); | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 406 | } | 
|  | 407 |  | 
|  | 408 | /* | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 409 | * calculates the fir transfer response of the overall polyphase filter at w. | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 410 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 411 | * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the | 
|  | 412 | * fact that h[n] is symmetric (cosines only, no complex arithmetic). | 
|  | 413 | * | 
|  | 414 | * We use Goertzel's algorithm to accelerate the computation to essentially | 
|  | 415 | * a single multiply and 2 adds per filter coefficient h[]. | 
|  | 416 | * | 
|  | 417 | * Be careful be careful to consider that h[n] is the overall polyphase filter, | 
|  | 418 | * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain", | 
|  | 419 | * as you only use one of the polyphases at a time. | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 420 | */ | 
|  | 421 | template <typename T> | 
|  | 422 | static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) { | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 423 | double accum = static_cast<double>(coef[0])*0.5;  // "center coefficient" from first bank | 
|  | 424 | coef += halfNumCoef;    // skip first filterbank (picked up by the last filterbank). | 
|  | 425 | #if SLOW_FIRTRANSFER | 
|  | 426 | /* Original code for reference.  This is equivalent to the code below, but slower. */ | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 427 | for (int i=1 ; i<=L ; ++i) { | 
|  | 428 | for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { | 
|  | 429 | accum += cos(ix*w)*static_cast<double>(*coef++); | 
|  | 430 | } | 
|  | 431 | } | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 432 | #else | 
|  | 433 | /* | 
|  | 434 | * Our overall filter is stored striped by polyphases, not a contiguous h[n]. | 
|  | 435 | * We could fetch coefficients in a non-contiguous fashion | 
|  | 436 | * but that will not scale to vector processing. | 
|  | 437 | * | 
|  | 438 | * We apply Goertzel's algorithm directly to each polyphase filter bank instead of | 
|  | 439 | * using cosine generation/multiplication, thereby saving one multiply per inner loop. | 
|  | 440 | * | 
|  | 441 | * See: http://en.wikipedia.org/wiki/Goertzel_algorithm | 
|  | 442 | * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720. | 
|  | 443 | * | 
|  | 444 | * We use the basic recursion to incorporate the cosine steps into real sequence x[n]: | 
|  | 445 | * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2] | 
|  | 446 | * | 
|  | 447 | * y[n] = s[n] - e^(iw)s[n-1] | 
|  | 448 | *      = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k)) | 
|  | 449 | *      = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk) | 
|  | 450 | * | 
|  | 451 | * The summation contains the frequency steps we want multiplied by the source | 
|  | 452 | * (similar to a DTFT). | 
|  | 453 | * | 
|  | 454 | * Using symmetry, and just the real part (be careful, this must happen | 
|  | 455 | * after any internal complex multiplications), the polyphase filterbank | 
|  | 456 | * transfer function is: | 
|  | 457 | * | 
|  | 458 | * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0) | 
|  | 459 | *                = Re{ e^(iwn + iw_0) y[n]} | 
|  | 460 | *                = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1] | 
|  | 461 | * | 
|  | 462 | * using the fact that s[n] of real x[n] is real. | 
|  | 463 | * | 
|  | 464 | */ | 
|  | 465 | double dcos = 2. * cos(L*w); | 
|  | 466 | int start = ((halfNumCoef)*L + 1); | 
|  | 467 | SineGen cc((start - L) * w, w, true); // cosine | 
|  | 468 | SineGen cp(start * w, w, true); // cosine | 
|  | 469 | for (int i=1 ; i<=L ; ++i) { | 
|  | 470 | double sc = 0; | 
|  | 471 | double sp = 0; | 
|  | 472 | for (int j=0 ; j<halfNumCoef ; ++j) { | 
|  | 473 | double tmp = sc; | 
|  | 474 | sc  = static_cast<double>(*coef++) + dcos*sc - sp; | 
|  | 475 | sp = tmp; | 
|  | 476 | } | 
|  | 477 | // If we are awfully clever, we can apply Goertzel's algorithm | 
|  | 478 | // again on the sc and sp sequences returned here. | 
|  | 479 | accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp; | 
|  | 480 | } | 
|  | 481 | #endif | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 482 | return accum*2.; | 
|  | 483 | } | 
|  | 484 |  | 
|  | 485 | /* | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 486 | * evaluates the minimum and maximum |H(f)| bound in a band region. | 
|  | 487 | * | 
|  | 488 | * This is usually done with equally spaced increments in the target band in question. | 
|  | 489 | * The passband is often very small, and sampled that way. The stopband is often much | 
|  | 490 | * larger. | 
|  | 491 | * | 
|  | 492 | * We use the fact that the overall polyphase filter has an additional bank at the end | 
|  | 493 | * for interpolation; hence it is overspecified for the H(f) computation.  Thus the | 
|  | 494 | * first polyphase is never actually checked, excepting its first term. | 
|  | 495 | * | 
|  | 496 | * In this code we use the firTransfer() evaluator above, which uses Goertzel's | 
|  | 497 | * algorithm to calculate the transfer function at each point. | 
|  | 498 | * | 
|  | 499 | * TODO: An alternative with equal spacing is the FFT/DFT.  An alternative with unequal | 
|  | 500 | * spacing is a chirp transform. | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 501 | * | 
|  | 502 | * @param coef is the designed polyphase filter banks | 
|  | 503 | * | 
|  | 504 | * @param L is the number of phases (for interpolation) | 
|  | 505 | * | 
|  | 506 | * @param halfNumCoef should be half the number of coefficients for a single | 
|  | 507 | * polyphase. | 
|  | 508 | * | 
|  | 509 | * @param fstart is the normalized frequency start. | 
|  | 510 | * | 
|  | 511 | * @param fend is the normalized frequency end. | 
|  | 512 | * | 
|  | 513 | * @param steps is the number of steps to take (sampling) between frequency start and end | 
|  | 514 | * | 
|  | 515 | * @param firMin returns the minimum transfer |H(f)| found | 
|  | 516 | * | 
|  | 517 | * @param firMax returns the maximum transfer |H(f)| found | 
|  | 518 | * | 
|  | 519 | * 0 <= f <= 0.5. | 
|  | 520 | * This is used to test passband and stopband performance. | 
|  | 521 | */ | 
|  | 522 | template <typename T> | 
|  | 523 | static void testFir(const T* coef, int L, int halfNumCoef, | 
|  | 524 | double fstart, double fend, int steps, double &firMin, double &firMax) { | 
|  | 525 | double wstart = fstart*(2.*M_PI); | 
|  | 526 | double wend = fend*(2.*M_PI); | 
|  | 527 | double wstep = (wend - wstart)/steps; | 
|  | 528 | double fmax, fmin; | 
|  | 529 | double trf = firTransfer(coef, L, halfNumCoef, wstart); | 
|  | 530 | if (trf<0) { | 
|  | 531 | trf = -trf; | 
|  | 532 | } | 
|  | 533 | fmin = fmax = trf; | 
|  | 534 | wstart += wstep; | 
|  | 535 | for (int i=1; i<steps; ++i) { | 
|  | 536 | trf = firTransfer(coef, L, halfNumCoef, wstart); | 
|  | 537 | if (trf<0) { | 
|  | 538 | trf = -trf; | 
|  | 539 | } | 
|  | 540 | if (trf>fmax) { | 
|  | 541 | fmax = trf; | 
|  | 542 | } | 
|  | 543 | else if (trf<fmin) { | 
|  | 544 | fmin = trf; | 
|  | 545 | } | 
|  | 546 | wstart += wstep; | 
|  | 547 | } | 
|  | 548 | // renormalize - this is only needed for integer filter types | 
|  | 549 | double norm = 1./((1ULL<<(sizeof(T)*8-1))*L); | 
|  | 550 |  | 
|  | 551 | firMin = fmin * norm; | 
|  | 552 | firMax = fmax * norm; | 
|  | 553 | } | 
|  | 554 |  | 
|  | 555 | /* | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 556 | * evaluates the |H(f)| lowpass band characteristics. | 
|  | 557 | * | 
|  | 558 | * This function tests the lowpass characteristics for the overall polyphase filter, | 
|  | 559 | * and is used to verify the design.  For this case, fp should be set to the | 
|  | 560 | * passband normalized frequency from 0 to 0.5 for the overall filter (thus it | 
|  | 561 | * is the designed polyphase bank value / L).  Likewise for fs. | 
|  | 562 | * | 
|  | 563 | * @param coef is the designed polyphase filter banks | 
|  | 564 | * | 
|  | 565 | * @param L is the number of phases (for interpolation) | 
|  | 566 | * | 
|  | 567 | * @param halfNumCoef should be half the number of coefficients for a single | 
|  | 568 | * polyphase. | 
|  | 569 | * | 
|  | 570 | * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5. | 
|  | 571 | * | 
|  | 572 | * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5. | 
|  | 573 | * | 
|  | 574 | * @param passSteps is the number of passband sampling steps. | 
|  | 575 | * | 
|  | 576 | * @param stopSteps is the number of stopband sampling steps. | 
|  | 577 | * | 
|  | 578 | * @param passMin is the minimum value in the passband | 
|  | 579 | * | 
|  | 580 | * @param passMax is the maximum value in the passband (useful for scaling).  This should | 
|  | 581 | * be less than 1., to avoid sine wave test overflow. | 
|  | 582 | * | 
|  | 583 | * @param passRipple is the passband ripple.  Typically this should be less than 0.1 for | 
|  | 584 | * an audio filter.  Generally speaker/headphone device characteristics will dominate | 
|  | 585 | * the passband term. | 
|  | 586 | * | 
|  | 587 | * @param stopMax is the maximum value in the stopband. | 
|  | 588 | * | 
|  | 589 | * @param stopRipple is the stopband ripple, also known as stopband attenuation. | 
|  | 590 | * Typically this should be greater than ~80dB for low quality, and greater than | 
|  | 591 | * ~100dB for full 16b quality, otherwise aliasing may become noticeable. | 
|  | 592 | * | 
|  | 593 | */ | 
|  | 594 | template <typename T> | 
|  | 595 | static void testFir(const T* coef, int L, int halfNumCoef, | 
|  | 596 | double fp, double fs, int passSteps, int stopSteps, | 
|  | 597 | double &passMin, double &passMax, double &passRipple, | 
|  | 598 | double &stopMax, double &stopRipple) { | 
|  | 599 | double fmin, fmax; | 
|  | 600 | testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax); | 
|  | 601 | double d1 = (fmax - fmin)/2.; | 
|  | 602 | passMin = fmin; | 
|  | 603 | passMax = fmax; | 
|  | 604 | passRipple = -20.*log10(1. - d1); // passband ripple | 
|  | 605 | testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax); | 
|  | 606 | // fmin is really not important for the stopband. | 
|  | 607 | stopMax = fmax; | 
|  | 608 | stopRipple = -20.*log10(fmax); // stopband ripple/attenuation | 
|  | 609 | } | 
|  | 610 |  | 
|  | 611 | /* | 
|  | 612 | * Calculates the overall polyphase filter based on a windowed sinc function. | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 613 | * | 
|  | 614 | * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1 | 
|  | 615 | * taps for the entire kernel.  This is then decomposed into L+1 polyphase filterbanks. | 
|  | 616 | * The last filterbank is used for interpolation purposes (and is mostly composed | 
|  | 617 | * of the first bank shifted by one sample), and is unnecessary if one does | 
|  | 618 | * not do interpolation. | 
|  | 619 | * | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 620 | * We use the last filterbank for some transfer function calculation purposes, | 
|  | 621 | * so it needs to be generated anyways. | 
|  | 622 | * | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 623 | * @param coef is the caller allocated space for coefficients.  This should be | 
|  | 624 | * exactly (L+1)*halfNumCoef in size. | 
|  | 625 | * | 
|  | 626 | * @param L is the number of phases (for interpolation) | 
|  | 627 | * | 
|  | 628 | * @param halfNumCoef should be half the number of coefficients for a single | 
|  | 629 | * polyphase. | 
|  | 630 | * | 
|  | 631 | * @param stopBandAtten is the stopband value, should be >50dB. | 
|  | 632 | * | 
|  | 633 | * @param fcr is cutoff frequency/sampling rate (<0.5).  At this point, the energy | 
|  | 634 | * should be 6dB less. (fcr is where the amplitude drops by half).  Use the | 
|  | 635 | * firKaiserTbw() to calculate the transition bandwidth.  fcr is the midpoint | 
|  | 636 | * between the stop band and the pass band (fstop+fpass)/2. | 
|  | 637 | * | 
|  | 638 | * @param atten is the attenuation (generally slightly less than 1). | 
|  | 639 | */ | 
|  | 640 |  | 
|  | 641 | template <typename T> | 
|  | 642 | static inline void firKaiserGen(T* coef, int L, int halfNumCoef, | 
|  | 643 | double stopBandAtten, double fcr, double atten) { | 
|  | 644 | // | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 645 | // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 | 
|  | 646 | // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 647 | // | 
|  | 648 | // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf | 
|  | 649 | // | 
|  | 650 | // Kaiser window and beta parameter | 
|  | 651 | // | 
|  | 652 | //         | 0.1102*(A - 8.7)                         A > 50 | 
|  | 653 | //  beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21)   21 <= A <= 50 | 
|  | 654 | //         | 0.                                       A < 21 | 
|  | 655 | // | 
|  | 656 | // with A is the desired stop-band attenuation in dBFS | 
|  | 657 | // | 
|  | 658 | //    30 dB    2.210 | 
|  | 659 | //    40 dB    3.384 | 
|  | 660 | //    50 dB    4.538 | 
|  | 661 | //    60 dB    5.658 | 
|  | 662 | //    70 dB    6.764 | 
|  | 663 | //    80 dB    7.865 | 
|  | 664 | //    90 dB    8.960 | 
|  | 665 | //   100 dB   10.056 | 
|  | 666 |  | 
|  | 667 | const int N = L * halfNumCoef; // non-negative half | 
|  | 668 | const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 669 | const double xstep = (2. * M_PI) * fcr / L; | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 670 | const double xfrac = 1. / N; | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 671 | const double yscale = atten * L / (I0(beta) * M_PI); | 
| Andy Hung | bafa561 | 2014-04-02 13:52:10 -0700 | [diff] [blame] | 672 | const double sqrbeta = sqr(beta); | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 673 |  | 
|  | 674 | // We use sine generators, which computes sines on regular step intervals. | 
|  | 675 | // This speeds up overall computation about 40% from computing the sine directly. | 
|  | 676 |  | 
|  | 677 | SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase) | 
|  | 678 |  | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 679 | for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 680 |  | 
|  | 681 | // computation for a single polyphase of the overall filter. | 
|  | 682 | SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop. | 
|  | 683 | double err = 0; // for noise shaping on int16_t coefficients (over each polyphase) | 
|  | 684 |  | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 685 | for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 686 | double y; | 
|  | 687 | if (CC_LIKELY(ix)) { | 
|  | 688 | double x = static_cast<double>(ix); | 
|  | 689 |  | 
|  | 690 | // sine generator: sg.valueAdvance() returns sin(ix*xstep); | 
| Andy Hung | bafa561 | 2014-04-02 13:52:10 -0700 | [diff] [blame] | 691 | // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x; | 
|  | 692 | y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x); | 
| Andy Hung | 6582f2b | 2014-01-03 12:30:41 -0800 | [diff] [blame] | 693 | } else { | 
|  | 694 | y = 2. * atten * fcr; // center of filter, sinc(0) = 1. | 
|  | 695 | sg.advance(); | 
|  | 696 | } | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 697 |  | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 698 | if (is_same<T, int16_t>::value) { // int16_t needs noise shaping | 
|  | 699 | *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err)); | 
| Andy Hung | 430b61c | 2014-04-08 18:10:02 -0700 | [diff] [blame] | 700 | } else if (is_same<T, int32_t>::value) { | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 701 | *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1))); | 
| Andy Hung | 430b61c | 2014-04-08 18:10:02 -0700 | [diff] [blame] | 702 | } else { // assumed float or double | 
|  | 703 | *coef++ = static_cast<T>(y); | 
| Andy Hung | 86eae0e | 2013-12-09 12:12:46 -0800 | [diff] [blame] | 704 | } | 
|  | 705 | } | 
|  | 706 | } | 
|  | 707 | } | 
|  | 708 |  | 
|  | 709 | }; // namespace android | 
|  | 710 |  | 
|  | 711 | #endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/ |