Improve resampler filter computation speed Approximate speed improvement is 10-15% for filter computation, which is floating point intensive. This will be important for devices without hw floating point support. Change-Id: I10b4e778c8d632b52218a777504b092c189e437f Signed-off-by: Andy Hung <hunga@google.com>

commit: bafa561d0c9363c5307b6b1daa498bd3ae36089d [log] [tgz]
author: Andy Hung <hunga@google.com> Wed Apr 02 13:52:10 2014 -0700
committer: Andy Hung <hunga@google.com> Wed Apr 02 13:52:10 2014 -0700
tree: 725ae0e8b8c8bc532c4c23c2b4a133aec35a9410
parent: 2ade30003882f0c3782d68018d06cb81b99dfeb2 [diff] [blame]
diff --git a/services/audioflinger/AudioResamplerFirGen.h b/services/audioflinger/AudioResamplerFirGen.h
index fac3001..1f21c60 100644
--- a/services/audioflinger/AudioResamplerFirGen.h
+++ b/services/audioflinger/AudioResamplerFirGen.h

@@ -365,6 +365,28 @@
     }
 }
 
+/* A speed optimized version of the Modified Bessel I0() which incorporates
+ * the sqrt and numerator multiply and denominator divide into the computation.
+ * This speeds up filter computation by about 10-15%.
+ */
+static inline double I0SqrRat(double x2, double num, double den) {
+    if (x2 < (3.75 * 3.75)) {
+        return Poly7(I0Term<0>::value, I0Term<1>::value,
+                I0Term<2>::value, I0Term<3>::value,
+                I0Term<4>::value, I0Term<5>::value,
+                I0Term<6>::value, x2) * num / den; // e < 1.6e-7
+    }
+    num *= Poly9(-0.13544938430e9, -0.33153754512e8,
+            -0.19406631946e7, -0.48058318783e5,
+            -0.63269783360e3, -0.49520779070e1,
+            -0.24970910370e-1, -0.74741159550e-4,
+            -0.18257612460e-6, x2); // e < 10^(-7.13).
+    double y = x2 - 225.; // reflection around 15 (squared)
+    den *= Poly4(-0.34598737196e8, 0.23852643181e6,
+            -0.70699387620e3, 0.10000000000e1, y);
+    return num / den;
+}
+
 /*
  * calculates the transition bandwidth for a Kaiser filter
  *
@@ -645,6 +667,7 @@
     const double xstep = (2. * M_PI) * fcr / L;
     const double xfrac = 1. / N;
     const double yscale = atten * L / (I0(beta) * M_PI);
+    const double sqrbeta = sqr(beta);
 
     // We use sine generators, which computes sines on regular step intervals.
     // This speeds up overall computation about 40% from computing the sine directly.
@@ -663,7 +686,8 @@
                 double x = static_cast<double>(ix);
 
                 // sine generator: sg.valueAdvance() returns sin(ix*xstep);
-                y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
+                // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
+                y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x);
             } else {
                 y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
                 sg.advance();
commit	bafa561d0c9363c5307b6b1daa498bd3ae36089d	[log] [tgz]
author	Andy Hung <hunga@google.com>	Wed Apr 02 13:52:10 2014 -0700
committer	Andy Hung <hunga@google.com>	Wed Apr 02 13:52:10 2014 -0700
tree	725ae0e8b8c8bc532c4c23c2b4a133aec35a9410
parent	2ade30003882f0c3782d68018d06cb81b99dfeb2 [diff] [blame]