refactor code to improve neon code

we want to make sure we don't transfer data from the
neon unit to the arm register file, as this can be quite
slow. instead we do all the calculation on the neon side
and write the result directly to main memory.

Change-Id: Ibb56664d3ab03098ae2798b75e2b6927ac900187
diff --git a/services/audioflinger/AudioResamplerSinc.h b/services/audioflinger/AudioResamplerSinc.h
index 1b14019..96c31ee 100644
--- a/services/audioflinger/AudioResamplerSinc.h
+++ b/services/audioflinger/AudioResamplerSinc.h
@@ -44,13 +44,15 @@
 private:
     void init();
 
+    virtual void setVolume(int16_t left, int16_t right);
+
     template<int CHANNELS>
     void resample(int32_t* out, size_t outFrameCount,
             AudioBufferProvider* provider);
 
     template<int CHANNELS>
     inline void filterCoefficient(
-            int32_t& l, int32_t& r, uint32_t phase, const int16_t *samples, uint32_t vRL);
+            int32_t* out, uint32_t phase, const int16_t *samples, uint32_t vRL);
 
     template<int CHANNELS>
     inline void interpolate(
@@ -65,6 +67,7 @@
     int16_t *mState;
     int16_t *mImpulse;
     int16_t *mRingFull;
+    int32_t mVolumeSIMD[2];
 
     const int32_t * mFirCoefs;
     static const int32_t mFirCoefsDown[];