Merge "av: native_window_set_buffers_geometry is deprecated."
diff --git a/include/media/AudioRecord.h b/include/media/AudioRecord.h
index 6a68c94..f9c7efd 100644
--- a/include/media/AudioRecord.h
+++ b/include/media/AudioRecord.h
@@ -240,6 +240,11 @@
      */
             uint32_t    getSampleRate() const   { return mSampleRate; }
 
+    /* Return the notification frame count.
+     * This is approximately how often the callback is invoked, for transfer type TRANSFER_CALLBACK.
+     */
+            size_t      notificationFrames() const  { return mNotificationFramesAct; }
+
     /* Sets marker position. When record reaches the number of frames specified,
      * a callback with event type EVENT_MARKER is called. Calling setMarkerPosition
      * with marker == 0 cancels marker notification callback.
diff --git a/include/media/AudioSystem.h b/include/media/AudioSystem.h
index c89ceaa..e1aab41 100644
--- a/include/media/AudioSystem.h
+++ b/include/media/AudioSystem.h
@@ -108,7 +108,7 @@
     static status_t getSamplingRate(audio_io_handle_t output,
                                           uint32_t* samplingRate);
     // returns the number of frames per audio HAL write buffer. Corresponds to
-    // audio_stream->get_buffer_size()/audio_stream_frame_size()
+    // audio_stream->get_buffer_size()/audio_stream_out_frame_size()
     static status_t getFrameCount(audio_io_handle_t output,
                                   size_t* frameCount);
     // returns the audio output stream latency in ms. Corresponds to
diff --git a/include/media/IAudioFlinger.h b/include/media/IAudioFlinger.h
index fc8be20..a8f4605 100644
--- a/include/media/IAudioFlinger.h
+++ b/include/media/IAudioFlinger.h
@@ -89,6 +89,7 @@
                                 track_flags_t *flags,
                                 pid_t tid,  // -1 means unused, otherwise must be valid non-0
                                 int *sessionId,
+                                size_t *notificationFrames,
                                 sp<IMemory>& cblk,
                                 sp<IMemory>& buffers,   // return value 0 means it follows cblk
                                 status_t *status) = 0;
@@ -97,9 +98,6 @@
      * and therefore can be cached.
      */
     virtual     uint32_t    sampleRate(audio_io_handle_t output) const = 0;
-#if 0
-    virtual     int         channelCount(audio_io_handle_t output) const = 0;
-#endif
     virtual     audio_format_t format(audio_io_handle_t output) const = 0;
     virtual     size_t      frameCount(audio_io_handle_t output) const = 0;
 
diff --git a/media/libmedia/AudioRecord.cpp b/media/libmedia/AudioRecord.cpp
index f865d38..3ee5809 100644
--- a/media/libmedia/AudioRecord.cpp
+++ b/media/libmedia/AudioRecord.cpp
@@ -495,6 +495,10 @@
     size_t temp = frameCount;   // temp may be replaced by a revised value of frameCount,
                                 // but we will still need the original value also
     int originalSessionId = mSessionId;
+
+    // The notification frame count is the period between callbacks, as suggested by the server.
+    size_t notificationFrames;
+
     sp<IMemory> iMem;           // for cblk
     sp<IMemory> bufferMem;
     sp<IAudioRecord> record = audioFlinger->openRecord(input,
@@ -504,6 +508,7 @@
                                                        &trackFlags,
                                                        tid,
                                                        &mSessionId,
+                                                       &notificationFrames,
                                                        iMem,
                                                        bufferMem,
                                                        &status);
diff --git a/media/libmedia/IAudioFlinger.cpp b/media/libmedia/IAudioFlinger.cpp
index 687fa76..5cf42f7 100644
--- a/media/libmedia/IAudioFlinger.cpp
+++ b/media/libmedia/IAudioFlinger.cpp
@@ -175,6 +175,7 @@
                                 track_flags_t *flags,
                                 pid_t tid,
                                 int *sessionId,
+                                size_t *notificationFrames,
                                 sp<IMemory>& cblk,
                                 sp<IMemory>& buffers,
                                 status_t *status)
@@ -214,6 +215,10 @@
             if (sessionId != NULL) {
                 *sessionId = lSessionId;
             }
+            size_t lNotificationFrames = (size_t) reply.readInt64();
+            if (notificationFrames != NULL) {
+                *notificationFrames = lNotificationFrames;
+            }
             lStatus = reply.readInt32();
             record = interface_cast<IAudioRecord>(reply.readStrongBinder());
             cblk = interface_cast<IMemory>(reply.readStrongBinder());
@@ -959,16 +964,19 @@
             track_flags_t flags = (track_flags_t) data.readInt32();
             pid_t tid = (pid_t) data.readInt32();
             int sessionId = data.readInt32();
+            size_t notificationFrames = 0;
             sp<IMemory> cblk;
             sp<IMemory> buffers;
             status_t status;
             sp<IAudioRecord> record = openRecord(input,
                     sampleRate, format, channelMask, &frameCount, &flags, tid, &sessionId,
+                    &notificationFrames,
                     cblk, buffers, &status);
             LOG_ALWAYS_FATAL_IF((record != 0) != (status == NO_ERROR));
             reply->writeInt64(frameCount);
             reply->writeInt32(flags);
             reply->writeInt32(sessionId);
+            reply->writeInt64(notificationFrames);
             reply->writeInt32(status);
             reply->writeStrongBinder(record->asBinder());
             reply->writeStrongBinder(cblk->asBinder());
diff --git a/services/audioflinger/AudioFlinger.cpp b/services/audioflinger/AudioFlinger.cpp
index 11a01cc..2124f85 100644
--- a/services/audioflinger/AudioFlinger.cpp
+++ b/services/audioflinger/AudioFlinger.cpp
@@ -668,17 +668,6 @@
     return thread->sampleRate();
 }
 
-int AudioFlinger::channelCount(audio_io_handle_t output) const
-{
-    Mutex::Autolock _l(mLock);
-    PlaybackThread *thread = checkPlaybackThread_l(output);
-    if (thread == NULL) {
-        ALOGW("channelCount() unknown thread %d", output);
-        return 0;
-    }
-    return thread->channelCount();
-}
-
 audio_format_t AudioFlinger::format(audio_io_handle_t output) const
 {
     Mutex::Autolock _l(mLock);
@@ -1344,6 +1333,7 @@
         IAudioFlinger::track_flags_t *flags,
         pid_t tid,
         int *sessionId,
+        size_t *notificationFrames,
         sp<IMemory>& cblk,
         sp<IMemory>& buffers,
         status_t *status)
@@ -1418,7 +1408,7 @@
 
         // TODO: the uid should be passed in as a parameter to openRecord
         recordTrack = thread->createRecordTrack_l(client, sampleRate, format, channelMask,
-                                                  frameCount, lSessionId,
+                                                  frameCount, lSessionId, notificationFrames,
                                                   IPCThreadState::self()->getCallingUid(),
                                                   flags, tid, &lStatus);
         LOG_ALWAYS_FATAL_IF((lStatus == NO_ERROR) && (recordTrack == 0));
diff --git a/services/audioflinger/AudioFlinger.h b/services/audioflinger/AudioFlinger.h
index ddc6afb..4df0921 100644
--- a/services/audioflinger/AudioFlinger.h
+++ b/services/audioflinger/AudioFlinger.h
@@ -120,12 +120,12 @@
                                 IAudioFlinger::track_flags_t *flags,
                                 pid_t tid,
                                 int *sessionId,
+                                size_t *notificationFrames,
                                 sp<IMemory>& cblk,
                                 sp<IMemory>& buffers,
                                 status_t *status /*non-NULL*/);
 
     virtual     uint32_t    sampleRate(audio_io_handle_t output) const;
-    virtual     int         channelCount(audio_io_handle_t output) const;
     virtual     audio_format_t format(audio_io_handle_t output) const;
     virtual     size_t      frameCount(audio_io_handle_t output) const;
     virtual     uint32_t    latency(audio_io_handle_t output) const;
diff --git a/services/audioflinger/AudioResampler.cpp b/services/audioflinger/AudioResampler.cpp
index 562c4ea..b8a0357 100644
--- a/services/audioflinger/AudioResampler.cpp
+++ b/services/audioflinger/AudioResampler.cpp
@@ -259,13 +259,14 @@
             mPhaseFraction(0), mLocalTimeFreq(0),
             mPTS(AudioBufferProvider::kInvalidPTS), mQuality(quality) {
     // sanity check on format
-    if ((bitDepth != 16) ||(inChannelCount < 1) || (inChannelCount > 2)) {
-        ALOGE("Unsupported sample format, %d bits, %d channels", bitDepth,
-                inChannelCount);
-        // ALOG_ASSERT(0);
+    if ((bitDepth != 16 && (quality < DYN_LOW_QUALITY || bitDepth != 32))
+            || inChannelCount < 1
+            || inChannelCount > (quality < DYN_LOW_QUALITY ? 2 : 8)) {
+        LOG_ALWAYS_FATAL("Unsupported sample format %d quality %d bits, %d channels",
+                quality, bitDepth, inChannelCount);
     }
     if (sampleRate <= 0) {
-        ALOGE("Unsupported sample rate %d Hz", sampleRate);
+        LOG_ALWAYS_FATAL("Unsupported sample rate %d Hz", sampleRate);
     }
 
     // initialize common members
diff --git a/services/audioflinger/AudioResamplerDyn.cpp b/services/audioflinger/AudioResamplerDyn.cpp
index 318eb57..7ca10c1 100644
--- a/services/audioflinger/AudioResamplerDyn.cpp
+++ b/services/audioflinger/AudioResamplerDyn.cpp
@@ -38,11 +38,6 @@
 
 namespace android {
 
-// generate a unique resample type compile-time constant (constexpr)
-#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE) \
-    ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 \
-    | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<2)
-
 /*
  * InBuffer is a type agnostic input buffer.
  *
@@ -403,12 +398,76 @@
     // determine which resampler to use
     // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits")
     int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0;
-    int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2;
     if (locked) {
         mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase
     }
 
-    setResampler(RESAMPLETYPE(mChannelCount, locked, stride));
+    // stride is the minimum number of filter coefficients processed per loop iteration.
+    // We currently only allow a stride of 16 to match with SIMD processing.
+    // This means that the filter length must be a multiple of 16,
+    // or half the filter length (mHalfNumCoefs) must be a multiple of 8.
+    //
+    // Note: A stride of 2 is achieved with non-SIMD processing.
+    int stride = ((c.mHalfNumCoefs & 7) == 0) ? 16 : 2;
+    LOG_ALWAYS_FATAL_IF(stride < 16, "Resampler stride must be 16 or more");
+    LOG_ALWAYS_FATAL_IF(mChannelCount > 8 || mChannelCount < 1,
+            "Resampler channels(%d) must be between 1 to 8", mChannelCount);
+    // stride 16 (falls back to stride 2 for machines that do not support NEON)
+    if (locked) {
+        switch (mChannelCount) {
+        case 1:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<1, true, 16>;
+            break;
+        case 2:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<2, true, 16>;
+            break;
+        case 3:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<3, true, 16>;
+            break;
+        case 4:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<4, true, 16>;
+            break;
+        case 5:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<5, true, 16>;
+            break;
+        case 6:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<6, true, 16>;
+            break;
+        case 7:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<7, true, 16>;
+            break;
+        case 8:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<8, true, 16>;
+            break;
+        }
+    } else {
+        switch (mChannelCount) {
+        case 1:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<1, false, 16>;
+            break;
+        case 2:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<2, false, 16>;
+            break;
+        case 3:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<3, false, 16>;
+            break;
+        case 4:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<4, false, 16>;
+            break;
+        case 5:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<5, false, 16>;
+            break;
+        case 6:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<6, false, 16>;
+            break;
+        case 7:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<7, false, 16>;
+            break;
+        case 8:
+            mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<8, false, 16>;
+            break;
+        }
+    }
 #ifdef DEBUG_RESAMPLER
     printf("channels:%d  %s  stride:%d  %s  coef:%d  shift:%d\n",
             mChannelCount, locked ? "locked" : "interpolated",
@@ -424,34 +483,12 @@
 }
 
 template<typename TC, typename TI, typename TO>
-void AudioResamplerDyn<TC, TI, TO>::setResampler(unsigned resampleType)
-{
-    // stride 16 (falls back to stride 2 for machines that do not support NEON)
-    switch (resampleType) {
-    case RESAMPLETYPE(1, true, 16):
-        mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<1, true, 16>;
-        return;
-    case RESAMPLETYPE(2, true, 16):
-        mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<2, true, 16>;
-        return;
-    case RESAMPLETYPE(1, false, 16):
-        mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<1, false, 16>;
-        return;
-    case RESAMPLETYPE(2, false, 16):
-        mResampleFunc = &AudioResamplerDyn<TC, TI, TO>::resample<2, false, 16>;
-        return;
-    default:
-        LOG_ALWAYS_FATAL("Invalid resampler type: %u", resampleType);
-        mResampleFunc = NULL;
-        return;
-    }
-}
-
-template<typename TC, typename TI, typename TO>
 template<int CHANNELS, bool LOCKED, int STRIDE>
 void AudioResamplerDyn<TC, TI, TO>::resample(TO* out, size_t outFrameCount,
         AudioBufferProvider* provider)
 {
+    // TODO Mono -> Mono is not supported. OUTPUT_CHANNELS reflects minimum of stereo out.
+    const int OUTPUT_CHANNELS = (CHANNELS < 2) ? 2 : CHANNELS;
     const Constants& c(mConstants);
     const TC* const coefs = mConstants.mFirCoefs;
     TI* impulse = mInBuffer.getImpulse();
@@ -459,7 +496,7 @@
     uint32_t phaseFraction = mPhaseFraction;
     const uint32_t phaseIncrement = mPhaseIncrement;
     size_t outputIndex = 0;
-    size_t outputSampleCount = outFrameCount * 2;   // stereo output
+    size_t outputSampleCount = outFrameCount * OUTPUT_CHANNELS;
     const uint32_t phaseWrapLimit = c.mL << c.mShift;
     size_t inFrameCount = (phaseIncrement * (uint64_t)outFrameCount + phaseFraction)
             / phaseWrapLimit;
@@ -490,7 +527,7 @@
         while (mBuffer.frameCount == 0 && inFrameCount > 0) {
             mBuffer.frameCount = inFrameCount;
             provider->getNextBuffer(&mBuffer,
-                    calculateOutputPTS(outputIndex / 2));
+                    calculateOutputPTS(outputIndex / OUTPUT_CHANNELS));
             if (mBuffer.raw == NULL) {
                 goto resample_exit;
             }
@@ -538,7 +575,8 @@
                     phaseFraction, phaseWrapLimit,
                     coefShift, halfNumCoefs, coefs,
                     impulse, volumeSimd);
-            outputIndex += 2;
+
+            outputIndex += OUTPUT_CHANNELS;
 
             phaseFraction += phaseIncrement;
             while (phaseFraction >= phaseWrapLimit) {
diff --git a/services/audioflinger/AudioResamplerDyn.h b/services/audioflinger/AudioResamplerDyn.h
index 8c56319..3dced8a 100644
--- a/services/audioflinger/AudioResamplerDyn.h
+++ b/services/audioflinger/AudioResamplerDyn.h
@@ -110,12 +110,10 @@
     void createKaiserFir(Constants &c, double stopBandAtten,
             int inSampleRate, int outSampleRate, double tbwCheat);
 
-    void setResampler(unsigned resampleType);
-
     template<int CHANNELS, bool LOCKED, int STRIDE>
     void resample(TO* out, size_t outFrameCount, AudioBufferProvider* provider);
 
-    // declare a pointer to member function for resample
+    // define a pointer to member function type for resample
     typedef void (AudioResamplerDyn<TC, TI, TO>::*resample_ABP_t)(TO* out,
             size_t outFrameCount, AudioBufferProvider* provider);
 
diff --git a/services/audioflinger/AudioResamplerFirProcess.h b/services/audioflinger/AudioResamplerFirProcess.h
index 76d2d66..bb0f1c9 100644
--- a/services/audioflinger/AudioResamplerFirProcess.h
+++ b/services/audioflinger/AudioResamplerFirProcess.h
@@ -44,14 +44,14 @@
 void mac(float& l, float& r, TC coef,  const float* samples)
 {
     l += *samples++ * coef;
-    r += *samples++ * coef;
+    r += *samples * coef;
 }
 
 template<typename TC>
 static inline
 void mac(float& l, TC coef,  const float* samples)
 {
-    l += *samples++ * coef;
+    l += *samples * coef;
 }
 
 /* variant for output type TO = int32_t output samples */
@@ -69,62 +69,48 @@
 }
 
 /*
- * Calculates a single output frame (two samples).
+ * Helper template functions for loop unrolling accumulator operations.
  *
- * This function computes both the positive half FIR dot product and
- * the negative half FIR dot product, accumulates, and then applies the volume.
- *
- * This is a locked phase filter (it does not compute the interpolation).
- *
- * Use fir() to compute the proper coefficient pointers for a polyphase
- * filter bank.
+ * Unrolling the loops achieves about 2x gain.
+ * Using a recursive template rather than an array of TO[] for the accumulator
+ * values is an additional 10-20% gain.
  */
 
-template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
-static inline
-void ProcessL(TO* const out,
-        int count,
-        const TC* coefsP,
-        const TC* coefsN,
-        const TI* sP,
-        const TI* sN,
-        const TO* const volumeLR)
+template<int CHANNELS, typename TO>
+class Accumulator : public Accumulator<CHANNELS-1, TO> // recursive
 {
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)
-    if (CHANNELS == 2) {
-        TO l = 0;
-        TO r = 0;
-        do {
-            mac(l, r, *coefsP++, sP);
-            sP -= CHANNELS;
-            mac(l, r, *coefsN++, sN);
-            sN += CHANNELS;
-        } while (--count > 0);
-        out[0] += volumeAdjust(l, volumeLR[0]);
-        out[1] += volumeAdjust(r, volumeLR[1]);
-    } else { /* CHANNELS == 1 */
-        TO l = 0;
-        do {
-            mac(l, *coefsP++, sP);
-            sP -= CHANNELS;
-            mac(l, *coefsN++, sN);
-            sN += CHANNELS;
-        } while (--count > 0);
-        out[0] += volumeAdjust(l, volumeLR[0]);
-        out[1] += volumeAdjust(l, volumeLR[1]);
+public:
+    inline void clear() {
+        value = 0;
+        Accumulator<CHANNELS-1, TO>::clear();
     }
-}
+    template<typename TC, typename TI>
+    inline void acc(TC coef, const TI*& data) {
+        mac(value, coef, data++);
+        Accumulator<CHANNELS-1, TO>::acc(coef, data);
+    }
+    inline void volume(TO*& out, TO gain) {
+        *out++ = volumeAdjust(value, gain);
+        Accumulator<CHANNELS-1, TO>::volume(out, gain);
+    }
+
+    TO value; // one per recursive inherited base class
+};
+
+template<typename TO>
+class Accumulator<0, TO> {
+public:
+    inline void clear() {
+    }
+    template<typename TC, typename TI>
+    inline void acc(TC coef __unused, const TI*& data __unused) {
+    }
+    inline void volume(TO*& out __unused, TO gain __unused) {
+    }
+};
 
 /*
- * Calculates a single output frame (two samples) interpolating phase.
- *
- * This function computes both the positive half FIR dot product and
- * the negative half FIR dot product, accumulates, and then applies the volume.
- *
- * This is an interpolated phase filter.
- *
- * Use fir() to compute the proper coefficient pointers for a polyphase
- * filter bank.
+ * Helper template functions for interpolating filter coefficients.
  */
 
 template<typename TC, typename T>
@@ -159,6 +145,131 @@
     return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0);
 }
 
+/* class scope for passing in functions into templates */
+struct InterpCompute {
+    template<typename TC, typename TINTERP>
+    static inline
+    TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) {
+        return interpolate(coef_0, coef_1, lerp);
+    }
+
+    template<typename TC, typename TINTERP>
+    static inline
+    TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) {
+        return interpolate(coef_0, coef_1, lerp);
+    }
+};
+
+struct InterpNull {
+    template<typename TC, typename TINTERP>
+    static inline
+    TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) {
+        return coef_0;
+    }
+
+    template<typename TC, typename TINTERP>
+    static inline
+    TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) {
+        return coef_1;
+    }
+};
+
+/*
+ * Calculates a single output frame (two samples).
+ *
+ * The Process*() functions compute both the positive half FIR dot product and
+ * the negative half FIR dot product, accumulates, and then applies the volume.
+ *
+ * Use fir() to compute the proper coefficient pointers for a polyphase
+ * filter bank.
+ *
+ * ProcessBase() is the fundamental processing template function.
+ *
+ * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase.
+ * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase.
+ */
+
+template <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP>
+static inline
+void ProcessBase(TO* const out,
+        int count,
+        const TC* coefsP,
+        const TC* coefsN,
+        const TI* sP,
+        const TI* sN,
+        TINTERP lerpP,
+        const TO* const volumeLR)
+{
+    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0)
+
+    if (CHANNELS > 2) {
+        // TO accum[CHANNELS];
+        Accumulator<CHANNELS, TO> accum;
+
+        // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0;
+        accum.clear();
+        for (size_t i = 0; i < count; ++i) {
+            TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP);
+
+            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j);
+            const TI *tmp_data = sP; // tmp_ptr seems to work better
+            accum.acc(c, tmp_data);
+
+            coefsP++;
+            sP -= CHANNELS;
+            c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP);
+
+            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j);
+            tmp_data = sN; // tmp_ptr seems faster than directly using sN
+            accum.acc(c, tmp_data);
+
+            coefsN++;
+            sN += CHANNELS;
+        }
+        // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]);
+        TO *tmp_out = out; // may remove if const out definition changes.
+        accum.volume(tmp_out, volumeLR[0]);
+    } else if (CHANNELS == 2) {
+        TO l = 0;
+        TO r = 0;
+        for (size_t i = 0; i < count; ++i) {
+            mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
+            coefsP++;
+            sP -= CHANNELS;
+            mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
+            coefsN++;
+            sN += CHANNELS;
+        }
+        out[0] += volumeAdjust(l, volumeLR[0]);
+        out[1] += volumeAdjust(r, volumeLR[1]);
+    } else { /* CHANNELS == 1 */
+        TO l = 0;
+        for (size_t i = 0; i < count; ++i) {
+            mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
+            coefsP++;
+            sP -= CHANNELS;
+            mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
+            coefsN++;
+            sN += CHANNELS;
+        }
+        out[0] += volumeAdjust(l, volumeLR[0]);
+        out[1] += volumeAdjust(l, volumeLR[1]);
+    }
+}
+
+template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
+static inline
+void ProcessL(TO* const out,
+        int count,
+        const TC* coefsP,
+        const TC* coefsN,
+        const TI* sP,
+        const TI* sN,
+        const TO* const volumeLR)
+{
+    ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR);
+}
+
 template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
 static inline
 void Process(TO* const out,
@@ -172,35 +283,8 @@
         TINTERP lerpP,
         const TO* const volumeLR)
 {
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)
-    adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation
-
-    if (CHANNELS == 2) {
-        TO l = 0;
-        TO r = 0;
-        for (size_t i = 0; i < count; ++i) {
-            mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP);
-            coefsP++;
-            sP -= CHANNELS;
-            mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN);
-            coefsN++;
-            sN += CHANNELS;
-        }
-        out[0] += volumeAdjust(l, volumeLR[0]);
-        out[1] += volumeAdjust(r, volumeLR[1]);
-    } else { /* CHANNELS == 1 */
-        TO l = 0;
-        for (size_t i = 0; i < count; ++i) {
-            mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP);
-            coefsP++;
-            sP -= CHANNELS;
-            mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN);
-            coefsN++;
-            sN += CHANNELS;
-        }
-        out[0] += volumeAdjust(l, volumeLR[0]);
-        out[1] += volumeAdjust(l, volumeLR[1]);
-    }
+    adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolations
+    ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR);
 }
 
 /*
diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp
index 67a0119..11d13a8 100755
--- a/services/audioflinger/Threads.cpp
+++ b/services/audioflinger/Threads.cpp
@@ -1815,7 +1815,7 @@
         LOG_FATAL("HAL format %#x not supported for mixed output",
                 mFormat);
     }
-    mFrameSize = audio_stream_frame_size(&mOutput->stream->common);
+    mFrameSize = audio_stream_out_frame_size(mOutput->stream);
     mBufferSize = mOutput->stream->common.get_buffer_size(&mOutput->stream->common);
     mFrameCount = mBufferSize / mFrameSize;
     if (mFrameCount & 15) {
@@ -4466,7 +4466,7 @@
                     size_t audioHALFrames =
                             (mOutput->stream->get_latency(mOutput->stream)*mSampleRate) / 1000;
                     size_t framesWritten =
-                            mBytesWritten / audio_stream_frame_size(&mOutput->stream->common);
+                            mBytesWritten / audio_stream_out_frame_size(mOutput->stream);
                     track->presentationComplete(framesWritten, audioHALFrames);
                     track->reset();
                     tracksToRemove->add(track);
@@ -5443,6 +5443,7 @@
         audio_channel_mask_t channelMask,
         size_t *pFrameCount,
         int sessionId,
+        size_t *notificationFrames,
         int uid,
         IAudioFlinger::track_flags_t *flags,
         pid_t tid,
@@ -5512,6 +5513,7 @@
       }
     }
     *pFrameCount = frameCount;
+    *notificationFrames = 0;    // FIXME implement
 
     lStatus = initCheck();
     if (lStatus != NO_ERROR) {
@@ -6011,7 +6013,7 @@
     if (mFormat != AUDIO_FORMAT_PCM_16_BIT) {
         ALOGE("HAL format %#x not supported; must be AUDIO_FORMAT_PCM_16_BIT", mFormat);
     }
-    mFrameSize = audio_stream_frame_size(&mInput->stream->common);
+    mFrameSize = audio_stream_in_frame_size(mInput->stream);
     mBufferSize = mInput->stream->common.get_buffer_size(&mInput->stream->common);
     mFrameCount = mBufferSize / mFrameSize;
     // This is the formula for calculating the temporary buffer size.
diff --git a/services/audioflinger/Threads.h b/services/audioflinger/Threads.h
index 93d2635..c265833 100644
--- a/services/audioflinger/Threads.h
+++ b/services/audioflinger/Threads.h
@@ -233,7 +233,6 @@
 
                 // dynamic externally-visible
                 uint32_t    sampleRate() const { return mSampleRate; }
-                uint32_t    channelCount() const { return mChannelCount; }
                 audio_channel_mask_t channelMask() const { return mChannelMask; }
                 audio_format_t format() const { return mFormat; }
                 // Called by AudioFlinger::frameCount(audio_io_handle_t output) and effects,
@@ -1073,6 +1072,7 @@
                     audio_channel_mask_t channelMask,
                     size_t *pFrameCount,
                     int sessionId,
+                    size_t *notificationFrames,
                     int uid,
                     IAudioFlinger::track_flags_t *flags,
                     pid_t tid,
diff --git a/services/audioflinger/tests/resampler_tests.cpp b/services/audioflinger/tests/resampler_tests.cpp
index 4a67d0b..d76c376 100644
--- a/services/audioflinger/tests/resampler_tests.cpp
+++ b/services/audioflinger/tests/resampler_tests.cpp
@@ -35,7 +35,8 @@
 #include "AudioResampler.h"
 #include "test_utils.h"
 
-void resample(void *output, size_t outputFrames, const std::vector<size_t> &outputIncr,
+void resample(int channels, void *output,
+        size_t outputFrames, const std::vector<size_t> &outputIncr,
         android::AudioBufferProvider *provider, android::AudioResampler *resampler)
 {
     for (size_t i = 0, j = 0; i < outputFrames; ) {
@@ -46,7 +47,7 @@
         if (thisFrames == 0 || thisFrames > outputFrames - i) {
             thisFrames = outputFrames - i;
         }
-        resampler->resample((int32_t*) output + 2*i, thisFrames, provider);
+        resampler->resample((int32_t*) output + channels*i, thisFrames, provider);
         i += thisFrames;
     }
 }
@@ -64,19 +65,26 @@
     }
 }
 
-void testBufferIncrement(size_t channels, unsigned inputFreq, unsigned outputFreq,
+void testBufferIncrement(size_t channels, bool useFloat,
+        unsigned inputFreq, unsigned outputFreq,
         enum android::AudioResampler::src_quality quality)
 {
+    const int bits = useFloat ? 32 : 16;
     // create the provider
     std::vector<int> inputIncr;
     SignalProvider provider;
-    provider.setChirp<int16_t>(channels,
-            0., outputFreq/2., outputFreq, outputFreq/2000.);
+    if (useFloat) {
+        provider.setChirp<float>(channels,
+                0., outputFreq/2., outputFreq, outputFreq/2000.);
+    } else {
+        provider.setChirp<int16_t>(channels,
+                0., outputFreq/2., outputFreq, outputFreq/2000.);
+    }
     provider.setIncr(inputIncr);
 
     // calculate the output size
     size_t outputFrames = ((int64_t) provider.getNumFrames() * outputFreq) / inputFreq;
-    size_t outputFrameSize = 2 * sizeof(int32_t);
+    size_t outputFrameSize = channels * (useFloat ? sizeof(float) : sizeof(int32_t));
     size_t outputSize = outputFrameSize * outputFrames;
     outputSize &= ~7;
 
@@ -84,7 +92,7 @@
     const int volumePrecision = 12; /* typical unity gain */
     android::AudioResampler* resampler;
 
-    resampler = android::AudioResampler::create(16, channels, outputFreq, quality);
+    resampler = android::AudioResampler::create(bits, channels, outputFreq, quality);
     resampler->setSampleRate(inputFreq);
     resampler->setVolume(1 << volumePrecision, 1 << volumePrecision);
 
@@ -92,7 +100,7 @@
     std::vector<size_t> refIncr;
     refIncr.push_back(outputFrames);
     void* reference = malloc(outputSize);
-    resample(reference, outputFrames, refIncr, &provider, resampler);
+    resample(channels, reference, outputFrames, refIncr, &provider, resampler);
 
     provider.reset();
 
@@ -101,7 +109,7 @@
     resampler->reset();
 #else
     delete resampler;
-    resampler = android::AudioResampler::create(16, channels, outputFreq, quality);
+    resampler = android::AudioResampler::create(bits, channels, outputFreq, quality);
     resampler->setSampleRate(inputFreq);
     resampler->setVolume(1 << volumePrecision, 1 << volumePrecision);
 #endif
@@ -112,7 +120,10 @@
     outIncr.push_back(2);
     outIncr.push_back(3);
     void* test = malloc(outputSize);
-    resample(test, outputFrames, outIncr, &provider, resampler);
+    inputIncr.push_back(1);
+    inputIncr.push_back(3);
+    provider.setIncr(inputIncr);
+    resample(channels, test, outputFrames, outIncr, &provider, resampler);
 
     // check
     buffercmp(reference, test, outputFrameSize, outputFrames);
@@ -155,7 +166,7 @@
 
     // calculate the output size
     size_t outputFrames = ((int64_t) provider.getNumFrames() * outputFreq) / inputFreq;
-    size_t outputFrameSize = 2 * sizeof(int32_t);
+    size_t outputFrameSize = channels * sizeof(int32_t);
     size_t outputSize = outputFrameSize * outputFrames;
     outputSize &= ~7;
 
@@ -171,7 +182,7 @@
     std::vector<size_t> refIncr;
     refIncr.push_back(outputFrames);
     void* reference = malloc(outputSize);
-    resample(reference, outputFrames, refIncr, &provider, resampler);
+    resample(channels, reference, outputFrames, refIncr, &provider, resampler);
 
     int32_t *out = reinterpret_cast<int32_t *>(reference);
 
@@ -226,7 +237,7 @@
     };
 
     for (size_t i = 0; i < ARRAY_SIZE(kQualityArray); ++i) {
-        testBufferIncrement(2, 48000, 32000, kQualityArray[i]);
+        testBufferIncrement(2, false, 48000, 32000, kQualityArray[i]);
     }
 }
 
@@ -243,7 +254,33 @@
     };
 
     for (size_t i = 0; i < ARRAY_SIZE(kQualityArray); ++i) {
-        testBufferIncrement(2, 22050, 48000, kQualityArray[i]);
+        testBufferIncrement(2, false, 22050, 48000, kQualityArray[i]);
+    }
+}
+
+TEST(audioflinger_resampler, bufferincrement_fixedphase_multi) {
+    // only dynamic quality
+    static const enum android::AudioResampler::src_quality kQualityArray[] = {
+            android::AudioResampler::DYN_LOW_QUALITY,
+            android::AudioResampler::DYN_MED_QUALITY,
+            android::AudioResampler::DYN_HIGH_QUALITY,
+    };
+
+    for (size_t i = 0; i < ARRAY_SIZE(kQualityArray); ++i) {
+        testBufferIncrement(4, false, 48000, 32000, kQualityArray[i]);
+    }
+}
+
+TEST(audioflinger_resampler, bufferincrement_interpolatedphase_multi_float) {
+    // only dynamic quality
+    static const enum android::AudioResampler::src_quality kQualityArray[] = {
+            android::AudioResampler::DYN_LOW_QUALITY,
+            android::AudioResampler::DYN_MED_QUALITY,
+            android::AudioResampler::DYN_HIGH_QUALITY,
+    };
+
+    for (size_t i = 0; i < ARRAY_SIZE(kQualityArray); ++i) {
+        testBufferIncrement(8, true, 22050, 48000, kQualityArray[i]);
     }
 }
 
diff --git a/services/audiopolicy/AudioPolicyManager.cpp b/services/audiopolicy/AudioPolicyManager.cpp
index 4fcf43b..95179b7 100644
--- a/services/audiopolicy/AudioPolicyManager.cpp
+++ b/services/audiopolicy/AudioPolicyManager.cpp
@@ -516,7 +516,10 @@
             config != AUDIO_POLICY_FORCE_WIRED_ACCESSORY &&
             config != AUDIO_POLICY_FORCE_ANALOG_DOCK &&
             config != AUDIO_POLICY_FORCE_DIGITAL_DOCK && config != AUDIO_POLICY_FORCE_NONE &&
-            config != AUDIO_POLICY_FORCE_NO_BT_A2DP) {
+            config != AUDIO_POLICY_FORCE_NO_BT_A2DP &&
+            config != AUDIO_POLICY_FORCE_SYSTEM_AUDIO_HDMI_ARC &&
+            config != AUDIO_POLICY_FORCE_SYSTEM_AUDIO_SPDIF &&
+            config != AUDIO_POLICY_FORCE_SYSTEM_AUDIO_LINE) {
             ALOGW("setForceUse() invalid config %d for FOR_MEDIA", config);
             return;
         }
@@ -3536,10 +3539,32 @@
         if (device2 == AUDIO_DEVICE_NONE) {
             device2 = availableOutputDeviceTypes & AUDIO_DEVICE_OUT_SPEAKER;
         }
+        int device3 = AUDIO_DEVICE_NONE;
+        if (strategy == STRATEGY_MEDIA) {
+            // Hdmi system audio should use manually configured device type.
+            if (mForceUse[AUDIO_POLICY_FORCE_FOR_MEDIA]
+                    == AUDIO_POLICY_FORCE_SYSTEM_AUDIO_HDMI_ARC) {
+                device3 = availableOutputDeviceTypes & AUDIO_DEVICE_OUT_HDMI_ARC;
+            } else if (mForceUse[AUDIO_POLICY_FORCE_FOR_MEDIA]
+                    == AUDIO_POLICY_FORCE_SYSTEM_AUDIO_SPDIF) {
+                device3 = availableOutputDeviceTypes & AUDIO_DEVICE_OUT_SPDIF;
+            } else if (mForceUse[AUDIO_POLICY_FORCE_FOR_MEDIA]
+                    == AUDIO_POLICY_FORCE_SYSTEM_AUDIO_LINE) {
+                device3 = availableOutputDeviceTypes & AUDIO_DEVICE_OUT_LINE;
+            }
+        }
 
+        // Merge hdmi cec system audio and existing device for media. If system audio is on,
+        // internal speaker will be muted but others are not.
+        device2 |= device3;
         // device is DEVICE_OUT_SPEAKER if we come from case STRATEGY_SONIFICATION or
         // STRATEGY_ENFORCED_AUDIBLE, AUDIO_DEVICE_NONE otherwise
         device |= device2;
+
+        // If system audio mode is on and proper audio out is set, remove speaker from device.
+        if (device3 != AUDIO_DEVICE_NONE) {
+             device &= ~AUDIO_DEVICE_OUT_SPEAKER;
+        }
         if (device) break;
         device = mDefaultOutputDevice->mDeviceType;
         if (device == AUDIO_DEVICE_NONE) {
diff --git a/services/camera/libcameraservice/api1/client2/ZslProcessor3.cpp b/services/camera/libcameraservice/api1/client2/ZslProcessor3.cpp
index 79ea2c3..ae537e2 100644
--- a/services/camera/libcameraservice/api1/client2/ZslProcessor3.cpp
+++ b/services/camera/libcameraservice/api1/client2/ZslProcessor3.cpp
@@ -52,8 +52,31 @@
         mFrameListHead(0),
         mZslQueueHead(0),
         mZslQueueTail(0) {
-    mZslQueue.insertAt(0, kZslBufferDepth);
-    mFrameList.insertAt(0, kFrameListDepth);
+    // Initialize buffer queue and frame list based on pipeline max depth.
+    size_t pipelineMaxDepth = kDefaultMaxPipelineDepth;
+    if (client != 0) {
+        sp<Camera3Device> device =
+        static_cast<Camera3Device*>(client->getCameraDevice().get());
+        if (device != 0) {
+            camera_metadata_ro_entry_t entry =
+                device->info().find(ANDROID_REQUEST_PIPELINE_MAX_DEPTH);
+            if (entry.count == 1) {
+                pipelineMaxDepth = entry.data.u8[0];
+            } else {
+                ALOGW("%s: Unable to find the android.request.pipelineMaxDepth,"
+                        " use default pipeline max depth %zu", __FUNCTION__,
+                        kDefaultMaxPipelineDepth);
+            }
+        }
+    }
+
+    ALOGV("%s: Initialize buffer queue and frame list depth based on max pipeline depth (%d)",
+          __FUNCTION__, pipelineMaxDepth);
+    mBufferQueueDepth = pipelineMaxDepth + 1;
+    mFrameListDepth = pipelineMaxDepth + 1;
+
+    mZslQueue.insertAt(0, mBufferQueueDepth);
+    mFrameList.insertAt(0, mFrameListDepth);
     sp<CaptureSequencer> captureSequencer = mSequencer.promote();
     if (captureSequencer != 0) captureSequencer->setZslProcessor(this);
 }
@@ -70,13 +93,25 @@
     camera_metadata_ro_entry_t entry;
     entry = result.mMetadata.find(ANDROID_SENSOR_TIMESTAMP);
     nsecs_t timestamp = entry.data.i64[0];
+    if (entry.count == 0) {
+        ALOGE("%s: metadata doesn't have timestamp, skip this result");
+        return;
+    }
     (void)timestamp;
-    ALOGVV("Got preview metadata for timestamp %" PRId64, timestamp);
+
+    entry = result.mMetadata.find(ANDROID_REQUEST_FRAME_COUNT);
+    if (entry.count == 0) {
+        ALOGE("%s: metadata doesn't have frame number, skip this result");
+        return;
+    }
+    int32_t frameNumber = entry.data.i32[0];
+
+    ALOGVV("Got preview metadata for frame %d with timestamp %" PRId64, frameNumber, timestamp);
 
     if (mState != RUNNING) return;
 
     mFrameList.editItemAt(mFrameListHead) = result.mMetadata;
-    mFrameListHead = (mFrameListHead + 1) % kFrameListDepth;
+    mFrameListHead = (mFrameListHead + 1) % mFrameListDepth;
 }
 
 status_t ZslProcessor3::updateStream(const Parameters &params) {
@@ -136,7 +171,7 @@
         // Note that format specified internally in Camera3ZslStream
         res = device->createZslStream(
                 params.fastInfo.arrayWidth, params.fastInfo.arrayHeight,
-                kZslBufferDepth,
+                mBufferQueueDepth,
                 &mZslStreamId,
                 &mZslStream);
         if (res != OK) {
@@ -145,7 +180,11 @@
                     strerror(-res), res);
             return res;
         }
+
+        // Only add the camera3 buffer listener when the stream is created.
+        mZslStream->addBufferListener(this);
     }
+
     client->registerFrameListener(Camera2Client::kPreviewRequestIdStart,
             Camera2Client::kPreviewRequestIdEnd,
             this,
@@ -277,15 +316,6 @@
             return INVALID_OPERATION;
         }
 
-        // Flush device to clear out all in-flight requests pending in HAL.
-        res = client->getCameraDevice()->flush();
-        if (res != OK) {
-            ALOGE("%s: Camera %d: Failed to flush device: "
-                "%s (%d)",
-                __FUNCTION__, client->getCameraId(), strerror(-res), res);
-            return res;
-        }
-
         // Update JPEG settings
         {
             SharedParameters::Lock l(client->getParameters());
@@ -323,11 +353,19 @@
 
 status_t ZslProcessor3::clearZslQueueLocked() {
     if (mZslStream != 0) {
+        // clear result metadata list first.
+        clearZslResultQueueLocked();
         return mZslStream->clearInputRingBuffer();
     }
     return OK;
 }
 
+void ZslProcessor3::clearZslResultQueueLocked() {
+    mFrameList.clear();
+    mFrameListHead = 0;
+    mFrameList.insertAt(0, mFrameListDepth);
+}
+
 void ZslProcessor3::dump(int fd, const Vector<String16>& /*args*/) const {
     Mutex::Autolock l(mInputMutex);
     if (!mLatestCapturedRequest.isEmpty()) {
@@ -481,11 +519,17 @@
     // We need to guarantee that if we do two back-to-back captures,
     // the second won't use a buffer that's older/the same as the first, which
     // is theoretically possible if we don't clear out the queue and the
-    // selection criteria is something like 'newest'. Clearing out the queue
-    // on a completed capture ensures we'll only use new data.
+    // selection criteria is something like 'newest'. Clearing out the result
+    // metadata queue on a completed capture ensures we'll only use new timestamp.
+    // Calling clearZslQueueLocked is a guaranteed deadlock because this callback
+    // holds the Camera3Stream internal lock (mLock), and clearZslQueueLocked requires
+    // to hold the same lock.
+    // TODO: need figure out a way to clear the Zsl buffer queue properly. Right now
+    // it is safe not to do so, as back to back ZSL capture requires stop and start
+    // preview, which will flush ZSL queue automatically.
     ALOGV("%s: Memory optimization, clearing ZSL queue",
           __FUNCTION__);
-    clearZslQueueLocked();
+    clearZslResultQueueLocked();
 
     // Required so we accept more ZSL requests
     mState = RUNNING;
diff --git a/services/camera/libcameraservice/api1/client2/ZslProcessor3.h b/services/camera/libcameraservice/api1/client2/ZslProcessor3.h
index 4c52a64..dfb1457 100644
--- a/services/camera/libcameraservice/api1/client2/ZslProcessor3.h
+++ b/services/camera/libcameraservice/api1/client2/ZslProcessor3.h
@@ -107,8 +107,9 @@
         CameraMetadata frame;
     };
 
-    static const size_t kZslBufferDepth = 4;
-    static const size_t kFrameListDepth = kZslBufferDepth * 2;
+    static const int32_t kDefaultMaxPipelineDepth = 4;
+    size_t mBufferQueueDepth;
+    size_t mFrameListDepth;
     Vector<CameraMetadata> mFrameList;
     size_t mFrameListHead;
 
@@ -124,6 +125,8 @@
 
     status_t clearZslQueueLocked();
 
+    void clearZslResultQueueLocked();
+
     void dumpZslQueue(int id) const;
 
     nsecs_t getCandidateTimestampLocked(size_t* metadataIdx) const;
diff --git a/services/camera/libcameraservice/device3/Camera3Device.cpp b/services/camera/libcameraservice/device3/Camera3Device.cpp
index 5973625..bbb1e1c 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.cpp
+++ b/services/camera/libcameraservice/device3/Camera3Device.cpp
@@ -1533,12 +1533,12 @@
  */
 
 status_t Camera3Device::registerInFlight(uint32_t frameNumber,
-        int32_t numBuffers, CaptureResultExtras resultExtras) {
+        int32_t numBuffers, CaptureResultExtras resultExtras, bool hasInput) {
     ATRACE_CALL();
     Mutex::Autolock l(mInFlightLock);
 
     ssize_t res;
-    res = mInFlightMap.add(frameNumber, InFlightRequest(numBuffers, resultExtras));
+    res = mInFlightMap.add(frameNumber, InFlightRequest(numBuffers, resultExtras, hasInput));
     if (res < 0) return res;
 
     return OK;
@@ -1720,7 +1720,8 @@
     status_t res;
 
     uint32_t frameNumber = result->frame_number;
-    if (result->result == NULL && result->num_output_buffers == 0) {
+    if (result->result == NULL && result->num_output_buffers == 0 &&
+            result->input_buffer == NULL) {
         SET_ERR("No result data provided by HAL for frame %d",
                 frameNumber);
         return;
@@ -1728,6 +1729,7 @@
     bool partialResultQuirk = false;
     CameraMetadata collectedQuirkResult;
     CaptureResultExtras resultExtras;
+    bool hasInputBufferInRequest = false;
 
     // Get capture timestamp and resultExtras from list of in-flight requests,
     // where it was added by the shutter notification for this frame.
@@ -1776,6 +1778,7 @@
 
         timestamp = request.captureTimestamp;
         resultExtras = request.resultExtras;
+        hasInputBufferInRequest = request.hasInputBuffer;
 
         /**
          * One of the following must happen before it's legal to call process_capture_result,
@@ -1804,8 +1807,17 @@
             request.haveResultMetadata = true;
         }
 
-        request.numBuffersLeft -= result->num_output_buffers;
-
+        uint32_t numBuffersReturned = result->num_output_buffers;
+        if (result->input_buffer != NULL) {
+            if (hasInputBufferInRequest) {
+                numBuffersReturned += 1;
+            } else {
+                ALOGW("%s: Input buffer should be NULL if there is no input"
+                        " buffer sent in the request",
+                        __FUNCTION__);
+            }
+        }
+        request.numBuffersLeft -= numBuffersReturned;
         if (request.numBuffersLeft < 0) {
             SET_ERR("Too many buffers returned for frame %d",
                     frameNumber);
@@ -1906,6 +1918,25 @@
         }
     }
 
+    if (result->input_buffer != NULL) {
+        if (hasInputBufferInRequest) {
+            Camera3Stream *stream =
+                Camera3Stream::cast(result->input_buffer->stream);
+            res = stream->returnInputBuffer(*(result->input_buffer));
+            // Note: stream may be deallocated at this point, if this buffer was the
+            // last reference to it.
+            if (res != OK) {
+                ALOGE("%s: RequestThread: Can't return input buffer for frame %d to"
+                      "  its stream:%s (%d)",  __FUNCTION__,
+                      frameNumber, strerror(-res), res);
+           } else {
+               ALOGW("%s: Input buffer should be NULL if there is no input"
+                       " buffer sent in the request",
+                       __FUNCTION__);
+           }
+        }
+    }
+
     // Finally, signal any waiters for new frames
 
     if (gotResult) {
@@ -2318,6 +2349,7 @@
     }
 
     camera3_stream_buffer_t inputBuffer;
+    uint32_t totalNumBuffers = 0;
 
     // Fill in buffers
 
@@ -2330,6 +2362,7 @@
             cleanUpFailedRequest(request, nextRequest, outputBuffers);
             return true;
         }
+        totalNumBuffers += 1;
     } else {
         request.input_buffer = NULL;
     }
@@ -2348,6 +2381,7 @@
         }
         request.num_output_buffers++;
     }
+    totalNumBuffers += request.num_output_buffers;
 
     // Log request in the in-flight queue
     sp<Camera3Device> parent = mParent.promote();
@@ -2358,7 +2392,8 @@
     }
 
     res = parent->registerInFlight(request.frame_number,
-            request.num_output_buffers, nextRequest->mResultExtras);
+            totalNumBuffers, nextRequest->mResultExtras,
+            /*hasInput*/request.input_buffer != NULL);
     ALOGVV("%s: registered in flight requestId = %" PRId32 ", frameNumber = %" PRId64
            ", burstId = %" PRId32 ".",
             __FUNCTION__,
@@ -2414,21 +2449,6 @@
     }
     mPrevTriggers = triggerCount;
 
-    // Return input buffer back to framework
-    if (request.input_buffer != NULL) {
-        Camera3Stream *stream =
-            Camera3Stream::cast(request.input_buffer->stream);
-        res = stream->returnInputBuffer(*(request.input_buffer));
-        // Note: stream may be deallocated at this point, if this buffer was the
-        // last reference to it.
-        if (res != OK) {
-            ALOGE("%s: RequestThread: Can't return input buffer for frame %d to"
-                    "  its stream:%s (%d)",  __FUNCTION__,
-                    request.frame_number, strerror(-res), res);
-            // TODO: Report error upstream
-        }
-    }
-
     return true;
 }
 
diff --git a/services/camera/libcameraservice/device3/Camera3Device.h b/services/camera/libcameraservice/device3/Camera3Device.h
index 61e6572..ea958b7 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.h
+++ b/services/camera/libcameraservice/device3/Camera3Device.h
@@ -501,9 +501,11 @@
         // Set by process_capture_result call with valid metadata
         bool    haveResultMetadata;
         // Decremented by calls to process_capture_result with valid output
-        // buffers
+        // and input buffers
         int     numBuffersLeft;
         CaptureResultExtras resultExtras;
+        // If this request has any input buffer
+        bool hasInputBuffer;
 
         // Fields used by the partial result quirk only
         struct PartialResultQuirkInFlight {
@@ -522,14 +524,16 @@
                 captureTimestamp(0),
                 requestStatus(OK),
                 haveResultMetadata(false),
-                numBuffersLeft(0) {
+                numBuffersLeft(0),
+                hasInputBuffer(false){
         }
 
         InFlightRequest(int numBuffers) :
                 captureTimestamp(0),
                 requestStatus(OK),
                 haveResultMetadata(false),
-                numBuffersLeft(numBuffers) {
+                numBuffersLeft(numBuffers),
+                hasInputBuffer(false){
         }
 
         InFlightRequest(int numBuffers, CaptureResultExtras extras) :
@@ -537,9 +541,19 @@
                 requestStatus(OK),
                 haveResultMetadata(false),
                 numBuffersLeft(numBuffers),
-                resultExtras(extras) {
+                resultExtras(extras),
+                hasInputBuffer(false){
         }
-    };
+
+        InFlightRequest(int numBuffers, CaptureResultExtras extras, bool hasInput) :
+                captureTimestamp(0),
+                requestStatus(OK),
+                haveResultMetadata(false),
+                numBuffersLeft(numBuffers),
+                resultExtras(extras),
+                hasInputBuffer(hasInput){
+        }
+};
     // Map from frame number to the in-flight request state
     typedef KeyedVector<uint32_t, InFlightRequest> InFlightMap;
 
@@ -547,7 +561,7 @@
     InFlightMap            mInFlightMap;
 
     status_t registerInFlight(uint32_t frameNumber,
-            int32_t numBuffers, CaptureResultExtras resultExtras);
+            int32_t numBuffers, CaptureResultExtras resultExtras, bool hasInput);
 
     /**
      * For the partial result quirk, check if all 3A state fields are available
diff --git a/services/camera/libcameraservice/device3/Camera3Stream.cpp b/services/camera/libcameraservice/device3/Camera3Stream.cpp
index 7645a2a..d7b1871 100644
--- a/services/camera/libcameraservice/device3/Camera3Stream.cpp
+++ b/services/camera/libcameraservice/device3/Camera3Stream.cpp
@@ -485,6 +485,18 @@
 void Camera3Stream::addBufferListener(
         wp<Camera3StreamBufferListener> listener) {
     Mutex::Autolock l(mLock);
+
+    List<wp<Camera3StreamBufferListener> >::iterator it, end;
+    for (it = mBufferListenerList.begin(), end = mBufferListenerList.end();
+         it != end;
+         ) {
+        if (*it == listener) {
+            ALOGE("%s: Try to add the same listener twice, ignoring...", __FUNCTION__);
+            return;
+        }
+        it++;
+    }
+
     mBufferListenerList.push_back(listener);
 }
 
diff --git a/services/camera/libcameraservice/device3/Camera3Stream.h b/services/camera/libcameraservice/device3/Camera3Stream.h
index 14f5387..a77f27c 100644
--- a/services/camera/libcameraservice/device3/Camera3Stream.h
+++ b/services/camera/libcameraservice/device3/Camera3Stream.h
@@ -226,8 +226,17 @@
      */
     virtual void     dump(int fd, const Vector<String16> &args) const = 0;
 
+    /**
+     * Add a camera3 buffer listener. Adding the same listener twice has
+     * no effect.
+     */
     void             addBufferListener(
             wp<Camera3StreamBufferListener> listener);
+
+    /**
+     * Remove a camera3 buffer listener. Removing the same listener twice
+     * or the listener that was never added has no effect.
+     */
     void             removeBufferListener(
             const sp<Camera3StreamBufferListener>& listener);
 
diff --git a/services/camera/libcameraservice/device3/Camera3ZslStream.cpp b/services/camera/libcameraservice/device3/Camera3ZslStream.cpp
index 05b3d1f..6c298f9 100644
--- a/services/camera/libcameraservice/device3/Camera3ZslStream.cpp
+++ b/services/camera/libcameraservice/device3/Camera3ZslStream.cpp
@@ -300,6 +300,7 @@
     nsecs_t actual = pinnedBuffer->getBufferItem().mTimestamp;
 
     if (actual != timestamp) {
+        // TODO: this is problematic, we'll end up with using wrong result for this pinned buffer.
         ALOGW("%s: ZSL buffer candidate search didn't find an exact match --"
               " requested timestamp = %" PRId64 ", actual timestamp = %" PRId64,
               __FUNCTION__, timestamp, actual);