AudioFlinger: offload playback, non-blocking write

- Added specialized playback thread class for offload playback,
derived from directoutput thread.
This thread type handles specific state transitions for offloaded
tracks and offloading commands (pause/resume/drain/flush..) to audio HAL.
As opposed to other threads, does not go to standby if the track is paused.

- Added support for asynchronous write and drain operations at audio HAL.
Use a thread to handle async callback events from HAL: this avoids locking
playback thread mutex when executing the callback and cause deadlocks when
calling audio HAL functions with the playback thread mutex locked.

- Better accouting for track activity: call start/stop and release Output
methods in audio policy manager when tracks are actually added and removed
from the active tracks list.
Added a command thread in audio policy service to handle stop/release commands
asynchronously and avoid deadlocks with playback thread.

- Track terminated status is not a state anymore. This condition is othogonal
to state to permitted state transitions while terminated.

Change-Id: Id157f4b3277620568d8eace7535d9186602564de
diff --git a/include/private/media/AudioTrackShared.h b/include/private/media/AudioTrackShared.h
index 0592683..6129c80 100644
--- a/include/private/media/AudioTrackShared.h
+++ b/include/private/media/AudioTrackShared.h
@@ -291,6 +291,11 @@
     virtual uint32_t    getUnderrunFrames() const {
         return mCblk->u.mStreaming.mUnderrunFrames;
     }
+
+    bool        clearStreamEndDone();   // and return previous value
+
+    bool        getStreamEndDone() const;
+
 };
 
 class StaticAudioTrackClientProxy : public AudioTrackClientProxy {
@@ -405,6 +410,8 @@
     // should avoid doing a state queue poll from within framesReady().
     // FIXME Change AudioFlinger to not call framesReady() from normal mixer thread.
     virtual void        framesReadyIsCalledByMultipleThreads() { }
+
+    bool     setStreamEndDone();    // and return previous value
 };
 
 class StaticAudioTrackServerProxy : public AudioTrackServerProxy {
diff --git a/media/libmedia/AudioTrackShared.cpp b/media/libmedia/AudioTrackShared.cpp
index 55bf175..bd43ad2 100644
--- a/media/libmedia/AudioTrackShared.cpp
+++ b/media/libmedia/AudioTrackShared.cpp
@@ -322,6 +322,14 @@
     mCblk->u.mStreaming.mFlush++;
 }
 
+bool AudioTrackClientProxy::clearStreamEndDone() {
+    return android_atomic_and(~CBLK_STREAM_END_DONE, &mCblk->flags) & CBLK_STREAM_END_DONE;
+}
+
+bool AudioTrackClientProxy::getStreamEndDone() const {
+    return (mCblk->flags & CBLK_STREAM_END_DONE) != 0;
+}
+
 // ---------------------------------------------------------------------------
 
 StaticAudioTrackClientProxy::StaticAudioTrackClientProxy(audio_track_cblk_t* cblk, void *buffers,
@@ -524,6 +532,16 @@
     return filled;
 }
 
+bool  AudioTrackServerProxy::setStreamEndDone() {
+    bool old =
+            (android_atomic_or(CBLK_STREAM_END_DONE, &mCblk->flags) & CBLK_STREAM_END_DONE) != 0;
+    if (!old) {
+        (void) __futex_syscall3(&mCblk->mFutex, mClientInServer ? FUTEX_WAKE_PRIVATE : FUTEX_WAKE,
+                1);
+    }
+    return old;
+}
+
 // ---------------------------------------------------------------------------
 
 StaticAudioTrackServerProxy::StaticAudioTrackServerProxy(audio_track_cblk_t* cblk, void *buffers,
diff --git a/services/audioflinger/AudioFlinger.cpp b/services/audioflinger/AudioFlinger.cpp
index 99e077c..712772b 100644
--- a/services/audioflinger/AudioFlinger.cpp
+++ b/services/audioflinger/AudioFlinger.cpp
@@ -1423,13 +1423,15 @@
     audio_stream_out_t *outStream = NULL;
     AudioHwDevice *outHwDev;
 
-    ALOGV("openOutput(), module %d Device %x, SamplingRate %d, Format %d, Channels %x, flags %x",
+    ALOGV("openOutput(), module %d Device %x, SamplingRate %d, Format %#08x, Channels %x, flags %x",
               module,
               (pDevices != NULL) ? *pDevices : 0,
               config.sample_rate,
               config.format,
               config.channel_mask,
               flags);
+    ALOGV("openOutput(), offloadInfo %p version 0x%04x",
+          offloadInfo, offloadInfo == NULL ? -1 : offloadInfo->version );
 
     if (pDevices == NULL || *pDevices == 0) {
         return 0;
@@ -1454,7 +1456,7 @@
                                           &outStream);
 
     mHardwareStatus = AUDIO_HW_IDLE;
-    ALOGV("openOutput() openOutputStream returned output %p, SamplingRate %d, Format %d, "
+    ALOGV("openOutput() openOutputStream returned output %p, SamplingRate %d, Format %#08x, "
             "Channels %x, status %d",
             outStream,
             config.sample_rate,
@@ -1463,9 +1465,12 @@
             status);
 
     if (status == NO_ERROR && outStream != NULL) {
-        AudioStreamOut *output = new AudioStreamOut(outHwDev, outStream);
+        AudioStreamOut *output = new AudioStreamOut(outHwDev, outStream, flags);
 
-        if ((flags & AUDIO_OUTPUT_FLAG_DIRECT) ||
+        if (flags & AUDIO_OUTPUT_FLAG_COMPRESS_OFFLOAD) {
+            thread = new OffloadThread(this, output, id, *pDevices);
+            ALOGV("openOutput() created offload output: ID %d thread %p", id, thread);
+        } else if ((flags & AUDIO_OUTPUT_FLAG_DIRECT) ||
             (config.format != AUDIO_FORMAT_PCM_16_BIT) ||
             (config.channel_mask != AUDIO_CHANNEL_OUT_STEREO)) {
             thread = new DirectOutputThread(this, output, id, *pDevices);
@@ -1555,11 +1560,28 @@
                     DuplicatingThread *dupThread =
                             (DuplicatingThread *)mPlaybackThreads.valueAt(i).get();
                     dupThread->removeOutputTrack((MixerThread *)thread.get());
+
+                }
+            }
+        }
+
+
+        mPlaybackThreads.removeItem(output);
+        // save all effects to the default thread
+        if (mPlaybackThreads.size()) {
+            PlaybackThread *dstThread = checkPlaybackThread_l(mPlaybackThreads.keyAt(0));
+            if (dstThread != NULL) {
+                // audioflinger lock is held here so the acquisition order of thread locks does not
+                // matter
+                Mutex::Autolock _dl(dstThread->mLock);
+                Mutex::Autolock _sl(thread->mLock);
+                Vector< sp<EffectChain> > effectChains = thread->getEffectChains_l();
+                for (size_t i = 0; i < effectChains.size(); i ++) {
+                    moveEffectChain_l(effectChains[i]->sessionId(), thread.get(), dstThread, true);
                 }
             }
         }
         audioConfigChanged_l(AudioSystem::OUTPUT_CLOSED, output, NULL);
-        mPlaybackThreads.removeItem(output);
     }
     thread->exit();
     // The thread entity (active unit of execution) is no longer running here,
diff --git a/services/audioflinger/AudioFlinger.h b/services/audioflinger/AudioFlinger.h
index f31619b..262d194 100644
--- a/services/audioflinger/AudioFlinger.h
+++ b/services/audioflinger/AudioFlinger.h
@@ -365,7 +365,9 @@
     class PlaybackThread;
     class MixerThread;
     class DirectOutputThread;
+    class OffloadThread;
     class DuplicatingThread;
+    class AsyncCallbackThread;
     class Track;
     class RecordTrack;
     class EffectModule;
@@ -432,6 +434,7 @@
         void                stop_nonvirtual();
     };
 
+
               PlaybackThread *checkPlaybackThread_l(audio_io_handle_t output) const;
               MixerThread *checkMixerThread_l(audio_io_handle_t output) const;
               RecordThread *checkRecordThread_l(audio_io_handle_t input) const;
@@ -498,11 +501,12 @@
     struct AudioStreamOut {
         AudioHwDevice* const audioHwDev;
         audio_stream_out_t* const stream;
+        audio_output_flags_t flags;
 
         audio_hw_device_t* hwDev() const { return audioHwDev->hwDevice(); }
 
-        AudioStreamOut(AudioHwDevice *dev, audio_stream_out_t *out) :
-            audioHwDev(dev), stream(out) {}
+        AudioStreamOut(AudioHwDevice *dev, audio_stream_out_t *out, audio_output_flags_t flags) :
+            audioHwDev(dev), stream(out), flags(flags) {}
     };
 
     struct AudioStreamIn {
diff --git a/services/audioflinger/AudioPolicyService.cpp b/services/audioflinger/AudioPolicyService.cpp
index fa1e405..900b411 100644
--- a/services/audioflinger/AudioPolicyService.cpp
+++ b/services/audioflinger/AudioPolicyService.cpp
@@ -70,10 +70,11 @@
     Mutex::Autolock _l(mLock);
 
     // start tone playback thread
-    mTonePlaybackThread = new AudioCommandThread(String8(""));
+    mTonePlaybackThread = new AudioCommandThread(String8("ApmTone"), this);
     // start audio commands thread
-    mAudioCommandThread = new AudioCommandThread(String8("ApmCommand"));
-
+    mAudioCommandThread = new AudioCommandThread(String8("ApmAudio"), this);
+    // start output activity command thread
+    mOutputCommandThread = new AudioCommandThread(String8("ApmOutput"), this);
     /* instantiate the audio policy manager */
     rc = hw_get_module(AUDIO_POLICY_HARDWARE_MODULE_ID, &module);
     if (rc)
@@ -256,6 +257,15 @@
         return NO_INIT;
     }
     ALOGV("stopOutput()");
+    mOutputCommandThread->stopOutputCommand(output, stream, session);
+    return NO_ERROR;
+}
+
+status_t  AudioPolicyService::doStopOutput(audio_io_handle_t output,
+                                      audio_stream_type_t stream,
+                                      int session)
+{
+    ALOGV("doStopOutput from tid %d", gettid());
     Mutex::Autolock _l(mLock);
     return mpAudioPolicy->stop_output(mpAudioPolicy, output, stream, session);
 }
@@ -266,6 +276,12 @@
         return;
     }
     ALOGV("releaseOutput()");
+    mOutputCommandThread->releaseOutputCommand(output);
+}
+
+void AudioPolicyService::doReleaseOutput(audio_io_handle_t output)
+{
+    ALOGV("doReleaseOutput from tid %d", gettid());
     Mutex::Autolock _l(mLock);
     mpAudioPolicy->release_output(mpAudioPolicy, output);
 }
@@ -641,8 +657,9 @@
 
 // -----------  AudioPolicyService::AudioCommandThread implementation ----------
 
-AudioPolicyService::AudioCommandThread::AudioCommandThread(String8 name)
-    : Thread(false), mName(name)
+AudioPolicyService::AudioCommandThread::AudioCommandThread(String8 name,
+                                                           const wp<AudioPolicyService>& service)
+    : Thread(false), mName(name), mService(service)
 {
     mpToneGenerator = NULL;
 }
@@ -650,7 +667,7 @@
 
 AudioPolicyService::AudioCommandThread::~AudioCommandThread()
 {
-    if (mName != "" && !mAudioCommands.isEmpty()) {
+    if (!mAudioCommands.isEmpty()) {
         release_wake_lock(mName.string());
     }
     mAudioCommands.clear();
@@ -659,11 +676,7 @@
 
 void AudioPolicyService::AudioCommandThread::onFirstRef()
 {
-    if (mName != "") {
-        run(mName.string(), ANDROID_PRIORITY_AUDIO);
-    } else {
-        run("AudioCommand", ANDROID_PRIORITY_AUDIO);
-    }
+    run(mName.string(), ANDROID_PRIORITY_AUDIO);
 }
 
 bool AudioPolicyService::AudioCommandThread::threadLoop()
@@ -738,6 +751,32 @@
                     }
                     delete data;
                     }break;
+                case STOP_OUTPUT: {
+                    StopOutputData *data = (StopOutputData *)command->mParam;
+                    ALOGV("AudioCommandThread() processing stop output %d",
+                            data->mIO);
+                    sp<AudioPolicyService> svc = mService.promote();
+                    if (svc == 0) {
+                        break;
+                    }
+                    mLock.unlock();
+                    svc->doStopOutput(data->mIO, data->mStream, data->mSession);
+                    mLock.lock();
+                    delete data;
+                    }break;
+                case RELEASE_OUTPUT: {
+                    ReleaseOutputData *data = (ReleaseOutputData *)command->mParam;
+                    ALOGV("AudioCommandThread() processing release output %d",
+                            data->mIO);
+                    sp<AudioPolicyService> svc = mService.promote();
+                    if (svc == 0) {
+                        break;
+                    }
+                    mLock.unlock();
+                    svc->doReleaseOutput(data->mIO);
+                    mLock.lock();
+                    delete data;
+                    }break;
                 default:
                     ALOGW("AudioCommandThread() unknown command %d", command->mCommand);
                 }
@@ -749,7 +788,7 @@
             }
         }
         // release delayed commands wake lock
-        if (mName != "" && mAudioCommands.isEmpty()) {
+        if (mAudioCommands.isEmpty()) {
             release_wake_lock(mName.string());
         }
         ALOGV("AudioCommandThread() going to sleep");
@@ -893,17 +932,46 @@
     return status;
 }
 
+void AudioPolicyService::AudioCommandThread::stopOutputCommand(audio_io_handle_t output,
+                                                               audio_stream_type_t stream,
+                                                               int session)
+{
+    AudioCommand *command = new AudioCommand();
+    command->mCommand = STOP_OUTPUT;
+    StopOutputData *data = new StopOutputData();
+    data->mIO = output;
+    data->mStream = stream;
+    data->mSession = session;
+    command->mParam = (void *)data;
+    Mutex::Autolock _l(mLock);
+    insertCommand_l(command);
+    ALOGV("AudioCommandThread() adding stop output %d", output);
+    mWaitWorkCV.signal();
+}
+
+void AudioPolicyService::AudioCommandThread::releaseOutputCommand(audio_io_handle_t output)
+{
+    AudioCommand *command = new AudioCommand();
+    command->mCommand = RELEASE_OUTPUT;
+    ReleaseOutputData *data = new ReleaseOutputData();
+    data->mIO = output;
+    command->mParam = (void *)data;
+    Mutex::Autolock _l(mLock);
+    insertCommand_l(command);
+    ALOGV("AudioCommandThread() adding release output %d", output);
+    mWaitWorkCV.signal();
+}
+
 // insertCommand_l() must be called with mLock held
 void AudioPolicyService::AudioCommandThread::insertCommand_l(AudioCommand *command, int delayMs)
 {
     ssize_t i;  // not size_t because i will count down to -1
     Vector <AudioCommand *> removedCommands;
-
     nsecs_t time = 0;
     command->mTime = systemTime() + milliseconds(delayMs);
 
     // acquire wake lock to make sure delayed commands are processed
-    if (mName != "" && mAudioCommands.isEmpty()) {
+    if (mAudioCommands.isEmpty()) {
         acquire_wake_lock(PARTIAL_WAKE_LOCK, mName.string());
     }
 
@@ -1060,7 +1128,17 @@
 
 bool AudioPolicyService::isOffloadSupported(const audio_offload_info_t& info)
 {
-    return false;   // stub function
+    if (mpAudioPolicy == NULL) {
+        ALOGV("mpAudioPolicy == NULL");
+        return false;
+    }
+
+    if (mpAudioPolicy->is_offload_supported == NULL) {
+        ALOGV("HAL does not implement is_offload_supported");
+        return false;
+    }
+
+    return mpAudioPolicy->is_offload_supported(mpAudioPolicy, &info);
 }
 
 // ----------------------------------------------------------------------------
@@ -1404,7 +1482,7 @@
         return 0;
     }
     return af->openOutput(module, pDevices, pSamplingRate, pFormat, pChannelMask,
-                          pLatencyMs, flags);
+                          pLatencyMs, flags, offloadInfo);
 }
 
 static audio_io_handle_t aps_open_dup_output(void *service,
diff --git a/services/audioflinger/AudioPolicyService.h b/services/audioflinger/AudioPolicyService.h
index e723c47..ae053a9 100644
--- a/services/audioflinger/AudioPolicyService.h
+++ b/services/audioflinger/AudioPolicyService.h
@@ -139,6 +139,11 @@
     virtual status_t setVoiceVolume(float volume, int delayMs = 0);
     virtual bool isOffloadSupported(const audio_offload_info_t &config);
 
+            status_t doStopOutput(audio_io_handle_t output,
+                                  audio_stream_type_t stream,
+                                  int session = 0);
+            void doReleaseOutput(audio_io_handle_t output);
+
 private:
                         AudioPolicyService() ANDROID_API;
     virtual             ~AudioPolicyService();
@@ -161,10 +166,12 @@
             STOP_TONE,
             SET_VOLUME,
             SET_PARAMETERS,
-            SET_VOICE_VOLUME
+            SET_VOICE_VOLUME,
+            STOP_OUTPUT,
+            RELEASE_OUTPUT
         };
 
-        AudioCommandThread (String8 name);
+        AudioCommandThread (String8 name, const wp<AudioPolicyService>& service);
         virtual             ~AudioCommandThread();
 
                     status_t    dump(int fd);
@@ -182,6 +189,11 @@
                     status_t    parametersCommand(audio_io_handle_t ioHandle,
                                             const char *keyValuePairs, int delayMs = 0);
                     status_t    voiceVolumeCommand(float volume, int delayMs = 0);
+                    void        stopOutputCommand(audio_io_handle_t output,
+                                                  audio_stream_type_t stream,
+                                                  int session);
+                    void        releaseOutputCommand(audio_io_handle_t output);
+
                     void        insertCommand_l(AudioCommand *command, int delayMs = 0);
 
     private:
@@ -226,12 +238,25 @@
             float mVolume;
         };
 
+        class StopOutputData {
+        public:
+            audio_io_handle_t mIO;
+            audio_stream_type_t mStream;
+            int mSession;
+        };
+
+        class ReleaseOutputData {
+        public:
+            audio_io_handle_t mIO;
+        };
+
         Mutex   mLock;
         Condition mWaitWorkCV;
         Vector <AudioCommand *> mAudioCommands; // list of pending commands
         ToneGenerator *mpToneGenerator;     // the tone generator
         AudioCommand mLastCommand;          // last processed command (used by dump)
         String8 mName;                      // string used by wake lock fo delayed commands
+        wp<AudioPolicyService> mService;
     };
 
     class EffectDesc {
@@ -316,6 +341,7 @@
                             // device connection state  or routing
     sp<AudioCommandThread> mAudioCommandThread;     // audio commands thread
     sp<AudioCommandThread> mTonePlaybackThread;     // tone playback thread
+    sp<AudioCommandThread> mOutputCommandThread;    // process stop and release output
     struct audio_policy_device *mpAudioPolicyDev;
     struct audio_policy *mpAudioPolicy;
     KeyedVector< audio_source_t, InputSourceDesc* > mInputSources;
diff --git a/services/audioflinger/Effects.cpp b/services/audioflinger/Effects.cpp
index 1c7a64b..d5a21a7 100644
--- a/services/audioflinger/Effects.cpp
+++ b/services/audioflinger/Effects.cpp
@@ -95,16 +95,7 @@
 {
     ALOGV("Destructor %p", this);
     if (mEffectInterface != NULL) {
-        if ((mDescriptor.flags & EFFECT_FLAG_TYPE_MASK) == EFFECT_FLAG_TYPE_PRE_PROC ||
-                (mDescriptor.flags & EFFECT_FLAG_TYPE_MASK) == EFFECT_FLAG_TYPE_POST_PROC) {
-            sp<ThreadBase> thread = mThread.promote();
-            if (thread != 0) {
-                audio_stream_t *stream = thread->stream();
-                if (stream != NULL) {
-                    stream->remove_audio_effect(stream, mEffectInterface);
-                }
-            }
-        }
+        remove_effect_from_hal_l();
         // release effect engine
         EffectRelease(mEffectInterface);
     }
@@ -488,7 +479,7 @@
     if (mStatus != NO_ERROR) {
         return mStatus;
     }
-    status_t cmdStatus;
+    status_t cmdStatus = NO_ERROR;
     uint32_t size = sizeof(status_t);
     status_t status = (*mEffectInterface)->command(mEffectInterface,
                                                    EFFECT_CMD_DISABLE,
@@ -496,12 +487,19 @@
                                                    NULL,
                                                    &size,
                                                    &cmdStatus);
-    if (status == 0) {
+    if (status == NO_ERROR) {
         status = cmdStatus;
     }
-    if (status == 0 &&
-            ((mDescriptor.flags & EFFECT_FLAG_TYPE_MASK) == EFFECT_FLAG_TYPE_PRE_PROC ||
-             (mDescriptor.flags & EFFECT_FLAG_TYPE_MASK) == EFFECT_FLAG_TYPE_POST_PROC)) {
+    if (status == NO_ERROR) {
+        status = remove_effect_from_hal_l();
+    }
+    return status;
+}
+
+status_t AudioFlinger::EffectModule::remove_effect_from_hal_l()
+{
+    if ((mDescriptor.flags & EFFECT_FLAG_TYPE_MASK) == EFFECT_FLAG_TYPE_PRE_PROC ||
+             (mDescriptor.flags & EFFECT_FLAG_TYPE_MASK) == EFFECT_FLAG_TYPE_POST_PROC) {
         sp<ThreadBase> thread = mThread.promote();
         if (thread != 0) {
             audio_stream_t *stream = thread->stream();
@@ -510,7 +508,7 @@
             }
         }
     }
-    return status;
+    return NO_ERROR;
 }
 
 status_t AudioFlinger::EffectModule::command(uint32_t cmdCode,
@@ -595,6 +593,17 @@
                 h->setEnabled(enabled);
             }
         }
+//EL_FIXME not sure why this is needed?
+//        sp<ThreadBase> thread = mThread.promote();
+//        if (thread == 0) {
+//            return NO_ERROR;
+//        }
+//
+//        if ((thread->type() == ThreadBase::OFFLOAD) && (enabled)) {
+//            PlaybackThread *p = (PlaybackThread *)thread.get();
+//            ALOGV("setEnabled: Offload, invalidate tracks");
+//            p->invalidateTracks(AUDIO_STREAM_MUSIC);
+//        }
     }
     return NO_ERROR;
 }
@@ -1218,9 +1227,7 @@
 // Must be called with EffectChain::mLock locked
 void AudioFlinger::EffectChain::clearInputBuffer_l(sp<ThreadBase> thread)
 {
-    size_t numSamples = thread->frameCount() * thread->channelCount();
-    memset(mInBuffer, 0, numSamples * sizeof(int16_t));
-
+    memset(mInBuffer, 0, thread->frameCount() * thread->frameSize());
 }
 
 // Must be called with EffectChain::mLock locked
diff --git a/services/audioflinger/Effects.h b/services/audioflinger/Effects.h
index 91303ee..0b7fb83 100644
--- a/services/audioflinger/Effects.h
+++ b/services/audioflinger/Effects.h
@@ -126,6 +126,7 @@
 
     status_t start_l();
     status_t stop_l();
+    status_t remove_effect_from_hal_l();
 
 mutable Mutex               mLock;      // mutex for process, commands and handles list protection
     wp<ThreadBase>      mThread;    // parent thread
diff --git a/services/audioflinger/PlaybackTracks.h b/services/audioflinger/PlaybackTracks.h
index b1286d3..8b7433c 100644
--- a/services/audioflinger/PlaybackTracks.h
+++ b/services/audioflinger/PlaybackTracks.h
@@ -51,6 +51,8 @@
             audio_stream_type_t streamType() const {
                 return mStreamType;
             }
+            bool        isOffloaded() const { return (mFlags & IAudioFlinger::TRACK_OFFLOAD) != 0; }
+            status_t    setParameters(const String8& keyValuePairs);
             status_t    attachAuxEffect(int EffectId);
             void        setAuxBuffer(int EffectId, int32_t *buffer);
             int32_t     *auxBuffer() const { return mAuxBuffer; }
@@ -68,6 +70,7 @@
     friend class PlaybackThread;
     friend class MixerThread;
     friend class DirectOutputThread;
+    friend class OffloadThread;
 
                         Track(const Track&);
                         Track& operator = (const Track&);
@@ -142,6 +145,7 @@
                                         // barrier, but is read/written atomically
     bool                mIsInvalid; // non-resettable latch, set by invalidate()
     AudioTrackServerProxy*  mAudioTrackServerProxy;
+    bool                mResumeToStopping; // track was paused in stopping state.
 };  // end of Track
 
 class TimedTrack : public Track {
diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp
index f27d908..296a485 100644
--- a/services/audioflinger/Threads.cpp
+++ b/services/audioflinger/Threads.cpp
@@ -932,13 +932,18 @@
                                              audio_devices_t device,
                                              type_t type)
     :   ThreadBase(audioFlinger, id, device, AUDIO_DEVICE_NONE, type),
-        mMixBuffer(NULL), mSuspended(0), mBytesWritten(0),
+        mAllocMixBuffer(NULL), mSuspended(0), mBytesWritten(0),
         // mStreamTypes[] initialized in constructor body
         mOutput(output),
         mLastWriteTime(0), mNumWrites(0), mNumDelayedWrites(0), mInWrite(false),
         mMixerStatus(MIXER_IDLE),
         mMixerStatusIgnoringFastTracks(MIXER_IDLE),
         standbyDelay(AudioFlinger::mStandbyTimeInNsecs),
+        mBytesRemaining(0),
+        mCurrentWriteLength(0),
+        mUseAsyncWrite(false),
+        mWriteBlocked(false),
+        mDraining(false),
         mScreenState(AudioFlinger::mScreenState),
         // index 0 is reserved for normal mixer's submix
         mFastTrackAvailMask(((1 << FastMixerState::kMaxFastTracks) - 1) & ~1)
@@ -981,7 +986,7 @@
 AudioFlinger::PlaybackThread::~PlaybackThread()
 {
     mAudioFlinger->unregisterWriter(mNBLogWriter);
-    delete [] mMixBuffer;
+    delete [] mAllocMixBuffer;
 }
 
 void AudioFlinger::PlaybackThread::dump(int fd, const Vector<String16>& args)
@@ -1187,7 +1192,22 @@
                 goto Exit;
             }
         }
+    } else if (mType == OFFLOAD) {
+        if (sampleRate != mSampleRate || format != mFormat || channelMask != mChannelMask) {
+            ALOGE("createTrack_l() Bad parameter: sampleRate %d format %d, channelMask 0x%08x \""
+                    "for output %p with format %d",
+                    sampleRate, format, channelMask, mOutput, mFormat);
+            lStatus = BAD_VALUE;
+            goto Exit;
+        }
     } else {
+        if ((format & AUDIO_FORMAT_MAIN_MASK) != AUDIO_FORMAT_PCM) {
+                ALOGE("createTrack_l() Bad parameter: format %d \""
+                        "for output %p with format %d",
+                        format, mOutput, mFormat);
+                lStatus = BAD_VALUE;
+                goto Exit;
+        }
         // Resampler implementation limits input sampling rate to 2 x output sampling rate.
         if (sampleRate > mSampleRate*2) {
             ALOGE("Sample rate out of range: %u mSampleRate %u", sampleRate, mSampleRate);
@@ -1233,6 +1253,7 @@
             lStatus = NO_MEMORY;
             goto Exit;
         }
+
         mTracks.add(track);
 
         sp<EffectChain> chain = getEffectChain_l(sessionId);
@@ -1307,12 +1328,14 @@
 {
     Mutex::Autolock _l(mLock);
     mStreamTypes[stream].volume = value;
+    signal_l();
 }
 
 void AudioFlinger::PlaybackThread::setStreamMute(audio_stream_type_t stream, bool muted)
 {
     Mutex::Autolock _l(mLock);
     mStreamTypes[stream].mute = muted;
+    signal_l();
 }
 
 float AudioFlinger::PlaybackThread::streamVolume(audio_stream_type_t stream) const
@@ -1332,6 +1355,30 @@
         // the track is newly added, make sure it fills up all its
         // buffers before playing. This is to ensure the client will
         // effectively get the latency it requested.
+        if (!track->isOutputTrack()) {
+            TrackBase::track_state state = track->mState;
+            mLock.unlock();
+            status = AudioSystem::startOutput(mId, track->streamType(), track->sessionId());
+            mLock.lock();
+            // abort track was stopped/paused while we released the lock
+            if (state != track->mState) {
+                if (status == NO_ERROR) {
+                    mLock.unlock();
+                    AudioSystem::stopOutput(mId, track->streamType(), track->sessionId());
+                    mLock.lock();
+                }
+                return INVALID_OPERATION;
+            }
+            // abort if start is rejected by audio policy manager
+            if (status != NO_ERROR) {
+                return PERMISSION_DENIED;
+            }
+#ifdef ADD_BATTERY_DATA
+            // to track the speaker usage
+            addBatteryData(IMediaPlayerService::kBatteryDataAudioFlingerStart);
+#endif
+        }
+
         track->mFillingUpStatus = track->sharedBuffer() != 0 ? Track::FS_FILLED : Track::FS_FILLING;
         track->mResetDone = false;
         track->mPresentationCompleteFrames = 0;
@@ -1352,14 +1399,19 @@
     return status;
 }
 
-// destroyTrack_l() must be called with ThreadBase::mLock held
-void AudioFlinger::PlaybackThread::destroyTrack_l(const sp<Track>& track)
+bool AudioFlinger::PlaybackThread::destroyTrack_l(const sp<Track>& track)
 {
-    track->mState = TrackBase::TERMINATED;
+    track->terminate();
     // active tracks are removed by threadLoop()
-    if (mActiveTracks.indexOf(track) < 0) {
+    bool trackActive = (mActiveTracks.indexOf(track) >= 0);
+    track->mState = TrackBase::STOPPED;
+    if (!trackActive) {
         removeTrack_l(track);
+    } else if (track->isFastTrack() || track->isOffloaded()) {
+        track->mState = TrackBase::STOPPING_1;
     }
+
+    return trackActive;
 }
 
 void AudioFlinger::PlaybackThread::removeTrack_l(const sp<Track>& track)
@@ -1383,6 +1435,16 @@
     }
 }
 
+void AudioFlinger::PlaybackThread::signal_l()
+{
+    // Thread could be blocked waiting for async
+    // so signal it to handle state changes immediately
+    // If threadLoop is currently unlocked a signal of mWaitWorkCV will
+    // be lost so we also flag to prevent it blocking on mWaitWorkCV
+    mSignalPending = true;
+    mWaitWorkCV.signal();
+}
+
 String8 AudioFlinger::PlaybackThread::getParameters(const String8& keys)
 {
     String8 out_s8 = String8("");
@@ -1428,6 +1490,57 @@
     mAudioFlinger->audioConfigChanged_l(event, mId, param2);
 }
 
+void AudioFlinger::PlaybackThread::writeCallback()
+{
+    ALOG_ASSERT(mCallbackThread != 0);
+    mCallbackThread->setWriteBlocked(false);
+}
+
+void AudioFlinger::PlaybackThread::drainCallback()
+{
+    ALOG_ASSERT(mCallbackThread != 0);
+    mCallbackThread->setDraining(false);
+}
+
+void AudioFlinger::PlaybackThread::setWriteBlocked(bool value)
+{
+    Mutex::Autolock _l(mLock);
+    mWriteBlocked = value;
+    if (!value) {
+        mWaitWorkCV.signal();
+    }
+}
+
+void AudioFlinger::PlaybackThread::setDraining(bool value)
+{
+    Mutex::Autolock _l(mLock);
+    mDraining = value;
+    if (!value) {
+        mWaitWorkCV.signal();
+    }
+}
+
+// static
+int AudioFlinger::PlaybackThread::asyncCallback(stream_callback_event_t event,
+                                                void *param,
+                                                void *cookie)
+{
+    AudioFlinger::PlaybackThread *me = (AudioFlinger::PlaybackThread *)cookie;
+    ALOGV("asyncCallback() event %d", event);
+    switch (event) {
+    case STREAM_CBK_EVENT_WRITE_READY:
+        me->writeCallback();
+        break;
+    case STREAM_CBK_EVENT_DRAIN_READY:
+        me->drainCallback();
+        break;
+    default:
+        ALOGW("asyncCallback() unknown event %d", event);
+        break;
+    }
+    return 0;
+}
+
 void AudioFlinger::PlaybackThread::readOutputParameters()
 {
     mSampleRate = mOutput->stream->common.get_sample_rate(&mOutput->stream->common);
@@ -1441,6 +1554,14 @@
                 mFrameCount);
     }
 
+    if ((mOutput->flags & AUDIO_OUTPUT_FLAG_NON_BLOCKING) &&
+            (mOutput->stream->set_callback != NULL)) {
+        if (mOutput->stream->set_callback(mOutput->stream,
+                                      AudioFlinger::PlaybackThread::asyncCallback, this) == 0) {
+            mUseAsyncWrite = true;
+        }
+    }
+
     // Calculate size of normal mix buffer relative to the HAL output buffer size
     double multiplier = 1.0;
     if (mType == MIXER && (kUseFastMixer == FastMixer_Static ||
@@ -1483,9 +1604,11 @@
     ALOGI("HAL output buffer size %u frames, normal mix buffer size %u frames", mFrameCount,
             mNormalFrameCount);
 
-    delete[] mMixBuffer;
-    mMixBuffer = new int16_t[mNormalFrameCount * mChannelCount];
-    memset(mMixBuffer, 0, mNormalFrameCount * mChannelCount * sizeof(int16_t));
+    delete[] mAllocMixBuffer;
+    size_t align = (mFrameSize < sizeof(int16_t)) ? sizeof(int16_t) : mFrameSize;
+    mAllocMixBuffer = new int8_t[mNormalFrameCount * mFrameSize + align - 1];
+    mMixBuffer = (int16_t *) ((((size_t)mAllocMixBuffer + align - 1) / align) * align);
+    memset(mMixBuffer, 0, mNormalFrameCount * mFrameSize);
 
     // force reconfiguration of effect chains and engines to take new buffer size and audio
     // parameters into account
@@ -1622,13 +1745,18 @@
     if (CC_UNLIKELY(count)) {
         for (size_t i = 0 ; i < count ; i++) {
             const sp<Track>& track = tracksToRemove.itemAt(i);
-            if ((track->sharedBuffer() != 0) &&
-                    (track->mState == TrackBase::ACTIVE || track->mState == TrackBase::RESUMING)) {
+            if (!track->isOutputTrack()) {
                 AudioSystem::stopOutput(mId, track->streamType(), track->sessionId());
+#ifdef ADD_BATTERY_DATA
+                // to track the speaker usage
+                addBatteryData(IMediaPlayerService::kBatteryDataAudioFlingerStop);
+#endif
+                if (track->isTerminated()) {
+                    AudioSystem::releaseOutput(mId);
+                }
             }
         }
     }
-
 }
 
 void AudioFlinger::PlaybackThread::checkSilentMode_l()
@@ -1649,17 +1777,18 @@
 }
 
 // shared by MIXER and DIRECT, overridden by DUPLICATING
-void AudioFlinger::PlaybackThread::threadLoop_write()
+ssize_t AudioFlinger::PlaybackThread::threadLoop_write()
 {
     // FIXME rewrite to reduce number of system calls
     mLastWriteTime = systemTime();
     mInWrite = true;
-    int bytesWritten;
+    ssize_t bytesWritten;
 
     // If an NBAIO sink is present, use it to write the normal mixer's submix
     if (mNormalSink != 0) {
 #define mBitShift 2 // FIXME
-        size_t count = mixBufferSize >> mBitShift;
+        size_t count = mBytesRemaining >> mBitShift;
+        size_t offset = (mCurrentWriteLength - mBytesRemaining) >> 1;
         ATRACE_BEGIN("write");
         // update the setpoint when AudioFlinger::mScreenState changes
         uint32_t screenState = AudioFlinger::mScreenState;
@@ -1671,7 +1800,7 @@
                         (pipe->maxFrames() * 7) / 8 : mNormalFrameCount * 2);
             }
         }
-        ssize_t framesWritten = mNormalSink->write(mMixBuffer, count);
+        ssize_t framesWritten = mNormalSink->write(mMixBuffer + offset, count);
         ATRACE_END();
         if (framesWritten > 0) {
             bytesWritten = framesWritten << mBitShift;
@@ -1680,15 +1809,48 @@
         }
     // otherwise use the HAL / AudioStreamOut directly
     } else {
-        // Direct output thread.
-        bytesWritten = (int)mOutput->stream->write(mOutput->stream, mMixBuffer, mixBufferSize);
+        // Direct output and offload threads
+        size_t offset = (mCurrentWriteLength - mBytesRemaining) / sizeof(int16_t);
+        if (mUseAsyncWrite) {
+            mWriteBlocked = true;
+            ALOG_ASSERT(mCallbackThread != 0);
+            mCallbackThread->setWriteBlocked(true);
+        }
+        bytesWritten = mOutput->stream->write(mOutput->stream,
+                                                   mMixBuffer + offset, mBytesRemaining);
+        if (mUseAsyncWrite &&
+                ((bytesWritten < 0) || (bytesWritten == (ssize_t)mBytesRemaining))) {
+            // do not wait for async callback in case of error of full write
+            mWriteBlocked = false;
+            ALOG_ASSERT(mCallbackThread != 0);
+            mCallbackThread->setWriteBlocked(false);
+        }
     }
 
-    if (bytesWritten > 0) {
-        mBytesWritten += mixBufferSize;
-    }
     mNumWrites++;
     mInWrite = false;
+
+    return bytesWritten;
+}
+
+void AudioFlinger::PlaybackThread::threadLoop_drain()
+{
+    if (mOutput->stream->drain) {
+        ALOGV("draining %s", (mMixerStatus == MIXER_DRAIN_TRACK) ? "early" : "full");
+        if (mUseAsyncWrite) {
+            mDraining = true;
+            ALOG_ASSERT(mCallbackThread != 0);
+            mCallbackThread->setDraining(true);
+        }
+        mOutput->stream->drain(mOutput->stream,
+            (mMixerStatus == MIXER_DRAIN_TRACK) ? AUDIO_DRAIN_EARLY_NOTIFY
+                                                : AUDIO_DRAIN_ALL);
+    }
+}
+
+void AudioFlinger::PlaybackThread::threadLoop_exit()
+{
+    // Default implementation has nothing to do
 }
 
 /*
@@ -1929,10 +2091,29 @@
 
             saveOutputTracks();
 
-            // put audio hardware into standby after short delay
-            if (CC_UNLIKELY((!mActiveTracks.size() && systemTime() > standbyTime) ||
-                        isSuspended())) {
-                if (!mStandby) {
+            if (mSignalPending) {
+                // A signal was raised while we were unlocked
+                mSignalPending = false;
+            } else if (waitingAsyncCallback_l()) {
+                if (exitPending()) {
+                    break;
+                }
+                releaseWakeLock_l();
+                ALOGV("wait async completion");
+                mWaitWorkCV.wait(mLock);
+                ALOGV("async completion/wake");
+                acquireWakeLock_l();
+                if (exitPending()) {
+                    break;
+                }
+                if (!mActiveTracks.size() && (systemTime() > standbyTime)) {
+                    continue;
+                }
+                sleepTime = 0;
+            } else if ((!mActiveTracks.size() && systemTime() > standbyTime) ||
+                                   isSuspended()) {
+                // put audio hardware into standby after short delay
+                if (shouldStandby_l()) {
 
                     threadLoop_standby();
 
@@ -1959,7 +2140,7 @@
                     mMixerStatus = MIXER_IDLE;
                     mMixerStatusIgnoringFastTracks = MIXER_IDLE;
                     mBytesWritten = 0;
-
+                    mBytesRemaining = 0;
                     checkSilentMode_l();
 
                     standbyTime = systemTime() + standbyDelay;
@@ -1981,50 +2162,73 @@
             lockEffectChains_l(effectChains);
         }
 
-        if (CC_LIKELY(mMixerStatus == MIXER_TRACKS_READY)) {
-            threadLoop_mix();
-        } else {
-            threadLoop_sleepTime();
-        }
+        if (mBytesRemaining == 0) {
+            mCurrentWriteLength = 0;
+            if (mMixerStatus == MIXER_TRACKS_READY) {
+                // threadLoop_mix() sets mCurrentWriteLength
+                threadLoop_mix();
+            } else if ((mMixerStatus != MIXER_DRAIN_TRACK)
+                        && (mMixerStatus != MIXER_DRAIN_ALL)) {
+                // threadLoop_sleepTime sets sleepTime to 0 if data
+                // must be written to HAL
+                threadLoop_sleepTime();
+                if (sleepTime == 0) {
+                    mCurrentWriteLength = mixBufferSize;
+                }
+            }
+            mBytesRemaining = mCurrentWriteLength;
+            if (isSuspended()) {
+                sleepTime = suspendSleepTimeUs();
+                // simulate write to HAL when suspended
+                mBytesWritten += mixBufferSize;
+                mBytesRemaining = 0;
+            }
 
-        if (isSuspended()) {
-            sleepTime = suspendSleepTimeUs();
-            mBytesWritten += mixBufferSize;
-        }
-
-        // only process effects if we're going to write
-        if (sleepTime == 0) {
-            for (size_t i = 0; i < effectChains.size(); i ++) {
-                effectChains[i]->process_l();
+            // only process effects if we're going to write
+            if (sleepTime == 0) {
+                for (size_t i = 0; i < effectChains.size(); i ++) {
+                    effectChains[i]->process_l();
+                }
             }
         }
 
         // enable changes in effect chain
         unlockEffectChains(effectChains);
 
-        // sleepTime == 0 means we must write to audio hardware
-        if (sleepTime == 0) {
-
-            threadLoop_write();
-
-if (mType == MIXER) {
-            // write blocked detection
-            nsecs_t now = systemTime();
-            nsecs_t delta = now - mLastWriteTime;
-            if (!mStandby && delta > maxPeriod) {
-                mNumDelayedWrites++;
-                if ((now - lastWarning) > kWarningThrottleNs) {
-                    ATRACE_NAME("underrun");
-                    ALOGW("write blocked for %llu msecs, %d delayed writes, thread %p",
-                            ns2ms(delta), mNumDelayedWrites, this);
-                    lastWarning = now;
+        if (!waitingAsyncCallback()) {
+            // sleepTime == 0 means we must write to audio hardware
+            if (sleepTime == 0) {
+                if (mBytesRemaining) {
+                    ssize_t ret = threadLoop_write();
+                    if (ret < 0) {
+                        mBytesRemaining = 0;
+                    } else {
+                        mBytesWritten += ret;
+                        mBytesRemaining -= ret;
+                    }
+                } else if ((mMixerStatus == MIXER_DRAIN_TRACK) ||
+                        (mMixerStatus == MIXER_DRAIN_ALL)) {
+                    threadLoop_drain();
                 }
-            }
+if (mType == MIXER) {
+                // write blocked detection
+                nsecs_t now = systemTime();
+                nsecs_t delta = now - mLastWriteTime;
+                if (!mStandby && delta > maxPeriod) {
+                    mNumDelayedWrites++;
+                    if ((now - lastWarning) > kWarningThrottleNs) {
+                        ATRACE_NAME("underrun");
+                        ALOGW("write blocked for %llu msecs, %d delayed writes, thread %p",
+                                ns2ms(delta), mNumDelayedWrites, this);
+                        lastWarning = now;
+                    }
+                }
 }
 
-            mStandby = false;
-        } else {
-            usleep(sleepTime);
+                mStandby = false;
+            } else {
+                usleep(sleepTime);
+            }
         }
 
         // Finally let go of removed track(s), without the lock held
@@ -2046,8 +2250,10 @@
         // is now local to this block, but will keep it for now (at least until merge done).
     }
 
+    threadLoop_exit();
+
     // for DuplicatingThread, standby mode is handled by the outputTracks, otherwise ...
-    if (mType == MIXER || mType == DIRECT) {
+    if (mType == MIXER || mType == DIRECT || mType == OFFLOAD) {
         // put output stream into standby mode
         if (!mStandby) {
             mOutput->stream->common.standby(&mOutput->stream->common);
@@ -2060,6 +2266,28 @@
     return false;
 }
 
+// removeTracks_l() must be called with ThreadBase::mLock held
+void AudioFlinger::PlaybackThread::removeTracks_l(const Vector< sp<Track> >& tracksToRemove)
+{
+    size_t count = tracksToRemove.size();
+    if (CC_UNLIKELY(count)) {
+        for (size_t i=0 ; i<count ; i++) {
+            const sp<Track>& track = tracksToRemove.itemAt(i);
+            mActiveTracks.remove(track);
+            ALOGV("removeTracks_l removing track on session %d", track->sessionId());
+            sp<EffectChain> chain = getEffectChain_l(track->sessionId());
+            if (chain != 0) {
+                ALOGV("stopping track on chain %p for session Id: %d", chain.get(),
+                        track->sessionId());
+                chain->decActiveTrackCnt();
+            }
+            if (track->isTerminated()) {
+                removeTrack_l(track);
+            }
+        }
+    }
+
+}
 
 // ----------------------------------------------------------------------------
 
@@ -2264,7 +2492,7 @@
     PlaybackThread::threadLoop_removeTracks(tracksToRemove);
 }
 
-void AudioFlinger::MixerThread::threadLoop_write()
+ssize_t AudioFlinger::MixerThread::threadLoop_write()
 {
     // FIXME we should only do one push per cycle; confirm this is true
     // Start the fast mixer if it's not already running
@@ -2296,7 +2524,7 @@
             sq->end(false /*didModify*/);
         }
     }
-    PlaybackThread::threadLoop_write();
+    return PlaybackThread::threadLoop_write();
 }
 
 void AudioFlinger::MixerThread::threadLoop_standby()
@@ -2328,11 +2556,40 @@
     PlaybackThread::threadLoop_standby();
 }
 
+// Empty implementation for standard mixer
+// Overridden for offloaded playback
+void AudioFlinger::PlaybackThread::flushOutput_l()
+{
+}
+
+bool AudioFlinger::PlaybackThread::waitingAsyncCallback_l()
+{
+    return false;
+}
+
+bool AudioFlinger::PlaybackThread::shouldStandby_l()
+{
+    return !mStandby;
+}
+
+bool AudioFlinger::PlaybackThread::waitingAsyncCallback()
+{
+    Mutex::Autolock _l(mLock);
+    return waitingAsyncCallback_l();
+}
+
 // shared by MIXER and DIRECT, overridden by DUPLICATING
 void AudioFlinger::PlaybackThread::threadLoop_standby()
 {
     ALOGV("Audio hardware entering standby, mixer %p, suspend count %d", this, mSuspended);
     mOutput->stream->common.standby(&mOutput->stream->common);
+    if (mUseAsyncWrite != 0) {
+        mWriteBlocked = false;
+        mDraining = false;
+        ALOG_ASSERT(mCallbackThread != 0);
+        mCallbackThread->setWriteBlocked(false);
+        mCallbackThread->setDraining(false);
+    }
 }
 
 void AudioFlinger::MixerThread::threadLoop_mix()
@@ -2353,6 +2610,7 @@
 
     // mix buffers...
     mAudioMixer->process(pts);
+    mCurrentWriteLength = mixBufferSize;
     // increase sleep time progressively when application underrun condition clears.
     // Only increase sleep time if the mixer is ready for two consecutive times to avoid
     // that a steady state of alternating ready/not ready conditions keeps the sleep time
@@ -2480,7 +2738,7 @@
             switch (track->mState) {
             case TrackBase::STOPPING_1:
                 // track stays active in STOPPING_1 state until first underrun
-                if (recentUnderruns > 0) {
+                if (recentUnderruns > 0 || track->isTerminated()) {
                     track->mState = TrackBase::STOPPING_2;
                 }
                 break;
@@ -2522,7 +2780,6 @@
                 // fall through
             case TrackBase::STOPPING_2:
             case TrackBase::PAUSED:
-            case TrackBase::TERMINATED:
             case TrackBase::STOPPED:
             case TrackBase::FLUSHED:   // flush() while active
                 // Check for presentation complete if track is inactive
@@ -2634,8 +2891,7 @@
         if ((framesReady >= minFrames) && track->isReady() &&
                 !track->isPaused() && !track->isTerminated())
         {
-            ALOGVV("track %d u=%08x, s=%08x [OK] on thread %p", name, cblk->user, cblk->server,
-                    this);
+            ALOGVV("track %d s=%08x [OK] on thread %p", name, cblk->server, this);
 
             mixedTracks++;
 
@@ -2709,6 +2965,7 @@
                 }
                 va = (uint32_t)(v * sendLevel);
             }
+
             // Delegate volume control to effect in track effect chain if needed
             if (chain != 0 && chain->setVolume_l(&vl, &vr)) {
                 // Do not ramp volume if volume is controlled by effect
@@ -2800,8 +3057,7 @@
                 chain->clearInputBuffer();
             }
 
-            ALOGVV("track %d u=%08x, s=%08x [NOT READY] on thread %p", name, cblk->user,
-                    cblk->server, this);
+            ALOGVV("track %d s=%08x [NOT READY] on thread %p", name, cblk->server, this);
             if ((track->sharedBuffer() != 0) || track->isTerminated() ||
                     track->isStopped() || track->isPaused()) {
                 // We have consumed all the buffers of this track.
@@ -2887,30 +3143,13 @@
     }
 
     // remove all the tracks that need to be...
-    count = tracksToRemove->size();
-    if (CC_UNLIKELY(count)) {
-        for (size_t i=0 ; i<count ; i++) {
-            const sp<Track>& track = tracksToRemove->itemAt(i);
-            mActiveTracks.remove(track);
-            if (track->mainBuffer() != mMixBuffer) {
-                chain = getEffectChain_l(track->sessionId());
-                if (chain != 0) {
-                    ALOGV("stopping track on chain %p for session Id: %d", chain.get(),
-                            track->sessionId());
-                    chain->decActiveTrackCnt();
-                }
-            }
-            if (track->isTerminated()) {
-                removeTrack_l(track);
-            }
-        }
-    }
+    removeTracks_l(*tracksToRemove);
 
     // mix buffer must be cleared if all tracks are connected to an
     // effect chain as in this case the mixer will not write to
     // mix buffer and track effects will accumulate into it
-    if ((mixedTracks != 0 && mixedTracks == tracksWithEffect) ||
-            (mixedTracks == 0 && fastTracks > 0)) {
+    if ((mBytesRemaining == 0) && ((mixedTracks != 0 && mixedTracks == tracksWithEffect) ||
+            (mixedTracks == 0 && fastTracks > 0))) {
         // FIXME as a performance optimization, should remember previous zero status
         memset(mMixBuffer, 0, mNormalFrameCount * mChannelCount * sizeof(int16_t));
     }
@@ -3142,10 +3381,63 @@
 {
 }
 
+AudioFlinger::DirectOutputThread::DirectOutputThread(const sp<AudioFlinger>& audioFlinger,
+        AudioStreamOut* output, audio_io_handle_t id, uint32_t device,
+        ThreadBase::type_t type)
+    :   PlaybackThread(audioFlinger, output, id, device, type)
+        // mLeftVolFloat, mRightVolFloat
+{
+}
+
 AudioFlinger::DirectOutputThread::~DirectOutputThread()
 {
 }
 
+void AudioFlinger::DirectOutputThread::processVolume_l(Track *track, bool lastTrack)
+{
+    audio_track_cblk_t* cblk = track->cblk();
+    float left, right;
+
+    if (mMasterMute || mStreamTypes[track->streamType()].mute) {
+        left = right = 0;
+    } else {
+        float typeVolume = mStreamTypes[track->streamType()].volume;
+        float v = mMasterVolume * typeVolume;
+        AudioTrackServerProxy *proxy = track->mAudioTrackServerProxy;
+        uint32_t vlr = proxy->getVolumeLR();
+        float v_clamped = v * (vlr & 0xFFFF);
+        if (v_clamped > MAX_GAIN) v_clamped = MAX_GAIN;
+        left = v_clamped/MAX_GAIN;
+        v_clamped = v * (vlr >> 16);
+        if (v_clamped > MAX_GAIN) v_clamped = MAX_GAIN;
+        right = v_clamped/MAX_GAIN;
+    }
+
+    if (lastTrack) {
+        if (left != mLeftVolFloat || right != mRightVolFloat) {
+            mLeftVolFloat = left;
+            mRightVolFloat = right;
+
+            // Convert volumes from float to 8.24
+            uint32_t vl = (uint32_t)(left * (1 << 24));
+            uint32_t vr = (uint32_t)(right * (1 << 24));
+
+            // Delegate volume control to effect in track effect chain if needed
+            // only one effect chain can be present on DirectOutputThread, so if
+            // there is one, the track is connected to it
+            if (!mEffectChains.isEmpty()) {
+                mEffectChains[0]->setVolume_l(&vl, &vr);
+                left = (float)vl / (1 << 24);
+                right = (float)vr / (1 << 24);
+            }
+            if (mOutput->stream->set_volume) {
+                mOutput->stream->set_volume(mOutput->stream, left, right);
+            }
+        }
+    }
+}
+
+
 AudioFlinger::PlaybackThread::mixer_state AudioFlinger::DirectOutputThread::prepareTracks_l(
     Vector< sp<Track> > *tracksToRemove
 )
@@ -3172,6 +3464,12 @@
         } else {
             minFrames = 1;
         }
+        // Only consider last track started for volume and mixer state control.
+        // This is the last entry in mActiveTracks unless a track underruns.
+        // As we only care about the transition phase between two tracks on a
+        // direct output, it is not a problem to ignore the underrun case.
+        bool last = (i == (count - 1));
+
         if ((track->framesReady() >= minFrames) && track->isReady() &&
                 !track->isPaused() && !track->isTerminated())
         {
@@ -3186,52 +3484,8 @@
             }
 
             // compute volume for this track
-            float left, right;
-            if (mMasterMute || track->isPausing() || mStreamTypes[track->streamType()].mute) {
-                left = right = 0;
-                if (track->isPausing()) {
-                    track->setPaused();
-                }
-            } else {
-                float typeVolume = mStreamTypes[track->streamType()].volume;
-                float v = mMasterVolume * typeVolume;
-                uint32_t vlr = track->mAudioTrackServerProxy->getVolumeLR();
-                float v_clamped = v * (vlr & 0xFFFF);
-                if (v_clamped > MAX_GAIN) {
-                    v_clamped = MAX_GAIN;
-                }
-                left = v_clamped/MAX_GAIN;
-                v_clamped = v * (vlr >> 16);
-                if (v_clamped > MAX_GAIN) {
-                    v_clamped = MAX_GAIN;
-                }
-                right = v_clamped/MAX_GAIN;
-            }
-            // Only consider last track started for volume and mixer state control.
-            // This is the last entry in mActiveTracks unless a track underruns.
-            // As we only care about the transition phase between two tracks on a
-            // direct output, it is not a problem to ignore the underrun case.
-            if (i == (count - 1)) {
-                if (left != mLeftVolFloat || right != mRightVolFloat) {
-                    mLeftVolFloat = left;
-                    mRightVolFloat = right;
-
-                    // Convert volumes from float to 8.24
-                    uint32_t vl = (uint32_t)(left * (1 << 24));
-                    uint32_t vr = (uint32_t)(right * (1 << 24));
-
-                    // Delegate volume control to effect in track effect chain if needed
-                    // only one effect chain can be present on DirectOutputThread, so if
-                    // there is one, the track is connected to it
-                    if (!mEffectChains.isEmpty()) {
-                        // Do not ramp volume if volume is controlled by effect
-                        mEffectChains[0]->setVolume_l(&vl, &vr);
-                        left = (float)vl / (1 << 24);
-                        right = (float)vr / (1 << 24);
-                    }
-                    mOutput->stream->set_volume(mOutput->stream, left, right);
-                }
-
+            processVolume_l(track, last);
+            if (last) {
                 // reset retry count
                 track->mRetryCount = kMaxTrackRetriesDirect;
                 mActiveTrack = t;
@@ -3265,7 +3519,7 @@
                 if (--(track->mRetryCount) <= 0) {
                     ALOGV("BUFFER TIMEOUT: remove(%d) from active list", track->name());
                     tracksToRemove->add(track);
-                } else if (i == (count -1)){
+                } else if (last) {
                     mixerStatus = MIXER_TRACKS_ENABLED;
                 }
             }
@@ -3273,21 +3527,7 @@
     }
 
     // remove all the tracks that need to be...
-    count = tracksToRemove->size();
-    if (CC_UNLIKELY(count)) {
-        for (size_t i = 0 ; i < count ; i++) {
-            const sp<Track>& track = tracksToRemove->itemAt(i);
-            mActiveTracks.remove(track);
-            if (!mEffectChains.isEmpty()) {
-                ALOGV("stopping track on chain %p for session Id: %d", mEffectChains[0].get(),
-                      track->sessionId());
-                mEffectChains[0]->decActiveTrackCnt();
-            }
-            if (track->isTerminated()) {
-                removeTrack_l(track);
-            }
-        }
-    }
+    removeTracks_l(*tracksToRemove);
 
     return mixerStatus;
 }
@@ -3310,10 +3550,10 @@
         curBuf += buffer.frameCount * mFrameSize;
         mActiveTrack->releaseBuffer(&buffer);
     }
+    mCurrentWriteLength = curBuf - (int8_t *)mMixBuffer;
     sleepTime = 0;
     standbyTime = systemTime() + standbyDelay;
     mActiveTrack.clear();
-
 }
 
 void AudioFlinger::DirectOutputThread::threadLoop_sleepTime()
@@ -3434,6 +3674,307 @@
 
 // ----------------------------------------------------------------------------
 
+AudioFlinger::AsyncCallbackThread::AsyncCallbackThread(
+        const sp<AudioFlinger::OffloadThread>& offloadThread)
+    :   Thread(false /*canCallJava*/),
+        mOffloadThread(offloadThread),
+        mWriteBlocked(false),
+        mDraining(false)
+{
+}
+
+AudioFlinger::AsyncCallbackThread::~AsyncCallbackThread()
+{
+}
+
+void AudioFlinger::AsyncCallbackThread::onFirstRef()
+{
+    run("Offload Cbk", ANDROID_PRIORITY_URGENT_AUDIO);
+}
+
+bool AudioFlinger::AsyncCallbackThread::threadLoop()
+{
+    while (!exitPending()) {
+        bool writeBlocked;
+        bool draining;
+
+        {
+            Mutex::Autolock _l(mLock);
+            mWaitWorkCV.wait(mLock);
+            if (exitPending()) {
+                break;
+            }
+            writeBlocked = mWriteBlocked;
+            draining = mDraining;
+            ALOGV("AsyncCallbackThread mWriteBlocked %d mDraining %d", mWriteBlocked, mDraining);
+        }
+        {
+            sp<AudioFlinger::OffloadThread> offloadThread = mOffloadThread.promote();
+            if (offloadThread != 0) {
+                if (writeBlocked == false) {
+                    offloadThread->setWriteBlocked(false);
+                }
+                if (draining == false) {
+                    offloadThread->setDraining(false);
+                }
+            }
+        }
+    }
+    return false;
+}
+
+void AudioFlinger::AsyncCallbackThread::exit()
+{
+    ALOGV("AsyncCallbackThread::exit");
+    Mutex::Autolock _l(mLock);
+    requestExit();
+    mWaitWorkCV.broadcast();
+}
+
+void AudioFlinger::AsyncCallbackThread::setWriteBlocked(bool value)
+{
+    Mutex::Autolock _l(mLock);
+    mWriteBlocked = value;
+    if (!value) {
+        mWaitWorkCV.signal();
+    }
+}
+
+void AudioFlinger::AsyncCallbackThread::setDraining(bool value)
+{
+    Mutex::Autolock _l(mLock);
+    mDraining = value;
+    if (!value) {
+        mWaitWorkCV.signal();
+    }
+}
+
+
+// ----------------------------------------------------------------------------
+AudioFlinger::OffloadThread::OffloadThread(const sp<AudioFlinger>& audioFlinger,
+        AudioStreamOut* output, audio_io_handle_t id, uint32_t device)
+    :   DirectOutputThread(audioFlinger, output, id, device, OFFLOAD),
+        mHwPaused(false),
+        mPausedBytesRemaining(0)
+{
+    mCallbackThread = new AudioFlinger::AsyncCallbackThread(this);
+}
+
+AudioFlinger::OffloadThread::~OffloadThread()
+{
+    mPreviousTrack.clear();
+}
+
+void AudioFlinger::OffloadThread::threadLoop_exit()
+{
+    if (mFlushPending || mHwPaused) {
+        // If a flush is pending or track was paused, just discard buffered data
+        flushHw_l();
+    } else {
+        mMixerStatus = MIXER_DRAIN_ALL;
+        threadLoop_drain();
+    }
+    mCallbackThread->exit();
+    PlaybackThread::threadLoop_exit();
+}
+
+AudioFlinger::PlaybackThread::mixer_state AudioFlinger::OffloadThread::prepareTracks_l(
+    Vector< sp<Track> > *tracksToRemove
+)
+{
+    ALOGV("OffloadThread::prepareTracks_l");
+    size_t count = mActiveTracks.size();
+
+    mixer_state mixerStatus = MIXER_IDLE;
+    if (mFlushPending) {
+        flushHw_l();
+        mFlushPending = false;
+    }
+    // find out which tracks need to be processed
+    for (size_t i = 0; i < count; i++) {
+        sp<Track> t = mActiveTracks[i].promote();
+        // The track died recently
+        if (t == 0) {
+            continue;
+        }
+        Track* const track = t.get();
+        audio_track_cblk_t* cblk = track->cblk();
+        if (mPreviousTrack != NULL) {
+            if (t != mPreviousTrack) {
+                // Flush any data still being written from last track
+                mBytesRemaining = 0;
+                if (mPausedBytesRemaining) {
+                    // Last track was paused so we also need to flush saved
+                    // mixbuffer state and invalidate track so that it will
+                    // re-submit that unwritten data when it is next resumed
+                    mPausedBytesRemaining = 0;
+                    // Invalidate is a bit drastic - would be more efficient
+                    // to have a flag to tell client that some of the
+                    // previously written data was lost
+                    mPreviousTrack->invalidate();
+                }
+            }
+        }
+        mPreviousTrack = t;
+        bool last = (i == (count - 1));
+        if (track->isPausing()) {
+            track->setPaused();
+            if (last) {
+                if (!mHwPaused) {
+                    mOutput->stream->pause(mOutput->stream);
+                    mHwPaused = true;
+                }
+                // If we were part way through writing the mixbuffer to
+                // the HAL we must save this until we resume
+                // BUG - this will be wrong if a different track is made active,
+                // in that case we want to discard the pending data in the
+                // mixbuffer and tell the client to present it again when the
+                // track is resumed
+                mPausedWriteLength = mCurrentWriteLength;
+                mPausedBytesRemaining = mBytesRemaining;
+                mBytesRemaining = 0;    // stop writing
+            }
+            tracksToRemove->add(track);
+        } else if (track->framesReady() && track->isReady() &&
+                !track->isPaused() && !track->isTerminated()) {
+            ALOGVV("OffloadThread: track %d s=%08x [OK]", track->name(), cblk->server);
+            if (track->mFillingUpStatus == Track::FS_FILLED) {
+                track->mFillingUpStatus = Track::FS_ACTIVE;
+                mLeftVolFloat = mRightVolFloat = 0;
+                if (track->mState == TrackBase::RESUMING) {
+                    if (CC_UNLIKELY(mPausedBytesRemaining)) {
+                        // Need to continue write that was interrupted
+                        mCurrentWriteLength = mPausedWriteLength;
+                        mBytesRemaining = mPausedBytesRemaining;
+                        mPausedBytesRemaining = 0;
+                    }
+                    track->mState = TrackBase::ACTIVE;
+                }
+            }
+
+            if (last) {
+                if (mHwPaused) {
+                    mOutput->stream->resume(mOutput->stream);
+                    mHwPaused = false;
+                    // threadLoop_mix() will handle the case that we need to
+                    // resume an interrupted write
+                }
+                // reset retry count
+                track->mRetryCount = kMaxTrackRetriesOffload;
+                mActiveTrack = t;
+                mixerStatus = MIXER_TRACKS_READY;
+            }
+        } else {
+            ALOGVV("OffloadThread: track %d s=%08x [NOT READY]", track->name(), cblk->server);
+            if (track->isStopping_1()) {
+                // Hardware buffer can hold a large amount of audio so we must
+                // wait for all current track's data to drain before we say
+                // that the track is stopped.
+                if (mBytesRemaining == 0) {
+                    // Only start draining when all data in mixbuffer
+                    // has been written
+                    ALOGV("OffloadThread: underrun and STOPPING_1 -> draining, STOPPING_2");
+                    track->mState = TrackBase::STOPPING_2; // so presentation completes after drain
+                    sleepTime = 0;
+                    standbyTime = systemTime() + standbyDelay;
+                    if (last) {
+                        mixerStatus = MIXER_DRAIN_TRACK;
+                        if (mHwPaused) {
+                            // It is possible to move from PAUSED to STOPPING_1 without
+                            // a resume so we must ensure hardware is running
+                            mOutput->stream->resume(mOutput->stream);
+                            mHwPaused = false;
+                        }
+                    }
+                }
+            } else if (track->isStopping_2()) {
+                // Drain has completed, signal presentation complete
+                if (!mDraining || !last) {
+                    track->mState = TrackBase::STOPPED;
+                    size_t audioHALFrames =
+                            (mOutput->stream->get_latency(mOutput->stream)*mSampleRate) / 1000;
+                    size_t framesWritten =
+                            mBytesWritten / audio_stream_frame_size(&mOutput->stream->common);
+                    track->presentationComplete(framesWritten, audioHALFrames);
+                    track->reset();
+                    tracksToRemove->add(track);
+                }
+            } else {
+                // No buffers for this track. Give it a few chances to
+                // fill a buffer, then remove it from active list.
+                if (--(track->mRetryCount) <= 0) {
+                    ALOGV("OffloadThread: BUFFER TIMEOUT: remove(%d) from active list",
+                          track->name());
+                    tracksToRemove->add(track);
+                } else if (last){
+                    mixerStatus = MIXER_TRACKS_ENABLED;
+                }
+            }
+        }
+        // compute volume for this track
+        processVolume_l(track, last);
+    }
+    // remove all the tracks that need to be...
+    removeTracks_l(*tracksToRemove);
+
+    return mixerStatus;
+}
+
+void AudioFlinger::OffloadThread::flushOutput_l()
+{
+    mFlushPending = true;
+}
+
+// must be called with thread mutex locked
+bool AudioFlinger::OffloadThread::waitingAsyncCallback_l()
+{
+    ALOGV("waitingAsyncCallback_l mWriteBlocked %d mDraining %d", mWriteBlocked, mDraining);
+    if (mUseAsyncWrite && (mWriteBlocked || mDraining)) {
+        return true;
+    }
+    return false;
+}
+
+// must be called with thread mutex locked
+bool AudioFlinger::OffloadThread::shouldStandby_l()
+{
+    bool TrackPaused = false;
+
+    // do not put the HAL in standby when paused. AwesomePlayer clear the offloaded AudioTrack
+    // after a timeout and we will enter standby then.
+    if (mTracks.size() > 0) {
+        TrackPaused = mTracks[mTracks.size() - 1]->isPaused();
+    }
+
+    return !mStandby && !TrackPaused;
+}
+
+
+bool AudioFlinger::OffloadThread::waitingAsyncCallback()
+{
+    Mutex::Autolock _l(mLock);
+    return waitingAsyncCallback_l();
+}
+
+void AudioFlinger::OffloadThread::flushHw_l()
+{
+    mOutput->stream->flush(mOutput->stream);
+    // Flush anything still waiting in the mixbuffer
+    mCurrentWriteLength = 0;
+    mBytesRemaining = 0;
+    mPausedWriteLength = 0;
+    mPausedBytesRemaining = 0;
+    if (mUseAsyncWrite) {
+        mWriteBlocked = false;
+        mDraining = false;
+        ALOG_ASSERT(mCallbackThread != 0);
+        mCallbackThread->setWriteBlocked(false);
+        mCallbackThread->setDraining(false);
+    }
+}
+
+// ----------------------------------------------------------------------------
+
 AudioFlinger::DuplicatingThread::DuplicatingThread(const sp<AudioFlinger>& audioFlinger,
         AudioFlinger::MixerThread* mainThread, audio_io_handle_t id)
     :   MixerThread(audioFlinger, mainThread->getOutput(), id, mainThread->outDevice(),
@@ -3460,6 +4001,7 @@
     }
     sleepTime = 0;
     writeFrames = mNormalFrameCount;
+    mCurrentWriteLength = mixBufferSize;
     standbyTime = systemTime() + standbyDelay;
 }
 
@@ -3483,12 +4025,12 @@
     }
 }
 
-void AudioFlinger::DuplicatingThread::threadLoop_write()
+ssize_t AudioFlinger::DuplicatingThread::threadLoop_write()
 {
     for (size_t i = 0; i < outputTracks.size(); i++) {
         outputTracks[i]->write(mMixBuffer, writeFrames);
     }
-    mBytesWritten += mixBufferSize;
+    return (ssize_t)mixBufferSize;
 }
 
 void AudioFlinger::DuplicatingThread::threadLoop_standby()
@@ -3682,7 +4224,10 @@
                 continue;
             }
             if (mActiveTrack != 0) {
-                if (mActiveTrack->mState == TrackBase::PAUSING) {
+                if (mActiveTrack->isTerminated()) {
+                    removeTrack_l(mActiveTrack);
+                    mActiveTrack.clear();
+                } else if (mActiveTrack->mState == TrackBase::PAUSING) {
                     standby();
                     mActiveTrack.clear();
                     mStartStopCond.broadcast();
@@ -3701,9 +4246,6 @@
                         mStartStopCond.broadcast();
                     }
                     mStandby = false;
-                } else if (mActiveTrack->mState == TrackBase::TERMINATED) {
-                    removeTrack_l(mActiveTrack);
-                    mActiveTrack.clear();
                 }
             }
             lockEffectChains_l(effectChains);
@@ -4083,7 +4625,8 @@
 // destroyTrack_l() must be called with ThreadBase::mLock held
 void AudioFlinger::RecordThread::destroyTrack_l(const sp<RecordTrack>& track)
 {
-    track->mState = TrackBase::TERMINATED;
+    track->terminate();
+    track->mState = TrackBase::STOPPED;
     // active tracks are removed by threadLoop()
     if (mActiveTrack != track) {
         removeTrack_l(track);
diff --git a/services/audioflinger/Threads.h b/services/audioflinger/Threads.h
index 365c790..7c7c6f0 100644
--- a/services/audioflinger/Threads.h
+++ b/services/audioflinger/Threads.h
@@ -28,7 +28,8 @@
         MIXER,              // Thread class is MixerThread
         DIRECT,             // Thread class is DirectOutputThread
         DUPLICATING,        // Thread class is DuplicatingThread
-        RECORD              // Thread class is RecordThread
+        RECORD,             // Thread class is RecordThread
+        OFFLOAD             // Thread class is OffloadThread
     };
 
     ThreadBase(const sp<AudioFlinger>& audioFlinger, audio_io_handle_t id,
@@ -129,6 +130,7 @@
                 size_t      frameCount() const { return mNormalFrameCount; }
                 // Return's the HAL's frame count i.e. fast mixer buffer size.
                 size_t      frameCountHAL() const { return mFrameCount; }
+                size_t      frameSize() const { return mFrameSize; }
 
     // Should be "virtual status_t requestExitAndWait()" and override same
     // method in Thread, but Thread::requestExitAndWait() is not yet virtual.
@@ -184,6 +186,8 @@
                 void lockEffectChains_l(Vector< sp<EffectChain> >& effectChains);
                 // unlock effect chains after process
                 void unlockEffectChains(const Vector< sp<EffectChain> >& effectChains);
+                // get a copy of mEffectChains vector
+                Vector< sp<EffectChain> > getEffectChains_l() const { return mEffectChains; };
                 // set audio mode to all effect chains
                 void setMode(audio_mode_t mode);
                 // get effect module with corresponding ID on specified audio session
@@ -329,11 +333,19 @@
     enum mixer_state {
         MIXER_IDLE,             // no active tracks
         MIXER_TRACKS_ENABLED,   // at least one active track, but no track has any data ready
-        MIXER_TRACKS_READY      // at least one active track, and at least one track has data
+        MIXER_TRACKS_READY,      // at least one active track, and at least one track has data
+        MIXER_DRAIN_TRACK,      // drain currently playing track
+        MIXER_DRAIN_ALL,        // fully drain the hardware
         // standby mode does not have an enum value
         // suspend by audio policy manager is orthogonal to mixer state
     };
 
+    // retry count before removing active track in case of underrun on offloaded thread:
+    // we need to make sure that AudioTrack client has enough time to send large buffers
+//FIXME may be more appropriate if expressed in time units. Need to revise how underrun is handled
+    // for offloaded tracks
+    static const int8_t kMaxTrackRetriesOffload = 20;
+
     PlaybackThread(const sp<AudioFlinger>& audioFlinger, AudioStreamOut* output,
                    audio_io_handle_t id, audio_devices_t device, type_t type);
     virtual             ~PlaybackThread();
@@ -351,8 +363,10 @@
     // Code snippets that were lifted up out of threadLoop()
     virtual     void        threadLoop_mix() = 0;
     virtual     void        threadLoop_sleepTime() = 0;
-    virtual     void        threadLoop_write();
+    virtual     ssize_t     threadLoop_write();
+    virtual     void        threadLoop_drain();
     virtual     void        threadLoop_standby();
+    virtual     void        threadLoop_exit();
     virtual     void        threadLoop_removeTracks(const Vector< sp<Track> >& tracksToRemove);
 
                 // prepareTracks_l reads and writes mActiveTracks, and returns
@@ -360,6 +374,19 @@
                 // is responsible for clearing or destroying this Vector later on, when it
                 // is safe to do so. That will drop the final ref count and destroy the tracks.
     virtual     mixer_state prepareTracks_l(Vector< sp<Track> > *tracksToRemove) = 0;
+                void        removeTracks_l(const Vector< sp<Track> >& tracksToRemove);
+
+                void        writeCallback();
+                void        setWriteBlocked(bool value);
+                void        drainCallback();
+                void        setDraining(bool value);
+
+    static      int         asyncCallback(stream_callback_event_t event, void *param, void *cookie);
+
+    virtual     bool        waitingAsyncCallback();
+    virtual     bool        waitingAsyncCallback_l();
+    virtual     bool        shouldStandby_l();
+
 
     // ThreadBase virtuals
     virtual     void        preExit();
@@ -436,7 +463,8 @@
 
 
 protected:
-    int16_t*                        mMixBuffer;
+    int16_t*                        mMixBuffer;         // frame size aligned mix buffer
+    int8_t*                         mAllocMixBuffer;    // mixer buffer allocation address
 
     // suspend count, > 0 means suspended.  While suspended, the thread continues to pull from
     // tracks and mix, but doesn't write to HAL.  A2DP and SCO HAL implementations can't handle
@@ -489,8 +517,9 @@
     PlaybackThread& operator = (const PlaybackThread&);
 
     status_t    addTrack_l(const sp<Track>& track);
-    void        destroyTrack_l(const sp<Track>& track);
+    bool        destroyTrack_l(const sp<Track>& track);
     void        removeTrack_l(const sp<Track>& track);
+    void        signal_l();
 
     void        readOutputParameters();
 
@@ -538,6 +567,14 @@
     // DUPLICATING only
     uint32_t                        writeFrames;
 
+    size_t                          mBytesRemaining;
+    size_t                          mCurrentWriteLength;
+    bool                            mUseAsyncWrite;
+    bool                            mWriteBlocked;
+    bool                            mDraining;
+    bool                            mSignalPending;
+    sp<AsyncCallbackThread>         mCallbackThread;
+
 private:
     // The HAL output sink is treated as non-blocking, but current implementation is blocking
     sp<NBAIO_Sink>          mOutputSink;
@@ -561,7 +598,7 @@
 protected:
                 // accessed by both binder threads and within threadLoop(), lock on mutex needed
                 unsigned    mFastTrackAvailMask;    // bit i set if fast track [i] is available
-
+    virtual     void        flushOutput_l();
 };
 
 class MixerThread : public PlaybackThread {
@@ -587,7 +624,7 @@
     virtual     void        cacheParameters_l();
 
     // threadLoop snippets
-    virtual     void        threadLoop_write();
+    virtual     ssize_t     threadLoop_write();
     virtual     void        threadLoop_standby();
     virtual     void        threadLoop_mix();
     virtual     void        threadLoop_sleepTime();
@@ -644,17 +681,73 @@
     virtual     void        threadLoop_mix();
     virtual     void        threadLoop_sleepTime();
 
-private:
     // volumes last sent to audio HAL with stream->set_volume()
     float mLeftVolFloat;
     float mRightVolFloat;
 
+    DirectOutputThread(const sp<AudioFlinger>& audioFlinger, AudioStreamOut* output,
+                        audio_io_handle_t id, uint32_t device, ThreadBase::type_t type);
+    void processVolume_l(Track *track, bool lastTrack);
+
     // prepareTracks_l() tells threadLoop_mix() the name of the single active track
     sp<Track>               mActiveTrack;
 public:
     virtual     bool        hasFastMixer() const { return false; }
 };
 
+class OffloadThread : public DirectOutputThread {
+public:
+
+    OffloadThread(const sp<AudioFlinger>& audioFlinger, AudioStreamOut* output,
+                        audio_io_handle_t id, uint32_t device);
+    virtual                 ~OffloadThread();
+
+protected:
+    // threadLoop snippets
+    virtual     mixer_state prepareTracks_l(Vector< sp<Track> > *tracksToRemove);
+    virtual     void        threadLoop_exit();
+    virtual     void        flushOutput_l();
+
+    virtual     bool        waitingAsyncCallback();
+    virtual     bool        waitingAsyncCallback_l();
+    virtual     bool        shouldStandby_l();
+
+private:
+                void        flushHw_l();
+
+private:
+    bool        mHwPaused;
+    bool        mFlushPending;
+    size_t      mPausedWriteLength;     // length in bytes of write interrupted by pause
+    size_t      mPausedBytesRemaining;  // bytes still waiting in mixbuffer after resume
+    sp<Track>   mPreviousTrack;         // used to detect track switch
+};
+
+class AsyncCallbackThread : public Thread {
+public:
+
+    AsyncCallbackThread(const sp<OffloadThread>& offloadThread);
+
+    virtual             ~AsyncCallbackThread();
+
+    // Thread virtuals
+    virtual bool        threadLoop();
+
+    // RefBase
+    virtual void        onFirstRef();
+
+            void        exit();
+            void        setWriteBlocked(bool value);
+            void        setDraining(bool value);
+
+private:
+    wp<OffloadThread>   mOffloadThread;
+    bool                mWriteBlocked;
+    bool                mDraining;
+    Condition           mWaitWorkCV;
+    Mutex               mLock;
+};
+
 class DuplicatingThread : public MixerThread {
 public:
     DuplicatingThread(const sp<AudioFlinger>& audioFlinger, MixerThread* mainThread,
@@ -674,7 +767,7 @@
     // threadLoop snippets
     virtual     void        threadLoop_mix();
     virtual     void        threadLoop_sleepTime();
-    virtual     void        threadLoop_write();
+    virtual     ssize_t     threadLoop_write();
     virtual     void        threadLoop_standby();
     virtual     void        cacheParameters_l();
 
diff --git a/services/audioflinger/TrackBase.h b/services/audioflinger/TrackBase.h
index 55d96fa..e69d1d7 100644
--- a/services/audioflinger/TrackBase.h
+++ b/services/audioflinger/TrackBase.h
@@ -25,10 +25,10 @@
 public:
     enum track_state {
         IDLE,
-        TERMINATED,
         FLUSHED,
         STOPPED,
-        // next 2 states are currently used for fast tracks only
+        // next 2 states are currently used for fast tracks
+        // and offloaded tracks only
         STOPPING_1,     // waiting for first underrun
         STOPPING_2,     // waiting for presentation complete
         RESUMING,
@@ -89,7 +89,7 @@
         return (mState == STOPPED || mState == FLUSHED);
     }
 
-    // for fast tracks only
+    // for fast tracks and offloaded tracks only
     bool isStopping() const {
         return mState == STOPPING_1 || mState == STOPPING_2;
     }
@@ -101,7 +101,11 @@
     }
 
     bool isTerminated() const {
-        return mState == TERMINATED;
+        return mTerminated;
+    }
+
+    void terminate() {
+        mTerminated = true;
     }
 
     bool step();    // mStepCount is an implicit input
@@ -142,4 +146,5 @@
     const int           mId;
     sp<NBAIO_Sink>      mTeeSink;
     sp<NBAIO_Source>    mTeeSource;
+    bool                mTerminated;
 };
diff --git a/services/audioflinger/Tracks.cpp b/services/audioflinger/Tracks.cpp
index c45daae..e674a50 100644
--- a/services/audioflinger/Tracks.cpp
+++ b/services/audioflinger/Tracks.cpp
@@ -89,7 +89,8 @@
         mSessionId(sessionId),
         mIsOut(isOut),
         mServerProxy(NULL),
-        mId(android_atomic_inc(&nextTrackId))
+        mId(android_atomic_inc(&nextTrackId)),
+        mTerminated(false)
 {
     // client == 0 implies sharedBuffer == 0
     ALOG_ASSERT(!(client == 0 && sharedBuffer != 0));
@@ -252,7 +253,7 @@
 }
 
 status_t AudioFlinger::TrackHandle::setParameters(const String8& keyValuePairs) {
-    return INVALID_OPERATION;   // stub function
+    return mTrack->setParameters(keyValuePairs);
 }
 
 status_t AudioFlinger::TrackHandle::attachAuxEffect(int EffectId)
@@ -328,7 +329,8 @@
     mUnderrunCount(0),
     mCachedVolume(1.0),
     mIsInvalid(false),
-    mAudioTrackServerProxy(NULL)
+    mAudioTrackServerProxy(NULL),
+    mResumeToStopping(false)
 {
     if (mCblk != NULL) {
         if (sharedBuffer == 0) {
@@ -386,27 +388,19 @@
     { // scope for mLock
         sp<ThreadBase> thread = mThread.promote();
         if (thread != 0) {
-            if (!isOutputTrack()) {
-                if (mState == ACTIVE || mState == RESUMING) {
-                    AudioSystem::stopOutput(thread->id(), mStreamType, mSessionId);
-
-#ifdef ADD_BATTERY_DATA
-                    // to track the speaker usage
-                    addBatteryData(IMediaPlayerService::kBatteryDataAudioFlingerStop);
-#endif
-                }
-                AudioSystem::releaseOutput(thread->id());
-            }
             Mutex::Autolock _l(thread->mLock);
             PlaybackThread *playbackThread = (PlaybackThread *)thread.get();
-            playbackThread->destroyTrack_l(this);
+            bool wasActive = playbackThread->destroyTrack_l(this);
+            if (!isOutputTrack() && !wasActive) {
+                AudioSystem::releaseOutput(thread->id());
+            }
         }
     }
 }
 
 /*static*/ void AudioFlinger::PlaybackThread::Track::appendDumpHeader(String8& result)
 {
-    result.append("   Name Client Type Fmt Chn mask   Session StpCnt fCount S F SRate  "
+    result.append("   Name Client Type Fmt        Chn mask   Session StpCnt fCount S F SRate  "
                   "L dB  R dB    Server    Main buf    Aux Buf  Flags Underruns\n");
 }
 
@@ -420,40 +414,41 @@
     }
     track_state state = mState;
     char stateChar;
-    switch (state) {
-    case IDLE:
-        stateChar = 'I';
-        break;
-    case TERMINATED:
+    if (isTerminated()) {
         stateChar = 'T';
-        break;
-    case STOPPING_1:
-        stateChar = 's';
-        break;
-    case STOPPING_2:
-        stateChar = '5';
-        break;
-    case STOPPED:
-        stateChar = 'S';
-        break;
-    case RESUMING:
-        stateChar = 'R';
-        break;
-    case ACTIVE:
-        stateChar = 'A';
-        break;
-    case PAUSING:
-        stateChar = 'p';
-        break;
-    case PAUSED:
-        stateChar = 'P';
-        break;
-    case FLUSHED:
-        stateChar = 'F';
-        break;
-    default:
-        stateChar = '?';
-        break;
+    } else {
+        switch (state) {
+        case IDLE:
+            stateChar = 'I';
+            break;
+        case STOPPING_1:
+            stateChar = 's';
+            break;
+        case STOPPING_2:
+            stateChar = '5';
+            break;
+        case STOPPED:
+            stateChar = 'S';
+            break;
+        case RESUMING:
+            stateChar = 'R';
+            break;
+        case ACTIVE:
+            stateChar = 'A';
+            break;
+        case PAUSING:
+            stateChar = 'p';
+            break;
+        case PAUSED:
+            stateChar = 'P';
+            break;
+        case FLUSHED:
+            stateChar = 'F';
+            break;
+        default:
+            stateChar = '?';
+            break;
+        }
     }
     char nowInUnderrun;
     switch (mObservedUnderruns.mBitFields.mMostRecent) {
@@ -470,7 +465,7 @@
         nowInUnderrun = '?';
         break;
     }
-    snprintf(&buffer[7], size-7, " %6d %4u %3u 0x%08x %7u %6u %6u %1c %1d %5u %5.2g %5.2g  "
+    snprintf(&buffer[7], size-7, " %6d %4u 0x%08x 0x%08x %7u %6u %6u %1c %1d %5u %5.2g %5.2g  "
             "0x%08x 0x%08x 0x%08x %#5x %9u%c\n",
             (mClient == 0) ? getpid_cached : mClient->pid(),
             mStreamType,
@@ -555,32 +550,33 @@
         track_state state = mState;
         // here the track could be either new, or restarted
         // in both cases "unstop" the track
+
         if (state == PAUSED) {
-            mState = TrackBase::RESUMING;
-            ALOGV("PAUSED => RESUMING (%d) on thread %p", mName, this);
+            if (mResumeToStopping) {
+                // happened we need to resume to STOPPING_1
+                mState = TrackBase::STOPPING_1;
+                ALOGV("PAUSED => STOPPING_1 (%d) on thread %p", mName, this);
+            } else {
+                mState = TrackBase::RESUMING;
+                ALOGV("PAUSED => RESUMING (%d) on thread %p", mName, this);
+            }
         } else {
             mState = TrackBase::ACTIVE;
             ALOGV("? => ACTIVE (%d) on thread %p", mName, this);
         }
 
-        if (!isOutputTrack() && state != ACTIVE && state != RESUMING) {
-            thread->mLock.unlock();
-            status = AudioSystem::startOutput(thread->id(), mStreamType, mSessionId);
-            thread->mLock.lock();
-
-#ifdef ADD_BATTERY_DATA
-            // to track the speaker usage
-            if (status == NO_ERROR) {
-                addBatteryData(IMediaPlayerService::kBatteryDataAudioFlingerStart);
-            }
-#endif
-        }
-        if (status == NO_ERROR) {
-            PlaybackThread *playbackThread = (PlaybackThread *)thread.get();
-            playbackThread->addTrack_l(this);
-        } else {
-            mState = state;
+        PlaybackThread *playbackThread = (PlaybackThread *)thread.get();
+        status = playbackThread->addTrack_l(this);
+        if (status == INVALID_OPERATION || status == PERMISSION_DENIED) {
             triggerEvents(AudioSystem::SYNC_EVENT_PRESENTATION_COMPLETE);
+            //  restore previous state if start was rejected by policy manager
+            if (status == PERMISSION_DENIED) {
+                mState = state;
+            }
+        }
+        // track was already in the active list, not a problem
+        if (status == ALREADY_EXISTS) {
+            status = NO_ERROR;
         }
     } else {
         status = BAD_VALUE;
@@ -601,26 +597,18 @@
             if (playbackThread->mActiveTracks.indexOf(this) < 0) {
                 reset();
                 mState = STOPPED;
-            } else if (!isFastTrack()) {
+            } else if (!isFastTrack() && !isOffloaded()) {
                 mState = STOPPED;
             } else {
-                // prepareTracks_l() will set state to STOPPING_2 after next underrun,
-                // and then to STOPPED and reset() when presentation is complete
+                // For fast tracks prepareTracks_l() will set state to STOPPING_2
+                // presentation is complete
+                // For an offloaded track this starts a drain and state will
+                // move to STOPPING_2 when drain completes and then STOPPED
                 mState = STOPPING_1;
             }
             ALOGV("not stopping/stopped => stopping/stopped (%d) on thread %p", mName,
                     playbackThread);
         }
-        if (!isOutputTrack() && (state == ACTIVE || state == RESUMING)) {
-            thread->mLock.unlock();
-            AudioSystem::stopOutput(thread->id(), mStreamType, mSessionId);
-            thread->mLock.lock();
-
-#ifdef ADD_BATTERY_DATA
-            // to track the speaker usage
-            addBatteryData(IMediaPlayerService::kBatteryDataAudioFlingerStop);
-#endif
-        }
     }
 }
 
@@ -630,19 +618,27 @@
     sp<ThreadBase> thread = mThread.promote();
     if (thread != 0) {
         Mutex::Autolock _l(thread->mLock);
-        if (mState == ACTIVE || mState == RESUMING) {
+        PlaybackThread *playbackThread = (PlaybackThread *)thread.get();
+        switch (mState) {
+        case STOPPING_1:
+        case STOPPING_2:
+            if (!isOffloaded()) {
+                /* nothing to do if track is not offloaded */
+                break;
+            }
+
+            // Offloaded track was draining, we need to carry on draining when resumed
+            mResumeToStopping = true;
+            // fall through...
+        case ACTIVE:
+        case RESUMING:
             mState = PAUSING;
             ALOGV("ACTIVE/RESUMING => PAUSING (%d) on thread %p", mName, thread.get());
-            if (!isOutputTrack()) {
-                thread->mLock.unlock();
-                AudioSystem::stopOutput(thread->id(), mStreamType, mSessionId);
-                thread->mLock.lock();
+            playbackThread->signal_l();
+            break;
 
-#ifdef ADD_BATTERY_DATA
-                // to track the speaker usage
-                addBatteryData(IMediaPlayerService::kBatteryDataAudioFlingerStop);
-#endif
-            }
+        default:
+            break;
         }
     }
 }
@@ -653,21 +649,52 @@
     sp<ThreadBase> thread = mThread.promote();
     if (thread != 0) {
         Mutex::Autolock _l(thread->mLock);
-        if (mState != STOPPING_1 && mState != STOPPING_2 && mState != STOPPED && mState != PAUSED &&
-                mState != PAUSING && mState != IDLE && mState != FLUSHED) {
-            return;
-        }
-        // No point remaining in PAUSED state after a flush => go to
-        // FLUSHED state
-        mState = FLUSHED;
-        // do not reset the track if it is still in the process of being stopped or paused.
-        // this will be done by prepareTracks_l() when the track is stopped.
-        // prepareTracks_l() will see mState == FLUSHED, then
-        // remove from active track list, reset(), and trigger presentation complete
         PlaybackThread *playbackThread = (PlaybackThread *)thread.get();
-        if (playbackThread->mActiveTracks.indexOf(this) < 0) {
+
+        if (isOffloaded()) {
+            // If offloaded we allow flush during any state except terminated
+            // and keep the track active to avoid problems if user is seeking
+            // rapidly and underlying hardware has a significant delay handling
+            // a pause
+            if (isTerminated()) {
+                return;
+            }
+
+            ALOGV("flush: offload flush");
             reset();
+
+            if (mState == STOPPING_1 || mState == STOPPING_2) {
+                ALOGV("flushed in STOPPING_1 or 2 state, change state to ACTIVE");
+                mState = ACTIVE;
+            }
+
+            if (mState == ACTIVE) {
+                ALOGV("flush called in active state, resetting buffer time out retry count");
+                mRetryCount = PlaybackThread::kMaxTrackRetriesOffload;
+            }
+
+            mResumeToStopping = false;
+        } else {
+            if (mState != STOPPING_1 && mState != STOPPING_2 && mState != STOPPED &&
+                    mState != PAUSED && mState != PAUSING && mState != IDLE && mState != FLUSHED) {
+                return;
+            }
+            // No point remaining in PAUSED state after a flush => go to
+            // FLUSHED state
+            mState = FLUSHED;
+            // do not reset the track if it is still in the process of being stopped or paused.
+            // this will be done by prepareTracks_l() when the track is stopped.
+            // prepareTracks_l() will see mState == FLUSHED, then
+            // remove from active track list, reset(), and trigger presentation complete
+            if (playbackThread->mActiveTracks.indexOf(this) < 0) {
+                reset();
+            }
         }
+        // Prevent flush being lost if the track is flushed and then resumed
+        // before mixer thread can run. This is important when offloading
+        // because the hardware buffer could hold a large amount of audio
+        playbackThread->flushOutput_l();
+        playbackThread->signal_l();
     }
 }
 
@@ -688,6 +715,20 @@
     }
 }
 
+status_t AudioFlinger::PlaybackThread::Track::setParameters(const String8& keyValuePairs)
+{
+    sp<ThreadBase> thread = mThread.promote();
+    if (thread == 0) {
+        ALOGE("thread is dead");
+        return FAILED_TRANSACTION;
+    } else if ((thread->type() == ThreadBase::DIRECT) ||
+                    (thread->type() == ThreadBase::OFFLOAD)) {
+        return thread->setParameters(keyValuePairs);
+    } else {
+        return PERMISSION_DENIED;
+    }
+}
+
 status_t AudioFlinger::PlaybackThread::Track::attachAuxEffect(int EffectId)
 {
     status_t status = DEAD_OBJECT;
@@ -749,15 +790,23 @@
     // a track is considered presented when the total number of frames written to audio HAL
     // corresponds to the number of frames written when presentationComplete() is called for the
     // first time (mPresentationCompleteFrames == 0) plus the buffer filling status at that time.
+    // For an offloaded track the HAL+h/w delay is variable so a HAL drain() is used
+    // to detect when all frames have been played. In this case framesWritten isn't
+    // useful because it doesn't always reflect whether there is data in the h/w
+    // buffers, particularly if a track has been paused and resumed during draining
+    ALOGV("presentationComplete() mPresentationCompleteFrames %d framesWritten %d",
+                      mPresentationCompleteFrames, framesWritten);
     if (mPresentationCompleteFrames == 0) {
         mPresentationCompleteFrames = framesWritten + audioHalFrames;
         ALOGV("presentationComplete() reset: mPresentationCompleteFrames %d audioHalFrames %d",
                   mPresentationCompleteFrames, audioHalFrames);
     }
-    if (framesWritten >= mPresentationCompleteFrames) {
+
+    if (framesWritten >= mPresentationCompleteFrames || isOffloaded()) {
         ALOGV("presentationComplete() session %d complete: framesWritten %d",
                   mSessionId, framesWritten);
         triggerEvents(AudioSystem::SYNC_EVENT_PRESENTATION_COMPLETE);
+        mAudioTrackServerProxy->setStreamEndDone();
         return true;
     }
     return false;
@@ -803,7 +852,7 @@
 
 status_t AudioFlinger::PlaybackThread::Track::setSyncEvent(const sp<SyncEvent>& event)
 {
-    if (mState == TERMINATED || mState == PAUSED ||
+    if (isTerminated() || mState == PAUSED ||
             ((framesReady() == 0) && ((mSharedBuffer != 0) ||
                                       (mState == STOPPED)))) {
         ALOGW("Track::setSyncEvent() in invalid state %d on session %d %s mode, framesReady %d ",