Fragmented mp4 extractor

Still experimental. Set property "media.stagefright.use-fragmp4" to true to
enable.

Change-Id: I210b9c5b5164b5c5eefc31309845ee881ac7db8e
diff --git a/include/media/stagefright/Utils.h b/include/media/stagefright/Utils.h
index d87902e..8213af9 100644
--- a/include/media/stagefright/Utils.h
+++ b/include/media/stagefright/Utils.h
@@ -42,6 +42,8 @@
 struct AMessage;
 status_t convertMetaDataToMessage(
         const sp<MetaData> &meta, sp<AMessage> *format);
+void convertMessageToMetaData(
+        const sp<AMessage> &format, sp<MetaData> &meta);
 
 }  // namespace android
 
diff --git a/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp b/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp
index c80d13f..ffb3a65 100644
--- a/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp
+++ b/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp
@@ -93,6 +93,10 @@
         return total;
     }
 
+    bool isSeekable() {
+        return false;
+    }
+
 private:
     sp<NuPlayer::NuPlayerStreamListener> mListener;
     off64_t mPosition;
diff --git a/media/libstagefright/Android.mk b/media/libstagefright/Android.mk
index 1522e75..f40982e 100644
--- a/media/libstagefright/Android.mk
+++ b/media/libstagefright/Android.mk
@@ -19,6 +19,7 @@
         ESDS.cpp                          \
         FileSource.cpp                    \
         FLACExtractor.cpp                 \
+        FragmentedMP4Extractor.cpp        \
         HTTPBase.cpp                      \
         JPEGSource.cpp                    \
         MP3Extractor.cpp                  \
diff --git a/media/libstagefright/DRMExtractor.cpp b/media/libstagefright/DRMExtractor.cpp
index 524c3aa..63cb430 100644
--- a/media/libstagefright/DRMExtractor.cpp
+++ b/media/libstagefright/DRMExtractor.cpp
@@ -15,11 +15,6 @@
  */
 
 #include "include/DRMExtractor.h"
-#include "include/AMRExtractor.h"
-#include "include/MP3Extractor.h"
-#include "include/MPEG4Extractor.h"
-#include "include/WAVExtractor.h"
-#include "include/OggExtractor.h"
 
 #include <arpa/inet.h>
 #include <utils/String8.h>
diff --git a/media/libstagefright/DataSource.cpp b/media/libstagefright/DataSource.cpp
index 1de808e..9d0eea2 100644
--- a/media/libstagefright/DataSource.cpp
+++ b/media/libstagefright/DataSource.cpp
@@ -20,17 +20,18 @@
 #include "include/chromium_http_stub.h"
 #endif
 
-#include "include/MP3Extractor.h"
-#include "include/MPEG4Extractor.h"
-#include "include/WAVExtractor.h"
-#include "include/OggExtractor.h"
-#include "include/MPEG2PSExtractor.h"
-#include "include/MPEG2TSExtractor.h"
-#include "include/NuCachedSource2.h"
-#include "include/HTTPBase.h"
+#include "include/AACExtractor.h"
 #include "include/DRMExtractor.h"
 #include "include/FLACExtractor.h"
-#include "include/AACExtractor.h"
+#include "include/FragmentedMP4Extractor.h"
+#include "include/HTTPBase.h"
+#include "include/MP3Extractor.h"
+#include "include/MPEG2PSExtractor.h"
+#include "include/MPEG2TSExtractor.h"
+#include "include/MPEG4Extractor.h"
+#include "include/NuCachedSource2.h"
+#include "include/OggExtractor.h"
+#include "include/WAVExtractor.h"
 #include "include/WVMExtractor.h"
 
 #include "matroska/MatroskaExtractor.h"
@@ -110,6 +111,7 @@
 // static
 void DataSource::RegisterDefaultSniffers() {
     RegisterSniffer(SniffMPEG4);
+    RegisterSniffer(SniffFragmentedMP4);
     RegisterSniffer(SniffMatroska);
     RegisterSniffer(SniffOgg);
     RegisterSniffer(SniffWAV);
diff --git a/media/libstagefright/FragmentedMP4Extractor.cpp b/media/libstagefright/FragmentedMP4Extractor.cpp
new file mode 100644
index 0000000..82712ef
--- /dev/null
+++ b/media/libstagefright/FragmentedMP4Extractor.cpp
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define LOG_NDEBUG 0
+#define LOG_TAG "FragmentedMP4Extractor"
+#include <utils/Log.h>
+
+#include "include/FragmentedMP4Extractor.h"
+#include "include/SampleTable.h"
+#include "include/ESDS.h"
+
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cutils/properties.h> // for property_get
+
+#include <media/stagefright/foundation/ABitReader.h>
+#include <media/stagefright/foundation/ABuffer.h>
+#include <media/stagefright/foundation/ADebug.h>
+#include <media/stagefright/foundation/AMessage.h>
+#include <media/stagefright/DataSource.h>
+#include <media/stagefright/MediaBuffer.h>
+#include <media/stagefright/MediaBufferGroup.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/MediaSource.h>
+#include <media/stagefright/MetaData.h>
+#include <media/stagefright/Utils.h>
+#include <utils/String8.h>
+
+namespace android {
+
+class FragmentedMPEG4Source : public MediaSource {
+public:
+    // Caller retains ownership of the Parser
+    FragmentedMPEG4Source(bool audio,
+                const sp<MetaData> &format,
+                const sp<FragmentedMP4Parser> &parser,
+                const sp<FragmentedMP4Extractor> &extractor);
+
+    virtual status_t start(MetaData *params = NULL);
+    virtual status_t stop();
+
+    virtual sp<MetaData> getFormat();
+
+    virtual status_t read(
+            MediaBuffer **buffer, const ReadOptions *options = NULL);
+
+protected:
+    virtual ~FragmentedMPEG4Source();
+
+private:
+    Mutex mLock;
+
+    sp<MetaData> mFormat;
+    sp<FragmentedMP4Parser> mParser;
+    sp<FragmentedMP4Extractor> mExtractor;
+    bool mIsAudioTrack;
+    uint32_t mCurrentSampleIndex;
+
+    bool mIsAVC;
+    size_t mNALLengthSize;
+
+    bool mStarted;
+
+    MediaBufferGroup *mGroup;
+
+    bool mWantsNALFragments;
+
+    uint8_t *mSrcBuffer;
+
+    FragmentedMPEG4Source(const FragmentedMPEG4Source &);
+    FragmentedMPEG4Source &operator=(const FragmentedMPEG4Source &);
+};
+
+
+FragmentedMP4Extractor::FragmentedMP4Extractor(const sp<DataSource> &source)
+    : mLooper(new ALooper),
+      mParser(new FragmentedMP4Parser()),
+      mDataSource(source),
+      mInitCheck(NO_INIT),
+      mFileMetaData(new MetaData) {
+    ALOGV("FragmentedMP4Extractor");
+    mLooper->registerHandler(mParser);
+    mLooper->start(false /* runOnCallingThread */);
+    mParser->start(mDataSource);
+
+    bool hasVideo = mParser->getFormat(false /* audio */, true /* synchronous */) != NULL;
+    bool hasAudio = mParser->getFormat(true /* audio */, true /* synchronous */) != NULL;
+
+    ALOGV("number of tracks: %d", countTracks());
+
+    if (hasVideo) {
+        mFileMetaData->setCString(
+                kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
+    } else if (hasAudio) {
+        mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
+    } else {
+        ALOGE("no audio and no video, no idea what file type this is");
+    }
+    // tracks are numbered such that video track is first, audio track is second
+    if (hasAudio && hasVideo) {
+        mTrackCount = 2;
+        mAudioTrackIndex = 1;
+    } else if (hasAudio) {
+        mTrackCount = 1;
+        mAudioTrackIndex = 0;
+    } else if (hasVideo) {
+        mTrackCount = 1;
+        mAudioTrackIndex = -1;
+    } else {
+        mTrackCount = 0;
+        mAudioTrackIndex = -1;
+    }
+}
+
+FragmentedMP4Extractor::~FragmentedMP4Extractor() {
+    ALOGV("~FragmentedMP4Extractor");
+    mLooper->stop();
+}
+
+uint32_t FragmentedMP4Extractor::flags() const {
+    return CAN_PAUSE |
+            (mParser->isSeekable() ? (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
+}
+
+sp<MetaData> FragmentedMP4Extractor::getMetaData() {
+    return mFileMetaData;
+}
+
+size_t FragmentedMP4Extractor::countTracks() {
+    return mTrackCount;
+}
+
+
+sp<MetaData> FragmentedMP4Extractor::getTrackMetaData(
+        size_t index, uint32_t flags) {
+    if (index >= countTracks()) {
+        return NULL;
+    }
+
+    sp<AMessage> msg = mParser->getFormat(index == mAudioTrackIndex, true /* synchronous */);
+
+    if (msg == NULL) {
+        ALOGV("got null format for track %d", index);
+        return NULL;
+    }
+
+    sp<MetaData> meta = new MetaData();
+    convertMessageToMetaData(msg, meta);
+    return meta;
+}
+
+static void MakeFourCCString(uint32_t x, char *s) {
+    s[0] = x >> 24;
+    s[1] = (x >> 16) & 0xff;
+    s[2] = (x >> 8) & 0xff;
+    s[3] = x & 0xff;
+    s[4] = '\0';
+}
+
+sp<MediaSource> FragmentedMP4Extractor::getTrack(size_t index) {
+    if (index >= countTracks()) {
+        return NULL;
+    }
+    return new FragmentedMPEG4Source(index == mAudioTrackIndex, getTrackMetaData(index, 0), mParser, this);
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+FragmentedMPEG4Source::FragmentedMPEG4Source(
+        bool audio,
+        const sp<MetaData> &format,
+        const sp<FragmentedMP4Parser> &parser,
+        const sp<FragmentedMP4Extractor> &extractor)
+    : mFormat(format),
+      mParser(parser),
+      mExtractor(extractor),
+      mIsAudioTrack(audio),
+      mStarted(false),
+      mGroup(NULL),
+      mWantsNALFragments(false),
+      mSrcBuffer(NULL) {
+}
+
+FragmentedMPEG4Source::~FragmentedMPEG4Source() {
+    if (mStarted) {
+        stop();
+    }
+}
+
+status_t FragmentedMPEG4Source::start(MetaData *params) {
+    Mutex::Autolock autoLock(mLock);
+
+    CHECK(!mStarted);
+
+    int32_t val;
+    if (params && params->findInt32(kKeyWantsNALFragments, &val)
+        && val != 0) {
+        mWantsNALFragments = true;
+    } else {
+        mWantsNALFragments = false;
+    }
+    ALOGV("caller wants NAL fragments: %s", mWantsNALFragments ? "yes" : "no");
+
+    mGroup = new MediaBufferGroup;
+
+    int32_t max_size = 65536;
+    // XXX CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size));
+
+    mGroup->add_buffer(new MediaBuffer(max_size));
+
+    mSrcBuffer = new uint8_t[max_size];
+
+    mStarted = true;
+
+    return OK;
+}
+
+status_t FragmentedMPEG4Source::stop() {
+    Mutex::Autolock autoLock(mLock);
+
+    CHECK(mStarted);
+
+    delete[] mSrcBuffer;
+    mSrcBuffer = NULL;
+
+    delete mGroup;
+    mGroup = NULL;
+
+    mStarted = false;
+    mCurrentSampleIndex = 0;
+
+    return OK;
+}
+
+sp<MetaData> FragmentedMPEG4Source::getFormat() {
+    Mutex::Autolock autoLock(mLock);
+
+    return mFormat;
+}
+
+
+status_t FragmentedMPEG4Source::read(
+        MediaBuffer **out, const ReadOptions *options) {
+    int64_t seekTimeUs;
+    ReadOptions::SeekMode mode;
+    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
+        mParser->seekTo(mIsAudioTrack, seekTimeUs);
+    }
+    MediaBuffer *buffer = NULL;
+    mGroup->acquire_buffer(&buffer);
+    sp<ABuffer> parseBuffer;
+
+    status_t ret = mParser->dequeueAccessUnit(mIsAudioTrack, &parseBuffer, true /* synchronous */);
+    if (ret != OK) {
+        buffer->release();
+        ALOGV("returning %d", ret);
+        return ret;
+    }
+    sp<AMessage> meta = parseBuffer->meta();
+    int64_t timeUs;
+    CHECK(meta->findInt64("timeUs", &timeUs));
+    buffer->meta_data()->setInt64(kKeyTime, timeUs);
+    buffer->set_range(0, parseBuffer->size());
+    memcpy(buffer->data(), parseBuffer->data(), parseBuffer->size());
+    *out = buffer;
+    return OK;
+}
+
+
+static bool isCompatibleBrand(uint32_t fourcc) {
+    static const uint32_t kCompatibleBrands[] = {
+        FOURCC('i', 's', 'o', 'm'),
+        FOURCC('i', 's', 'o', '2'),
+        FOURCC('a', 'v', 'c', '1'),
+        FOURCC('3', 'g', 'p', '4'),
+        FOURCC('m', 'p', '4', '1'),
+        FOURCC('m', 'p', '4', '2'),
+
+        // Won't promise that the following file types can be played.
+        // Just give these file types a chance.
+        FOURCC('q', 't', ' ', ' '),  // Apple's QuickTime
+        FOURCC('M', 'S', 'N', 'V'),  // Sony's PSP
+
+        FOURCC('3', 'g', '2', 'a'),  // 3GPP2
+        FOURCC('3', 'g', '2', 'b'),
+    };
+
+    for (size_t i = 0;
+         i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
+         ++i) {
+        if (kCompatibleBrands[i] == fourcc) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+// Attempt to actually parse the 'ftyp' atom and determine if a suitable
+// compatible brand is present.
+// Also try to identify where this file's metadata ends
+// (end of the 'moov' atom) and report it to the caller as part of
+// the metadata.
+static bool Sniff(
+        const sp<DataSource> &source, String8 *mimeType, float *confidence,
+        sp<AMessage> *meta) {
+    // We scan up to 128k bytes to identify this file as an MP4.
+    static const off64_t kMaxScanOffset = 128ll * 1024ll;
+
+    off64_t offset = 0ll;
+    bool foundGoodFileType = false;
+    bool isFragmented = false;
+    off64_t moovAtomEndOffset = -1ll;
+    bool done = false;
+
+    while (!done && offset < kMaxScanOffset) {
+        uint32_t hdr[2];
+        if (source->readAt(offset, hdr, 8) < 8) {
+            return false;
+        }
+
+        uint64_t chunkSize = ntohl(hdr[0]);
+        uint32_t chunkType = ntohl(hdr[1]);
+        off64_t chunkDataOffset = offset + 8;
+
+        if (chunkSize == 1) {
+            if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
+                return false;
+            }
+
+            chunkSize = ntoh64(chunkSize);
+            chunkDataOffset += 8;
+
+            if (chunkSize < 16) {
+                // The smallest valid chunk is 16 bytes long in this case.
+                return false;
+            }
+        } else if (chunkSize < 8) {
+            // The smallest valid chunk is 8 bytes long.
+            return false;
+        }
+
+        off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
+
+        char chunkstring[5];
+        MakeFourCCString(chunkType, chunkstring);
+        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
+        switch (chunkType) {
+            case FOURCC('f', 't', 'y', 'p'):
+            {
+                if (chunkDataSize < 8) {
+                    return false;
+                }
+
+                uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
+                for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
+                    if (i == 1) {
+                        // Skip this index, it refers to the minorVersion,
+                        // not a brand.
+                        continue;
+                    }
+
+                    uint32_t brand;
+                    if (source->readAt(
+                                chunkDataOffset + 4 * i, &brand, 4) < 4) {
+                        return false;
+                    }
+
+                    brand = ntohl(brand);
+                    char brandstring[5];
+                    MakeFourCCString(brand, brandstring);
+                    ALOGV("Brand: %s", brandstring);
+
+                    if (isCompatibleBrand(brand)) {
+                        foundGoodFileType = true;
+                        break;
+                    }
+                }
+
+                if (!foundGoodFileType) {
+                    return false;
+                }
+
+                break;
+            }
+
+            case FOURCC('m', 'o', 'o', 'v'):
+            {
+                moovAtomEndOffset = offset + chunkSize;
+                break;
+            }
+
+            case FOURCC('m', 'o', 'o', 'f'):
+            {
+                // this is kind of broken, since we might not actually find a
+                // moof box in the first 128k.
+                isFragmented = true;
+                done = true;
+                break;
+            }
+
+            default:
+                break;
+        }
+
+        offset += chunkSize;
+    }
+
+    if (!foundGoodFileType || !isFragmented) {
+        return false;
+    }
+
+    *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
+    *confidence = 0.5f; // slightly more than MPEG4Extractor
+
+    if (moovAtomEndOffset >= 0) {
+        *meta = new AMessage;
+        (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
+        (*meta)->setInt32("fragmented", 1); // tell MediaExtractor what to instantiate
+
+        ALOGV("found metadata size: %lld", moovAtomEndOffset);
+    }
+
+    return true;
+}
+
+// used by DataSource::RegisterDefaultSniffers
+bool SniffFragmentedMP4(
+        const sp<DataSource> &source, String8 *mimeType, float *confidence,
+        sp<AMessage> *meta) {
+    ALOGV("SniffFragmentedMP4");
+    char prop[PROPERTY_VALUE_MAX];
+    if (property_get("media.stagefright.use-fragmp4", prop, NULL)
+            && (!strcmp(prop, "1") || !strcasecmp(prop, "true"))) {
+        return Sniff(source, mimeType, confidence, meta);
+    }
+
+    return false;
+}
+
+}  // namespace android
diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp
index a572541..7d49ef0 100644
--- a/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/MPEG4Extractor.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+//#define LOG_NDEBUG 0
 #define LOG_TAG "MPEG4Extractor"
 #include <utils/Log.h>
 
@@ -408,7 +409,7 @@
 }
 
 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
-int32_t readSize(off64_t offset,
+static int32_t readSize(off64_t offset,
         const sp<DataSource> DataSource, uint8_t *numOfBytes) {
     uint32_t size = 0;
     uint8_t data;
diff --git a/media/libstagefright/MediaExtractor.cpp b/media/libstagefright/MediaExtractor.cpp
index 9ab6611..b18c916 100644
--- a/media/libstagefright/MediaExtractor.cpp
+++ b/media/libstagefright/MediaExtractor.cpp
@@ -21,6 +21,7 @@
 #include "include/AMRExtractor.h"
 #include "include/MP3Extractor.h"
 #include "include/MPEG4Extractor.h"
+#include "include/FragmentedMP4Extractor.h"
 #include "include/WAVExtractor.h"
 #include "include/OggExtractor.h"
 #include "include/MPEG2PSExtractor.h"
@@ -93,7 +94,12 @@
     MediaExtractor *ret = NULL;
     if (!strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_MPEG4)
             || !strcasecmp(mime, "audio/mp4")) {
-        ret = new MPEG4Extractor(source);
+        int fragmented = 0;
+        if (meta != NULL && meta->findInt32("fragmented", &fragmented) && fragmented) {
+            ret = new FragmentedMP4Extractor(source);
+        } else {
+            ret = new MPEG4Extractor(source);
+        }
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEG)) {
         ret = new MP3Extractor(source, meta);
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AMR_NB)
diff --git a/media/libstagefright/MetaData.cpp b/media/libstagefright/MetaData.cpp
index 755594a..a01ec97 100644
--- a/media/libstagefright/MetaData.cpp
+++ b/media/libstagefright/MetaData.cpp
@@ -22,6 +22,8 @@
 #include <string.h>
 
 #include <media/stagefright/foundation/ADebug.h>
+#include <media/stagefright/foundation/AString.h>
+#include <media/stagefright/foundation/hexdump.h>
 #include <media/stagefright/MetaData.h>
 
 namespace android {
@@ -318,6 +320,12 @@
 
         default:
             out = String8::format("(unknown type %d, size %d)", mType, mSize);
+            if (mSize <= 48) { // if it's less than three lines of hex data, dump it
+                AString foo;
+                hexdump(data, mSize, 0, &foo);
+                out.append("\n");
+                out.append(foo.c_str());
+            }
             break;
     }
     return out;
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index 2a16f66..74e9222 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -241,5 +241,196 @@
     return OK;
 }
 
+static size_t reassembleAVCC(const sp<ABuffer> &csd0, const sp<ABuffer> csd1, char *avcc) {
+
+    avcc[0] = 1;        // version
+    avcc[1] = 0x64;     // profile
+    avcc[2] = 0;        // unused (?)
+    avcc[3] = 0xd;      // level
+    avcc[4] = 0xff;     // reserved+size
+
+    size_t i = 0;
+    int numparams = 0;
+    int lastparamoffset = 0;
+    int avccidx = 6;
+    do {
+        if (i >= csd0->size() - 4 ||
+                memcmp(csd0->data() + i, "\x00\x00\x00\x01", 4) == 0) {
+            if (i >= csd0->size() - 4) {
+                // there can't be another param here, so use all the rest
+                i = csd0->size();
+            }
+            ALOGV("block at %d, last was %d", i, lastparamoffset);
+            if (lastparamoffset > 0) {
+                int size = i - lastparamoffset;
+                avcc[avccidx++] = size >> 8;
+                avcc[avccidx++] = size & 0xff;
+                memcpy(avcc+avccidx, csd0->data() + lastparamoffset, size);
+                avccidx += size;
+                numparams++;
+            }
+            i += 4;
+            lastparamoffset = i;
+        } else {
+            i++;
+        }
+    } while(i < csd0->size());
+    ALOGV("csd0 contains %d params", numparams);
+
+    avcc[5] = 0xe0 | numparams;
+    //and now csd-1
+    i = 0;
+    numparams = 0;
+    lastparamoffset = 0;
+    int numpicparamsoffset = avccidx;
+    avccidx++;
+    do {
+        if (i >= csd1->size() - 4 ||
+                memcmp(csd1->data() + i, "\x00\x00\x00\x01", 4) == 0) {
+            if (i >= csd1->size() - 4) {
+                // there can't be another param here, so use all the rest
+                i = csd1->size();
+            }
+            ALOGV("block at %d, last was %d", i, lastparamoffset);
+            if (lastparamoffset > 0) {
+                int size = i - lastparamoffset;
+                avcc[avccidx++] = size >> 8;
+                avcc[avccidx++] = size & 0xff;
+                memcpy(avcc+avccidx, csd1->data() + lastparamoffset, size);
+                avccidx += size;
+                numparams++;
+            }
+            i += 4;
+            lastparamoffset = i;
+        } else {
+            i++;
+        }
+    } while(i < csd1->size());
+    avcc[numpicparamsoffset] = numparams;
+    return avccidx;
+}
+
+static void reassembleESDS(const sp<ABuffer> &csd0, char *esds) {
+    int csd0size = csd0->size();
+    esds[0] = 3; // kTag_ESDescriptor;
+    int esdescriptorsize = 26 + csd0size;
+    CHECK(esdescriptorsize < 268435456); // 7 bits per byte, so max is 2^28-1
+    esds[1] = 0x80 | (esdescriptorsize >> 21);
+    esds[2] = 0x80 | ((esdescriptorsize >> 14) & 0x7f);
+    esds[3] = 0x80 | ((esdescriptorsize >> 7) & 0x7f);
+    esds[4] = (esdescriptorsize & 0x7f);
+    esds[5] = esds[6] = 0; // es id
+    esds[7] = 0; // flags
+    esds[8] = 4; // kTag_DecoderConfigDescriptor
+    int configdescriptorsize = 18 + csd0size;
+    esds[9] = 0x80 | (configdescriptorsize >> 21);
+    esds[10] = 0x80 | ((configdescriptorsize >> 14) & 0x7f);
+    esds[11] = 0x80 | ((configdescriptorsize >> 7) & 0x7f);
+    esds[12] = (configdescriptorsize & 0x7f);
+    esds[13] = 0x40; // objectTypeIndication
+    esds[14] = 0x15; // not sure what 14-25 mean, they are ignored by ESDS.cpp,
+    esds[15] = 0x00; // but the actual values here were taken from a real file.
+    esds[16] = 0x18;
+    esds[17] = 0x00;
+    esds[18] = 0x00;
+    esds[19] = 0x00;
+    esds[20] = 0xfa;
+    esds[21] = 0x00;
+    esds[22] = 0x00;
+    esds[23] = 0x00;
+    esds[24] = 0xfa;
+    esds[25] = 0x00;
+    esds[26] = 5; // kTag_DecoderSpecificInfo;
+    esds[27] = 0x80 | (csd0size >> 21);
+    esds[28] = 0x80 | ((csd0size >> 14) & 0x7f);
+    esds[29] = 0x80 | ((csd0size >> 7) & 0x7f);
+    esds[30] = (csd0size & 0x7f);
+    memcpy((void*)&esds[31], csd0->data(), csd0size);
+    // data following this is ignored, so don't bother appending it
+
+}
+
+void convertMessageToMetaData(const sp<AMessage> &msg, sp<MetaData> &meta) {
+    AString mime;
+    if (msg->findString("mime", &mime)) {
+        meta->setCString(kKeyMIMEType, mime.c_str());
+    } else {
+        ALOGW("did not find mime type");
+    }
+
+    int64_t durationUs;
+    if (msg->findInt64("durationUs", &durationUs)) {
+        meta->setInt64(kKeyDuration, durationUs);
+    }
+
+    if (mime.startsWith("video/")) {
+        int32_t width;
+        int32_t height;
+        if (msg->findInt32("width", &width) && msg->findInt32("height", &height)) {
+            meta->setInt32(kKeyWidth, width);
+            meta->setInt32(kKeyHeight, height);
+        } else {
+            ALOGW("did not find width and/or height");
+        }
+    } else if (mime.startsWith("audio/")) {
+        int32_t numChannels;
+        if (msg->findInt32("channel-count", &numChannels)) {
+            meta->setInt32(kKeyChannelCount, numChannels);
+        }
+        int32_t sampleRate;
+        if (msg->findInt32("sample-rate", &sampleRate)) {
+            meta->setInt32(kKeySampleRate, sampleRate);
+        }
+        int32_t channelMask;
+        if (msg->findInt32("channel-mask", &channelMask)) {
+            meta->setInt32(kKeyChannelMask, channelMask);
+        }
+        int32_t delay = 0;
+        if (msg->findInt32("encoder-delay", &delay)) {
+            meta->setInt32(kKeyEncoderDelay, delay);
+        }
+        int32_t padding = 0;
+        if (msg->findInt32("encoder-padding", &padding)) {
+            meta->setInt32(kKeyEncoderPadding, padding);
+        }
+
+        int32_t isADTS;
+        if (msg->findInt32("is-adts", &isADTS)) {
+            meta->setInt32(kKeyIsADTS, isADTS);
+        }
+    }
+
+    int32_t maxInputSize;
+    if (msg->findInt32("max-input-size", &maxInputSize)) {
+        meta->setInt32(kKeyMaxInputSize, maxInputSize);
+    }
+
+    // reassemble the csd data into its original form
+    sp<ABuffer> csd0;
+    if (msg->findBuffer("csd-0", &csd0)) {
+        if (mime.startsWith("video/")) { // do we need to be stricter than this?
+            sp<ABuffer> csd1;
+            if (msg->findBuffer("csd-1", &csd1)) {
+                char avcc[1024]; // that oughta be enough, right?
+                size_t outsize = reassembleAVCC(csd0, csd1, avcc);
+                meta->setData(kKeyAVCC, kKeyAVCC, avcc, outsize);
+            }
+        } else if (mime.startsWith("audio/")) {
+            int csd0size = csd0->size();
+            char esds[csd0size + 31];
+            reassembleESDS(csd0, esds);
+            meta->setData(kKeyESDS, kKeyESDS, esds, sizeof(esds));
+        }
+    }
+
+    // XXX TODO add whatever other keys there are
+
+#if 0
+    ALOGI("converted %s to:", msg->debugString(0).c_str());
+    meta->dumpToLog();
+#endif
+}
+
+
 }  // namespace android
 
diff --git a/media/libstagefright/include/FragmentedMP4Extractor.h b/media/libstagefright/include/FragmentedMP4Extractor.h
new file mode 100644
index 0000000..763cd3a
--- /dev/null
+++ b/media/libstagefright/include/FragmentedMP4Extractor.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FRAGMENTED_MP4_EXTRACTOR_H_
+
+#define FRAGMENTED_MP4_EXTRACTOR_H_
+
+#include "include/FragmentedMP4Parser.h"
+
+#include <media/stagefright/MediaExtractor.h>
+#include <utils/Vector.h>
+#include <utils/String8.h>
+
+namespace android {
+
+struct AMessage;
+class DataSource;
+class SampleTable;
+class String8;
+
+class FragmentedMP4Extractor : public MediaExtractor {
+public:
+    // Extractor assumes ownership of "source".
+    FragmentedMP4Extractor(const sp<DataSource> &source);
+
+    virtual size_t countTracks();
+    virtual sp<MediaSource> getTrack(size_t index);
+    virtual sp<MetaData> getTrackMetaData(size_t index, uint32_t flags);
+    virtual sp<MetaData> getMetaData();
+    virtual uint32_t flags() const;
+
+protected:
+    virtual ~FragmentedMP4Extractor();
+
+private:
+    sp<ALooper> mLooper;
+    sp<FragmentedMP4Parser> mParser;
+    sp<DataSource> mDataSource;
+    status_t mInitCheck;
+    size_t mAudioTrackIndex;
+    size_t mTrackCount;
+
+    sp<MetaData> mFileMetaData;
+
+    Vector<uint32_t> mPath;
+
+    FragmentedMP4Extractor(const FragmentedMP4Extractor &);
+    FragmentedMP4Extractor &operator=(const FragmentedMP4Extractor &);
+};
+
+bool SniffFragmentedMP4(
+        const sp<DataSource> &source, String8 *mimeType, float *confidence,
+        sp<AMessage> *);
+
+}  // namespace android
+
+#endif  // MPEG4_EXTRACTOR_H_
diff --git a/media/libstagefright/include/FragmentedMP4Parser.h b/media/libstagefright/include/FragmentedMP4Parser.h
index bd8fe32..0edafb9 100644
--- a/media/libstagefright/include/FragmentedMP4Parser.h
+++ b/media/libstagefright/include/FragmentedMP4Parser.h
@@ -19,6 +19,7 @@
 #define PARSER_H_
 
 #include <media/stagefright/foundation/AHandler.h>
+#include <media/stagefright/DataSource.h>
 #include <utils/Vector.h>
 
 namespace android {
@@ -30,6 +31,7 @@
         Source() {}
 
         virtual ssize_t readAt(off64_t offset, void *data, size_t size) = 0;
+        virtual bool isSeekable() = 0;
 
         protected:
         virtual ~Source() {}
@@ -42,9 +44,12 @@
 
     void start(const char *filename);
     void start(const sp<Source> &source);
+    void start(sp<DataSource> &source);
 
-    sp<AMessage> getFormat(bool audio);
-    status_t dequeueAccessUnit(bool audio, sp<ABuffer> *accessUnit);
+    sp<AMessage> getFormat(bool audio, bool synchronous = false);
+    status_t dequeueAccessUnit(bool audio, sp<ABuffer> *accessUnit, bool synchronous = false);
+    status_t seekTo(bool audio, int64_t timeUs);
+    bool isSeekable() const;
 
     virtual void onMessageReceived(const sp<AMessage> &msg);
 
@@ -58,6 +63,7 @@
         kWhatReadMore,
         kWhatGetFormat,
         kWhatDequeueAccessUnit,
+        kWhatSeekTo,
     };
 
     struct TrackFragment;
@@ -97,6 +103,11 @@
         off64_t mOffset;
     };
 
+    struct SidxEntry {
+        size_t mSize;
+        uint32_t mDurationUs;
+    };
+
     struct TrackInfo {
         enum Flags {
             kTrackEnabled     = 0x01,
@@ -107,6 +118,7 @@
         uint32_t mTrackID;
         uint32_t mFlags;
         uint32_t mDuration;  // This is the duration in terms of movie timescale!
+        uint64_t mSidxDuration; // usec, from sidx box, which can use a different timescale
 
         uint32_t mMediaTimeScale;
 
@@ -121,6 +133,7 @@
 
         uint32_t mDecodingTime;
 
+        Vector<SidxEntry> mSidx;
         sp<StaticTrackFragment> mStaticFragment;
         List<sp<TrackFragment> > mFragments;
     };
@@ -151,6 +164,8 @@
     sp<Source> mSource;
     off_t mBufferPos;
     bool mSuspended;
+    bool mDoneWithMoov;
+    off_t mFirstMoofOffset; // used as the starting point for offsets calculated from the sidx box
     sp<ABuffer> mBuffer;
     Vector<Container> mStack;
     KeyedVector<uint32_t, TrackInfo> mTracks;  // TrackInfo by trackID
@@ -164,6 +179,7 @@
 
     status_t onProceed();
     status_t onDequeueAccessUnit(size_t trackIndex, sp<ABuffer> *accessUnit);
+    status_t onSeekTo(bool wantAudio, int64_t position);
 
     void enter(off64_t offset, uint32_t type, uint64_t size);
 
@@ -222,6 +238,9 @@
     status_t parseMediaData(
             uint32_t type, size_t offset, uint64_t size);
 
+    status_t parseSegmentIndex(
+            uint32_t type, size_t offset, uint64_t size);
+
     TrackInfo *editTrack(uint32_t trackID, bool createIfNecessary = false);
 
     ssize_t findTrack(bool wantAudio) const;
diff --git a/media/libstagefright/mp4/FragmentedMP4Parser.cpp b/media/libstagefright/mp4/FragmentedMP4Parser.cpp
index e130a80..7fe4e63 100644
--- a/media/libstagefright/mp4/FragmentedMP4Parser.cpp
+++ b/media/libstagefright/mp4/FragmentedMP4Parser.cpp
@@ -18,8 +18,8 @@
 #define LOG_TAG "FragmentedMP4Parser"
 #include <utils/Log.h>
 
-#include "include/FragmentedMP4Parser.h"
 #include "include/ESDS.h"
+#include "include/FragmentedMP4Parser.h"
 #include "TrackFragment.h"
 
 
@@ -31,6 +31,7 @@
 #include <media/stagefright/MediaErrors.h>
 #include <media/stagefright/Utils.h>
 
+
 namespace android {
 
 static const char *Fourcc2String(uint32_t fourcc) {
@@ -121,6 +122,8 @@
     },
 
     { FOURCC('m', 'f', 'r', 'a'), 0, NULL },
+
+    { FOURCC('s', 'i', 'd', 'x'), 0, &FragmentedMP4Parser::parseSegmentIndex },
 };
 
 struct FileSource : public FragmentedMP4Parser::Source {
@@ -134,15 +137,92 @@
         return fread(data, 1, size, mFile);
     }
 
+    virtual bool isSeekable() {
+        return true;
+    }
+
     private:
     FILE *mFile;
 
     DISALLOW_EVIL_CONSTRUCTORS(FileSource);
 };
 
+struct ReadTracker : public RefBase {
+    ReadTracker(off64_t size) {
+        allocSize = 1 + size / 8192; // 1 bit per kilobyte
+        bitmap = (char*) calloc(1, allocSize);
+    }
+    virtual ~ReadTracker() {
+        dumpToLog();
+        free(bitmap);
+    }
+    void mark(off64_t offset, size_t size) {
+        int firstbit = offset / 1024;
+        int lastbit = (offset + size - 1) / 1024;
+        for (int i = firstbit; i <= lastbit; i++) {
+            bitmap[i/8] |= (0x80 >> (i & 7));
+        }
+    }
+
+ private:
+    void dumpToLog() {
+        // 96 chars per line, each char represents one kilobyte, 1 kb per bit
+        int numlines = allocSize / 12;
+        char buf[97];
+        char *cur = bitmap;
+        for (int i = 0; i < numlines; i++ && cur) {
+            for (int j = 0; j < 12; j++) {
+                for (int k = 0; k < 8; k++) {
+                    buf[(j * 8) + k] = (*cur & (0x80 >> k)) ? 'X' : '.';
+                }
+                cur++;
+            }
+            buf[96] = '\0';
+            ALOGI("%5dk: %s", i * 96, buf);
+        }
+    }
+
+    size_t allocSize;
+    char *bitmap;
+};
+
+struct DataSourceSource : public FragmentedMP4Parser::Source {
+    DataSourceSource(sp<DataSource> &source)
+        : mDataSource(source) {
+            CHECK(mDataSource != NULL);
+#if 0
+            off64_t size;
+            if (source->getSize(&size) == OK) {
+                mReadTracker = new ReadTracker(size);
+            } else {
+                ALOGE("couldn't get data source size");
+            }
+#endif
+        }
+
+    virtual ssize_t readAt(off64_t offset, void *data, size_t size) {
+        if (mReadTracker != NULL) {
+            mReadTracker->mark(offset, size);
+        }
+        return mDataSource->readAt(offset, data, size);
+    }
+
+    virtual bool isSeekable() {
+        return true;
+    }
+
+    private:
+    sp<DataSource> mDataSource;
+    sp<ReadTracker> mReadTracker;
+
+    DISALLOW_EVIL_CONSTRUCTORS(DataSourceSource);
+};
+
 FragmentedMP4Parser::FragmentedMP4Parser()
     : mBufferPos(0),
       mSuspended(false),
+      mDoneWithMoov(false),
+      mFirstMoofOffset(0),
       mFinalResult(OK) {
 }
 
@@ -153,54 +233,142 @@
     sp<AMessage> msg = new AMessage(kWhatStart, id());
     msg->setObject("source", new FileSource(filename));
     msg->post();
+    ALOGV("Parser::start(%s)", filename);
 }
 
 void FragmentedMP4Parser::start(const sp<Source> &source) {
     sp<AMessage> msg = new AMessage(kWhatStart, id());
     msg->setObject("source", source);
     msg->post();
+    ALOGV("Parser::start(Source)");
 }
 
-sp<AMessage> FragmentedMP4Parser::getFormat(bool audio) {
-    sp<AMessage> msg = new AMessage(kWhatGetFormat, id());
-    msg->setInt32("audio", audio);
+void FragmentedMP4Parser::start(sp<DataSource> &source) {
+    sp<AMessage> msg = new AMessage(kWhatStart, id());
+    msg->setObject("source", new DataSourceSource(source));
+    msg->post();
+    ALOGV("Parser::start(DataSource)");
+}
+
+sp<AMessage> FragmentedMP4Parser::getFormat(bool audio, bool synchronous) {
+
+    while (true) {
+        bool moovDone = mDoneWithMoov;
+        sp<AMessage> msg = new AMessage(kWhatGetFormat, id());
+        msg->setInt32("audio", audio);
+
+        sp<AMessage> response;
+        status_t err = msg->postAndAwaitResponse(&response);
+
+        if (err != OK) {
+            ALOGV("getFormat post failed: %d", err);
+            return NULL;
+        }
+
+        if (response->findInt32("err", &err) && err != OK) {
+            if (synchronous && err == -EWOULDBLOCK && !moovDone) {
+                resumeIfNecessary();
+                ALOGV("@getFormat parser not ready yet, retrying");
+                usleep(10000);
+                continue;
+            }
+            ALOGV("getFormat failed: %d", err);
+            return NULL;
+        }
+
+        sp<AMessage> format;
+        CHECK(response->findMessage("format", &format));
+
+        ALOGV("returning format %s", format->debugString().c_str());
+        return format;
+    }
+}
+
+status_t FragmentedMP4Parser::seekTo(bool wantAudio, int64_t timeUs) {
+    sp<AMessage> msg = new AMessage(kWhatSeekTo, id());
+    msg->setInt32("audio", wantAudio);
+    msg->setInt64("position", timeUs);
 
     sp<AMessage> response;
     status_t err = msg->postAndAwaitResponse(&response);
-
-    if (err != OK) {
-        return NULL;
-    }
-
-    if (response->findInt32("err", &err) && err != OK) {
-        return NULL;
-    }
-
-    sp<AMessage> format;
-    CHECK(response->findMessage("format", &format));
-
-    ALOGV("returning format %s", format->debugString().c_str());
-    return format;
+    return err;
 }
 
-status_t FragmentedMP4Parser::dequeueAccessUnit(bool audio, sp<ABuffer> *accessUnit) {
-    sp<AMessage> msg = new AMessage(kWhatDequeueAccessUnit, id());
-    msg->setInt32("audio", audio);
-
-    sp<AMessage> response;
-    status_t err = msg->postAndAwaitResponse(&response);
-
-    if (err != OK) {
-        return err;
+bool FragmentedMP4Parser::isSeekable() const {
+    while (mFirstMoofOffset == 0 && mFinalResult == OK) {
+        usleep(10000);
     }
-
-    if (response->findInt32("err", &err) && err != OK) {
-        return err;
+    bool seekable = mSource->isSeekable();
+    for (size_t i = 0; seekable && i < mTracks.size(); i++) {
+        const TrackInfo *info = &mTracks.valueAt(i);
+        seekable &= !info->mSidx.empty();
     }
+    return seekable;
+}
 
-    CHECK(response->findBuffer("accessUnit", accessUnit));
+status_t FragmentedMP4Parser::onSeekTo(bool wantAudio, int64_t position) {
+    status_t err = -EINVAL;
+    ssize_t trackIndex = findTrack(wantAudio);
+    if (trackIndex < 0) {
+        err = trackIndex;
+    } else {
+        TrackInfo *info = &mTracks.editValueAt(trackIndex);
 
-    return OK;
+        int numSidxEntries = info->mSidx.size();
+        int64_t totalTime = 0;
+        off_t totalOffset = mFirstMoofOffset;
+        for (int i = 0; i < numSidxEntries; i++) {
+            const SidxEntry *se = &info->mSidx[i];
+            totalTime += se->mDurationUs;
+            if (totalTime > position) {
+                mBuffer->setRange(0,0);
+                mBufferPos = totalOffset;
+                if (mFinalResult == ERROR_END_OF_STREAM) {
+                    mFinalResult = OK;
+                    mSuspended = true; // force resume
+                    resumeIfNecessary();
+                }
+                info->mFragments.clear();
+                info->mDecodingTime = position * info->mMediaTimeScale / 1000000ll;
+                return OK;
+            }
+            totalOffset += se->mSize;
+        }
+    }
+    ALOGV("seekTo out of range");
+    return err;
+}
+
+status_t FragmentedMP4Parser::dequeueAccessUnit(bool audio, sp<ABuffer> *accessUnit,
+                                                bool synchronous) {
+
+    while (true) {
+        sp<AMessage> msg = new AMessage(kWhatDequeueAccessUnit, id());
+        msg->setInt32("audio", audio);
+
+        sp<AMessage> response;
+        status_t err = msg->postAndAwaitResponse(&response);
+
+        if (err != OK) {
+            ALOGV("dequeue fail 1: %d", err);
+            return err;
+        }
+
+        if (response->findInt32("err", &err) && err != OK) {
+            if (synchronous && err == -EWOULDBLOCK) {
+                resumeIfNecessary();
+                ALOGV("Parser not ready yet, retrying");
+                usleep(10000);
+                continue;
+            }
+            ALOGV("dequeue fail 2: %d, %d", err, synchronous);
+            return err;
+        }
+
+        CHECK(response->findBuffer("accessUnit", accessUnit));
+
+        return OK;
+    }
 }
 
 ssize_t FragmentedMP4Parser::findTrack(bool wantAudio) const {
@@ -272,7 +440,7 @@
             size_t maxBytesToRead = mBuffer->capacity() - mBuffer->size();
 
             if (maxBytesToRead < needed) {
-                ALOGI("resizing buffer.");
+                ALOGV("resizing buffer.");
 
                 sp<ABuffer> newBuffer =
                     new ABuffer((mBuffer->size() + needed + 1023) & ~1023);
@@ -290,7 +458,7 @@
                     mBuffer->data() + mBuffer->size(), needed);
 
             if (n < (ssize_t)needed) {
-                ALOGI("%s", "Reached EOF");
+                ALOGV("Reached EOF when reading %d @ %d + %d", needed, mBufferPos, mBuffer->size());
                 if (n < 0) {
                     mFinalResult = n;
                 } else if (n == 0) {
@@ -321,8 +489,16 @@
             } else {
                 TrackInfo *info = &mTracks.editValueAt(trackIndex);
 
+                sp<AMessage> format = info->mSampleDescs.itemAt(0).mFormat;
+                if (info->mSidxDuration) {
+                    format->setInt64("durationUs", info->mSidxDuration);
+                } else {
+                    // this is probably going to be zero. Oh well...
+                    format->setInt64("durationUs",
+                                     1000000ll * info->mDuration / info->mMediaTimeScale);
+                }
                 response->setMessage(
-                        "format", info->mSampleDescs.itemAt(0).mFormat);
+                        "format", format);
 
                 err = OK;
             }
@@ -366,6 +542,30 @@
             break;
         }
 
+        case kWhatSeekTo:
+        {
+            ALOGV("kWhatSeekTo");
+            int32_t wantAudio;
+            CHECK(msg->findInt32("audio", &wantAudio));
+            int64_t position;
+            CHECK(msg->findInt64("position", &position));
+
+            status_t err = -EWOULDBLOCK;
+            sp<AMessage> response = new AMessage;
+
+            ssize_t trackIndex = findTrack(wantAudio);
+
+            if (trackIndex < 0) {
+                err = trackIndex;
+            } else {
+                err = onSeekTo(wantAudio, position);
+            }
+            response->setInt32("err", err);
+            uint32_t replyID;
+            CHECK(msg->senderAwaitsResponse(&replyID));
+            response->postReply(replyID);
+            break;
+        }
         default:
             TRESPASS();
     }
@@ -429,6 +629,12 @@
     if ((i < kNumDispatchers && kDispatchTable[i].mHandler == 0)
             || isSampleEntryBox || ptype == FOURCC('i', 'l', 's', 't')) {
         // This is a container box.
+        if (type == FOURCC('m', 'o', 'o', 'f')) {
+            if (mFirstMoofOffset == 0) {
+                ALOGV("first moof @ %08x", mBufferPos + offset);
+                mFirstMoofOffset = mBufferPos + offset - 8; // point at the size
+            }
+        }
         if (type == FOURCC('m', 'e', 't', 'a')) {
             if ((err = need(offset + 4)) < OK) {
                 return err;
@@ -589,7 +795,7 @@
         return;
     }
 
-    ALOGI("resuming.");
+    ALOGV("resuming.");
 
     mSuspended = false;
     (new AMessage(kWhatProceed, id()))->post();
@@ -647,7 +853,7 @@
 
         int cmp = CompareSampleLocation(sampleInfo, mdatInfo);
 
-        if (cmp < 0) {
+        if (cmp < 0 && !mSource->isSeekable()) {
             return -EPIPE;
         } else if (cmp == 0) {
             if (i > 0) {
@@ -669,6 +875,8 @@
         size_t numDroppable = 0;
         bool done = false;
 
+        // XXX FIXME: if one of the tracks is not advanced (e.g. if you play an audio+video
+        // file with sf2), then mMediaData will not be pruned and keeps growing
         for (size_t i = 0; !done && i < mMediaData.size(); ++i) {
             const MediaDataInfo &mdatInfo = mMediaData.itemAt(i);
 
@@ -896,6 +1104,8 @@
 
                     static_cast<DynamicTrackFragment *>(
                             fragment.get())->signalCompletion();
+                } else if (container->mType == FOURCC('m', 'o', 'o', 'v')) {
+                    mDoneWithMoov = true;
                 }
 
                 container = NULL;
@@ -953,6 +1163,10 @@
     TrackInfo *info = editTrack(trackID, true /* createIfNecessary */);
     info->mFlags = flags;
     info->mDuration = duration;
+    if (info->mDuration == 0xffffffff) {
+        // ffmpeg sets this to -1, which is incorrect.
+        info->mDuration = 0;
+    }
 
     info->mStaticFragment = new StaticTrackFragment;
 
@@ -1363,13 +1577,100 @@
     info->mOffset = mBufferPos + offset;
 
     if (mMediaData.size() > 10) {
-        ALOGI("suspending for now.");
+        ALOGV("suspending for now.");
         mSuspended = true;
     }
 
     return OK;
 }
 
+status_t FragmentedMP4Parser::parseSegmentIndex(
+        uint32_t type, size_t offset, uint64_t size) {
+    ALOGV("sidx box type %d, offset %d, size %d", type, int(offset), int(size));
+//    AString sidxstr;
+//    hexdump(mBuffer->data() + offset, size, 0 /* indent */, &sidxstr);
+//    ALOGV("raw sidx:");
+//    ALOGV("%s", sidxstr.c_str());
+    if (offset + 12 > size) {
+        return -EINVAL;
+    }
+
+    uint32_t flags = readU32(offset);
+
+    uint32_t version = flags >> 24;
+    flags &= 0xffffff;
+
+    ALOGV("sidx version %d", version);
+
+    uint32_t referenceId = readU32(offset + 4);
+    uint32_t timeScale = readU32(offset + 8);
+    ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
+
+    uint64_t earliestPresentationTime;
+    uint64_t firstOffset;
+
+    offset += 12;
+
+    if (version == 0) {
+        if (offset + 8 > size) {
+            return -EINVAL;
+        }
+        earliestPresentationTime = readU32(offset);
+        firstOffset = readU32(offset + 4);
+        offset += 8;
+    } else {
+        if (offset + 16 > size) {
+            return -EINVAL;
+        }
+        earliestPresentationTime = readU64(offset);
+        firstOffset = readU64(offset + 8);
+        offset += 16;
+    }
+    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
+
+    if (offset + 4 > size) {
+        return -EINVAL;
+    }
+    if (readU16(offset) != 0) { // reserved
+        return -EINVAL;
+    }
+    int32_t referenceCount = readU16(offset + 2);
+    offset += 4;
+    ALOGV("refcount: %d", referenceCount);
+
+    if (offset + referenceCount * 12 > size) {
+        return -EINVAL;
+    }
+
+    TrackInfo *info = editTrack(mCurrentTrackID);
+    uint64_t total_duration = 0;
+    for (int i = 0; i < referenceCount; i++) {
+        uint32_t d1 = readU32(offset);
+        uint32_t d2 = readU32(offset + 4);
+        uint32_t d3 = readU32(offset + 8);
+
+        if (d1 & 0x80000000) {
+            ALOGW("sub-sidx boxes not supported yet");
+        }
+        bool sap = d3 & 0x80000000;
+        bool saptype = d3 >> 28;
+        if (!sap || saptype > 2) {
+            ALOGW("not a stream access point, or unsupported type");
+        }
+        total_duration += d2;
+        offset += 12;
+        ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
+        SidxEntry se;
+        se.mSize = d1 & 0x7fffffff;
+        se.mDurationUs = 1000000LL * d2 / timeScale;
+        info->mSidx.add(se);
+    }
+
+    info->mSidxDuration = total_duration * 1000000 / timeScale;
+    ALOGV("duration: %lld", info->mSidxDuration);
+    return OK;
+}
+
 status_t FragmentedMP4Parser::parseTrackExtends(
         uint32_t type, size_t offset, uint64_t size) {
     if (offset + 24 > size) {
@@ -1407,6 +1708,7 @@
     info.mTrackID = trackID;
     info.mFlags = 0;
     info.mDuration = 0xffffffff;
+    info.mSidxDuration = 0;
     info.mMediaTimeScale = 0;
     info.mMediaHandlerType = 0;
     info.mDefaultSampleDescriptionIndex = 0;