heif: fixes for image sequences and dual-function files

Adding support for two new sets of APIs on MediaMetadataRetriever:

- getImageAtIndex() and getPrimaryImage()
- getFrameAtIndex() and getFramesAtIndex()

Outline of changes:

- Proper indexing of all displayable still images, so that they
  can be retrieved by getImageAtIndex()

- Exposing still images as "image/x.android.heic" tracks in
  MediaExtractor with necessary descriptive keys (such as "grid-*")

- Support to retrieve video frames by absolute index instead
  of timestamps, as image use cases mostly are interested in
  getting the images and care less about timing.

- Support to retrieve video frames in batches because retrieving
  one frame at a time is inefficient.

- Refactor image / frame decoding code into FrameDecoder, and split
  still image decoding and video sequence decoding into to sub
  classes to facilite future development.

bug: 63633199

test:
cts-tradefed run cts-dev --module CtsMediaTestCases --compatibility:module-arg CtsMediaTestCases:include-annotation:android.platform.test.annotations.RequiresDevice

Change-Id: I2fe8519fb6907f315a8b513921fc1cc7f436e28d
diff --git a/media/extractors/mkv/MatroskaExtractor.cpp b/media/extractors/mkv/MatroskaExtractor.cpp
index 5a8e79d..e199f03 100644
--- a/media/extractors/mkv/MatroskaExtractor.cpp
+++ b/media/extractors/mkv/MatroskaExtractor.cpp
@@ -703,18 +703,22 @@
 
     int64_t seekTimeUs;
     ReadOptions::SeekMode mode;
-    if (options && options->getSeekTo(&seekTimeUs, &mode)
-            && !mExtractor->isLiveStreaming()) {
-        clearPendingFrames();
+    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
+        if (mode == ReadOptions::SEEK_FRAME_INDEX) {
+            return ERROR_UNSUPPORTED;
+        }
 
-        // The audio we want is located by using the Cues to seek the video
-        // stream to find the target Cluster then iterating to finalize for
-        // audio.
-        int64_t actualFrameTimeUs;
-        mBlockIter.seek(seekTimeUs, mIsAudio, &actualFrameTimeUs);
+        if (!mExtractor->isLiveStreaming()) {
+            clearPendingFrames();
 
-        if (mode == ReadOptions::SEEK_CLOSEST) {
-            targetSampleTimeUs = actualFrameTimeUs;
+            // The audio we want is located by using the Cues to seek the video
+            // stream to find the target Cluster then iterating to finalize for
+            // audio.
+            int64_t actualFrameTimeUs;
+            mBlockIter.seek(seekTimeUs, mIsAudio, &actualFrameTimeUs);
+            if (mode == ReadOptions::SEEK_CLOSEST) {
+                targetSampleTimeUs = actualFrameTimeUs;
+            }
         }
     }
 
diff --git a/media/extractors/mp4/ItemTable.cpp b/media/extractors/mp4/ItemTable.cpp
index ed560e1..9a6cb64 100644
--- a/media/extractors/mp4/ItemTable.cpp
+++ b/media/extractors/mp4/ItemTable.cpp
@@ -40,8 +40,9 @@
     friend struct ItemReference;
     friend struct ItemProperty;
 
-    ImageItem() : ImageItem(0) {}
-    ImageItem(uint32_t _type) : type(_type),
+    ImageItem() : ImageItem(0, 0, false) {}
+    ImageItem(uint32_t _type, uint32_t _id, bool _hidden) :
+            type(_type), itemId(_id), hidden(_hidden),
             rows(0), columns(0), width(0), height(0), rotation(0),
             offset(0), size(0), nextTileIndex(0) {}
 
@@ -61,6 +62,8 @@
     }
 
     uint32_t type;
+    uint32_t itemId;
+    bool hidden;
     int32_t rows;
     int32_t columns;
     int32_t width;
@@ -496,7 +499,25 @@
             ALOGW("dimgRefs if not clean!");
         }
         derivedImage.dimgRefs.appendVector(mRefs);
+
+        for (size_t i = 0; i < mRefs.size(); i++) {
+            itemIndex = itemIdToItemMap.indexOfKey(mRefs[i]);
+
+            // ignore non-image items
+            if (itemIndex < 0) {
+                continue;
+            }
+            ImageItem &sourceImage = itemIdToItemMap.editValueAt(itemIndex);
+
+            // mark the source image of the derivation as hidden
+            sourceImage.hidden = true;
+        }
     } else if (type() == FOURCC('t', 'h', 'm', 'b')) {
+        // mark thumbnail image as hidden, these can be retrieved if the client
+        // request thumbnail explicitly, but won't be exposed as displayables.
+        ImageItem &thumbImage = itemIdToItemMap.editValueAt(itemIndex);
+        thumbImage.hidden = true;
+
         for (size_t i = 0; i < mRefs.size(); i++) {
             itemIndex = itemIdToItemMap.indexOfKey(mRefs[i]);
 
@@ -511,6 +532,10 @@
             }
             masterImage.thumbnails.push_back(mItemId);
         }
+    } else if (type() == FOURCC('a', 'u', 'x', 'l')) {
+        // mark auxiliary image as hidden
+        ImageItem &auxImage = itemIdToItemMap.editValueAt(itemIndex);
+        auxImage.hidden = true;
     } else {
         ALOGW("ignoring unsupported ref type 0x%x", type());
     }
@@ -942,6 +967,7 @@
 struct ItemInfo {
     uint32_t itemId;
     uint32_t itemType;
+    bool hidden;
 };
 
 struct InfeBox : public FullBox {
@@ -1012,6 +1038,9 @@
 
         itemInfo->itemId = item_id;
         itemInfo->itemType = item_type;
+        // According to HEIF spec, (flags & 1) indicates the image is hidden
+        // and not supposed to be displayed.
+        itemInfo->hidden = (flags() & 1);
 
         char itemTypeString[5];
         MakeFourCCString(item_type, itemTypeString);
@@ -1295,7 +1324,7 @@
             return ERROR_MALFORMED;
         }
 
-        ImageItem image(info.itemType);
+        ImageItem image(info.itemType, info.itemId, info.hidden);
 
         ALOGV("adding %s: itemId %d", image.isGrid() ? "grid" : "image", info.itemId);
 
@@ -1327,6 +1356,29 @@
         mItemReferences[i]->apply(mItemIdToItemMap);
     }
 
+    bool foundPrimary = false;
+    for (size_t i = 0; i < mItemIdToItemMap.size(); i++) {
+        // add all non-hidden images, also add the primary even if it's marked
+        // hidden, in case the primary is set to a thumbnail
+        bool isPrimary = (mItemIdToItemMap[i].itemId == mPrimaryItemId);
+        if (!mItemIdToItemMap[i].hidden || isPrimary) {
+            mDisplayables.push_back(i);
+        }
+        foundPrimary |= isPrimary;
+    }
+
+    ALOGV("found %zu displayables", mDisplayables.size());
+
+    // fail if no displayables are found
+    if (mDisplayables.empty()) {
+        return ERROR_MALFORMED;
+    }
+
+    // if the primary item id is invalid, set primary to the first displayable
+    if (!foundPrimary) {
+        mPrimaryItemId = mItemIdToItemMap[mDisplayables[0]].itemId;
+    }
+
     mImageItemsValid = true;
     return OK;
 }
@@ -1348,29 +1400,36 @@
     ALOGV("attach property %d to item id %d)",
             propertyIndex, association.itemId);
 
-    mItemProperties[propertyIndex]->attachTo(
-            mItemIdToItemMap.editValueAt(itemIndex));
+    mItemProperties[propertyIndex]->attachTo(mItemIdToItemMap.editValueAt(itemIndex));
 }
 
-sp<MetaData> ItemTable::getImageMeta() {
+uint32_t ItemTable::countImages() const {
+    return mImageItemsValid ? mDisplayables.size() : 0;
+}
+
+sp<MetaData> ItemTable::getImageMeta(const uint32_t imageIndex) {
     if (!mImageItemsValid) {
         return NULL;
     }
 
-    ssize_t itemIndex = mItemIdToItemMap.indexOfKey(mPrimaryItemId);
-    if (itemIndex < 0) {
-        ALOGE("Primary item id %d not found!", mPrimaryItemId);
+    if (imageIndex >= mDisplayables.size()) {
+        ALOGE("%s: invalid image index %u", __FUNCTION__, imageIndex);
         return NULL;
     }
-
-    ALOGV("primary item index %zu", itemIndex);
+    const uint32_t itemIndex = mDisplayables[imageIndex];
+    ALOGV("image[%u]: item index %u", imageIndex, itemIndex);
 
     const ImageItem *image = &mItemIdToItemMap[itemIndex];
 
     sp<MetaData> meta = new MetaData;
-    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_HEVC);
+    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
 
-    ALOGV("setting image size %dx%d", image->width, image->height);
+    if (image->itemId == mPrimaryItemId) {
+        meta->setInt32(kKeyIsPrimaryImage, 1);
+    }
+
+    ALOGV("image[%u]: size %dx%d", imageIndex, image->width, image->height);
+
     meta->setInt32(kKeyWidth, image->width);
     meta->setInt32(kKeyHeight, image->height);
     if (image->rotation != 0) {
@@ -1394,8 +1453,8 @@
             meta->setInt32(kKeyThumbnailHeight, thumbnail.height);
             meta->setData(kKeyThumbnailHVCC, kTypeHVCC,
                     thumbnail.hvcc->data(), thumbnail.hvcc->size());
-            ALOGV("thumbnail meta: %dx%d, item index %zd",
-                    thumbnail.width, thumbnail.height, thumbItemIndex);
+            ALOGV("image[%u]: thumbnail: size %dx%d, item index %zd",
+                    imageIndex, thumbnail.width, thumbnail.height, thumbItemIndex);
         } else {
             ALOGW("%s: Referenced thumbnail does not exist!", __FUNCTION__);
         }
@@ -1406,23 +1465,18 @@
         if (tileItemIndex < 0) {
             return NULL;
         }
-        // when there are tiles, (kKeyWidth, kKeyHeight) is the full tiled area,
-        // and (kKeyDisplayWidth, kKeyDisplayHeight) may be smaller than that.
-        meta->setInt32(kKeyDisplayWidth, image->width);
-        meta->setInt32(kKeyDisplayHeight, image->height);
-        int32_t gridRows = image->rows, gridCols = image->columns;
+        meta->setInt32(kKeyGridRows, image->rows);
+        meta->setInt32(kKeyGridCols, image->columns);
 
         // point image to the first tile for grid size and HVCC
         image = &mItemIdToItemMap.editValueAt(tileItemIndex);
-        meta->setInt32(kKeyWidth, image->width * gridCols);
-        meta->setInt32(kKeyHeight, image->height * gridRows);
         meta->setInt32(kKeyGridWidth, image->width);
         meta->setInt32(kKeyGridHeight, image->height);
         meta->setInt32(kKeyMaxInputSize, image->width * image->height * 1.5);
     }
 
     if (image->hvcc == NULL) {
-        ALOGE("%s: hvcc is missing for item index %zd!", __FUNCTION__, itemIndex);
+        ALOGE("%s: hvcc is missing for image[%u]!", __FUNCTION__, imageIndex);
         return NULL;
     }
     meta->setData(kKeyHVCC, kTypeHVCC, image->hvcc->data(), image->hvcc->size());
@@ -1433,48 +1487,46 @@
     return meta;
 }
 
-uint32_t ItemTable::countImages() const {
-    return mImageItemsValid ? mItemIdToItemMap.size() : 0;
-}
-
-status_t ItemTable::findPrimaryImage(uint32_t *itemIndex) {
+status_t ItemTable::findImageItem(const uint32_t imageIndex, uint32_t *itemIndex) {
     if (!mImageItemsValid) {
         return INVALID_OPERATION;
     }
 
-    ssize_t index = mItemIdToItemMap.indexOfKey(mPrimaryItemId);
-    if (index < 0) {
-        return ERROR_MALFORMED;
+    if (imageIndex >= mDisplayables.size()) {
+        ALOGE("%s: invalid image index %d", __FUNCTION__, imageIndex);
+        return BAD_VALUE;
     }
 
-    *itemIndex = index;
+    *itemIndex = mDisplayables[imageIndex];
+
+    ALOGV("image[%u]: item index %u", imageIndex, *itemIndex);
     return OK;
 }
 
-status_t ItemTable::findThumbnail(uint32_t *itemIndex) {
+status_t ItemTable::findThumbnailItem(const uint32_t imageIndex, uint32_t *itemIndex) {
     if (!mImageItemsValid) {
         return INVALID_OPERATION;
     }
 
-    ssize_t primaryItemIndex = mItemIdToItemMap.indexOfKey(mPrimaryItemId);
-    if (primaryItemIndex < 0) {
-        ALOGE("%s: Primary item id %d not found!", __FUNCTION__, mPrimaryItemId);
-        return ERROR_MALFORMED;
+    if (imageIndex >= mDisplayables.size()) {
+        ALOGE("%s: invalid image index %d", __FUNCTION__, imageIndex);
+        return BAD_VALUE;
     }
 
-    const ImageItem &primaryImage = mItemIdToItemMap[primaryItemIndex];
-    if (primaryImage.thumbnails.empty()) {
-        ALOGW("%s: Using primary in place of thumbnail.", __FUNCTION__);
-        *itemIndex = primaryItemIndex;
+    uint32_t masterItemIndex = mDisplayables[imageIndex];
+
+    const ImageItem &masterImage = mItemIdToItemMap[masterItemIndex];
+    if (masterImage.thumbnails.empty()) {
+        *itemIndex = masterItemIndex;
         return OK;
     }
 
-    ssize_t thumbItemIndex = mItemIdToItemMap.indexOfKey(
-            primaryImage.thumbnails[0]);
+    ssize_t thumbItemIndex = mItemIdToItemMap.indexOfKey(masterImage.thumbnails[0]);
     if (thumbItemIndex < 0) {
-        ALOGE("%s: Thumbnail item id %d not found!",
-                __FUNCTION__, primaryImage.thumbnails[0]);
-        return ERROR_MALFORMED;
+        ALOGW("%s: Thumbnail item id %d not found, use master instead",
+                __FUNCTION__, masterImage.thumbnails[0]);
+        *itemIndex = masterItemIndex;
+        return OK;
     }
 
     *itemIndex = thumbItemIndex;
diff --git a/media/extractors/mp4/ItemTable.h b/media/extractors/mp4/ItemTable.h
index 6591271..3d2e2ae 100644
--- a/media/extractors/mp4/ItemTable.h
+++ b/media/extractors/mp4/ItemTable.h
@@ -49,12 +49,12 @@
     status_t parse(uint32_t type, off64_t offset, size_t size);
 
     bool isValid() { return mImageItemsValid; }
-    sp<MetaData> getImageMeta();
     uint32_t countImages() const;
-    status_t findPrimaryImage(uint32_t *imageIndex);
-    status_t findThumbnail(uint32_t *thumbnailIndex);
+    sp<MetaData> getImageMeta(const uint32_t imageIndex);
+    status_t findImageItem(const uint32_t imageIndex, uint32_t *itemIndex);
+    status_t findThumbnailItem(const uint32_t imageIndex, uint32_t *itemIndex);
     status_t getImageOffsetAndSize(
-            uint32_t *imageIndex, off64_t *offset, size_t *size);
+            uint32_t *itemIndex, off64_t *offset, size_t *size);
 
 protected:
     ~ItemTable();
@@ -78,6 +78,7 @@
     bool mImageItemsValid;
     uint32_t mCurrentItemIndex;
     KeyedVector<uint32_t, ImageItem> mItemIdToItemMap;
+    Vector<uint32_t> mDisplayables;
 
     status_t parseIlocBox(off64_t offset, size_t size);
     status_t parseIinfBox(off64_t offset, size_t size);
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index ede7e84..6671956 100644
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -138,7 +138,7 @@
 
     uint8_t *mSrcBuffer;
 
-    bool mIsHEIF;
+    bool mIsHeif;
     sp<ItemTable> mItemTable;
 
     size_t parseNALSize(const uint8_t *data) const;
@@ -338,7 +338,7 @@
     return false;
 }
 
-MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
+MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source, const char *mime)
     : mMoofOffset(0),
       mMoofFound(false),
       mMdatFound(false),
@@ -346,12 +346,15 @@
       mInitCheck(NO_INIT),
       mHeaderTimescale(0),
       mIsQT(false),
-      mIsHEIF(false),
+      mIsHeif(false),
+      mIsHeifSequence(false),
+      mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
       mFirstTrack(NULL),
       mLastTrack(NULL),
       mFileMetaData(new MetaData),
       mFirstSINF(NULL),
       mIsDrm(false) {
+    ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
 }
 
 MPEG4Extractor::~MPEG4Extractor() {
@@ -560,8 +563,9 @@
     status_t err;
     bool sawMoovOrSidx = false;
 
-    while (!((sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
-            (mIsHEIF && (mItemTable != NULL) && mItemTable->isValid()))) {
+    while (!((!mIsHeif && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
+             (mIsHeif && (mPreferHeif || !mIsHeifSequence)
+                     && (mItemTable != NULL) && mItemTable->isValid()))) {
         off64_t orig_offset = offset;
         err = parseChunk(&offset, 0);
 
@@ -578,12 +582,47 @@
         }
     }
 
+    if (mIsHeif) {
+        uint32_t imageCount = mItemTable->countImages();
+        if (imageCount == 0) {
+            ALOGE("found no image in heif!");
+        } else {
+            for (uint32_t imageIndex = 0; imageIndex < imageCount; imageIndex++) {
+                sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
+                if (meta == NULL) {
+                    ALOGE("heif image %u has no meta!", imageIndex);
+                    continue;
+                }
+
+                ALOGV("adding HEIF image track %u", imageIndex);
+                Track *track = new Track;
+                track->next = NULL;
+                if (mLastTrack != NULL) {
+                    mLastTrack->next = track;
+                } else {
+                    mFirstTrack = track;
+                }
+                mLastTrack = track;
+
+                track->meta = meta;
+                track->meta->setInt32(kKeyTrackID, imageIndex);
+                track->includes_expensive_metadata = false;
+                track->skipTrack = false;
+                track->timescale = 0;
+            }
+        }
+    }
+
     if (mInitCheck == OK) {
         if (findTrackByMimePrefix("video/") != NULL) {
             mFileMetaData->setCString(
                     kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
         } else if (findTrackByMimePrefix("audio/") != NULL) {
             mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
+        } else if (findTrackByMimePrefix(
+                MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
+            mFileMetaData->setCString(
+                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
         } else {
             mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream");
         }
@@ -614,28 +653,6 @@
         free(buf);
     }
 
-    if (mIsHEIF) {
-        sp<MetaData> meta = mItemTable->getImageMeta();
-        if (meta == NULL) {
-            return ERROR_MALFORMED;
-        }
-
-        Track *track = mLastTrack;
-        if (track != NULL) {
-            ALOGW("track is set before metadata is fully processed");
-        } else {
-            track = new Track;
-            track->next = NULL;
-            mFirstTrack = mLastTrack = track;
-        }
-
-        track->meta = meta;
-        track->meta->setInt32(kKeyTrackID, 0);
-        track->includes_expensive_metadata = false;
-        track->skipTrack = false;
-        track->timescale = 0;
-    }
-
     return mInitCheck;
 }
 
@@ -1037,6 +1054,7 @@
                 }
                 isTrack = true;
 
+                ALOGV("adding new track");
                 Track *track = new Track;
                 track->next = NULL;
                 if (mLastTrack) {
@@ -1084,6 +1102,7 @@
                 }
 
                 if (mLastTrack->skipTrack) {
+                    ALOGV("skipping this track...");
                     Track *cur = mFirstTrack;
 
                     if (cur == mLastTrack) {
@@ -1260,6 +1279,25 @@
             break;
         }
 
+        case FOURCC('t', 'r', 'e', 'f'):
+        {
+            *offset += chunk_size;
+
+            if (mLastTrack == NULL) {
+                return ERROR_MALFORMED;
+            }
+
+            // Skip thumbnail track for now since we don't have an
+            // API to retrieve it yet.
+            // The thumbnail track can't be accessed by negative index or time,
+            // because each timed sample has its own corresponding thumbnail
+            // in the thumbnail track. We'll need a dedicated API to retrieve
+            // thumbnail at time instead.
+            mLastTrack->skipTrack = true;
+
+            break;
+        }
+
         case FOURCC('p', 's', 's', 'h'):
         {
             *offset += chunk_size;
@@ -1758,6 +1796,8 @@
                             mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
                         }
                     }
+                    ALOGV("setting frame count %zu", nSamples);
+                    mLastTrack->meta->setInt32(kKeyFrameCount, nSamples);
                 }
             }
 
@@ -2089,7 +2129,7 @@
         case FOURCC('i', 'r', 'e', 'f'):
         case FOURCC('i', 'p', 'r', 'o'):
         {
-            if (mIsHEIF) {
+            if (mIsHeif) {
                 if (mItemTable == NULL) {
                     mItemTable = new ItemTable(mDataSource);
                 }
@@ -2469,10 +2509,17 @@
 
             if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
                 mIsQT = true;
-            } else if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
-                    && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
-                mIsHEIF = true;
-                ALOGV("identified HEIF image");
+            } else {
+                if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
+                 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
+                    mIsHeif = true;
+                    ALOGV("identified HEIF image");
+                }
+                if (brandSet.count(FOURCC('m', 's', 'f', '1')) > 0
+                 && brandSet.count(FOURCC('h', 'e', 'v', 'c')) > 0) {
+                    mIsHeifSequence = true;
+                    ALOGV("identified HEIF image sequence");
+                }
             }
 
             *offset = stop_offset;
@@ -3391,6 +3438,7 @@
         return NULL;
     }
 
+    sp<ItemTable> itemTable;
     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
         uint32_t type;
         const void *data;
@@ -3404,7 +3452,8 @@
         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
             return NULL;
         }
-    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
+            || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
         uint32_t type;
         const void *data;
         size_t size;
@@ -3417,11 +3466,14 @@
         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
             return NULL;
         }
+        if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
+            itemTable = mItemTable;
+        }
     }
 
     sp<MPEG4Source> source =  new MPEG4Source(this,
             track->meta, mDataSource, track->timescale, track->sampleTable,
-            mSidxEntries, trex, mMoofOffset, mItemTable);
+            mSidxEntries, trex, mMoofOffset, itemTable);
     if (source->init() != OK) {
         return NULL;
     }
@@ -3849,7 +3901,7 @@
       mBuffer(NULL),
       mWantsNALFragments(false),
       mSrcBuffer(NULL),
-      mIsHEIF(itemTable != NULL),
+      mIsHeif(itemTable != NULL),
       mItemTable(itemTable) {
 
     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
@@ -3871,7 +3923,8 @@
     CHECK(success);
 
     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
-    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
+    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
+              !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
 
     if (mIsAVC) {
         uint32_t type;
@@ -4625,15 +4678,19 @@
     int64_t seekTimeUs;
     ReadOptions::SeekMode mode;
     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
-        if (mIsHEIF) {
+        if (mIsHeif) {
             CHECK(mSampleTable == NULL);
             CHECK(mItemTable != NULL);
+            int32_t imageIndex;
+            if (!mFormat->findInt32(kKeyTrackID, &imageIndex)) {
+                return ERROR_MALFORMED;
+            }
 
             status_t err;
             if (seekTimeUs >= 0) {
-                err = mItemTable->findPrimaryImage(&mCurrentSampleIndex);
+                err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
             } else {
-                err = mItemTable->findThumbnail(&mCurrentSampleIndex);
+                err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
             }
             if (err != OK) {
                 return err;
@@ -4651,6 +4708,9 @@
                 case ReadOptions::SEEK_CLOSEST:
                     findFlags = SampleTable::kFlagClosest;
                     break;
+                case ReadOptions::SEEK_FRAME_INDEX:
+                    findFlags = SampleTable::kFlagFrameIndex;
+                    break;
                 default:
                     CHECK(!"Should not be here.");
                     break;
@@ -4661,7 +4721,8 @@
                     seekTimeUs, 1000000, mTimescale,
                     &sampleIndex, findFlags);
 
-            if (mode == ReadOptions::SEEK_CLOSEST) {
+            if (mode == ReadOptions::SEEK_CLOSEST
+                    || mode == ReadOptions::SEEK_FRAME_INDEX) {
                 // We found the closest sample already, now we want the sync
                 // sample preceding it (or the sample itself of course), even
                 // if the subsequent sync sample is closer.
@@ -4693,7 +4754,8 @@
                 return err;
             }
 
-            if (mode == ReadOptions::SEEK_CLOSEST) {
+            if (mode == ReadOptions::SEEK_CLOSEST
+                || mode == ReadOptions::SEEK_FRAME_INDEX) {
                 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
             }
 
@@ -4729,7 +4791,7 @@
         newBuffer = true;
 
         status_t err;
-        if (!mIsHEIF) {
+        if (!mIsHeif) {
             err = mSampleTable->getMetaDataForSample(
                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
         } else {
@@ -5316,7 +5378,8 @@
         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
-        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)) {
+        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
+        || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
         *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
         *confidence = 0.4;
 
@@ -5347,6 +5410,8 @@
         FOURCC('3', 'g', '2', 'b'),
         FOURCC('m', 'i', 'f', '1'),  // HEIF image
         FOURCC('h', 'e', 'i', 'c'),  // HEIF image
+        FOURCC('m', 's', 'f', '1'),  // HEIF image sequence
+        FOURCC('h', 'e', 'v', 'c'),  // HEIF image sequence
     };
 
     for (size_t i = 0;
diff --git a/media/extractors/mp4/MPEG4Extractor.h b/media/extractors/mp4/MPEG4Extractor.h
index c634796..d4f17e3 100644
--- a/media/extractors/mp4/MPEG4Extractor.h
+++ b/media/extractors/mp4/MPEG4Extractor.h
@@ -52,7 +52,7 @@
 class MPEG4Extractor : public MediaExtractor {
 public:
     // Extractor assumes ownership of "source".
-    explicit MPEG4Extractor(const sp<DataSource> &source);
+    explicit MPEG4Extractor(const sp<DataSource> &source, const char *mime = NULL);
 
     virtual size_t countTracks();
     virtual sp<MediaSource> getTrack(size_t index);
@@ -103,7 +103,9 @@
     status_t mInitCheck;
     uint32_t mHeaderTimescale;
     bool mIsQT;
-    bool mIsHEIF;
+    bool mIsHeif;
+    bool mIsHeifSequence;
+    bool mPreferHeif;
 
     Track *mFirstTrack, *mLastTrack;
 
diff --git a/media/extractors/mp4/SampleTable.cpp b/media/extractors/mp4/SampleTable.cpp
index fe25e95..378d63a 100644
--- a/media/extractors/mp4/SampleTable.cpp
+++ b/media/extractors/mp4/SampleTable.cpp
@@ -724,6 +724,14 @@
         return ERROR_OUT_OF_RANGE;
     }
 
+    if (flags == kFlagFrameIndex) {
+        if (req_time >= mNumSampleSizes) {
+            return ERROR_OUT_OF_RANGE;
+        }
+        *sample_index = mSampleTimeEntries[req_time].mSampleIndex;
+        return OK;
+    }
+
     uint32_t left = 0;
     uint32_t right_plus_one = mNumSampleSizes;
     while (left < right_plus_one) {
diff --git a/media/extractors/mp4/SampleTable.h b/media/extractors/mp4/SampleTable.h
index eb1a674..466e26b 100644
--- a/media/extractors/mp4/SampleTable.h
+++ b/media/extractors/mp4/SampleTable.h
@@ -72,7 +72,8 @@
     enum {
         kFlagBefore,
         kFlagAfter,
-        kFlagClosest
+        kFlagClosest,
+        kFlagFrameIndex,
     };
     status_t findSampleAtTime(
             uint64_t req_time, uint64_t scale_num, uint64_t scale_den,
diff --git a/media/extractors/mpeg2/MPEG2TSExtractor.cpp b/media/extractors/mpeg2/MPEG2TSExtractor.cpp
index abe2054..4f61e16 100644
--- a/media/extractors/mpeg2/MPEG2TSExtractor.cpp
+++ b/media/extractors/mpeg2/MPEG2TSExtractor.cpp
@@ -512,6 +512,8 @@
                 --index;
             }
             break;
+        default:
+            return ERROR_UNSUPPORTED;
     }
     if (!shouldSeekBeyond || mOffset <= mSeekSyncPoints->valueAt(index)) {
         int64_t actualSeekTimeUs = mSeekSyncPoints->keyAt(index);