Heif decoder API for decoding image sequences

bug: 120414514
bug: 78868457

test: local testing with "OpenGL Renderer Tests"
animation demo and heifs files

Change-Id: If765b0643920316d0454835e99479122aa1fd665
diff --git a/media/libheif/HeifDecoderImpl.cpp b/media/libheif/HeifDecoderImpl.cpp
index a977300..a7c0f84 100644
--- a/media/libheif/HeifDecoderImpl.cpp
+++ b/media/libheif/HeifDecoderImpl.cpp
@@ -31,6 +31,7 @@
 #include <private/media/VideoFrame.h>
 #include <utils/Log.h>
 #include <utils/RefBase.h>
+#include <vector>
 
 HeifDecoder* createHeifDecoder() {
     return new android::HeifDecoderImpl();
@@ -38,6 +39,23 @@
 
 namespace android {
 
+void initFrameInfo(HeifFrameInfo *info, const VideoFrame *videoFrame) {
+    info->mWidth = videoFrame->mWidth;
+    info->mHeight = videoFrame->mHeight;
+    info->mRotationAngle = videoFrame->mRotationAngle;
+    info->mBytesPerPixel = videoFrame->mBytesPerPixel;
+    // TODO: retrieve per-frame duration from extractor/metadataretriever.
+    info->mDurationUs = 33333;
+    if (videoFrame->mIccSize > 0) {
+        info->mIccData.assign(
+                videoFrame->getFlattenedIccData(),
+                videoFrame->getFlattenedIccData() + videoFrame->mIccSize);
+    } else {
+        // clear old Icc data if there is no Icc data.
+        info->mIccData.clear();
+    }
+}
+
 /*
  * HeifDataSource
  *
@@ -293,11 +311,11 @@
     // it's not, default to HAL_PIXEL_FORMAT_RGB_565.
     mOutputColor(HAL_PIXEL_FORMAT_RGB_565),
     mCurScanline(0),
-    mWidth(0),
-    mHeight(0),
+    mTotalScanline(0),
     mFrameDecoded(false),
     mHasImage(false),
     mHasVideo(false),
+    mSequenceLength(0),
     mAvailableLines(0),
     mNumSlices(1),
     mSliceHeight(0),
@@ -336,48 +354,94 @@
 
     mHasImage = hasImage && !strcasecmp(hasImage, "yes");
     mHasVideo = hasVideo && !strcasecmp(hasVideo, "yes");
-    sp<IMemory> sharedMem;
+
+    HeifFrameInfo* defaultInfo = nullptr;
     if (mHasImage) {
         // image index < 0 to retrieve primary image
-        sharedMem = mRetriever->getImageAtIndex(
+        sp<IMemory> sharedMem = mRetriever->getImageAtIndex(
                 -1, mOutputColor, true /*metaOnly*/);
-    } else if (mHasVideo) {
-        sharedMem = mRetriever->getFrameAtTime(0,
-                MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC,
-                mOutputColor, true /*metaOnly*/);
+
+        if (sharedMem == nullptr || sharedMem->pointer() == nullptr) {
+            ALOGE("init: videoFrame is a nullptr");
+            return false;
+        }
+
+        VideoFrame* videoFrame = static_cast<VideoFrame*>(sharedMem->pointer());
+
+        ALOGV("Image dimension %dx%d, display %dx%d, angle %d, iccSize %d",
+                videoFrame->mWidth,
+                videoFrame->mHeight,
+                videoFrame->mDisplayWidth,
+                videoFrame->mDisplayHeight,
+                videoFrame->mRotationAngle,
+                videoFrame->mIccSize);
+
+        initFrameInfo(&mImageInfo, videoFrame);
+
+        if (videoFrame->mTileHeight >= 512) {
+            // Try decoding in slices only if the image has tiles and is big enough.
+            mSliceHeight = videoFrame->mTileHeight;
+            ALOGV("mSliceHeight %u", mSliceHeight);
+        }
+
+        defaultInfo = &mImageInfo;
     }
 
-    if (sharedMem == nullptr || sharedMem->pointer() == nullptr) {
-        ALOGE("getFrameAtTime: videoFrame is a nullptr");
+    if (mHasVideo) {
+        sp<IMemory> sharedMem = mRetriever->getFrameAtTime(0,
+                MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC,
+                mOutputColor, true /*metaOnly*/);
+
+        if (sharedMem == nullptr || sharedMem->pointer() == nullptr) {
+            ALOGE("init: videoFrame is a nullptr");
+            return false;
+        }
+
+        VideoFrame* videoFrame = static_cast<VideoFrame*>(sharedMem->pointer());
+
+        ALOGV("Sequence dimension %dx%d, display %dx%d, angle %d, iccSize %d",
+                videoFrame->mWidth,
+                videoFrame->mHeight,
+                videoFrame->mDisplayWidth,
+                videoFrame->mDisplayHeight,
+                videoFrame->mRotationAngle,
+                videoFrame->mIccSize);
+
+        initFrameInfo(&mSequenceInfo, videoFrame);
+
+        mSequenceLength = atoi(mRetriever->extractMetadata(METADATA_KEY_VIDEO_FRAME_COUNT));
+
+        if (defaultInfo == nullptr) {
+            defaultInfo = &mSequenceInfo;
+        }
+    }
+
+    if (defaultInfo == nullptr) {
+        ALOGD("No valid image or sequence available");
         return false;
     }
 
-    VideoFrame* videoFrame = static_cast<VideoFrame*>(sharedMem->pointer());
-
-    ALOGV("Meta dimension %dx%d, display %dx%d, angle %d, iccSize %d",
-            videoFrame->mWidth,
-            videoFrame->mHeight,
-            videoFrame->mDisplayWidth,
-            videoFrame->mDisplayHeight,
-            videoFrame->mRotationAngle,
-            videoFrame->mIccSize);
-
     if (frameInfo != nullptr) {
-        frameInfo->set(
-                videoFrame->mWidth,
-                videoFrame->mHeight,
-                videoFrame->mRotationAngle,
-                videoFrame->mBytesPerPixel,
-                videoFrame->mIccSize,
-                videoFrame->getFlattenedIccData());
+        *frameInfo = *defaultInfo;
     }
-    mWidth = videoFrame->mWidth;
-    mHeight = videoFrame->mHeight;
-    if (mHasImage && videoFrame->mTileHeight >= 512 && mWidth >= 3000 && mHeight >= 2000 ) {
-        // Try decoding in slices only if the image has tiles and is big enough.
-        mSliceHeight = videoFrame->mTileHeight;
-        mNumSlices = (videoFrame->mHeight + mSliceHeight - 1) / mSliceHeight;
-        ALOGV("mSliceHeight %u, mNumSlices %zu", mSliceHeight, mNumSlices);
+
+    // default total scanline, this might change if decodeSequence() is used
+    mTotalScanline = defaultInfo->mHeight;
+
+    return true;
+}
+
+bool HeifDecoderImpl::getSequenceInfo(
+        HeifFrameInfo* frameInfo, size_t *frameCount) {
+    ALOGV("%s", __FUNCTION__);
+    if (!mHasVideo) {
+        return false;
+    }
+    if (frameInfo != nullptr) {
+        *frameInfo = mSequenceInfo;
+    }
+    if (frameCount != nullptr) {
+        *frameCount = mSequenceLength;
     }
     return true;
 }
@@ -416,11 +480,11 @@
         ALOGV("decodeAsync(): decoding slice %zu", i);
         size_t top = i * mSliceHeight;
         size_t bottom = (i + 1) * mSliceHeight;
-        if (bottom > mHeight) {
-            bottom = mHeight;
+        if (bottom > mImageInfo.mHeight) {
+            bottom = mImageInfo.mHeight;
         }
         sp<IMemory> frameMemory = mRetriever->getImageRectAtIndex(
-                -1, mOutputColor, 0, top, mWidth, bottom);
+                -1, mOutputColor, 0, top, mImageInfo.mWidth, bottom);
         {
             Mutex::Autolock autolock(mLock);
 
@@ -452,42 +516,44 @@
     // See if we want to decode in slices to allow client to start
     // scanline processing in parallel with decode. If this fails
     // we fallback to decoding the full frame.
-    if (mHasImage && mNumSlices > 1) {
-        // get first slice and metadata
-        sp<IMemory> frameMemory = mRetriever->getImageRectAtIndex(
-                -1, mOutputColor, 0, 0, mWidth, mSliceHeight);
-
-        if (frameMemory == nullptr || frameMemory->pointer() == nullptr) {
-            ALOGE("decode: metadata is a nullptr");
-            return false;
+    if (mHasImage) {
+        if (mSliceHeight >= 512 &&
+                mImageInfo.mWidth >= 3000 &&
+                mImageInfo.mHeight >= 2000 ) {
+            // Try decoding in slices only if the image has tiles and is big enough.
+            mNumSlices = (mImageInfo.mHeight + mSliceHeight - 1) / mSliceHeight;
+            ALOGV("mSliceHeight %u, mNumSlices %zu", mSliceHeight, mNumSlices);
         }
 
-        VideoFrame* videoFrame = static_cast<VideoFrame*>(frameMemory->pointer());
+        if (mNumSlices > 1) {
+            // get first slice and metadata
+            sp<IMemory> frameMemory = mRetriever->getImageRectAtIndex(
+                    -1, mOutputColor, 0, 0, mImageInfo.mWidth, mSliceHeight);
 
-        if (frameInfo != nullptr) {
-            frameInfo->set(
-                    videoFrame->mWidth,
-                    videoFrame->mHeight,
-                    videoFrame->mRotationAngle,
-                    videoFrame->mBytesPerPixel,
-                    videoFrame->mIccSize,
-                    videoFrame->getFlattenedIccData());
+            if (frameMemory == nullptr || frameMemory->pointer() == nullptr) {
+                ALOGE("decode: metadata is a nullptr");
+                return false;
+            }
+
+            VideoFrame* videoFrame = static_cast<VideoFrame*>(frameMemory->pointer());
+
+            if (frameInfo != nullptr) {
+                initFrameInfo(frameInfo, videoFrame);
+            }
+            mFrameMemory = frameMemory;
+            mAvailableLines = mSliceHeight;
+            mThread = new DecodeThread(this);
+            if (mThread->run("HeifDecode", ANDROID_PRIORITY_FOREGROUND) == OK) {
+                mFrameDecoded = true;
+                return true;
+            }
+            // Fallback to decode without slicing
+            mThread.clear();
+            mNumSlices = 1;
+            mSliceHeight = 0;
+            mAvailableLines = 0;
+            mFrameMemory.clear();
         }
-
-        mFrameMemory = frameMemory;
-        mAvailableLines = mSliceHeight;
-        mThread = new DecodeThread(this);
-        if (mThread->run("HeifDecode", ANDROID_PRIORITY_FOREGROUND) == OK) {
-            mFrameDecoded = true;
-            return true;
-        }
-
-        // Fallback to decode without slicing
-        mThread.clear();
-        mNumSlices = 1;
-        mSliceHeight = 0;
-        mAvailableLines = 0;
-        mFrameMemory.clear();
     }
 
     if (mHasImage) {
@@ -520,13 +586,8 @@
             videoFrame->mSize);
 
     if (frameInfo != nullptr) {
-        frameInfo->set(
-                videoFrame->mWidth,
-                videoFrame->mHeight,
-                videoFrame->mRotationAngle,
-                videoFrame->mBytesPerPixel,
-                videoFrame->mIccSize,
-                videoFrame->getFlattenedIccData());
+        initFrameInfo(frameInfo, videoFrame);
+
     }
     mFrameDecoded = true;
 
@@ -536,6 +597,50 @@
     return true;
 }
 
+bool HeifDecoderImpl::decodeSequence(int frameIndex, HeifFrameInfo* frameInfo) {
+    ALOGV("%s: frame index %d", __FUNCTION__, frameIndex);
+    if (!mHasVideo) {
+        return false;
+    }
+
+    if (frameIndex < 0 || frameIndex >= mSequenceLength) {
+        ALOGE("invalid frame index: %d, total frames %zu", frameIndex, mSequenceLength);
+        return false;
+    }
+
+    mCurScanline = 0;
+
+    // set total scanline to sequence height now
+    mTotalScanline = mSequenceInfo.mHeight;
+
+    mFrameMemory = mRetriever->getFrameAtIndex(frameIndex, mOutputColor);
+    if (mFrameMemory == nullptr || mFrameMemory->pointer() == nullptr) {
+        ALOGE("decode: videoFrame is a nullptr");
+        return false;
+    }
+
+    VideoFrame* videoFrame = static_cast<VideoFrame*>(mFrameMemory->pointer());
+    if (videoFrame->mSize == 0 ||
+            mFrameMemory->size() < videoFrame->getFlattenedSize()) {
+        ALOGE("decode: videoFrame size is invalid");
+        return false;
+    }
+
+    ALOGV("Decoded dimension %dx%d, display %dx%d, angle %d, rowbytes %d, size %d",
+            videoFrame->mWidth,
+            videoFrame->mHeight,
+            videoFrame->mDisplayWidth,
+            videoFrame->mDisplayHeight,
+            videoFrame->mRotationAngle,
+            videoFrame->mRowBytes,
+            videoFrame->mSize);
+
+    if (frameInfo != nullptr) {
+        initFrameInfo(frameInfo, videoFrame);
+    }
+    return true;
+}
+
 bool HeifDecoderImpl::getScanlineInner(uint8_t* dst) {
     if (mFrameMemory == nullptr || mFrameMemory->pointer() == nullptr) {
         return false;
@@ -547,7 +652,7 @@
 }
 
 bool HeifDecoderImpl::getScanline(uint8_t* dst) {
-    if (mCurScanline >= mHeight) {
+    if (mCurScanline >= mTotalScanline) {
         ALOGE("no more scanline available");
         return false;
     }
@@ -567,8 +672,8 @@
 size_t HeifDecoderImpl::skipScanlines(size_t count) {
     uint32_t oldScanline = mCurScanline;
     mCurScanline += count;
-    if (mCurScanline > mHeight) {
-        mCurScanline = mHeight;
+    if (mCurScanline > mTotalScanline) {
+        mCurScanline = mTotalScanline;
     }
     return (mCurScanline > oldScanline) ? (mCurScanline - oldScanline) : 0;
 }
diff --git a/media/libheif/HeifDecoderImpl.h b/media/libheif/HeifDecoderImpl.h
index 528ee3b..69c74a7 100644
--- a/media/libheif/HeifDecoderImpl.h
+++ b/media/libheif/HeifDecoderImpl.h
@@ -40,12 +40,16 @@
 
     bool init(HeifStream* stream, HeifFrameInfo* frameInfo) override;
 
+    bool getSequenceInfo(HeifFrameInfo* frameInfo, size_t *frameCount) override;
+
     bool getEncodedColor(HeifEncodedColor* outColor) const override;
 
     bool setOutputColor(HeifColorFormat heifColor) override;
 
     bool decode(HeifFrameInfo* frameInfo) override;
 
+    bool decodeSequence(int frameIndex, HeifFrameInfo* frameInfo) override;
+
     bool getScanline(uint8_t* dst) override;
 
     size_t skipScanlines(size_t count) override;
@@ -56,13 +60,15 @@
     sp<IDataSource> mDataSource;
     sp<MediaMetadataRetriever> mRetriever;
     sp<IMemory> mFrameMemory;
+    HeifFrameInfo mImageInfo;
+    HeifFrameInfo mSequenceInfo;
     android_pixel_format_t mOutputColor;
     size_t mCurScanline;
-    uint32_t mWidth;
-    uint32_t mHeight;
+    size_t mTotalScanline;
     bool mFrameDecoded;
     bool mHasImage;
     bool mHasVideo;
+    size_t mSequenceLength;
 
     // Slice decoding only
     Mutex mLock;
diff --git a/media/libheif/include/HeifDecoderAPI.h b/media/libheif/include/HeifDecoderAPI.h
index aa10f33..9073672 100644
--- a/media/libheif/include/HeifDecoderAPI.h
+++ b/media/libheif/include/HeifDecoderAPI.h
@@ -17,7 +17,7 @@
 #ifndef _HEIF_DECODER_API_
 #define _HEIF_DECODER_API_
 
-#include <memory>
+#include <vector>
 
 /*
  * The output color pixel format of heif decoder.
@@ -40,41 +40,13 @@
 /*
  * Represents a color converted (RGB-based) video frame
  */
-struct HeifFrameInfo
-{
-    HeifFrameInfo() :
-        mWidth(0), mHeight(0), mRotationAngle(0), mBytesPerPixel(0),
-        mIccSize(0), mIccData(nullptr) {}
-
-    // update the frame info, will make a copy of |iccData| internally
-    void set(uint32_t width, uint32_t height, int32_t rotation, uint32_t bpp,
-            uint32_t iccSize, uint8_t* iccData) {
-        mWidth = width;
-        mHeight = height;
-        mRotationAngle = rotation;
-        mBytesPerPixel = bpp;
-
-        if (mIccData != nullptr) {
-            mIccData.reset(nullptr);
-        }
-        mIccSize = iccSize;
-        if (iccSize > 0) {
-            mIccData.reset(new uint8_t[iccSize]);
-            if (mIccData.get() != nullptr) {
-                memcpy(mIccData.get(), iccData, iccSize);
-            } else {
-                mIccSize = 0;
-            }
-        }
-    }
-
-    // Intentional public access modifiers:
+struct HeifFrameInfo {
     uint32_t mWidth;
     uint32_t mHeight;
     int32_t  mRotationAngle;           // Rotation angle, clockwise, should be multiple of 90
     uint32_t mBytesPerPixel;           // Number of bytes for one pixel
-    uint32_t mIccSize;                 // Number of bytes in mIccData
-    std::unique_ptr<uint8_t[]> mIccData; // Actual ICC data, memory is owned by this structure
+    int64_t mDurationUs;               // Duration of the frame in us
+    std::vector<uint8_t> mIccData;     // ICC data array
 };
 
 /*
@@ -113,8 +85,8 @@
     virtual size_t getLength() const = 0;
 
 private:
-    HeifStream(const HeifFrameInfo&) = delete;
-    HeifStream& operator=(const HeifFrameInfo&) = delete;
+    HeifStream(const HeifStream&) = delete;
+    HeifStream& operator=(const HeifStream&) = delete;
 };
 
 /*
@@ -146,6 +118,14 @@
     virtual bool init(HeifStream* stream, HeifFrameInfo* frameInfo) = 0;
 
     /*
+     * Returns true if the stream contains an image sequence and false otherwise.
+     * |frameInfo| will be filled with information of pictures in the sequence
+     * and |frameCount| the length of the sequence upon success and unmodified
+     * upon failure.
+     */
+    virtual bool getSequenceInfo(HeifFrameInfo* frameInfo, size_t *frameCount) = 0;
+
+    /*
      * Decode the picture internally, returning whether it succeeded. |frameInfo|
      * will be filled with information of the primary picture upon success and
      * unmodified upon failure.
@@ -156,6 +136,20 @@
     virtual bool decode(HeifFrameInfo* frameInfo) = 0;
 
     /*
+     * Decode the picture from the image sequence at index |frameIndex|.
+     * |frameInfo| will be filled with information of the decoded picture upon
+     * success and unmodified upon failure.
+     *
+     * |frameIndex| is the 0-based index of the video frame to retrieve. The frame
+     * index must be that of a valid frame. The total number of frames available for
+     * retrieval was reported via getSequenceInfo().
+     *
+     * After this succeeded, getScanline can be called to read the scanlines
+     * that were decoded.
+     */
+    virtual bool decodeSequence(int frameIndex, HeifFrameInfo* frameInfo) = 0;
+
+    /*
      * Read the next scanline (in top-down order), returns true upon success
      * and false otherwise.
      */