Camera: HEIC: Use libyuv utilities to speed up memcpy

Use libyuv's optimized CopyRow function to improve performance of YUV
tiling.

Bug: 124781199
Test: Camera CTS
Test: TestingCamera2 smoke test
Change-Id: I6af6678099655b7e35ddaccf7cd9aa817ec64a9c
diff --git a/services/camera/libcameraservice/Android.bp b/services/camera/libcameraservice/Android.bp
index 2ca8356..9ae7fd9 100644
--- a/services/camera/libcameraservice/Android.bp
+++ b/services/camera/libcameraservice/Android.bp
@@ -94,6 +94,7 @@
         "libsensorprivacy",
         "libstagefright",
         "libstagefright_foundation",
+        "libyuv",
         "android.frameworks.cameraservice.common@2.0",
         "android.frameworks.cameraservice.service@2.0",
         "android.frameworks.cameraservice.device@2.0",
diff --git a/services/camera/libcameraservice/api2/HeicCompositeStream.cpp b/services/camera/libcameraservice/api2/HeicCompositeStream.cpp
index a61cdee..9fd0e8b 100644
--- a/services/camera/libcameraservice/api2/HeicCompositeStream.cpp
+++ b/services/camera/libcameraservice/api2/HeicCompositeStream.cpp
@@ -23,6 +23,7 @@
 #include <sys/syscall.h>
 
 #include <android/hardware/camera/device/3.5/types.h>
+#include <libyuv.h>
 #include <gui/Surface.h>
 #include <utils/Log.h>
 #include <utils/Trace.h>
@@ -192,6 +193,7 @@
         return res;
     }
 
+    initCopyRowFunction(width);
     return res;
 }
 
@@ -1373,7 +1375,7 @@
     for (auto row = top; row < top+height; row++) {
         uint8_t *dst = codecBuffer->data() + imageInfo->mPlane[MediaImage2::Y].mOffset +
                 imageInfo->mPlane[MediaImage2::Y].mRowInc * (row - top);
-        memcpy(dst, yuvBuffer.data+row*yuvBuffer.stride+left, width);
+        mFnCopyRow(yuvBuffer.data+row*yuvBuffer.stride+left, dst, width);
     }
 
     // U is Cb, V is Cr
@@ -1406,24 +1408,25 @@
         for (auto row = top/2; row < (top+height)/2; row++) {
             uint8_t *dst = codecBuffer->data() + imageInfo->mPlane[dstPlane].mOffset +
                     imageInfo->mPlane[dstPlane].mRowInc * (row - top/2);
-            memcpy(dst, src+row*yuvBuffer.chromaStride+left, width);
+            mFnCopyRow(src+row*yuvBuffer.chromaStride+left, dst, width);
         }
     } else if (isCodecUvPlannar && yuvBuffer.chromaStep == 1) {
         // U plane
         for (auto row = top/2; row < (top+height)/2; row++) {
             uint8_t *dst = codecBuffer->data() + imageInfo->mPlane[MediaImage2::U].mOffset +
                     imageInfo->mPlane[MediaImage2::U].mRowInc * (row - top/2);
-            memcpy(dst, yuvBuffer.dataCb+row*yuvBuffer.chromaStride+left/2, width/2);
+            mFnCopyRow(yuvBuffer.dataCb+row*yuvBuffer.chromaStride+left/2, dst, width/2);
         }
 
         // V plane
         for (auto row = top/2; row < (top+height)/2; row++) {
             uint8_t *dst = codecBuffer->data() + imageInfo->mPlane[MediaImage2::V].mOffset +
                     imageInfo->mPlane[MediaImage2::V].mRowInc * (row - top/2);
-            memcpy(dst, yuvBuffer.dataCr+row*yuvBuffer.chromaStride+left/2, width/2);
+            mFnCopyRow(yuvBuffer.dataCr+row*yuvBuffer.chromaStride+left/2, dst, width/2);
         }
     } else {
-        // Convert between semiplannar and plannar
+        // Convert between semiplannar and plannar, or when UV orders are
+        // different.
         uint8_t *dst = codecBuffer->data();
         for (auto row = top/2; row < (top+height)/2; row++) {
             for (auto col = left/2; col < (left+width)/2; col++) {
@@ -1446,6 +1449,38 @@
     return OK;
 }
 
+void HeicCompositeStream::initCopyRowFunction(int32_t width)
+{
+    using namespace libyuv;
+
+    mFnCopyRow = CopyRow_C;
+#if defined(HAS_COPYROW_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2)) {
+        mFnCopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+    }
+#endif
+#if defined(HAS_COPYROW_AVX)
+    if (TestCpuFlag(kCpuHasAVX)) {
+        mFnCopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
+    }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+    if (TestCpuFlag(kCpuHasERMS)) {
+        mFnCopyRow = CopyRow_ERMS;
+    }
+#endif
+#if defined(HAS_COPYROW_NEON)
+    if (TestCpuFlag(kCpuHasNEON)) {
+        mFnCopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+    }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+    if (TestCpuFlag(kCpuHasMIPS)) {
+        mFnCopyRow = CopyRow_MIPS;
+    }
+#endif
+}
+
 size_t HeicCompositeStream::calcAppSegmentMaxSize(const CameraMetadata& info) {
     camera_metadata_ro_entry_t entry = info.find(ANDROID_HEIC_INFO_MAX_JPEG_APP_SEGMENTS_COUNT);
     size_t maxAppsSegment = 1;
diff --git a/services/camera/libcameraservice/api2/HeicCompositeStream.h b/services/camera/libcameraservice/api2/HeicCompositeStream.h
index 4cd9af0..2aa3c38 100644
--- a/services/camera/libcameraservice/api2/HeicCompositeStream.h
+++ b/services/camera/libcameraservice/api2/HeicCompositeStream.h
@@ -195,6 +195,7 @@
     status_t copyOneYuvTile(sp<MediaCodecBuffer>& codecBuffer,
             const CpuConsumer::LockedBuffer& yuvBuffer,
             size_t top, size_t left, size_t width, size_t height);
+    void initCopyRowFunction(int32_t width);
     static size_t calcAppSegmentMaxSize(const CameraMetadata& info);
 
     static const nsecs_t kWaitDuration = 10000000; // 10 ms
@@ -244,6 +245,9 @@
 
     // In most common use case, entries are accessed in order.
     std::map<int64_t, InputFrame> mPendingInputFrames;
+
+    // Function pointer of libyuv row copy.
+    void (*mFnCopyRow)(const uint8_t* src, uint8_t* dst, int width);
 };
 
 }; // namespace camera3