Better character set encoding detection

Id3 tags are supposed to be ISO-8859-1 or unicode, but often aren't.
To better detect the real encoding we now use ICU to detect possible
encodings for a given byte sequence, then apply additional heuristics
to determine the most likely one.
b/5564857

Change-Id: I53bc83b006433da5c2f2ccfcd770ddb3a26b64d0
diff --git a/include/media/mediascanner.h b/include/media/mediascanner.h
index a73403b..4537679 100644
--- a/include/media/mediascanner.h
+++ b/include/media/mediascanner.h
@@ -21,6 +21,7 @@
 #include <utils/threads.h>
 #include <utils/List.h>
 #include <utils/Errors.h>
+#include <utils/String8.h>
 #include <pthread.h>
 
 struct dirent;
@@ -29,6 +30,7 @@
 
 class MediaScannerClient;
 class StringArray;
+class CharacterEncodingDetector;
 
 enum MediaScanResult {
     // This file or directory was scanned successfully.
@@ -94,15 +96,9 @@
     virtual status_t setMimeType(const char* mimeType) = 0;
 
 protected:
-    void convertValues(uint32_t encoding);
-
-protected:
-    // cached name and value strings, for native encoding support.
-    StringArray*    mNames;
-    StringArray*    mValues;
-
-    // default encoding based on MediaScanner::mLocale string
-    uint32_t        mLocaleEncoding;
+    // default encoding from MediaScanner::mLocale
+    String8 mLocale;
+    CharacterEncodingDetector *mEncodingDetector;
 };
 
 }; // namespace android