Better character set encoding detection
Id3 tags are supposed to be ISO-8859-1 or unicode, but often aren't.
To better detect the real encoding we now use ICU to detect possible
encodings for a given byte sequence, then apply additional heuristics
to determine the most likely one.
b/5564857
Change-Id: I53bc83b006433da5c2f2ccfcd770ddb3a26b64d0
diff --git a/include/media/mediascanner.h b/include/media/mediascanner.h
index a73403b..4537679 100644
--- a/include/media/mediascanner.h
+++ b/include/media/mediascanner.h
@@ -21,6 +21,7 @@
#include <utils/threads.h>
#include <utils/List.h>
#include <utils/Errors.h>
+#include <utils/String8.h>
#include <pthread.h>
struct dirent;
@@ -29,6 +30,7 @@
class MediaScannerClient;
class StringArray;
+class CharacterEncodingDetector;
enum MediaScanResult {
// This file or directory was scanned successfully.
@@ -94,15 +96,9 @@
virtual status_t setMimeType(const char* mimeType) = 0;
protected:
- void convertValues(uint32_t encoding);
-
-protected:
- // cached name and value strings, for native encoding support.
- StringArray* mNames;
- StringArray* mValues;
-
- // default encoding based on MediaScanner::mLocale string
- uint32_t mLocaleEncoding;
+ // default encoding from MediaScanner::mLocale
+ String8 mLocale;
+ CharacterEncodingDetector *mEncodingDetector;
};
}; // namespace android