Respect system locale in ID3 tag encoding detection

Tweak the confidence of ID3 tag encoding detection according to
the current system locale.

Bug: 109708246
Test: Check encoding for th-TH

Change-Id: I9e18df0d2e3cd07bf28c1fb3459ecee47ec17d56
diff --git a/media/libmedia/CharacterEncodingDetector.cpp b/media/libmedia/CharacterEncodingDetector.cpp
index 990d260..5c6b981 100644
--- a/media/libmedia/CharacterEncodingDetector.cpp
+++ b/media/libmedia/CharacterEncodingDetector.cpp
@@ -28,6 +28,8 @@
 #include <unicode/ucsdet.h>
 #include <unicode/ustring.h>
 
+#include <cutils/properties.h>
+
 namespace android {
 
 CharacterEncodingDetector::CharacterEncodingDetector() {
@@ -38,6 +40,26 @@
         ALOGE("could not create UConverter for UTF-8");
         mUtf8Conv = NULL;
     }
+
+    // Read system locale setting from system property and map to ICU encoding names.
+    mLocaleEnc = NULL;
+    char locale_value[PROPERTY_VALUE_MAX] = "";
+    if (property_get("persist.sys.locale", locale_value, NULL) > 0) {
+        const size_t len = strnlen(locale_value, sizeof(locale_value));
+
+        if (len == 3 && !strncmp(locale_value, "und", 3)) {
+            // Undetermined
+        } else if (!strncmp(locale_value, "th", 2)) { // Thai
+            mLocaleEnc = "windows-874-2000";
+        }
+        if (mLocaleEnc != NULL) {
+            ALOGV("System locale encoding = %s", mLocaleEnc);
+        } else {
+            ALOGV("Didn't recognize system locale setting, defaulting to en_US");
+        }
+    } else {
+        ALOGV("Couldn't read system locale setting, assuming en_US");
+    }
 }
 
 CharacterEncodingDetector::~CharacterEncodingDetector() {
@@ -157,7 +179,11 @@
                 }
             }
 
-            if (bestCombinedMatch != NULL) {
+            if (mLocaleEnc != NULL && !goodmatch && highest < 50) {
+                combinedenc = mLocaleEnc;
+                ALOGV("confidence is low but we have recognized predefined encoding, "
+                        "so try this (%s) instead", mLocaleEnc);
+            } else if (bestCombinedMatch != NULL) {
                 combinedenc = ucsdet_getName(bestCombinedMatch, &status);
             } else {
                 combinedenc = "ISO-8859-1";