Refactor MediaScanner. Some steps on the way towards being able to build the tree without OpenCore.
diff --git a/media/libmedia/MediaScannerClient.cpp b/media/libmedia/MediaScannerClient.cpp
new file mode 100644
index 0000000..bd3596e
--- /dev/null
+++ b/media/libmedia/MediaScannerClient.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <media/mediascanner.h>
+
+#include <utils/StringArray.h>
+
+#include "autodetect.h"
+#include "unicode/ucnv.h"
+#include "unicode/ustring.h"
+
+namespace android {
+
+MediaScannerClient::MediaScannerClient()
+    :   mNames(NULL),
+        mValues(NULL),
+        mLocaleEncoding(kEncodingNone)
+{
+}
+
+MediaScannerClient::~MediaScannerClient()
+{
+    delete mNames;
+    delete mValues;
+}
+
+void MediaScannerClient::setLocale(const char* locale)
+{
+    if (!locale) return;
+
+    if (!strncmp(locale, "ja", 2))
+        mLocaleEncoding = kEncodingShiftJIS;
+    else if (!strncmp(locale, "ko", 2))
+        mLocaleEncoding = kEncodingEUCKR;
+    else if (!strncmp(locale, "zh", 2)) {
+        if (!strcmp(locale, "zh_CN")) {
+            // simplified chinese for mainland China
+            mLocaleEncoding = kEncodingGBK;
+        } else {
+            // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore)
+            mLocaleEncoding = kEncodingBig5;
+        }
+    }
+}
+
+void MediaScannerClient::beginFile()
+{
+    mNames = new StringArray;
+    mValues = new StringArray;
+}
+
+bool MediaScannerClient::addStringTag(const char* name, const char* value)
+{
+    if (mLocaleEncoding != kEncodingNone) {
+        // don't bother caching strings that are all ASCII.
+        // call handleStringTag directly instead.
+        // check to see if value (which should be utf8) has any non-ASCII characters
+        bool nonAscii = false;
+        const char* chp = value;
+        char ch;
+        while ((ch = *chp++)) {
+            if (ch & 0x80) {
+                nonAscii = true;
+                break;
+            }
+        }
+
+        if (nonAscii) {
+            // save the strings for later so they can be used for native encoding detection
+            mNames->push_back(name);
+            mValues->push_back(value);
+            return true;
+        }
+        // else fall through
+    }
+
+    // autodetection is not necessary, so no need to cache the values
+    // pass directly to the client instead
+    return handleStringTag(name, value);
+}
+
+static uint32_t possibleEncodings(const char* s)
+{
+    uint32_t result = kEncodingAll;
+    // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1
+    // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back
+    uint8_t ch1, ch2;
+    uint8_t* chp = (uint8_t *)s;
+
+    while ((ch1 = *chp++)) {
+        if (ch1 & 0x80) {
+            ch2 = *chp++;
+            ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
+            // ch1 is now the first byte of the potential native char
+
+            ch2 = *chp++;
+            if (ch2 & 0x80)
+                ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
+            // ch2 is now the second byte of the potential native char
+            int ch = (int)ch1 << 8 | (int)ch2;
+            result &= findPossibleEncodings(ch);
+        }
+        // else ASCII character, which could be anything
+    }
+
+    return result;
+}
+
+void MediaScannerClient::convertValues(uint32_t encoding)
+{
+    const char* enc = NULL;
+    switch (encoding) {
+        case kEncodingShiftJIS:
+            enc = "shift-jis";
+            break;
+        case kEncodingGBK:
+            enc = "gbk";
+            break;
+        case kEncodingBig5:
+            enc = "Big5";
+            break;
+        case kEncodingEUCKR:
+            enc = "EUC-KR";
+            break;
+    }
+
+    if (enc) {
+        UErrorCode status = U_ZERO_ERROR;
+
+        UConverter *conv = ucnv_open(enc, &status);
+        if (U_FAILURE(status)) {
+            LOGE("could not create UConverter for %s\n", enc);
+            return;
+        }
+        UConverter *utf8Conv = ucnv_open("UTF-8", &status);
+        if (U_FAILURE(status)) {
+            LOGE("could not create UConverter for UTF-8\n");
+            ucnv_close(conv);
+            return;
+        }
+
+        // for each value string, convert from native encoding to UTF-8
+        for (int i = 0; i < mNames->size(); i++) {
+            // first we need to untangle the utf8 and convert it back to the original bytes
+            // since we are reducing the length of the string, we can do this in place
+            uint8_t* src = (uint8_t *)mValues->getEntry(i);
+            int len = strlen((char *)src);
+            uint8_t* dest = src;
+
+            uint8_t uch;
+            while ((uch = *src++)) {
+                if (uch & 0x80)
+                    *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F);
+                else
+                    *dest++ = uch;
+            }
+            *dest = 0;
+
+            // now convert from native encoding to UTF-8
+            const char* source = mValues->getEntry(i);
+            int targetLength = len * 3 + 1;
+            char* buffer = new char[targetLength];
+            if (!buffer)
+                break;
+            char* target = buffer;
+
+            ucnv_convertEx(utf8Conv, conv, &target, target + targetLength,
+                    &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
+            if (U_FAILURE(status)) {
+                LOGE("ucnv_convertEx failed: %d\n", status);
+                mValues->setEntry(i, "???");
+            } else {
+                // zero terminate
+                *target = 0;
+                mValues->setEntry(i, buffer);
+            }
+
+            delete[] buffer;
+        }
+
+        ucnv_close(conv);
+        ucnv_close(utf8Conv);
+    }
+}
+
+void MediaScannerClient::endFile()
+{
+    if (mLocaleEncoding != kEncodingNone) {
+        int size = mNames->size();
+        uint32_t encoding = kEncodingAll;
+
+        // compute a bit mask containing all possible encodings
+        for (int i = 0; i < mNames->size(); i++)
+            encoding &= possibleEncodings(mValues->getEntry(i));
+
+        // if the locale encoding matches, then assume we have a native encoding.
+        if (encoding & mLocaleEncoding)
+            convertValues(mLocaleEncoding);
+
+        // finally, push all name/value pairs to the client
+        for (int i = 0; i < mNames->size(); i++) {
+            if (!handleStringTag(mNames->getEntry(i), mValues->getEntry(i)))
+                break;
+        }
+    }
+    // else addStringTag() has done all the work so we have nothing to do
+
+    delete mNames;
+    delete mValues;
+    mNames = NULL;
+    mValues = NULL;
+}
+
+}  // namespace android
+