Andreas Huber | 413f523 | 2009-12-03 11:31:19 -0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2008 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "autodetect.h" |
| 18 | |
| 19 | typedef struct CharRange { |
| 20 | uint16_t first; |
| 21 | uint16_t last; |
| 22 | }; |
| 23 | |
| 24 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*x)) |
| 25 | |
| 26 | // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT |
| 27 | static const CharRange kShiftJISRanges[] = { |
| 28 | { 0x8140, 0x817E }, |
| 29 | { 0x8180, 0x81AC }, |
| 30 | { 0x81B8, 0x81BF }, |
| 31 | { 0x81C8, 0x81CE }, |
| 32 | { 0x81DA, 0x81E8 }, |
| 33 | { 0x81F0, 0x81F7 }, |
| 34 | { 0x81FC, 0x81FC }, |
| 35 | { 0x824F, 0x8258 }, |
| 36 | { 0x8260, 0x8279 }, |
| 37 | { 0x8281, 0x829A }, |
| 38 | { 0x829F, 0x82F1 }, |
| 39 | { 0x8340, 0x837E }, |
| 40 | { 0x8380, 0x8396 }, |
| 41 | { 0x839F, 0x83B6 }, |
| 42 | { 0x83BF, 0x83D6 }, |
| 43 | { 0x8440, 0x8460 }, |
| 44 | { 0x8470, 0x847E }, |
| 45 | { 0x8480, 0x8491 }, |
| 46 | { 0x849F, 0x84BE }, |
| 47 | { 0x8740, 0x875D }, |
| 48 | { 0x875F, 0x8775 }, |
| 49 | { 0x877E, 0x877E }, |
| 50 | { 0x8780, 0x879C }, |
| 51 | { 0x889F, 0x88FC }, |
| 52 | { 0x8940, 0x897E }, |
| 53 | { 0x8980, 0x89FC }, |
| 54 | { 0x8A40, 0x8A7E }, |
| 55 | { 0x8A80, 0x8AFC }, |
| 56 | { 0x8B40, 0x8B7E }, |
| 57 | { 0x8B80, 0x8BFC }, |
| 58 | { 0x8C40, 0x8C7E }, |
| 59 | { 0x8C80, 0x8CFC }, |
| 60 | { 0x8D40, 0x8D7E }, |
| 61 | { 0x8D80, 0x8DFC }, |
| 62 | { 0x8E40, 0x8E7E }, |
| 63 | { 0x8E80, 0x8EFC }, |
| 64 | { 0x8F40, 0x8F7E }, |
| 65 | { 0x8F80, 0x8FFC }, |
| 66 | { 0x9040, 0x907E }, |
| 67 | { 0x9080, 0x90FC }, |
| 68 | { 0x9140, 0x917E }, |
| 69 | { 0x9180, 0x91FC }, |
| 70 | { 0x9240, 0x927E }, |
| 71 | { 0x9280, 0x92FC }, |
| 72 | { 0x9340, 0x937E }, |
| 73 | { 0x9380, 0x93FC }, |
| 74 | { 0x9440, 0x947E }, |
| 75 | { 0x9480, 0x94FC }, |
| 76 | { 0x9540, 0x957E }, |
| 77 | { 0x9580, 0x95FC }, |
| 78 | { 0x9640, 0x967E }, |
| 79 | { 0x9680, 0x96FC }, |
| 80 | { 0x9740, 0x977E }, |
| 81 | { 0x9780, 0x97FC }, |
| 82 | { 0x9840, 0x9872 }, |
| 83 | { 0x989F, 0x98FC }, |
| 84 | { 0x9940, 0x997E }, |
| 85 | { 0x9980, 0x99FC }, |
| 86 | { 0x9A40, 0x9A7E }, |
| 87 | { 0x9A80, 0x9AFC }, |
| 88 | { 0x9B40, 0x9B7E }, |
| 89 | { 0x9B80, 0x9BFC }, |
| 90 | { 0x9C40, 0x9C7E }, |
| 91 | { 0x9C80, 0x9CFC }, |
| 92 | { 0x9D40, 0x9D7E }, |
| 93 | { 0x9D80, 0x9DFC }, |
| 94 | { 0x9E40, 0x9E7E }, |
| 95 | { 0x9E80, 0x9EFC }, |
| 96 | { 0x9F40, 0x9F7E }, |
| 97 | { 0x9F80, 0x9FFC }, |
| 98 | { 0xE040, 0xE07E }, |
| 99 | { 0xE080, 0xE0FC }, |
| 100 | { 0xE140, 0xE17E }, |
| 101 | { 0xE180, 0xE1FC }, |
| 102 | { 0xE240, 0xE27E }, |
| 103 | { 0xE280, 0xE2FC }, |
| 104 | { 0xE340, 0xE37E }, |
| 105 | { 0xE380, 0xE3FC }, |
| 106 | { 0xE440, 0xE47E }, |
| 107 | { 0xE480, 0xE4FC }, |
| 108 | { 0xE540, 0xE57E }, |
| 109 | { 0xE580, 0xE5FC }, |
| 110 | { 0xE640, 0xE67E }, |
| 111 | { 0xE680, 0xE6FC }, |
| 112 | { 0xE740, 0xE77E }, |
| 113 | { 0xE780, 0xE7FC }, |
| 114 | { 0xE840, 0xE87E }, |
| 115 | { 0xE880, 0xE8FC }, |
| 116 | { 0xE940, 0xE97E }, |
| 117 | { 0xE980, 0xE9FC }, |
| 118 | { 0xEA40, 0xEA7E }, |
| 119 | { 0xEA80, 0xEAA4 }, |
| 120 | { 0xED40, 0xED7E }, |
| 121 | { 0xED80, 0xEDFC }, |
| 122 | { 0xEE40, 0xEE7E }, |
| 123 | { 0xEE80, 0xEEEC }, |
| 124 | { 0xEEEF, 0xEEFC }, |
| 125 | { 0xFA40, 0xFA7E }, |
| 126 | { 0xFA80, 0xFAFC }, |
| 127 | { 0xFB40, 0xFB7E }, |
| 128 | { 0xFB80, 0xFBFC }, |
| 129 | { 0xFC40, 0xFC4B }, |
| 130 | }; |
| 131 | |
| 132 | // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT |
| 133 | static const CharRange kGBKRanges[] = { |
| 134 | { 0x8140, 0x817E }, |
| 135 | { 0x8180, 0x81FE }, |
| 136 | { 0x8240, 0x827E }, |
| 137 | { 0x8280, 0x82FE }, |
| 138 | { 0x8340, 0x837E }, |
| 139 | { 0x8380, 0x83FE }, |
| 140 | { 0x8440, 0x847E }, |
| 141 | { 0x8480, 0x84FE }, |
| 142 | { 0x8540, 0x857E }, |
| 143 | { 0x8580, 0x85FE }, |
| 144 | { 0x8640, 0x867E }, |
| 145 | { 0x8680, 0x86FE }, |
| 146 | { 0x8740, 0x877E }, |
| 147 | { 0x8780, 0x87FE }, |
| 148 | { 0x8840, 0x887E }, |
| 149 | { 0x8880, 0x88FE }, |
| 150 | { 0x8940, 0x897E }, |
| 151 | { 0x8980, 0x89FE }, |
| 152 | { 0x8A40, 0x8A7E }, |
| 153 | { 0x8A80, 0x8AFE }, |
| 154 | { 0x8B40, 0x8B7E }, |
| 155 | { 0x8B80, 0x8BFE }, |
| 156 | { 0x8C40, 0x8C7E }, |
| 157 | { 0x8C80, 0x8CFE }, |
| 158 | { 0x8D40, 0x8D7E }, |
| 159 | { 0x8D80, 0x8DFE }, |
| 160 | { 0x8E40, 0x8E7E }, |
| 161 | { 0x8E80, 0x8EFE }, |
| 162 | { 0x8F40, 0x8F7E }, |
| 163 | { 0x8F80, 0x8FFE }, |
| 164 | { 0x9040, 0x907E }, |
| 165 | { 0x9080, 0x90FE }, |
| 166 | { 0x9140, 0x917E }, |
| 167 | { 0x9180, 0x91FE }, |
| 168 | { 0x9240, 0x927E }, |
| 169 | { 0x9280, 0x92FE }, |
| 170 | { 0x9340, 0x937E }, |
| 171 | { 0x9380, 0x93FE }, |
| 172 | { 0x9440, 0x947E }, |
| 173 | { 0x9480, 0x94FE }, |
| 174 | { 0x9540, 0x957E }, |
| 175 | { 0x9580, 0x95FE }, |
| 176 | { 0x9640, 0x967E }, |
| 177 | { 0x9680, 0x96FE }, |
| 178 | { 0x9740, 0x977E }, |
| 179 | { 0x9780, 0x97FE }, |
| 180 | { 0x9840, 0x987E }, |
| 181 | { 0x9880, 0x98FE }, |
| 182 | { 0x9940, 0x997E }, |
| 183 | { 0x9980, 0x99FE }, |
| 184 | { 0x9A40, 0x9A7E }, |
| 185 | { 0x9A80, 0x9AFE }, |
| 186 | { 0x9B40, 0x9B7E }, |
| 187 | { 0x9B80, 0x9BFE }, |
| 188 | { 0x9C40, 0x9C7E }, |
| 189 | { 0x9C80, 0x9CFE }, |
| 190 | { 0x9D40, 0x9D7E }, |
| 191 | { 0x9D80, 0x9DFE }, |
| 192 | { 0x9E40, 0x9E7E }, |
| 193 | { 0x9E80, 0x9EFE }, |
| 194 | { 0x9F40, 0x9F7E }, |
| 195 | { 0x9F80, 0x9FFE }, |
| 196 | { 0xA040, 0xA07E }, |
| 197 | { 0xA080, 0xA0FE }, |
| 198 | { 0xA1A1, 0xA1FE }, |
| 199 | { 0xA2A1, 0xA2AA }, |
| 200 | { 0xA2B1, 0xA2E2 }, |
| 201 | { 0xA2E5, 0xA2EE }, |
| 202 | { 0xA2F1, 0xA2FC }, |
| 203 | { 0xA3A1, 0xA3FE }, |
| 204 | { 0xA4A1, 0xA4F3 }, |
| 205 | { 0xA5A1, 0xA5F6 }, |
| 206 | { 0xA6A1, 0xA6B8 }, |
| 207 | { 0xA6C1, 0xA6D8 }, |
| 208 | { 0xA6E0, 0xA6EB }, |
| 209 | { 0xA6EE, 0xA6F2 }, |
| 210 | { 0xA6F4, 0xA6F5 }, |
| 211 | { 0xA7A1, 0xA7C1 }, |
| 212 | { 0xA7D1, 0xA7F1 }, |
| 213 | { 0xA840, 0xA87E }, |
| 214 | { 0xA880, 0xA895 }, |
| 215 | { 0xA8A1, 0xA8BB }, |
| 216 | { 0xA8BD, 0xA8BE }, |
| 217 | { 0xA8C0, 0xA8C0 }, |
| 218 | { 0xA8C5, 0xA8E9 }, |
| 219 | { 0xA940, 0xA957 }, |
| 220 | { 0xA959, 0xA95A }, |
| 221 | { 0xA95C, 0xA95C }, |
| 222 | { 0xA960, 0xA97E }, |
| 223 | { 0xA980, 0xA988 }, |
| 224 | { 0xA996, 0xA996 }, |
| 225 | { 0xA9A4, 0xA9EF }, |
| 226 | { 0xAA40, 0xAA7E }, |
| 227 | { 0xAA80, 0xAAA0 }, |
| 228 | { 0xAB40, 0xAB7E }, |
| 229 | { 0xAB80, 0xABA0 }, |
| 230 | { 0xAC40, 0xAC7E }, |
| 231 | { 0xAC80, 0xACA0 }, |
| 232 | { 0xAD40, 0xAD7E }, |
| 233 | { 0xAD80, 0xADA0 }, |
| 234 | { 0xAE40, 0xAE7E }, |
| 235 | { 0xAE80, 0xAEA0 }, |
| 236 | { 0xAF40, 0xAF7E }, |
| 237 | { 0xAF80, 0xAFA0 }, |
| 238 | { 0xB040, 0xB07E }, |
| 239 | { 0xB080, 0xB0FE }, |
| 240 | { 0xB140, 0xB17E }, |
| 241 | { 0xB180, 0xB1FE }, |
| 242 | { 0xB240, 0xB27E }, |
| 243 | { 0xB280, 0xB2FE }, |
| 244 | { 0xB340, 0xB37E }, |
| 245 | { 0xB380, 0xB3FE }, |
| 246 | { 0xB440, 0xB47E }, |
| 247 | { 0xB480, 0xB4FE }, |
| 248 | { 0xB540, 0xB57E }, |
| 249 | { 0xB580, 0xB5FE }, |
| 250 | { 0xB640, 0xB67E }, |
| 251 | { 0xB680, 0xB6FE }, |
| 252 | { 0xB740, 0xB77E }, |
| 253 | { 0xB780, 0xB7FE }, |
| 254 | { 0xB840, 0xB87E }, |
| 255 | { 0xB880, 0xB8FE }, |
| 256 | { 0xB940, 0xB97E }, |
| 257 | { 0xB980, 0xB9FE }, |
| 258 | { 0xBA40, 0xBA7E }, |
| 259 | { 0xBA80, 0xBAFE }, |
| 260 | { 0xBB40, 0xBB7E }, |
| 261 | { 0xBB80, 0xBBFE }, |
| 262 | { 0xBC40, 0xBC7E }, |
| 263 | { 0xBC80, 0xBCFE }, |
| 264 | { 0xBD40, 0xBD7E }, |
| 265 | { 0xBD80, 0xBDFE }, |
| 266 | { 0xBE40, 0xBE7E }, |
| 267 | { 0xBE80, 0xBEFE }, |
| 268 | { 0xBF40, 0xBF7E }, |
| 269 | { 0xBF80, 0xBFFE }, |
| 270 | { 0xC040, 0xC07E }, |
| 271 | { 0xC080, 0xC0FE }, |
| 272 | { 0xC140, 0xC17E }, |
| 273 | { 0xC180, 0xC1FE }, |
| 274 | { 0xC240, 0xC27E }, |
| 275 | { 0xC280, 0xC2FE }, |
| 276 | { 0xC340, 0xC37E }, |
| 277 | { 0xC380, 0xC3FE }, |
| 278 | { 0xC440, 0xC47E }, |
| 279 | { 0xC480, 0xC4FE }, |
| 280 | { 0xC540, 0xC57E }, |
| 281 | { 0xC580, 0xC5FE }, |
| 282 | { 0xC640, 0xC67E }, |
| 283 | { 0xC680, 0xC6FE }, |
| 284 | { 0xC740, 0xC77E }, |
| 285 | { 0xC780, 0xC7FE }, |
| 286 | { 0xC840, 0xC87E }, |
| 287 | { 0xC880, 0xC8FE }, |
| 288 | { 0xC940, 0xC97E }, |
| 289 | { 0xC980, 0xC9FE }, |
| 290 | { 0xCA40, 0xCA7E }, |
| 291 | { 0xCA80, 0xCAFE }, |
| 292 | { 0xCB40, 0xCB7E }, |
| 293 | { 0xCB80, 0xCBFE }, |
| 294 | { 0xCC40, 0xCC7E }, |
| 295 | { 0xCC80, 0xCCFE }, |
| 296 | { 0xCD40, 0xCD7E }, |
| 297 | { 0xCD80, 0xCDFE }, |
| 298 | { 0xCE40, 0xCE7E }, |
| 299 | { 0xCE80, 0xCEFE }, |
| 300 | { 0xCF40, 0xCF7E }, |
| 301 | { 0xCF80, 0xCFFE }, |
| 302 | { 0xD040, 0xD07E }, |
| 303 | { 0xD080, 0xD0FE }, |
| 304 | { 0xD140, 0xD17E }, |
| 305 | { 0xD180, 0xD1FE }, |
| 306 | { 0xD240, 0xD27E }, |
| 307 | { 0xD280, 0xD2FE }, |
| 308 | { 0xD340, 0xD37E }, |
| 309 | { 0xD380, 0xD3FE }, |
| 310 | { 0xD440, 0xD47E }, |
| 311 | { 0xD480, 0xD4FE }, |
| 312 | { 0xD540, 0xD57E }, |
| 313 | { 0xD580, 0xD5FE }, |
| 314 | { 0xD640, 0xD67E }, |
| 315 | { 0xD680, 0xD6FE }, |
| 316 | { 0xD740, 0xD77E }, |
| 317 | { 0xD780, 0xD7F9 }, |
| 318 | { 0xD840, 0xD87E }, |
| 319 | { 0xD880, 0xD8FE }, |
| 320 | { 0xD940, 0xD97E }, |
| 321 | { 0xD980, 0xD9FE }, |
| 322 | { 0xDA40, 0xDA7E }, |
| 323 | { 0xDA80, 0xDAFE }, |
| 324 | { 0xDB40, 0xDB7E }, |
| 325 | { 0xDB80, 0xDBFE }, |
| 326 | { 0xDC40, 0xDC7E }, |
| 327 | { 0xDC80, 0xDCFE }, |
| 328 | { 0xDD40, 0xDD7E }, |
| 329 | { 0xDD80, 0xDDFE }, |
| 330 | { 0xDE40, 0xDE7E }, |
| 331 | { 0xDE80, 0xDEFE }, |
| 332 | { 0xDF40, 0xDF7E }, |
| 333 | { 0xDF80, 0xDFFE }, |
| 334 | { 0xE040, 0xE07E }, |
| 335 | { 0xE080, 0xE0FE }, |
| 336 | { 0xE140, 0xE17E }, |
| 337 | { 0xE180, 0xE1FE }, |
| 338 | { 0xE240, 0xE27E }, |
| 339 | { 0xE280, 0xE2FE }, |
| 340 | { 0xE340, 0xE37E }, |
| 341 | { 0xE380, 0xE3FE }, |
| 342 | { 0xE440, 0xE47E }, |
| 343 | { 0xE480, 0xE4FE }, |
| 344 | { 0xE540, 0xE57E }, |
| 345 | { 0xE580, 0xE5FE }, |
| 346 | { 0xE640, 0xE67E }, |
| 347 | { 0xE680, 0xE6FE }, |
| 348 | { 0xE740, 0xE77E }, |
| 349 | { 0xE780, 0xE7FE }, |
| 350 | { 0xE840, 0xE87E }, |
| 351 | { 0xE880, 0xE8FE }, |
| 352 | { 0xE940, 0xE97E }, |
| 353 | { 0xE980, 0xE9FE }, |
| 354 | { 0xEA40, 0xEA7E }, |
| 355 | { 0xEA80, 0xEAFE }, |
| 356 | { 0xEB40, 0xEB7E }, |
| 357 | { 0xEB80, 0xEBFE }, |
| 358 | { 0xEC40, 0xEC7E }, |
| 359 | { 0xEC80, 0xECFE }, |
| 360 | { 0xED40, 0xED7E }, |
| 361 | { 0xED80, 0xEDFE }, |
| 362 | { 0xEE40, 0xEE7E }, |
| 363 | { 0xEE80, 0xEEFE }, |
| 364 | { 0xEF40, 0xEF7E }, |
| 365 | { 0xEF80, 0xEFFE }, |
| 366 | { 0xF040, 0xF07E }, |
| 367 | { 0xF080, 0xF0FE }, |
| 368 | { 0xF140, 0xF17E }, |
| 369 | { 0xF180, 0xF1FE }, |
| 370 | { 0xF240, 0xF27E }, |
| 371 | { 0xF280, 0xF2FE }, |
| 372 | { 0xF340, 0xF37E }, |
| 373 | { 0xF380, 0xF3FE }, |
| 374 | { 0xF440, 0xF47E }, |
| 375 | { 0xF480, 0xF4FE }, |
| 376 | { 0xF540, 0xF57E }, |
| 377 | { 0xF580, 0xF5FE }, |
| 378 | { 0xF640, 0xF67E }, |
| 379 | { 0xF680, 0xF6FE }, |
| 380 | { 0xF740, 0xF77E }, |
| 381 | { 0xF780, 0xF7FE }, |
| 382 | { 0xF840, 0xF87E }, |
| 383 | { 0xF880, 0xF8A0 }, |
| 384 | { 0xF940, 0xF97E }, |
| 385 | { 0xF980, 0xF9A0 }, |
| 386 | { 0xFA40, 0xFA7E }, |
| 387 | { 0xFA80, 0xFAA0 }, |
| 388 | { 0xFB40, 0xFB7E }, |
| 389 | { 0xFB80, 0xFBA0 }, |
| 390 | { 0xFC40, 0xFC7E }, |
| 391 | { 0xFC80, 0xFCA0 }, |
| 392 | { 0xFD40, 0xFD7E }, |
| 393 | { 0xFD80, 0xFDA0 }, |
| 394 | { 0xFE40, 0xFE4F }, |
| 395 | }; |
| 396 | |
| 397 | // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT |
| 398 | static const CharRange kEUCKRRanges[] = { |
| 399 | { 0x8141, 0x815A }, |
| 400 | { 0x8161, 0x817A }, |
| 401 | { 0x8181, 0x81FE }, |
| 402 | { 0x8241, 0x825A }, |
| 403 | { 0x8261, 0x827A }, |
| 404 | { 0x8281, 0x82FE }, |
| 405 | { 0x8341, 0x835A }, |
| 406 | { 0x8361, 0x837A }, |
| 407 | { 0x8381, 0x83FE }, |
| 408 | { 0x8441, 0x845A }, |
| 409 | { 0x8461, 0x847A }, |
| 410 | { 0x8481, 0x84FE }, |
| 411 | { 0x8541, 0x855A }, |
| 412 | { 0x8561, 0x857A }, |
| 413 | { 0x8581, 0x85FE }, |
| 414 | { 0x8641, 0x865A }, |
| 415 | { 0x8661, 0x867A }, |
| 416 | { 0x8681, 0x86FE }, |
| 417 | { 0x8741, 0x875A }, |
| 418 | { 0x8761, 0x877A }, |
| 419 | { 0x8781, 0x87FE }, |
| 420 | { 0x8841, 0x885A }, |
| 421 | { 0x8861, 0x887A }, |
| 422 | { 0x8881, 0x88FE }, |
| 423 | { 0x8941, 0x895A }, |
| 424 | { 0x8961, 0x897A }, |
| 425 | { 0x8981, 0x89FE }, |
| 426 | { 0x8A41, 0x8A5A }, |
| 427 | { 0x8A61, 0x8A7A }, |
| 428 | { 0x8A81, 0x8AFE }, |
| 429 | { 0x8B41, 0x8B5A }, |
| 430 | { 0x8B61, 0x8B7A }, |
| 431 | { 0x8B81, 0x8BFE }, |
| 432 | { 0x8C41, 0x8C5A }, |
| 433 | { 0x8C61, 0x8C7A }, |
| 434 | { 0x8C81, 0x8CFE }, |
| 435 | { 0x8D41, 0x8D5A }, |
| 436 | { 0x8D61, 0x8D7A }, |
| 437 | { 0x8D81, 0x8DFE }, |
| 438 | { 0x8E41, 0x8E5A }, |
| 439 | { 0x8E61, 0x8E7A }, |
| 440 | { 0x8E81, 0x8EFE }, |
| 441 | { 0x8F41, 0x8F5A }, |
| 442 | { 0x8F61, 0x8F7A }, |
| 443 | { 0x8F81, 0x8FFE }, |
| 444 | { 0x9041, 0x905A }, |
| 445 | { 0x9061, 0x907A }, |
| 446 | { 0x9081, 0x90FE }, |
| 447 | { 0x9141, 0x915A }, |
| 448 | { 0x9161, 0x917A }, |
| 449 | { 0x9181, 0x91FE }, |
| 450 | { 0x9241, 0x925A }, |
| 451 | { 0x9261, 0x927A }, |
| 452 | { 0x9281, 0x92FE }, |
| 453 | { 0x9341, 0x935A }, |
| 454 | { 0x9361, 0x937A }, |
| 455 | { 0x9381, 0x93FE }, |
| 456 | { 0x9441, 0x945A }, |
| 457 | { 0x9461, 0x947A }, |
| 458 | { 0x9481, 0x94FE }, |
| 459 | { 0x9541, 0x955A }, |
| 460 | { 0x9561, 0x957A }, |
| 461 | { 0x9581, 0x95FE }, |
| 462 | { 0x9641, 0x965A }, |
| 463 | { 0x9661, 0x967A }, |
| 464 | { 0x9681, 0x96FE }, |
| 465 | { 0x9741, 0x975A }, |
| 466 | { 0x9761, 0x977A }, |
| 467 | { 0x9781, 0x97FE }, |
| 468 | { 0x9841, 0x985A }, |
| 469 | { 0x9861, 0x987A }, |
| 470 | { 0x9881, 0x98FE }, |
| 471 | { 0x9941, 0x995A }, |
| 472 | { 0x9961, 0x997A }, |
| 473 | { 0x9981, 0x99FE }, |
| 474 | { 0x9A41, 0x9A5A }, |
| 475 | { 0x9A61, 0x9A7A }, |
| 476 | { 0x9A81, 0x9AFE }, |
| 477 | { 0x9B41, 0x9B5A }, |
| 478 | { 0x9B61, 0x9B7A }, |
| 479 | { 0x9B81, 0x9BFE }, |
| 480 | { 0x9C41, 0x9C5A }, |
| 481 | { 0x9C61, 0x9C7A }, |
| 482 | { 0x9C81, 0x9CFE }, |
| 483 | { 0x9D41, 0x9D5A }, |
| 484 | { 0x9D61, 0x9D7A }, |
| 485 | { 0x9D81, 0x9DFE }, |
| 486 | { 0x9E41, 0x9E5A }, |
| 487 | { 0x9E61, 0x9E7A }, |
| 488 | { 0x9E81, 0x9EFE }, |
| 489 | { 0x9F41, 0x9F5A }, |
| 490 | { 0x9F61, 0x9F7A }, |
| 491 | { 0x9F81, 0x9FFE }, |
| 492 | { 0xA041, 0xA05A }, |
| 493 | { 0xA061, 0xA07A }, |
| 494 | { 0xA081, 0xA0FE }, |
| 495 | { 0xA141, 0xA15A }, |
| 496 | { 0xA161, 0xA17A }, |
| 497 | { 0xA181, 0xA1FE }, |
| 498 | { 0xA241, 0xA25A }, |
| 499 | { 0xA261, 0xA27A }, |
| 500 | { 0xA281, 0xA2E7 }, |
| 501 | { 0xA341, 0xA35A }, |
| 502 | { 0xA361, 0xA37A }, |
| 503 | { 0xA381, 0xA3FE }, |
| 504 | { 0xA441, 0xA45A }, |
| 505 | { 0xA461, 0xA47A }, |
| 506 | { 0xA481, 0xA4FE }, |
| 507 | { 0xA541, 0xA55A }, |
| 508 | { 0xA561, 0xA57A }, |
| 509 | { 0xA581, 0xA5AA }, |
| 510 | { 0xA5B0, 0xA5B9 }, |
| 511 | { 0xA5C1, 0xA5D8 }, |
| 512 | { 0xA5E1, 0xA5F8 }, |
| 513 | { 0xA641, 0xA65A }, |
| 514 | { 0xA661, 0xA67A }, |
| 515 | { 0xA681, 0xA6E4 }, |
| 516 | { 0xA741, 0xA75A }, |
| 517 | { 0xA761, 0xA77A }, |
| 518 | { 0xA781, 0xA7EF }, |
| 519 | { 0xA841, 0xA85A }, |
| 520 | { 0xA861, 0xA87A }, |
| 521 | { 0xA881, 0xA8A4 }, |
| 522 | { 0xA8A6, 0xA8A6 }, |
| 523 | { 0xA8A8, 0xA8AF }, |
| 524 | { 0xA8B1, 0xA8FE }, |
| 525 | { 0xA941, 0xA95A }, |
| 526 | { 0xA961, 0xA97A }, |
| 527 | { 0xA981, 0xA9FE }, |
| 528 | { 0xAA41, 0xAA5A }, |
| 529 | { 0xAA61, 0xAA7A }, |
| 530 | { 0xAA81, 0xAAF3 }, |
| 531 | { 0xAB41, 0xAB5A }, |
| 532 | { 0xAB61, 0xAB7A }, |
| 533 | { 0xAB81, 0xABF6 }, |
| 534 | { 0xAC41, 0xAC5A }, |
| 535 | { 0xAC61, 0xAC7A }, |
| 536 | { 0xAC81, 0xACC1 }, |
| 537 | { 0xACD1, 0xACF1 }, |
| 538 | { 0xAD41, 0xAD5A }, |
| 539 | { 0xAD61, 0xAD7A }, |
| 540 | { 0xAD81, 0xADA0 }, |
| 541 | { 0xAE41, 0xAE5A }, |
| 542 | { 0xAE61, 0xAE7A }, |
| 543 | { 0xAE81, 0xAEA0 }, |
| 544 | { 0xAF41, 0xAF5A }, |
| 545 | { 0xAF61, 0xAF7A }, |
| 546 | { 0xAF81, 0xAFA0 }, |
| 547 | { 0xB041, 0xB05A }, |
| 548 | { 0xB061, 0xB07A }, |
| 549 | { 0xB081, 0xB0FE }, |
| 550 | { 0xB141, 0xB15A }, |
| 551 | { 0xB161, 0xB17A }, |
| 552 | { 0xB181, 0xB1FE }, |
| 553 | { 0xB241, 0xB25A }, |
| 554 | { 0xB261, 0xB27A }, |
| 555 | { 0xB281, 0xB2FE }, |
| 556 | { 0xB341, 0xB35A }, |
| 557 | { 0xB361, 0xB37A }, |
| 558 | { 0xB381, 0xB3FE }, |
| 559 | { 0xB441, 0xB45A }, |
| 560 | { 0xB461, 0xB47A }, |
| 561 | { 0xB481, 0xB4FE }, |
| 562 | { 0xB541, 0xB55A }, |
| 563 | { 0xB561, 0xB57A }, |
| 564 | { 0xB581, 0xB5FE }, |
| 565 | { 0xB641, 0xB65A }, |
| 566 | { 0xB661, 0xB67A }, |
| 567 | { 0xB681, 0xB6FE }, |
| 568 | { 0xB741, 0xB75A }, |
| 569 | { 0xB761, 0xB77A }, |
| 570 | { 0xB781, 0xB7FE }, |
| 571 | { 0xB841, 0xB85A }, |
| 572 | { 0xB861, 0xB87A }, |
| 573 | { 0xB881, 0xB8FE }, |
| 574 | { 0xB941, 0xB95A }, |
| 575 | { 0xB961, 0xB97A }, |
| 576 | { 0xB981, 0xB9FE }, |
| 577 | { 0xBA41, 0xBA5A }, |
| 578 | { 0xBA61, 0xBA7A }, |
| 579 | { 0xBA81, 0xBAFE }, |
| 580 | { 0xBB41, 0xBB5A }, |
| 581 | { 0xBB61, 0xBB7A }, |
| 582 | { 0xBB81, 0xBBFE }, |
| 583 | { 0xBC41, 0xBC5A }, |
| 584 | { 0xBC61, 0xBC7A }, |
| 585 | { 0xBC81, 0xBCFE }, |
| 586 | { 0xBD41, 0xBD5A }, |
| 587 | { 0xBD61, 0xBD7A }, |
| 588 | { 0xBD81, 0xBDFE }, |
| 589 | { 0xBE41, 0xBE5A }, |
| 590 | { 0xBE61, 0xBE7A }, |
| 591 | { 0xBE81, 0xBEFE }, |
| 592 | { 0xBF41, 0xBF5A }, |
| 593 | { 0xBF61, 0xBF7A }, |
| 594 | { 0xBF81, 0xBFFE }, |
| 595 | { 0xC041, 0xC05A }, |
| 596 | { 0xC061, 0xC07A }, |
| 597 | { 0xC081, 0xC0FE }, |
| 598 | { 0xC141, 0xC15A }, |
| 599 | { 0xC161, 0xC17A }, |
| 600 | { 0xC181, 0xC1FE }, |
| 601 | { 0xC241, 0xC25A }, |
| 602 | { 0xC261, 0xC27A }, |
| 603 | { 0xC281, 0xC2FE }, |
| 604 | { 0xC341, 0xC35A }, |
| 605 | { 0xC361, 0xC37A }, |
| 606 | { 0xC381, 0xC3FE }, |
| 607 | { 0xC441, 0xC45A }, |
| 608 | { 0xC461, 0xC47A }, |
| 609 | { 0xC481, 0xC4FE }, |
| 610 | { 0xC541, 0xC55A }, |
| 611 | { 0xC561, 0xC57A }, |
| 612 | { 0xC581, 0xC5FE }, |
| 613 | { 0xC641, 0xC652 }, |
| 614 | { 0xC6A1, 0xC6FE }, |
| 615 | { 0xC7A1, 0xC7FE }, |
| 616 | { 0xC8A1, 0xC8FE }, |
| 617 | { 0xCAA1, 0xCAFE }, |
| 618 | { 0xCBA1, 0xCBFE }, |
| 619 | { 0xCCA1, 0xCCFE }, |
| 620 | { 0xCDA1, 0xCDFE }, |
| 621 | { 0xCEA1, 0xCEFE }, |
| 622 | { 0xCFA1, 0xCFFE }, |
| 623 | { 0xD0A1, 0xD0FE }, |
| 624 | { 0xD1A1, 0xD1FE }, |
| 625 | { 0xD2A1, 0xD2FE }, |
| 626 | { 0xD3A1, 0xD3FE }, |
| 627 | { 0xD4A1, 0xD4FE }, |
| 628 | { 0xD5A1, 0xD5FE }, |
| 629 | { 0xD6A1, 0xD6FE }, |
| 630 | { 0xD7A1, 0xD7FE }, |
| 631 | { 0xD8A1, 0xD8FE }, |
| 632 | { 0xD9A1, 0xD9FE }, |
| 633 | { 0xDAA1, 0xDAFE }, |
| 634 | { 0xDBA1, 0xDBFE }, |
| 635 | { 0xDCA1, 0xDCFE }, |
| 636 | { 0xDDA1, 0xDDFE }, |
| 637 | { 0xDEA1, 0xDEFE }, |
| 638 | { 0xDFA1, 0xDFFE }, |
| 639 | { 0xE0A1, 0xE0FE }, |
| 640 | { 0xE1A1, 0xE1FE }, |
| 641 | { 0xE2A1, 0xE2FE }, |
| 642 | { 0xE3A1, 0xE3FE }, |
| 643 | { 0xE4A1, 0xE4FE }, |
| 644 | { 0xE5A1, 0xE5FE }, |
| 645 | { 0xE6A1, 0xE6FE }, |
| 646 | { 0xE7A1, 0xE7FE }, |
| 647 | { 0xE8A1, 0xE8FE }, |
| 648 | { 0xE9A1, 0xE9FE }, |
| 649 | { 0xEAA1, 0xEAFE }, |
| 650 | { 0xEBA1, 0xEBFE }, |
| 651 | { 0xECA1, 0xECFE }, |
| 652 | { 0xEDA1, 0xEDFE }, |
| 653 | { 0xEEA1, 0xEEFE }, |
| 654 | { 0xEFA1, 0xEFFE }, |
| 655 | { 0xF0A1, 0xF0FE }, |
| 656 | { 0xF1A1, 0xF1FE }, |
| 657 | { 0xF2A1, 0xF2FE }, |
| 658 | { 0xF3A1, 0xF3FE }, |
| 659 | { 0xF4A1, 0xF4FE }, |
| 660 | { 0xF5A1, 0xF5FE }, |
| 661 | { 0xF6A1, 0xF6FE }, |
| 662 | { 0xF7A1, 0xF7FE }, |
| 663 | { 0xF8A1, 0xF8FE }, |
| 664 | { 0xF9A1, 0xF9FE }, |
| 665 | { 0xFAA1, 0xFAFE }, |
| 666 | { 0xFBA1, 0xFBFE }, |
| 667 | { 0xFCA1, 0xFCFE }, |
| 668 | { 0xFDA1, 0xFDFE }, |
| 669 | }; |
| 670 | |
| 671 | // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT |
| 672 | static const CharRange kBig5Ranges[] = { |
| 673 | { 0xA140, 0xA17E }, |
| 674 | { 0xA1A1, 0xA1FE }, |
| 675 | { 0xA240, 0xA27E }, |
| 676 | { 0xA2A1, 0xA2FE }, |
| 677 | { 0xA340, 0xA37E }, |
| 678 | { 0xA3A1, 0xA3BF }, |
| 679 | { 0xA3E1, 0xA3E1 }, |
| 680 | { 0xA440, 0xA47E }, |
| 681 | { 0xA4A1, 0xA4FE }, |
| 682 | { 0xA540, 0xA57E }, |
| 683 | { 0xA5A1, 0xA5FE }, |
| 684 | { 0xA640, 0xA67E }, |
| 685 | { 0xA6A1, 0xA6FE }, |
| 686 | { 0xA740, 0xA77E }, |
| 687 | { 0xA7A1, 0xA7FE }, |
| 688 | { 0xA840, 0xA87E }, |
| 689 | { 0xA8A1, 0xA8FE }, |
| 690 | { 0xA940, 0xA97E }, |
| 691 | { 0xA9A1, 0xA9FE }, |
| 692 | { 0xAA40, 0xAA7E }, |
| 693 | { 0xAAA1, 0xAAFE }, |
| 694 | { 0xAB40, 0xAB7E }, |
| 695 | { 0xABA1, 0xABFE }, |
| 696 | { 0xAC40, 0xAC7E }, |
| 697 | { 0xACA1, 0xACFE }, |
| 698 | { 0xAD40, 0xAD7E }, |
| 699 | { 0xADA1, 0xADFE }, |
| 700 | { 0xAE40, 0xAE7E }, |
| 701 | { 0xAEA1, 0xAEFE }, |
| 702 | { 0xAF40, 0xAF7E }, |
| 703 | { 0xAFA1, 0xAFFE }, |
| 704 | { 0xB040, 0xB07E }, |
| 705 | { 0xB0A1, 0xB0FE }, |
| 706 | { 0xB140, 0xB17E }, |
| 707 | { 0xB1A1, 0xB1FE }, |
| 708 | { 0xB240, 0xB27E }, |
| 709 | { 0xB2A1, 0xB2FE }, |
| 710 | { 0xB340, 0xB37E }, |
| 711 | { 0xB3A1, 0xB3FE }, |
| 712 | { 0xB440, 0xB47E }, |
| 713 | { 0xB4A1, 0xB4FE }, |
| 714 | { 0xB540, 0xB57E }, |
| 715 | { 0xB5A1, 0xB5FE }, |
| 716 | { 0xB640, 0xB67E }, |
| 717 | { 0xB6A1, 0xB6FE }, |
| 718 | { 0xB740, 0xB77E }, |
| 719 | { 0xB7A1, 0xB7FE }, |
| 720 | { 0xB840, 0xB87E }, |
| 721 | { 0xB8A1, 0xB8FE }, |
| 722 | { 0xB940, 0xB97E }, |
| 723 | { 0xB9A1, 0xB9FE }, |
| 724 | { 0xBA40, 0xBA7E }, |
| 725 | { 0xBAA1, 0xBAFE }, |
| 726 | { 0xBB40, 0xBB7E }, |
| 727 | { 0xBBA1, 0xBBFE }, |
| 728 | { 0xBC40, 0xBC7E }, |
| 729 | { 0xBCA1, 0xBCFE }, |
| 730 | { 0xBD40, 0xBD7E }, |
| 731 | { 0xBDA1, 0xBDFE }, |
| 732 | { 0xBE40, 0xBE7E }, |
| 733 | { 0xBEA1, 0xBEFE }, |
| 734 | { 0xBF40, 0xBF7E }, |
| 735 | { 0xBFA1, 0xBFFE }, |
| 736 | { 0xC040, 0xC07E }, |
| 737 | { 0xC0A1, 0xC0FE }, |
| 738 | { 0xC140, 0xC17E }, |
| 739 | { 0xC1A1, 0xC1FE }, |
| 740 | { 0xC240, 0xC27E }, |
| 741 | { 0xC2A1, 0xC2FE }, |
| 742 | { 0xC340, 0xC37E }, |
| 743 | { 0xC3A1, 0xC3FE }, |
| 744 | { 0xC440, 0xC47E }, |
| 745 | { 0xC4A1, 0xC4FE }, |
| 746 | { 0xC540, 0xC57E }, |
| 747 | { 0xC5A1, 0xC5FE }, |
| 748 | { 0xC640, 0xC67E }, |
| 749 | { 0xC940, 0xC97E }, |
| 750 | { 0xC9A1, 0xC9FE }, |
| 751 | { 0xCA40, 0xCA7E }, |
| 752 | { 0xCAA1, 0xCAFE }, |
| 753 | { 0xCB40, 0xCB7E }, |
| 754 | { 0xCBA1, 0xCBFE }, |
| 755 | { 0xCC40, 0xCC7E }, |
| 756 | { 0xCCA1, 0xCCFE }, |
| 757 | { 0xCD40, 0xCD7E }, |
| 758 | { 0xCDA1, 0xCDFE }, |
| 759 | { 0xCE40, 0xCE7E }, |
| 760 | { 0xCEA1, 0xCEFE }, |
| 761 | { 0xCF40, 0xCF7E }, |
| 762 | { 0xCFA1, 0xCFFE }, |
| 763 | { 0xD040, 0xD07E }, |
| 764 | { 0xD0A1, 0xD0FE }, |
| 765 | { 0xD140, 0xD17E }, |
| 766 | { 0xD1A1, 0xD1FE }, |
| 767 | { 0xD240, 0xD27E }, |
| 768 | { 0xD2A1, 0xD2FE }, |
| 769 | { 0xD340, 0xD37E }, |
| 770 | { 0xD3A1, 0xD3FE }, |
| 771 | { 0xD440, 0xD47E }, |
| 772 | { 0xD4A1, 0xD4FE }, |
| 773 | { 0xD540, 0xD57E }, |
| 774 | { 0xD5A1, 0xD5FE }, |
| 775 | { 0xD640, 0xD67E }, |
| 776 | { 0xD6A1, 0xD6FE }, |
| 777 | { 0xD740, 0xD77E }, |
| 778 | { 0xD7A1, 0xD7FE }, |
| 779 | { 0xD840, 0xD87E }, |
| 780 | { 0xD8A1, 0xD8FE }, |
| 781 | { 0xD940, 0xD97E }, |
| 782 | { 0xD9A1, 0xD9FE }, |
| 783 | { 0xDA40, 0xDA7E }, |
| 784 | { 0xDAA1, 0xDAFE }, |
| 785 | { 0xDB40, 0xDB7E }, |
| 786 | { 0xDBA1, 0xDBFE }, |
| 787 | { 0xDC40, 0xDC7E }, |
| 788 | { 0xDCA1, 0xDCFE }, |
| 789 | { 0xDD40, 0xDD7E }, |
| 790 | { 0xDDA1, 0xDDFE }, |
| 791 | { 0xDE40, 0xDE7E }, |
| 792 | { 0xDEA1, 0xDEFE }, |
| 793 | { 0xDF40, 0xDF7E }, |
| 794 | { 0xDFA1, 0xDFFE }, |
| 795 | { 0xE040, 0xE07E }, |
| 796 | { 0xE0A1, 0xE0FE }, |
| 797 | { 0xE140, 0xE17E }, |
| 798 | { 0xE1A1, 0xE1FE }, |
| 799 | { 0xE240, 0xE27E }, |
| 800 | { 0xE2A1, 0xE2FE }, |
| 801 | { 0xE340, 0xE37E }, |
| 802 | { 0xE3A1, 0xE3FE }, |
| 803 | { 0xE440, 0xE47E }, |
| 804 | { 0xE4A1, 0xE4FE }, |
| 805 | { 0xE540, 0xE57E }, |
| 806 | { 0xE5A1, 0xE5FE }, |
| 807 | { 0xE640, 0xE67E }, |
| 808 | { 0xE6A1, 0xE6FE }, |
| 809 | { 0xE740, 0xE77E }, |
| 810 | { 0xE7A1, 0xE7FE }, |
| 811 | { 0xE840, 0xE87E }, |
| 812 | { 0xE8A1, 0xE8FE }, |
| 813 | { 0xE940, 0xE97E }, |
| 814 | { 0xE9A1, 0xE9FE }, |
| 815 | { 0xEA40, 0xEA7E }, |
| 816 | { 0xEAA1, 0xEAFE }, |
| 817 | { 0xEB40, 0xEB7E }, |
| 818 | { 0xEBA1, 0xEBFE }, |
| 819 | { 0xEC40, 0xEC7E }, |
| 820 | { 0xECA1, 0xECFE }, |
| 821 | { 0xED40, 0xED7E }, |
| 822 | { 0xEDA1, 0xEDFE }, |
| 823 | { 0xEE40, 0xEE7E }, |
| 824 | { 0xEEA1, 0xEEFE }, |
| 825 | { 0xEF40, 0xEF7E }, |
| 826 | { 0xEFA1, 0xEFFE }, |
| 827 | { 0xF040, 0xF07E }, |
| 828 | { 0xF0A1, 0xF0FE }, |
| 829 | { 0xF140, 0xF17E }, |
| 830 | { 0xF1A1, 0xF1FE }, |
| 831 | { 0xF240, 0xF27E }, |
| 832 | { 0xF2A1, 0xF2FE }, |
| 833 | { 0xF340, 0xF37E }, |
| 834 | { 0xF3A1, 0xF3FE }, |
| 835 | { 0xF440, 0xF47E }, |
| 836 | { 0xF4A1, 0xF4FE }, |
| 837 | { 0xF540, 0xF57E }, |
| 838 | { 0xF5A1, 0xF5FE }, |
| 839 | { 0xF640, 0xF67E }, |
| 840 | { 0xF6A1, 0xF6FE }, |
| 841 | { 0xF740, 0xF77E }, |
| 842 | { 0xF7A1, 0xF7FE }, |
| 843 | { 0xF840, 0xF87E }, |
| 844 | { 0xF8A1, 0xF8FE }, |
| 845 | { 0xF940, 0xF97E }, |
| 846 | { 0xF9A1, 0xF9FE }, |
| 847 | }; |
| 848 | |
| 849 | static bool charMatchesEncoding(int ch, const CharRange* encodingRanges, int rangeCount) { |
| 850 | // Use binary search to see if the character is contained in the encoding |
| 851 | int low = 0; |
| 852 | int high = rangeCount; |
| 853 | |
| 854 | while (low < high) { |
| 855 | int i = (low + high) / 2; |
| 856 | const CharRange* range = &encodingRanges[i]; |
| 857 | if (ch >= range->first && ch <= range->last) |
| 858 | return true; |
| 859 | if (ch > range->last) |
| 860 | low = i + 1; |
| 861 | else |
| 862 | high = i; |
| 863 | } |
| 864 | |
| 865 | return false; |
| 866 | } |
| 867 | |
| 868 | extern uint32_t findPossibleEncodings(int ch) |
| 869 | { |
| 870 | // ASCII matches everything |
| 871 | if (ch < 256) return kEncodingAll; |
| 872 | |
| 873 | int result = kEncodingNone; |
| 874 | |
| 875 | if (charMatchesEncoding(ch, kShiftJISRanges, ARRAY_SIZE(kShiftJISRanges))) |
| 876 | result |= kEncodingShiftJIS; |
| 877 | if (charMatchesEncoding(ch, kGBKRanges, ARRAY_SIZE(kGBKRanges))) |
| 878 | result |= kEncodingGBK; |
| 879 | if (charMatchesEncoding(ch, kBig5Ranges, ARRAY_SIZE(kBig5Ranges))) |
| 880 | result |= kEncodingBig5; |
| 881 | if (charMatchesEncoding(ch, kEUCKRRanges, ARRAY_SIZE(kEUCKRRanges))) |
| 882 | result |= kEncodingEUCKR; |
| 883 | |
| 884 | return result; |
| 885 | } |