| 1 | /* |
| 2 | * conversion between BIG5 and Mule Internal Code(CNS 116643-1992 |
| 3 | * plane 1 and plane 2). |
| 4 | * This program is partially copied from lv(Multilingual file viewer) |
| 5 | * and slightly modified. lv is written and copyrighted by NARITA Tomio |
| 6 | * (nrt@web.ad.jp). |
| 7 | * |
| 8 | * 1999/1/15 Tatsuo Ishii |
| 9 | * |
| 10 | * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c |
| 11 | */ |
| 12 | |
| 13 | /* can be used in either frontend or backend */ |
| 14 | #include "postgres_fe.h" |
| 15 | |
| 16 | #include "mb/pg_wchar.h" |
| 17 | |
| 18 | typedef struct |
| 19 | { |
| 20 | unsigned short code, |
| 21 | peer; |
| 22 | } codes_t; |
| 23 | |
| 24 | /* map Big5 Level 1 to CNS 11643-1992 Plane 1 */ |
| 25 | static const codes_t big5Level1ToCnsPlane1[25] = { /* range */ |
| 26 | {0xA140, 0x2121}, |
| 27 | {0xA1F6, 0x2258}, |
| 28 | {0xA1F7, 0x2257}, |
| 29 | {0xA1F8, 0x2259}, |
| 30 | {0xA2AF, 0x2421}, |
| 31 | {0xA3C0, 0x4221}, |
| 32 | {0xa3e1, 0x0000}, |
| 33 | {0xA440, 0x4421}, |
| 34 | {0xACFE, 0x5753}, |
| 35 | {0xacff, 0x0000}, |
| 36 | {0xAD40, 0x5323}, |
| 37 | {0xAFD0, 0x5754}, |
| 38 | {0xBBC8, 0x6B51}, |
| 39 | {0xBE52, 0x6B50}, |
| 40 | {0xBE53, 0x6F5C}, |
| 41 | {0xC1AB, 0x7536}, |
| 42 | {0xC2CB, 0x7535}, |
| 43 | {0xC2CC, 0x7737}, |
| 44 | {0xC361, 0x782E}, |
| 45 | {0xC3B9, 0x7865}, |
| 46 | {0xC3BA, 0x7864}, |
| 47 | {0xC3BB, 0x7866}, |
| 48 | {0xC456, 0x782D}, |
| 49 | {0xC457, 0x7962}, |
| 50 | {0xc67f, 0x0000} |
| 51 | }; |
| 52 | |
| 53 | /* map CNS 11643-1992 Plane 1 to Big5 Level 1 */ |
| 54 | static const codes_t cnsPlane1ToBig5Level1[26] = { /* range */ |
| 55 | {0x2121, 0xA140}, |
| 56 | {0x2257, 0xA1F7}, |
| 57 | {0x2258, 0xA1F6}, |
| 58 | {0x2259, 0xA1F8}, |
| 59 | {0x234f, 0x0000}, |
| 60 | {0x2421, 0xA2AF}, |
| 61 | {0x2571, 0x0000}, |
| 62 | {0x4221, 0xA3C0}, |
| 63 | {0x4242, 0x0000}, |
| 64 | {0x4421, 0xA440}, |
| 65 | {0x5323, 0xAD40}, |
| 66 | {0x5753, 0xACFE}, |
| 67 | {0x5754, 0xAFD0}, |
| 68 | {0x6B50, 0xBE52}, |
| 69 | {0x6B51, 0xBBC8}, |
| 70 | {0x6F5C, 0xBE53}, |
| 71 | {0x7535, 0xC2CB}, |
| 72 | {0x7536, 0xC1AB}, |
| 73 | {0x7737, 0xC2CC}, |
| 74 | {0x782D, 0xC456}, |
| 75 | {0x782E, 0xC361}, |
| 76 | {0x7864, 0xC3BA}, |
| 77 | {0x7865, 0xC3B9}, |
| 78 | {0x7866, 0xC3BB}, |
| 79 | {0x7962, 0xC457}, |
| 80 | {0x7d4c, 0x0000} |
| 81 | }; |
| 82 | |
| 83 | /* map Big5 Level 2 to CNS 11643-1992 Plane 2 */ |
| 84 | static const codes_t big5Level2ToCnsPlane2[48] = { /* range */ |
| 85 | {0xC940, 0x2121}, |
| 86 | {0xc94a, 0x0000}, |
| 87 | {0xC94B, 0x212B}, |
| 88 | {0xC96C, 0x214D}, |
| 89 | {0xC9BE, 0x214C}, |
| 90 | {0xC9BF, 0x217D}, |
| 91 | {0xC9ED, 0x224E}, |
| 92 | {0xCAF7, 0x224D}, |
| 93 | {0xCAF8, 0x2439}, |
| 94 | {0xD77A, 0x3F6A}, |
| 95 | {0xD77B, 0x387E}, |
| 96 | {0xDBA7, 0x3F6B}, |
| 97 | {0xDDFC, 0x4176}, |
| 98 | {0xDDFD, 0x4424}, |
| 99 | {0xE8A3, 0x554C}, |
| 100 | {0xE976, 0x5723}, |
| 101 | {0xEB5B, 0x5A29}, |
| 102 | {0xEBF1, 0x554B}, |
| 103 | {0xEBF2, 0x5B3F}, |
| 104 | {0xECDE, 0x5722}, |
| 105 | {0xECDF, 0x5C6A}, |
| 106 | {0xEDAA, 0x5D75}, |
| 107 | {0xEEEB, 0x642F}, |
| 108 | {0xEEEC, 0x6039}, |
| 109 | {0xF056, 0x5D74}, |
| 110 | {0xF057, 0x6243}, |
| 111 | {0xF0CB, 0x5A28}, |
| 112 | {0xF0CC, 0x6337}, |
| 113 | {0xF163, 0x6430}, |
| 114 | {0xF16B, 0x6761}, |
| 115 | {0xF16C, 0x6438}, |
| 116 | {0xF268, 0x6934}, |
| 117 | {0xF269, 0x6573}, |
| 118 | {0xF2C3, 0x664E}, |
| 119 | {0xF375, 0x6762}, |
| 120 | {0xF466, 0x6935}, |
| 121 | {0xF4B5, 0x664D}, |
| 122 | {0xF4B6, 0x6962}, |
| 123 | {0xF4FD, 0x6A4C}, |
| 124 | {0xF663, 0x6A4B}, |
| 125 | {0xF664, 0x6C52}, |
| 126 | {0xF977, 0x7167}, |
| 127 | {0xF9C4, 0x7166}, |
| 128 | {0xF9C5, 0x7234}, |
| 129 | {0xF9C6, 0x7240}, |
| 130 | {0xF9C7, 0x7235}, |
| 131 | {0xF9D2, 0x7241}, |
| 132 | {0xf9d6, 0x0000} |
| 133 | }; |
| 134 | |
| 135 | /* map CNS 11643-1992 Plane 2 to Big5 Level 2 */ |
| 136 | static const codes_t cnsPlane2ToBig5Level2[49] = { /* range */ |
| 137 | {0x2121, 0xC940}, |
| 138 | {0x212B, 0xC94B}, |
| 139 | {0x214C, 0xC9BE}, |
| 140 | {0x214D, 0xC96C}, |
| 141 | {0x217D, 0xC9BF}, |
| 142 | {0x224D, 0xCAF7}, |
| 143 | {0x224E, 0xC9ED}, |
| 144 | {0x2439, 0xCAF8}, |
| 145 | {0x387E, 0xD77B}, |
| 146 | {0x3F6A, 0xD77A}, |
| 147 | {0x3F6B, 0xDBA7}, |
| 148 | {0x4424, 0x0000}, |
| 149 | {0x4176, 0xDDFC}, |
| 150 | {0x4177, 0x0000}, |
| 151 | {0x4424, 0xDDFD}, |
| 152 | {0x554B, 0xEBF1}, |
| 153 | {0x554C, 0xE8A3}, |
| 154 | {0x5722, 0xECDE}, |
| 155 | {0x5723, 0xE976}, |
| 156 | {0x5A28, 0xF0CB}, |
| 157 | {0x5A29, 0xEB5B}, |
| 158 | {0x5B3F, 0xEBF2}, |
| 159 | {0x5C6A, 0xECDF}, |
| 160 | {0x5D74, 0xF056}, |
| 161 | {0x5D75, 0xEDAA}, |
| 162 | {0x6039, 0xEEEC}, |
| 163 | {0x6243, 0xF057}, |
| 164 | {0x6337, 0xF0CC}, |
| 165 | {0x642F, 0xEEEB}, |
| 166 | {0x6430, 0xF163}, |
| 167 | {0x6438, 0xF16C}, |
| 168 | {0x6573, 0xF269}, |
| 169 | {0x664D, 0xF4B5}, |
| 170 | {0x664E, 0xF2C3}, |
| 171 | {0x6761, 0xF16B}, |
| 172 | {0x6762, 0xF375}, |
| 173 | {0x6934, 0xF268}, |
| 174 | {0x6935, 0xF466}, |
| 175 | {0x6962, 0xF4B6}, |
| 176 | {0x6A4B, 0xF663}, |
| 177 | {0x6A4C, 0xF4FD}, |
| 178 | {0x6C52, 0xF664}, |
| 179 | {0x7166, 0xF9C4}, |
| 180 | {0x7167, 0xF977}, |
| 181 | {0x7234, 0xF9C5}, |
| 182 | {0x7235, 0xF9C7}, |
| 183 | {0x7240, 0xF9C6}, |
| 184 | {0x7241, 0xF9D2}, |
| 185 | {0x7245, 0x0000} |
| 186 | }; |
| 187 | |
| 188 | /* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */ |
| 189 | static const unsigned short b1c4[][2] = { |
| 190 | {0xC879, 0x2123}, |
| 191 | {0xC87B, 0x2124}, |
| 192 | {0xC87D, 0x212A}, |
| 193 | {0xC8A2, 0x2152} |
| 194 | }; |
| 195 | |
| 196 | /* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */ |
| 197 | static const unsigned short b2c3[][2] = { |
| 198 | {0xF9D6, 0x4337}, |
| 199 | {0xF9D7, 0x4F50}, |
| 200 | {0xF9D8, 0x444E}, |
| 201 | {0xF9D9, 0x504A}, |
| 202 | {0xF9DA, 0x2C5D}, |
| 203 | {0xF9DB, 0x3D7E}, |
| 204 | {0xF9DC, 0x4B5C} |
| 205 | }; |
| 206 | |
| 207 | static unsigned short BinarySearchRange |
| 208 | (const codes_t *array, int high, unsigned short code) |
| 209 | { |
| 210 | int low, |
| 211 | mid, |
| 212 | distance, |
| 213 | tmp; |
| 214 | |
| 215 | low = 0; |
| 216 | mid = high >> 1; |
| 217 | |
| 218 | for (; low <= high; mid = (low + high) >> 1) |
| 219 | { |
| 220 | if ((array[mid].code <= code) && (array[mid + 1].code > code)) |
| 221 | { |
| 222 | if (0 == array[mid].peer) |
| 223 | return 0; |
| 224 | if (code >= 0xa140U) |
| 225 | { |
| 226 | /* big5 to cns */ |
| 227 | tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; |
| 228 | high = code & 0x00ff; |
| 229 | low = array[mid].code & 0x00ff; |
| 230 | |
| 231 | /* |
| 232 | * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, |
| 233 | * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is |
| 234 | * 0x9d. [region_low, region_high] We |
| 235 | * should remember big5 has two different regions (above). |
| 236 | * There is a bias for the distance between these regions. |
| 237 | * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is |
| 238 | * 1.) bias = - 0x22. |
| 239 | */ |
| 240 | distance = tmp * 0x9d + high - low + |
| 241 | (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22) |
| 242 | : (low >= 0xa1 ? +0x22 : 0)); |
| 243 | |
| 244 | /* |
| 245 | * NOTE: we have to convert the distance into a code point. |
| 246 | * The code point's low_byte is 0x21 plus mod_0x5e. In the |
| 247 | * first, we extract the mod_0x5e of the starting code point, |
| 248 | * subtracting 0x21, and add distance to it. Then we calculate |
| 249 | * again mod_0x5e of them, and restore the final codepoint, |
| 250 | * adding 0x21. |
| 251 | */ |
| 252 | tmp = (array[mid].peer & 0x00ff) + distance - 0x21; |
| 253 | tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8) |
| 254 | + 0x21 + tmp % 0x5e; |
| 255 | return tmp; |
| 256 | } |
| 257 | else |
| 258 | { |
| 259 | /* cns to big5 */ |
| 260 | tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; |
| 261 | |
| 262 | /* |
| 263 | * NOTE: ISO charsets ranges between 0x21-0xfe (94charset). |
| 264 | * Its radix is 0x5e. But there is no distance bias like big5. |
| 265 | */ |
| 266 | distance = tmp * 0x5e |
| 267 | + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff)); |
| 268 | |
| 269 | /* |
| 270 | * NOTE: Similar to big5 to cns conversion, we extract |
| 271 | * mod_0x9d and restore mod_0x9d into a code point. |
| 272 | */ |
| 273 | low = array[mid].peer & 0x00ff; |
| 274 | tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40); |
| 275 | low = tmp % 0x9d; |
| 276 | tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8) |
| 277 | + (low > 0x3e ? 0x62 : 0x40) + low; |
| 278 | return tmp; |
| 279 | } |
| 280 | } |
| 281 | else if (array[mid].code > code) |
| 282 | high = mid - 1; |
| 283 | else |
| 284 | low = mid + 1; |
| 285 | } |
| 286 | |
| 287 | return 0; |
| 288 | } |
| 289 | |
| 290 | |
| 291 | unsigned short |
| 292 | BIG5toCNS(unsigned short big5, unsigned char *lc) |
| 293 | { |
| 294 | unsigned short cns = 0; |
| 295 | int i; |
| 296 | |
| 297 | if (big5 < 0xc940U) |
| 298 | { |
| 299 | /* level 1 */ |
| 300 | |
| 301 | for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++) |
| 302 | { |
| 303 | if (b1c4[i][0] == big5) |
| 304 | { |
| 305 | *lc = LC_CNS11643_4; |
| 306 | return (b1c4[i][1] | 0x8080U); |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5))) |
| 311 | *lc = LC_CNS11643_1; |
| 312 | } |
| 313 | else if (big5 == 0xc94aU) |
| 314 | { |
| 315 | /* level 2 */ |
| 316 | *lc = LC_CNS11643_1; |
| 317 | cns = 0x4442; |
| 318 | } |
| 319 | else |
| 320 | { |
| 321 | /* level 2 */ |
| 322 | for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++) |
| 323 | { |
| 324 | if (b2c3[i][0] == big5) |
| 325 | { |
| 326 | *lc = LC_CNS11643_3; |
| 327 | return (b2c3[i][1] | 0x8080U); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5))) |
| 332 | *lc = LC_CNS11643_2; |
| 333 | } |
| 334 | |
| 335 | if (0 == cns) |
| 336 | { /* no mapping Big5 to CNS 11643-1992 */ |
| 337 | *lc = 0; |
| 338 | return (unsigned short) '?'; |
| 339 | } |
| 340 | |
| 341 | return cns | 0x8080; |
| 342 | } |
| 343 | |
| 344 | unsigned short |
| 345 | CNStoBIG5(unsigned short cns, unsigned char lc) |
| 346 | { |
| 347 | int i; |
| 348 | unsigned int big5 = 0; |
| 349 | |
| 350 | cns &= 0x7f7f; |
| 351 | |
| 352 | switch (lc) |
| 353 | { |
| 354 | case LC_CNS11643_1: |
| 355 | big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns); |
| 356 | break; |
| 357 | case LC_CNS11643_2: |
| 358 | big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns); |
| 359 | break; |
| 360 | case LC_CNS11643_3: |
| 361 | for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++) |
| 362 | { |
| 363 | if (b2c3[i][1] == cns) |
| 364 | return b2c3[i][0]; |
| 365 | } |
| 366 | break; |
| 367 | case LC_CNS11643_4: |
| 368 | for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++) |
| 369 | { |
| 370 | if (b1c4[i][1] == cns) |
| 371 | return b1c4[i][0]; |
| 372 | } |
| 373 | default: |
| 374 | break; |
| 375 | } |
| 376 | return big5; |
| 377 | } |
| 378 | |