1 | /* |
2 | * conversion between BIG5 and Mule Internal Code(CNS 116643-1992 |
3 | * plane 1 and plane 2). |
4 | * This program is partially copied from lv(Multilingual file viewer) |
5 | * and slightly modified. lv is written and copyrighted by NARITA Tomio |
6 | * (nrt@web.ad.jp). |
7 | * |
8 | * 1999/1/15 Tatsuo Ishii |
9 | * |
10 | * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c |
11 | */ |
12 | |
13 | /* can be used in either frontend or backend */ |
14 | #include "postgres_fe.h" |
15 | |
16 | #include "mb/pg_wchar.h" |
17 | |
18 | typedef struct |
19 | { |
20 | unsigned short code, |
21 | peer; |
22 | } codes_t; |
23 | |
24 | /* map Big5 Level 1 to CNS 11643-1992 Plane 1 */ |
25 | static const codes_t big5Level1ToCnsPlane1[25] = { /* range */ |
26 | {0xA140, 0x2121}, |
27 | {0xA1F6, 0x2258}, |
28 | {0xA1F7, 0x2257}, |
29 | {0xA1F8, 0x2259}, |
30 | {0xA2AF, 0x2421}, |
31 | {0xA3C0, 0x4221}, |
32 | {0xa3e1, 0x0000}, |
33 | {0xA440, 0x4421}, |
34 | {0xACFE, 0x5753}, |
35 | {0xacff, 0x0000}, |
36 | {0xAD40, 0x5323}, |
37 | {0xAFD0, 0x5754}, |
38 | {0xBBC8, 0x6B51}, |
39 | {0xBE52, 0x6B50}, |
40 | {0xBE53, 0x6F5C}, |
41 | {0xC1AB, 0x7536}, |
42 | {0xC2CB, 0x7535}, |
43 | {0xC2CC, 0x7737}, |
44 | {0xC361, 0x782E}, |
45 | {0xC3B9, 0x7865}, |
46 | {0xC3BA, 0x7864}, |
47 | {0xC3BB, 0x7866}, |
48 | {0xC456, 0x782D}, |
49 | {0xC457, 0x7962}, |
50 | {0xc67f, 0x0000} |
51 | }; |
52 | |
53 | /* map CNS 11643-1992 Plane 1 to Big5 Level 1 */ |
54 | static const codes_t cnsPlane1ToBig5Level1[26] = { /* range */ |
55 | {0x2121, 0xA140}, |
56 | {0x2257, 0xA1F7}, |
57 | {0x2258, 0xA1F6}, |
58 | {0x2259, 0xA1F8}, |
59 | {0x234f, 0x0000}, |
60 | {0x2421, 0xA2AF}, |
61 | {0x2571, 0x0000}, |
62 | {0x4221, 0xA3C0}, |
63 | {0x4242, 0x0000}, |
64 | {0x4421, 0xA440}, |
65 | {0x5323, 0xAD40}, |
66 | {0x5753, 0xACFE}, |
67 | {0x5754, 0xAFD0}, |
68 | {0x6B50, 0xBE52}, |
69 | {0x6B51, 0xBBC8}, |
70 | {0x6F5C, 0xBE53}, |
71 | {0x7535, 0xC2CB}, |
72 | {0x7536, 0xC1AB}, |
73 | {0x7737, 0xC2CC}, |
74 | {0x782D, 0xC456}, |
75 | {0x782E, 0xC361}, |
76 | {0x7864, 0xC3BA}, |
77 | {0x7865, 0xC3B9}, |
78 | {0x7866, 0xC3BB}, |
79 | {0x7962, 0xC457}, |
80 | {0x7d4c, 0x0000} |
81 | }; |
82 | |
83 | /* map Big5 Level 2 to CNS 11643-1992 Plane 2 */ |
84 | static const codes_t big5Level2ToCnsPlane2[48] = { /* range */ |
85 | {0xC940, 0x2121}, |
86 | {0xc94a, 0x0000}, |
87 | {0xC94B, 0x212B}, |
88 | {0xC96C, 0x214D}, |
89 | {0xC9BE, 0x214C}, |
90 | {0xC9BF, 0x217D}, |
91 | {0xC9ED, 0x224E}, |
92 | {0xCAF7, 0x224D}, |
93 | {0xCAF8, 0x2439}, |
94 | {0xD77A, 0x3F6A}, |
95 | {0xD77B, 0x387E}, |
96 | {0xDBA7, 0x3F6B}, |
97 | {0xDDFC, 0x4176}, |
98 | {0xDDFD, 0x4424}, |
99 | {0xE8A3, 0x554C}, |
100 | {0xE976, 0x5723}, |
101 | {0xEB5B, 0x5A29}, |
102 | {0xEBF1, 0x554B}, |
103 | {0xEBF2, 0x5B3F}, |
104 | {0xECDE, 0x5722}, |
105 | {0xECDF, 0x5C6A}, |
106 | {0xEDAA, 0x5D75}, |
107 | {0xEEEB, 0x642F}, |
108 | {0xEEEC, 0x6039}, |
109 | {0xF056, 0x5D74}, |
110 | {0xF057, 0x6243}, |
111 | {0xF0CB, 0x5A28}, |
112 | {0xF0CC, 0x6337}, |
113 | {0xF163, 0x6430}, |
114 | {0xF16B, 0x6761}, |
115 | {0xF16C, 0x6438}, |
116 | {0xF268, 0x6934}, |
117 | {0xF269, 0x6573}, |
118 | {0xF2C3, 0x664E}, |
119 | {0xF375, 0x6762}, |
120 | {0xF466, 0x6935}, |
121 | {0xF4B5, 0x664D}, |
122 | {0xF4B6, 0x6962}, |
123 | {0xF4FD, 0x6A4C}, |
124 | {0xF663, 0x6A4B}, |
125 | {0xF664, 0x6C52}, |
126 | {0xF977, 0x7167}, |
127 | {0xF9C4, 0x7166}, |
128 | {0xF9C5, 0x7234}, |
129 | {0xF9C6, 0x7240}, |
130 | {0xF9C7, 0x7235}, |
131 | {0xF9D2, 0x7241}, |
132 | {0xf9d6, 0x0000} |
133 | }; |
134 | |
135 | /* map CNS 11643-1992 Plane 2 to Big5 Level 2 */ |
136 | static const codes_t cnsPlane2ToBig5Level2[49] = { /* range */ |
137 | {0x2121, 0xC940}, |
138 | {0x212B, 0xC94B}, |
139 | {0x214C, 0xC9BE}, |
140 | {0x214D, 0xC96C}, |
141 | {0x217D, 0xC9BF}, |
142 | {0x224D, 0xCAF7}, |
143 | {0x224E, 0xC9ED}, |
144 | {0x2439, 0xCAF8}, |
145 | {0x387E, 0xD77B}, |
146 | {0x3F6A, 0xD77A}, |
147 | {0x3F6B, 0xDBA7}, |
148 | {0x4424, 0x0000}, |
149 | {0x4176, 0xDDFC}, |
150 | {0x4177, 0x0000}, |
151 | {0x4424, 0xDDFD}, |
152 | {0x554B, 0xEBF1}, |
153 | {0x554C, 0xE8A3}, |
154 | {0x5722, 0xECDE}, |
155 | {0x5723, 0xE976}, |
156 | {0x5A28, 0xF0CB}, |
157 | {0x5A29, 0xEB5B}, |
158 | {0x5B3F, 0xEBF2}, |
159 | {0x5C6A, 0xECDF}, |
160 | {0x5D74, 0xF056}, |
161 | {0x5D75, 0xEDAA}, |
162 | {0x6039, 0xEEEC}, |
163 | {0x6243, 0xF057}, |
164 | {0x6337, 0xF0CC}, |
165 | {0x642F, 0xEEEB}, |
166 | {0x6430, 0xF163}, |
167 | {0x6438, 0xF16C}, |
168 | {0x6573, 0xF269}, |
169 | {0x664D, 0xF4B5}, |
170 | {0x664E, 0xF2C3}, |
171 | {0x6761, 0xF16B}, |
172 | {0x6762, 0xF375}, |
173 | {0x6934, 0xF268}, |
174 | {0x6935, 0xF466}, |
175 | {0x6962, 0xF4B6}, |
176 | {0x6A4B, 0xF663}, |
177 | {0x6A4C, 0xF4FD}, |
178 | {0x6C52, 0xF664}, |
179 | {0x7166, 0xF9C4}, |
180 | {0x7167, 0xF977}, |
181 | {0x7234, 0xF9C5}, |
182 | {0x7235, 0xF9C7}, |
183 | {0x7240, 0xF9C6}, |
184 | {0x7241, 0xF9D2}, |
185 | {0x7245, 0x0000} |
186 | }; |
187 | |
188 | /* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */ |
189 | static const unsigned short b1c4[][2] = { |
190 | {0xC879, 0x2123}, |
191 | {0xC87B, 0x2124}, |
192 | {0xC87D, 0x212A}, |
193 | {0xC8A2, 0x2152} |
194 | }; |
195 | |
196 | /* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */ |
197 | static const unsigned short b2c3[][2] = { |
198 | {0xF9D6, 0x4337}, |
199 | {0xF9D7, 0x4F50}, |
200 | {0xF9D8, 0x444E}, |
201 | {0xF9D9, 0x504A}, |
202 | {0xF9DA, 0x2C5D}, |
203 | {0xF9DB, 0x3D7E}, |
204 | {0xF9DC, 0x4B5C} |
205 | }; |
206 | |
207 | static unsigned short BinarySearchRange |
208 | (const codes_t *array, int high, unsigned short code) |
209 | { |
210 | int low, |
211 | mid, |
212 | distance, |
213 | tmp; |
214 | |
215 | low = 0; |
216 | mid = high >> 1; |
217 | |
218 | for (; low <= high; mid = (low + high) >> 1) |
219 | { |
220 | if ((array[mid].code <= code) && (array[mid + 1].code > code)) |
221 | { |
222 | if (0 == array[mid].peer) |
223 | return 0; |
224 | if (code >= 0xa140U) |
225 | { |
226 | /* big5 to cns */ |
227 | tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; |
228 | high = code & 0x00ff; |
229 | low = array[mid].code & 0x00ff; |
230 | |
231 | /* |
232 | * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, |
233 | * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is |
234 | * 0x9d. [region_low, region_high] We |
235 | * should remember big5 has two different regions (above). |
236 | * There is a bias for the distance between these regions. |
237 | * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is |
238 | * 1.) bias = - 0x22. |
239 | */ |
240 | distance = tmp * 0x9d + high - low + |
241 | (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22) |
242 | : (low >= 0xa1 ? +0x22 : 0)); |
243 | |
244 | /* |
245 | * NOTE: we have to convert the distance into a code point. |
246 | * The code point's low_byte is 0x21 plus mod_0x5e. In the |
247 | * first, we extract the mod_0x5e of the starting code point, |
248 | * subtracting 0x21, and add distance to it. Then we calculate |
249 | * again mod_0x5e of them, and restore the final codepoint, |
250 | * adding 0x21. |
251 | */ |
252 | tmp = (array[mid].peer & 0x00ff) + distance - 0x21; |
253 | tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8) |
254 | + 0x21 + tmp % 0x5e; |
255 | return tmp; |
256 | } |
257 | else |
258 | { |
259 | /* cns to big5 */ |
260 | tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; |
261 | |
262 | /* |
263 | * NOTE: ISO charsets ranges between 0x21-0xfe (94charset). |
264 | * Its radix is 0x5e. But there is no distance bias like big5. |
265 | */ |
266 | distance = tmp * 0x5e |
267 | + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff)); |
268 | |
269 | /* |
270 | * NOTE: Similar to big5 to cns conversion, we extract |
271 | * mod_0x9d and restore mod_0x9d into a code point. |
272 | */ |
273 | low = array[mid].peer & 0x00ff; |
274 | tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40); |
275 | low = tmp % 0x9d; |
276 | tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8) |
277 | + (low > 0x3e ? 0x62 : 0x40) + low; |
278 | return tmp; |
279 | } |
280 | } |
281 | else if (array[mid].code > code) |
282 | high = mid - 1; |
283 | else |
284 | low = mid + 1; |
285 | } |
286 | |
287 | return 0; |
288 | } |
289 | |
290 | |
291 | unsigned short |
292 | BIG5toCNS(unsigned short big5, unsigned char *lc) |
293 | { |
294 | unsigned short cns = 0; |
295 | int i; |
296 | |
297 | if (big5 < 0xc940U) |
298 | { |
299 | /* level 1 */ |
300 | |
301 | for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++) |
302 | { |
303 | if (b1c4[i][0] == big5) |
304 | { |
305 | *lc = LC_CNS11643_4; |
306 | return (b1c4[i][1] | 0x8080U); |
307 | } |
308 | } |
309 | |
310 | if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5))) |
311 | *lc = LC_CNS11643_1; |
312 | } |
313 | else if (big5 == 0xc94aU) |
314 | { |
315 | /* level 2 */ |
316 | *lc = LC_CNS11643_1; |
317 | cns = 0x4442; |
318 | } |
319 | else |
320 | { |
321 | /* level 2 */ |
322 | for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++) |
323 | { |
324 | if (b2c3[i][0] == big5) |
325 | { |
326 | *lc = LC_CNS11643_3; |
327 | return (b2c3[i][1] | 0x8080U); |
328 | } |
329 | } |
330 | |
331 | if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5))) |
332 | *lc = LC_CNS11643_2; |
333 | } |
334 | |
335 | if (0 == cns) |
336 | { /* no mapping Big5 to CNS 11643-1992 */ |
337 | *lc = 0; |
338 | return (unsigned short) '?'; |
339 | } |
340 | |
341 | return cns | 0x8080; |
342 | } |
343 | |
344 | unsigned short |
345 | CNStoBIG5(unsigned short cns, unsigned char lc) |
346 | { |
347 | int i; |
348 | unsigned int big5 = 0; |
349 | |
350 | cns &= 0x7f7f; |
351 | |
352 | switch (lc) |
353 | { |
354 | case LC_CNS11643_1: |
355 | big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns); |
356 | break; |
357 | case LC_CNS11643_2: |
358 | big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns); |
359 | break; |
360 | case LC_CNS11643_3: |
361 | for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++) |
362 | { |
363 | if (b2c3[i][1] == cns) |
364 | return b2c3[i][0]; |
365 | } |
366 | break; |
367 | case LC_CNS11643_4: |
368 | for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++) |
369 | { |
370 | if (b1c4[i][1] == cns) |
371 | return b1c4[i][0]; |
372 | } |
373 | default: |
374 | break; |
375 | } |
376 | return big5; |
377 | } |
378 | |