1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * EUC_JIS_2004, SHIFT_JIS_2004 |
4 | * |
5 | * Copyright (c) 2007-2019, PostgreSQL Global Development Group |
6 | * |
7 | * IDENTIFICATION |
8 | * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c |
9 | * |
10 | *------------------------------------------------------------------------- |
11 | */ |
12 | |
13 | #include "postgres.h" |
14 | #include "fmgr.h" |
15 | #include "mb/pg_wchar.h" |
16 | |
17 | PG_MODULE_MAGIC; |
18 | |
19 | PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004); |
20 | PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004); |
21 | |
22 | static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len); |
23 | static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len); |
24 | |
25 | /* ---------- |
26 | * conv_proc( |
27 | * INTEGER, -- source encoding id |
28 | * INTEGER, -- destination encoding id |
29 | * CSTRING, -- source string (null terminated C string) |
30 | * CSTRING, -- destination string (null terminated C string) |
31 | * INTEGER -- source string length |
32 | * ) returns VOID; |
33 | * ---------- |
34 | */ |
35 | |
36 | Datum |
37 | euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS) |
38 | { |
39 | unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); |
40 | unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); |
41 | int len = PG_GETARG_INT32(4); |
42 | |
43 | CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004); |
44 | |
45 | euc_jis_20042shift_jis_2004(src, dest, len); |
46 | |
47 | PG_RETURN_VOID(); |
48 | } |
49 | |
50 | Datum |
51 | shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS) |
52 | { |
53 | unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); |
54 | unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); |
55 | int len = PG_GETARG_INT32(4); |
56 | |
57 | CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004); |
58 | |
59 | shift_jis_20042euc_jis_2004(src, dest, len); |
60 | |
61 | PG_RETURN_VOID(); |
62 | } |
63 | |
64 | /* |
65 | * EUC_JIS_2004 -> SHIFT_JIS_2004 |
66 | */ |
67 | static void |
68 | euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) |
69 | { |
70 | int c1, |
71 | ku, |
72 | ten; |
73 | int l; |
74 | |
75 | while (len > 0) |
76 | { |
77 | c1 = *euc; |
78 | if (!IS_HIGHBIT_SET(c1)) |
79 | { |
80 | /* ASCII */ |
81 | if (c1 == 0) |
82 | report_invalid_encoding(PG_EUC_JIS_2004, |
83 | (const char *) euc, len); |
84 | *p++ = c1; |
85 | euc++; |
86 | len--; |
87 | continue; |
88 | } |
89 | |
90 | l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len); |
91 | |
92 | if (l < 0) |
93 | report_invalid_encoding(PG_EUC_JIS_2004, |
94 | (const char *) euc, len); |
95 | |
96 | if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */ |
97 | { |
98 | *p++ = euc[1]; |
99 | } |
100 | else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */ |
101 | { |
102 | ku = euc[1] - 0xa0; |
103 | ten = euc[2] - 0xa0; |
104 | |
105 | switch (ku) |
106 | { |
107 | case 1: |
108 | case 3: |
109 | case 4: |
110 | case 5: |
111 | case 8: |
112 | case 12: |
113 | case 13: |
114 | case 14: |
115 | case 15: |
116 | *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3; |
117 | break; |
118 | default: |
119 | if (ku >= 78 && ku <= 94) |
120 | { |
121 | *p++ = (ku + 0x19b) >> 1; |
122 | } |
123 | else |
124 | report_invalid_encoding(PG_EUC_JIS_2004, |
125 | (const char *) euc, len); |
126 | } |
127 | |
128 | if (ku % 2) |
129 | { |
130 | if (ten >= 1 && ten <= 63) |
131 | *p++ = ten + 0x3f; |
132 | else if (ten >= 64 && ten <= 94) |
133 | *p++ = ten + 0x40; |
134 | else |
135 | report_invalid_encoding(PG_EUC_JIS_2004, |
136 | (const char *) euc, len); |
137 | } |
138 | else |
139 | *p++ = ten + 0x9e; |
140 | } |
141 | |
142 | else if (l == 2) /* JIS X 0213 plane 1? */ |
143 | { |
144 | ku = c1 - 0xa0; |
145 | ten = euc[1] - 0xa0; |
146 | |
147 | if (ku >= 1 && ku <= 62) |
148 | *p++ = (ku + 0x101) >> 1; |
149 | else if (ku >= 63 && ku <= 94) |
150 | *p++ = (ku + 0x181) >> 1; |
151 | else |
152 | report_invalid_encoding(PG_EUC_JIS_2004, |
153 | (const char *) euc, len); |
154 | |
155 | if (ku % 2) |
156 | { |
157 | if (ten >= 1 && ten <= 63) |
158 | *p++ = ten + 0x3f; |
159 | else if (ten >= 64 && ten <= 94) |
160 | *p++ = ten + 0x40; |
161 | else |
162 | report_invalid_encoding(PG_EUC_JIS_2004, |
163 | (const char *) euc, len); |
164 | } |
165 | else |
166 | *p++ = ten + 0x9e; |
167 | } |
168 | else |
169 | report_invalid_encoding(PG_EUC_JIS_2004, |
170 | (const char *) euc, len); |
171 | |
172 | euc += l; |
173 | len -= l; |
174 | } |
175 | *p = '\0'; |
176 | } |
177 | |
178 | /* |
179 | * returns SHIFT_JIS_2004 "ku" code indicated by second byte |
180 | * *ku = 0: "ku" = even |
181 | * *ku = 1: "ku" = odd |
182 | */ |
183 | static int |
184 | get_ten(int b, int *ku) |
185 | { |
186 | int ten; |
187 | |
188 | if (b >= 0x40 && b <= 0x7e) |
189 | { |
190 | ten = b - 0x3f; |
191 | *ku = 1; |
192 | } |
193 | else if (b >= 0x80 && b <= 0x9e) |
194 | { |
195 | ten = b - 0x40; |
196 | *ku = 1; |
197 | } |
198 | else if (b >= 0x9f && b <= 0xfc) |
199 | { |
200 | ten = b - 0x9e; |
201 | *ku = 0; |
202 | } |
203 | else |
204 | { |
205 | ten = -1; /* error */ |
206 | *ku = 0; /* keep compiler quiet */ |
207 | } |
208 | return ten; |
209 | } |
210 | |
211 | /* |
212 | * SHIFT_JIS_2004 ---> EUC_JIS_2004 |
213 | */ |
214 | |
215 | static void |
216 | shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len) |
217 | { |
218 | int c1; |
219 | int ku, |
220 | ten, |
221 | kubun; |
222 | int plane; |
223 | int l; |
224 | |
225 | while (len > 0) |
226 | { |
227 | c1 = *sjis; |
228 | |
229 | if (!IS_HIGHBIT_SET(c1)) |
230 | { |
231 | /* ASCII */ |
232 | if (c1 == 0) |
233 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
234 | (const char *) sjis, len); |
235 | *p++ = c1; |
236 | sjis++; |
237 | len--; |
238 | continue; |
239 | } |
240 | |
241 | l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len); |
242 | |
243 | if (l < 0 || l > len) |
244 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
245 | (const char *) sjis, len); |
246 | |
247 | if (c1 >= 0xa1 && c1 <= 0xdf && l == 1) |
248 | { |
249 | /* JIS X0201 (1 byte kana) */ |
250 | *p++ = SS2; |
251 | *p++ = c1; |
252 | } |
253 | else if (l == 2) |
254 | { |
255 | int c2 = sjis[1]; |
256 | |
257 | plane = 1; |
258 | ku = 1; |
259 | ten = 1; |
260 | |
261 | /* |
262 | * JIS X 0213 |
263 | */ |
264 | if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */ |
265 | { |
266 | ku = (c1 << 1) - 0x100; |
267 | ten = get_ten(c2, &kubun); |
268 | if (ten < 0) |
269 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
270 | (const char *) sjis, len); |
271 | ku -= kubun; |
272 | } |
273 | else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */ |
274 | { |
275 | ku = (c1 << 1) - 0x180; |
276 | ten = get_ten(c2, &kubun); |
277 | if (ten < 0) |
278 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
279 | |
280 | (const char *) sjis, len); |
281 | ku -= kubun; |
282 | } |
283 | else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 |
284 | * 1,3,4,5,8,12,13,14,15 ku */ |
285 | { |
286 | plane = 2; |
287 | ten = get_ten(c2, &kubun); |
288 | if (ten < 0) |
289 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
290 | (const char *) sjis, len); |
291 | switch (c1) |
292 | { |
293 | case 0xf0: |
294 | ku = kubun == 0 ? 8 : 1; |
295 | break; |
296 | case 0xf1: |
297 | ku = kubun == 0 ? 4 : 3; |
298 | break; |
299 | case 0xf2: |
300 | ku = kubun == 0 ? 12 : 5; |
301 | break; |
302 | default: |
303 | ku = kubun == 0 ? 14 : 13; |
304 | break; |
305 | } |
306 | } |
307 | else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */ |
308 | { |
309 | plane = 2; |
310 | ten = get_ten(c2, &kubun); |
311 | if (ten < 0) |
312 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
313 | (const char *) sjis, len); |
314 | if (c1 == 0xf4 && kubun == 1) |
315 | ku = 15; |
316 | else |
317 | ku = (c1 << 1) - 0x19a - kubun; |
318 | } |
319 | else |
320 | report_invalid_encoding(PG_SHIFT_JIS_2004, |
321 | (const char *) sjis, len); |
322 | |
323 | if (plane == 2) |
324 | *p++ = SS3; |
325 | |
326 | *p++ = ku + 0xa0; |
327 | *p++ = ten + 0xa0; |
328 | } |
329 | sjis += l; |
330 | len -= l; |
331 | } |
332 | *p = '\0'; |
333 | } |
334 | |