1/*-------------------------------------------------------------------------
2 *
3 * EUC_JIS_2004, SHIFT_JIS_2004
4 *
5 * Copyright (c) 2007-2019, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13#include "postgres.h"
14#include "fmgr.h"
15#include "mb/pg_wchar.h"
16
17PG_MODULE_MAGIC;
18
19PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
23static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
24
25/* ----------
26 * conv_proc(
27 * INTEGER, -- source encoding id
28 * INTEGER, -- destination encoding id
29 * CSTRING, -- source string (null terminated C string)
30 * CSTRING, -- destination string (null terminated C string)
31 * INTEGER -- source string length
32 * ) returns VOID;
33 * ----------
34 */
35
36Datum
37euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
38{
39 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
40 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
41 int len = PG_GETARG_INT32(4);
42
43 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
44
45 euc_jis_20042shift_jis_2004(src, dest, len);
46
47 PG_RETURN_VOID();
48}
49
50Datum
51shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
52{
53 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
54 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
55 int len = PG_GETARG_INT32(4);
56
57 CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
58
59 shift_jis_20042euc_jis_2004(src, dest, len);
60
61 PG_RETURN_VOID();
62}
63
64/*
65 * EUC_JIS_2004 -> SHIFT_JIS_2004
66 */
67static void
68euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
69{
70 int c1,
71 ku,
72 ten;
73 int l;
74
75 while (len > 0)
76 {
77 c1 = *euc;
78 if (!IS_HIGHBIT_SET(c1))
79 {
80 /* ASCII */
81 if (c1 == 0)
82 report_invalid_encoding(PG_EUC_JIS_2004,
83 (const char *) euc, len);
84 *p++ = c1;
85 euc++;
86 len--;
87 continue;
88 }
89
90 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
91
92 if (l < 0)
93 report_invalid_encoding(PG_EUC_JIS_2004,
94 (const char *) euc, len);
95
96 if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
97 {
98 *p++ = euc[1];
99 }
100 else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */
101 {
102 ku = euc[1] - 0xa0;
103 ten = euc[2] - 0xa0;
104
105 switch (ku)
106 {
107 case 1:
108 case 3:
109 case 4:
110 case 5:
111 case 8:
112 case 12:
113 case 13:
114 case 14:
115 case 15:
116 *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
117 break;
118 default:
119 if (ku >= 78 && ku <= 94)
120 {
121 *p++ = (ku + 0x19b) >> 1;
122 }
123 else
124 report_invalid_encoding(PG_EUC_JIS_2004,
125 (const char *) euc, len);
126 }
127
128 if (ku % 2)
129 {
130 if (ten >= 1 && ten <= 63)
131 *p++ = ten + 0x3f;
132 else if (ten >= 64 && ten <= 94)
133 *p++ = ten + 0x40;
134 else
135 report_invalid_encoding(PG_EUC_JIS_2004,
136 (const char *) euc, len);
137 }
138 else
139 *p++ = ten + 0x9e;
140 }
141
142 else if (l == 2) /* JIS X 0213 plane 1? */
143 {
144 ku = c1 - 0xa0;
145 ten = euc[1] - 0xa0;
146
147 if (ku >= 1 && ku <= 62)
148 *p++ = (ku + 0x101) >> 1;
149 else if (ku >= 63 && ku <= 94)
150 *p++ = (ku + 0x181) >> 1;
151 else
152 report_invalid_encoding(PG_EUC_JIS_2004,
153 (const char *) euc, len);
154
155 if (ku % 2)
156 {
157 if (ten >= 1 && ten <= 63)
158 *p++ = ten + 0x3f;
159 else if (ten >= 64 && ten <= 94)
160 *p++ = ten + 0x40;
161 else
162 report_invalid_encoding(PG_EUC_JIS_2004,
163 (const char *) euc, len);
164 }
165 else
166 *p++ = ten + 0x9e;
167 }
168 else
169 report_invalid_encoding(PG_EUC_JIS_2004,
170 (const char *) euc, len);
171
172 euc += l;
173 len -= l;
174 }
175 *p = '\0';
176}
177
178/*
179 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
180 * *ku = 0: "ku" = even
181 * *ku = 1: "ku" = odd
182 */
183static int
184get_ten(int b, int *ku)
185{
186 int ten;
187
188 if (b >= 0x40 && b <= 0x7e)
189 {
190 ten = b - 0x3f;
191 *ku = 1;
192 }
193 else if (b >= 0x80 && b <= 0x9e)
194 {
195 ten = b - 0x40;
196 *ku = 1;
197 }
198 else if (b >= 0x9f && b <= 0xfc)
199 {
200 ten = b - 0x9e;
201 *ku = 0;
202 }
203 else
204 {
205 ten = -1; /* error */
206 *ku = 0; /* keep compiler quiet */
207 }
208 return ten;
209}
210
211/*
212 * SHIFT_JIS_2004 ---> EUC_JIS_2004
213 */
214
215static void
216shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
217{
218 int c1;
219 int ku,
220 ten,
221 kubun;
222 int plane;
223 int l;
224
225 while (len > 0)
226 {
227 c1 = *sjis;
228
229 if (!IS_HIGHBIT_SET(c1))
230 {
231 /* ASCII */
232 if (c1 == 0)
233 report_invalid_encoding(PG_SHIFT_JIS_2004,
234 (const char *) sjis, len);
235 *p++ = c1;
236 sjis++;
237 len--;
238 continue;
239 }
240
241 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
242
243 if (l < 0 || l > len)
244 report_invalid_encoding(PG_SHIFT_JIS_2004,
245 (const char *) sjis, len);
246
247 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
248 {
249 /* JIS X0201 (1 byte kana) */
250 *p++ = SS2;
251 *p++ = c1;
252 }
253 else if (l == 2)
254 {
255 int c2 = sjis[1];
256
257 plane = 1;
258 ku = 1;
259 ten = 1;
260
261 /*
262 * JIS X 0213
263 */
264 if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */
265 {
266 ku = (c1 << 1) - 0x100;
267 ten = get_ten(c2, &kubun);
268 if (ten < 0)
269 report_invalid_encoding(PG_SHIFT_JIS_2004,
270 (const char *) sjis, len);
271 ku -= kubun;
272 }
273 else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
274 {
275 ku = (c1 << 1) - 0x180;
276 ten = get_ten(c2, &kubun);
277 if (ten < 0)
278 report_invalid_encoding(PG_SHIFT_JIS_2004,
279
280 (const char *) sjis, len);
281 ku -= kubun;
282 }
283 else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
284 * 1,3,4,5,8,12,13,14,15 ku */
285 {
286 plane = 2;
287 ten = get_ten(c2, &kubun);
288 if (ten < 0)
289 report_invalid_encoding(PG_SHIFT_JIS_2004,
290 (const char *) sjis, len);
291 switch (c1)
292 {
293 case 0xf0:
294 ku = kubun == 0 ? 8 : 1;
295 break;
296 case 0xf1:
297 ku = kubun == 0 ? 4 : 3;
298 break;
299 case 0xf2:
300 ku = kubun == 0 ? 12 : 5;
301 break;
302 default:
303 ku = kubun == 0 ? 14 : 13;
304 break;
305 }
306 }
307 else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */
308 {
309 plane = 2;
310 ten = get_ten(c2, &kubun);
311 if (ten < 0)
312 report_invalid_encoding(PG_SHIFT_JIS_2004,
313 (const char *) sjis, len);
314 if (c1 == 0xf4 && kubun == 1)
315 ku = 15;
316 else
317 ku = (c1 << 1) - 0x19a - kubun;
318 }
319 else
320 report_invalid_encoding(PG_SHIFT_JIS_2004,
321 (const char *) sjis, len);
322
323 if (plane == 2)
324 *p++ = SS3;
325
326 *p++ = ku + 0xa0;
327 *p++ = ten + 0xa0;
328 }
329 sjis += l;
330 len -= l;
331 }
332 *p = '\0';
333}
334