wchar.c source code [PostgreSQL/src/backend/utils/mb/wchar.c]

1	/*
2	* conversion functions between pg_wchar and multibyte streams.
3	* Tatsuo Ishii
4	* src/backend/utils/mb/wchar.c
5	*
6	*/
7	/ can be used in either frontend or backend /
8	#ifdef FRONTEND
9	#include "postgres_fe.h"
10	#else
11	#include "postgres.h"
12	#endif
13
14	#include "mb/pg_wchar.h"
15
16
17	/*
18	* Operations on multi-byte encodings are driven by a table of helper
19	* functions.
20	*
21	* To add an encoding support, define mblen(), dsplen() and verifier() for
22	* the encoding. For server-encodings, also define mb2wchar() and wchar2mb()
23	* conversion functions.
24	*
25	* These functions generally assume that their input is validly formed.
26	* The "verifier" functions, further down in the file, have to be more
27	* paranoid.
28	*
29	* We expect that mblen() does not need to examine more than the first byte
30	* of the character to discover the correct length. GB18030 is an exception
31	* to that rule, though, as it also looks at second byte. But even that
32	* behaves in a predictable way, if you only pass the first byte: it will
33	* treat 4-byte encoded characters as two 2-byte encoded characters, which is
34	* good enough for all current uses.
35	*
36	* Note: for the display output of psql to work properly, the return values
37	* of the dsplen functions must conform to the Unicode standard. In particular
38	* the NUL character is zero width and control characters are generally
39	* width -1. It is recommended that non-ASCII encodings refer their ASCII
40	* subset to the ASCII routines to ensure consistency.
41	*/
42
43	/*
44	* SQL/ASCII
45	*/
46	static int
47	pg_ascii2wchar_with_len(const unsigned char from, pg_wchar to, int len)
48	{
49	int cnt = `0`;
50
51	while (len > `0` && *from)
52	{
53	to++ = from++;
54	len--;
55	cnt++;
56	}
57	*to = `0`;
58	return cnt;
59	}
60
61	static int
62	pg_ascii_mblen(const unsigned char *s)
63	{
64	return `1`;
65	}
66
67	static int
68	pg_ascii_dsplen(const unsigned char *s)
69	{
70	if (*s == `'\0'`)
71	return `0`;
72	if (s < `0x20` \|\| s == `0x7f`)
73	return -`1`;
74
75	return `1`;
76	}
77
78	/*
79	* EUC
80	*/
81	static int
82	pg_euc2wchar_with_len(const unsigned char from, pg_wchar to, int len)
83	{
84	int cnt = `0`;
85
86	while (len > `0` && *from)
87	{
88	if (from == SS2 && len >= `2`) /* JIS X 0201 (so called "1 byte*
89	* KANA") */
90	{
91	from++;
92	to = (SS2 << `8`) \| from++;
93	len -= `2`;
94	}
95	else if (from == SS3 && len >= `3`) /* JIS X 0212 KANJI /
96	{
97	from++;
98	to = (SS3 << `16`) \| (from++ << `8`);
99	to \|= from++;
100	len -= `3`;
101	}
102	else if (IS_HIGHBIT_SET(from) && len >= `2`) /* JIS X 0208 KANJI /
103	{
104	to = from++ << `8`;
105	to \|= from++;
106	len -= `2`;
107	}
108	else / must be ASCII /
109	{
110	to = from++;
111	len--;
112	}
113	to++;
114	cnt++;
115	}
116	*to = `0`;
117	return cnt;
118	}
119
120	static inline int
121	pg_euc_mblen(const unsigned char *s)
122	{
123	int len;
124
125	if (*s == SS2)
126	len = `2`;
127	else if (*s == SS3)
128	len = `3`;
129	else if (IS_HIGHBIT_SET(*s))
130	len = `2`;
131	else
132	len = `1`;
133	return len;
134	}
135
136	static inline int
137	pg_euc_dsplen(const unsigned char *s)
138	{
139	int len;
140
141	if (*s == SS2)
142	len = `2`;
143	else if (*s == SS3)
144	len = `2`;
145	else if (IS_HIGHBIT_SET(*s))
146	len = `2`;
147	else
148	len = pg_ascii_dsplen(s);
149	return len;
150	}
151
152	/*
153	* EUC_JP
154	*/
155	static int
156	pg_eucjp2wchar_with_len(const unsigned char from, pg_wchar to, int len)
157	{
158	return pg_euc2wchar_with_len(from, to, len);
159	}
160
161	static int
162	pg_eucjp_mblen(const unsigned char *s)
163	{
164	return pg_euc_mblen(s);
165	}
166
167	static int
168	pg_eucjp_dsplen(const unsigned char *s)
169	{
170	int len;
171
172	if (*s == SS2)
173	len = `1`;
174	else if (*s == SS3)
175	len = `2`;
176	else if (IS_HIGHBIT_SET(*s))
177	len = `2`;
178	else
179	len = pg_ascii_dsplen(s);
180	return len;
181	}
182
183	/*
184	* EUC_KR
185	*/
186	static int
187	pg_euckr2wchar_with_len(const unsigned char from, pg_wchar to, int len)
188	{
189	return pg_euc2wchar_with_len(from, to, len);
190	}
191
192	static int
193	pg_euckr_mblen(const unsigned char *s)
194	{
195	return pg_euc_mblen(s);
196	}
197
198	static int
199	pg_euckr_dsplen(const unsigned char *s)
200	{
201	return pg_euc_dsplen(s);
202	}
203
204	/*
205	* EUC_CN
206	*
207	*/
208	static int
209	pg_euccn2wchar_with_len(const unsigned char from, pg_wchar to, int len)
210	{
211	int cnt = `0`;
212
213	while (len > `0` && *from)
214	{
215	if (from == SS2 && len >= `3`) /* code set 2 (unused?) /
216	{
217	from++;
218	to = (SS2 << `16`) \| (from++ << `8`);
219	to \|= from++;
220	len -= `3`;
221	}
222	else if (from == SS3 && len >= `3`) /* code set 3 (unused ?) /
223	{
224	from++;
225	to = (SS3 << `16`) \| (from++ << `8`);
226	to \|= from++;
227	len -= `3`;
228	}
229	else if (IS_HIGHBIT_SET(from) && len >= `2`) /* code set 1 /
230	{
231	to = from++ << `8`;
232	to \|= from++;
233	len -= `2`;
234	}
235	else
236	{
237	to = from++;
238	len--;
239	}
240	to++;
241	cnt++;
242	}
243	*to = `0`;
244	return cnt;
245	}
246
247	static int
248	pg_euccn_mblen(const unsigned char *s)
249	{
250	int len;
251
252	if (IS_HIGHBIT_SET(*s))
253	len = `2`;
254	else
255	len = `1`;
256	return len;
257	}
258
259	static int
260	pg_euccn_dsplen(const unsigned char *s)
261	{
262	int len;
263
264	if (IS_HIGHBIT_SET(*s))
265	len = `2`;
266	else
267	len = pg_ascii_dsplen(s);
268	return len;
269	}
270
271	/*
272	* EUC_TW
273	*
274	*/
275	static int
276	pg_euctw2wchar_with_len(const unsigned char from, pg_wchar to, int len)
277	{
278	int cnt = `0`;
279
280	while (len > `0` && *from)
281	{
282	if (from == SS2 && len >= `4`) /* code set 2 /
283	{
284	from++;
285	to = (((uint32) SS2) << `24`) \| (from++ << `16`);
286	to \|= from++ << `8`;
287	to \|= from++;
288	len -= `4`;
289	}
290	else if (from == SS3 && len >= `3`) /* code set 3 (unused?) /
291	{
292	from++;
293	to = (SS3 << `16`) \| (from++ << `8`);
294	to \|= from++;
295	len -= `3`;
296	}
297	else if (IS_HIGHBIT_SET(from) && len >= `2`) /* code set 2 /
298	{
299	to = from++ << `8`;
300	to \|= from++;
301	len -= `2`;
302	}
303	else
304	{
305	to = from++;
306	len--;
307	}
308	to++;
309	cnt++;
310	}
311	*to = `0`;
312	return cnt;
313	}
314
315	static int
316	pg_euctw_mblen(const unsigned char *s)
317	{
318	int len;
319
320	if (*s == SS2)
321	len = `4`;
322	else if (*s == SS3)
323	len = `3`;
324	else if (IS_HIGHBIT_SET(*s))
325	len = `2`;
326	else
327	len = `1`;
328	return len;
329	}
330
331	static int
332	pg_euctw_dsplen(const unsigned char *s)
333	{
334	int len;
335
336	if (*s == SS2)
337	len = `2`;
338	else if (*s == SS3)
339	len = `2`;
340	else if (IS_HIGHBIT_SET(*s))
341	len = `2`;
342	else
343	len = pg_ascii_dsplen(s);
344	return len;
345	}
346
347	/*
348	* Convert pg_wchar to EUC_* encoding.
349	* caller must allocate enough space for "to", including a trailing zero!
350	* len: length of from.
351	* "from" not necessarily null terminated.
352	*/
353	static int
354	pg_wchar2euc_with_len(const pg_wchar from, unsigned* char to, int* len)
355	{
356	int cnt = `0`;
357
358	while (len > `0` && *from)
359	{
360	unsigned char c;
361
362	if ((c = (*from >> `24`)))
363	{
364	*to++ = c;
365	to++ = (from >> `16`) & `0xff`;
366	to++ = (from >> `8`) & `0xff`;
367	to++ = from & `0xff`;
368	cnt += `4`;
369	}
370	else if ((c = (*from >> `16`)))
371	{
372	*to++ = c;
373	to++ = (from >> `8`) & `0xff`;
374	to++ = from & `0xff`;
375	cnt += `3`;
376	}
377	else if ((c = (*from >> `8`)))
378	{
379	*to++ = c;
380	to++ = from & `0xff`;
381	cnt += `2`;
382	}
383	else
384	{
385	to++ = from;
386	cnt++;
387	}
388	from++;
389	len--;
390	}
391	*to = `0`;
392	return cnt;
393	}
394
395
396	/*
397	* JOHAB
398	*/
399	static int
400	pg_johab_mblen(const unsigned char *s)
401	{
402	return pg_euc_mblen(s);
403	}
404
405	static int
406	pg_johab_dsplen(const unsigned char *s)
407	{
408	return pg_euc_dsplen(s);
409	}
410
411	/*
412	* convert UTF8 string to pg_wchar (UCS-4)
413	* caller must allocate enough space for "to", including a trailing zero!
414	* len: length of from.
415	* "from" not necessarily null terminated.
416	*/
417	static int
418	pg_utf2wchar_with_len(const unsigned char from, pg_wchar to, int len)
419	{
420	int cnt = `0`;
421	uint32 c1,
422	c2,
423	c3,
424	c4;
425
426	while (len > `0` && *from)
427	{
428	if ((*from & `0x80`) == `0`)
429	{
430	to = from++;
431	len--;
432	}
433	else if ((*from & `0xe0`) == `0xc0`)
434	{
435	if (len < `2`)
436	break; / drop trailing incomplete char /
437	c1 = *from++ & `0x1f`;
438	c2 = *from++ & `0x3f`;
439	*to = (c1 << `6`) \| c2;
440	len -= `2`;
441	}
442	else if ((*from & `0xf0`) == `0xe0`)
443	{
444	if (len < `3`)
445	break; / drop trailing incomplete char /
446	c1 = *from++ & `0x0f`;
447	c2 = *from++ & `0x3f`;
448	c3 = *from++ & `0x3f`;
449	*to = (c1 << `12`) \| (c2 << `6`) \| c3;
450	len -= `3`;
451	}
452	else if ((*from & `0xf8`) == `0xf0`)
453	{
454	if (len < `4`)
455	break; / drop trailing incomplete char /
456	c1 = *from++ & `0x07`;
457	c2 = *from++ & `0x3f`;
458	c3 = *from++ & `0x3f`;
459	c4 = *from++ & `0x3f`;
460	*to = (c1 << `18`) \| (c2 << `12`) \| (c3 << `6`) \| c4;
461	len -= `4`;
462	}
463	else
464	{
465	/ treat a bogus char as length 1; not ours to raise error /
466	to = from++;
467	len--;
468	}
469	to++;
470	cnt++;
471	}
472	*to = `0`;
473	return cnt;
474	}
475
476
477	/*
478	* Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
479	* space allocated.
480	*/
481	unsigned char *
482	unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
483	{
484	if (c <= `0x7F`)
485	{
486	utf8string[`0`] = c;
487	}
488	else if (c <= `0x7FF`)
489	{
490	utf8string[`0`] = `0xC0` \| ((c >> `6`) & `0x1F`);
491	utf8string[`1`] = `0x80` \| (c & `0x3F`);
492	}
493	else if (c <= `0xFFFF`)
494	{
495	utf8string[`0`] = `0xE0` \| ((c >> `12`) & `0x0F`);
496	utf8string[`1`] = `0x80` \| ((c >> `6`) & `0x3F`);
497	utf8string[`2`] = `0x80` \| (c & `0x3F`);
498	}
499	else
500	{
501	utf8string[`0`] = `0xF0` \| ((c >> `18`) & `0x07`);
502	utf8string[`1`] = `0x80` \| ((c >> `12`) & `0x3F`);
503	utf8string[`2`] = `0x80` \| ((c >> `6`) & `0x3F`);
504	utf8string[`3`] = `0x80` \| (c & `0x3F`);
505	}
506
507	return utf8string;
508	}
509
510	/*
511	* Trivial conversion from pg_wchar to UTF-8.
512	* caller should allocate enough space for "to"
513	* len: length of from.
514	* "from" not necessarily null terminated.
515	*/
516	static int
517	pg_wchar2utf_with_len(const pg_wchar from, unsigned* char to, int* len)
518	{
519	int cnt = `0`;
520
521	while (len > `0` && *from)
522	{
523	int char_len;
524
525	unicode_to_utf8(*from, to);
526	char_len = pg_utf_mblen(to);
527	cnt += char_len;
528	to += char_len;
529	from++;
530	len--;
531	}
532	*to = `0`;
533	return cnt;
534	}
535
536	/*
537	* Return the byte length of a UTF8 character pointed to by s
538	*
539	* Note: in the current implementation we do not support UTF8 sequences
540	* of more than 4 bytes; hence do NOT return a value larger than 4.
541	* We return "1" for any leading byte that is either flat-out illegal or
542	* indicates a length larger than we support.
543	*
544	* pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
545	* other places would need to be fixed to change this.
546	*/
547	int
548	pg_utf_mblen(const unsigned char *s)
549	{
550	int len;
551
552	if ((*s & `0x80`) == `0`)
553	len = `1`;
554	else if ((*s & `0xe0`) == `0xc0`)
555	len = `2`;
556	else if ((*s & `0xf0`) == `0xe0`)
557	len = `3`;
558	else if ((*s & `0xf8`) == `0xf0`)
559	len = `4`;
560	#ifdef NOT_USED
561	else if ((*s & `0xfc`) == `0xf8`)
562	len = `5`;
563	else if ((*s & `0xfe`) == `0xfc`)
564	len = `6`;
565	#endif
566	else
567	len = `1`;
568	return len;
569	}
570
571	/*
572	* This is an implementation of wcwidth() and wcswidth() as defined in
573	* "The Single UNIX Specification, Version 2, The Open Group, 1997"
574	* <http://www.UNIX-systems.org/online.html>
575	*
576	* Markus Kuhn -- 2001-09-08 -- public domain
577	*
578	* customised for PostgreSQL
579	*
580	* original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
581	*/
582
583	struct mbinterval
584	{
585	unsigned short first;
586	unsigned short last;
587	};
588
589	/ auxiliary function for binary search in interval table /
590	static int
591	mbbisearch(pg_wchar ucs, const struct mbinterval table, int* max)
592	{
593	int min = `0`;
594	int mid;
595
596	if (ucs < table[`0`].first \|\| ucs > table[max].last)
597	return `0`;
598	while (max >= min)
599	{
600	mid = (min + max) / `2`;
601	if (ucs > table[mid].last)
602	min = mid + `1`;
603	else if (ucs < table[mid].first)
604	max = mid - `1`;
605	else
606	return `1`;
607	}
608
609	return `0`;
610	}
611
612
613	/ The following functions define the column width of an ISO 10646*
614	* character as follows:
615	*
616	* - The null character (U+0000) has a column width of 0.
617	*
618	* - Other C0/C1 control characters and DEL will lead to a return
619	* value of -1.
620	*
621	* - Non-spacing and enclosing combining characters (general
622	* category code Mn or Me in the Unicode database) have a
623	* column width of 0.
624	*
625	* - Other format characters (general category code Cf in the Unicode
626	* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
627	*
628	* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
629	* have a column width of 0.
630	*
631	* - Spacing characters in the East Asian Wide (W) or East Asian
632	* FullWidth (F) category as defined in Unicode Technical
633	* Report #11 have a column width of 2.
634	*
635	* - All remaining characters (including all printable
636	* ISO 8859-1 and WGL4 characters, Unicode control characters,
637	* etc.) have a column width of 1.
638	*
639	* This implementation assumes that wchar_t characters are encoded
640	* in ISO 10646.
641	*/
642
643	static int
644	ucs_wcwidth(pg_wchar ucs)
645	{
646	/ sorted list of non-overlapping intervals of non-spacing characters /
647	static const struct mbinterval combining[] = {
648	{`0x0300`, `0x036F`}, {`0x0483`, `0x0489`}, {`0x0591`, `0x05BD`},
649	{`0x05BF`, `0x05BF`}, {`0x05C1`, `0x05C2`}, {`0x05C4`, `0x05C5`},
650	{`0x05C7`, `0x05C7`}, {`0x0610`, `0x061A`}, {`0x064B`, `0x065F`},
651	{`0x0670`, `0x0670`}, {`0x06D6`, `0x06DC`}, {`0x06DF`, `0x06E4`},
652	{`0x06E7`, `0x06E8`}, {`0x06EA`, `0x06ED`}, {`0x0711`, `0x0711`},
653	{`0x0730`, `0x074A`}, {`0x07A6`, `0x07B0`}, {`0x07EB`, `0x07F3`},
654	{`0x07FD`, `0x07FD`}, {`0x0816`, `0x0819`}, {`0x081B`, `0x0823`},
655	{`0x0825`, `0x0827`}, {`0x0829`, `0x082D`}, {`0x0859`, `0x085B`},
656	{`0x08D3`, `0x08E1`}, {`0x08E3`, `0x0902`}, {`0x093A`, `0x093A`},
657	{`0x093C`, `0x093C`}, {`0x0941`, `0x0948`}, {`0x094D`, `0x094D`},
658	{`0x0951`, `0x0957`}, {`0x0962`, `0x0963`}, {`0x0981`, `0x0981`},
659	{`0x09BC`, `0x09BC`}, {`0x09C1`, `0x09C4`}, {`0x09CD`, `0x09CD`},
660	{`0x09E2`, `0x09E3`}, {`0x09FE`, `0x0A02`}, {`0x0A3C`, `0x0A3C`},
661	{`0x0A41`, `0x0A51`}, {`0x0A70`, `0x0A71`}, {`0x0A75`, `0x0A75`},
662	{`0x0A81`, `0x0A82`}, {`0x0ABC`, `0x0ABC`}, {`0x0AC1`, `0x0AC8`},
663	{`0x0ACD`, `0x0ACD`}, {`0x0AE2`, `0x0AE3`}, {`0x0AFA`, `0x0B01`},
664	{`0x0B3C`, `0x0B3C`}, {`0x0B3F`, `0x0B3F`}, {`0x0B41`, `0x0B44`},
665	{`0x0B4D`, `0x0B56`}, {`0x0B62`, `0x0B63`}, {`0x0B82`, `0x0B82`},
666	{`0x0BC0`, `0x0BC0`}, {`0x0BCD`, `0x0BCD`}, {`0x0C00`, `0x0C00`},
667	{`0x0C04`, `0x0C04`}, {`0x0C3E`, `0x0C40`}, {`0x0C46`, `0x0C56`},
668	{`0x0C62`, `0x0C63`}, {`0x0C81`, `0x0C81`}, {`0x0CBC`, `0x0CBC`},
669	{`0x0CBF`, `0x0CBF`}, {`0x0CC6`, `0x0CC6`}, {`0x0CCC`, `0x0CCD`},
670	{`0x0CE2`, `0x0CE3`}, {`0x0D00`, `0x0D01`}, {`0x0D3B`, `0x0D3C`},
671	{`0x0D41`, `0x0D44`}, {`0x0D4D`, `0x0D4D`}, {`0x0D62`, `0x0D63`},
672	{`0x0DCA`, `0x0DCA`}, {`0x0DD2`, `0x0DD6`}, {`0x0E31`, `0x0E31`},
673	{`0x0E34`, `0x0E3A`}, {`0x0E47`, `0x0E4E`}, {`0x0EB1`, `0x0EB1`},
674	{`0x0EB4`, `0x0EBC`}, {`0x0EC8`, `0x0ECD`}, {`0x0F18`, `0x0F19`},
675	{`0x0F35`, `0x0F35`}, {`0x0F37`, `0x0F37`}, {`0x0F39`, `0x0F39`},
676	{`0x0F71`, `0x0F7E`}, {`0x0F80`, `0x0F84`}, {`0x0F86`, `0x0F87`},
677	{`0x0F8D`, `0x0FBC`}, {`0x0FC6`, `0x0FC6`}, {`0x102D`, `0x1030`},
678	{`0x1032`, `0x1037`}, {`0x1039`, `0x103A`}, {`0x103D`, `0x103E`},
679	{`0x1058`, `0x1059`}, {`0x105E`, `0x1060`}, {`0x1071`, `0x1074`},
680	{`0x1082`, `0x1082`}, {`0x1085`, `0x1086`}, {`0x108D`, `0x108D`},
681	{`0x109D`, `0x109D`}, {`0x135D`, `0x135F`}, {`0x1712`, `0x1714`},
682	{`0x1732`, `0x1734`}, {`0x1752`, `0x1753`}, {`0x1772`, `0x1773`},
683	{`0x17B4`, `0x17B5`}, {`0x17B7`, `0x17BD`}, {`0x17C6`, `0x17C6`},
684	{`0x17C9`, `0x17D3`}, {`0x17DD`, `0x17DD`}, {`0x180B`, `0x180D`},
685	{`0x1885`, `0x1886`}, {`0x18A9`, `0x18A9`}, {`0x1920`, `0x1922`},
686	{`0x1927`, `0x1928`}, {`0x1932`, `0x1932`}, {`0x1939`, `0x193B`},
687	{`0x1A17`, `0x1A18`}, {`0x1A1B`, `0x1A1B`}, {`0x1A56`, `0x1A56`},
688	{`0x1A58`, `0x1A60`}, {`0x1A62`, `0x1A62`}, {`0x1A65`, `0x1A6C`},
689	{`0x1A73`, `0x1A7F`}, {`0x1AB0`, `0x1B03`}, {`0x1B34`, `0x1B34`},
690	{`0x1B36`, `0x1B3A`}, {`0x1B3C`, `0x1B3C`}, {`0x1B42`, `0x1B42`},
691	{`0x1B6B`, `0x1B73`}, {`0x1B80`, `0x1B81`}, {`0x1BA2`, `0x1BA5`},
692	{`0x1BA8`, `0x1BA9`}, {`0x1BAB`, `0x1BAD`}, {`0x1BE6`, `0x1BE6`},
693	{`0x1BE8`, `0x1BE9`}, {`0x1BED`, `0x1BED`}, {`0x1BEF`, `0x1BF1`},
694	{`0x1C2C`, `0x1C33`}, {`0x1C36`, `0x1C37`}, {`0x1CD0`, `0x1CD2`},
695	{`0x1CD4`, `0x1CE0`}, {`0x1CE2`, `0x1CE8`}, {`0x1CED`, `0x1CED`},
696	{`0x1CF4`, `0x1CF4`}, {`0x1CF8`, `0x1CF9`}, {`0x1DC0`, `0x1DFF`},
697	{`0x20D0`, `0x20F0`}, {`0x2CEF`, `0x2CF1`}, {`0x2D7F`, `0x2D7F`},
698	{`0x2DE0`, `0x2DFF`}, {`0x302A`, `0x302D`}, {`0x3099`, `0x309A`},
699	{`0xA66F`, `0xA672`}, {`0xA674`, `0xA67D`}, {`0xA69E`, `0xA69F`},
700	{`0xA6F0`, `0xA6F1`}, {`0xA802`, `0xA802`}, {`0xA806`, `0xA806`},
701	{`0xA80B`, `0xA80B`}, {`0xA825`, `0xA826`}, {`0xA8C4`, `0xA8C5`},
702	{`0xA8E0`, `0xA8F1`}, {`0xA8FF`, `0xA8FF`}, {`0xA926`, `0xA92D`},
703	{`0xA947`, `0xA951`}, {`0xA980`, `0xA982`}, {`0xA9B3`, `0xA9B3`},
704	{`0xA9B6`, `0xA9B9`}, {`0xA9BC`, `0xA9BD`}, {`0xA9E5`, `0xA9E5`},
705	{`0xAA29`, `0xAA2E`}, {`0xAA31`, `0xAA32`}, {`0xAA35`, `0xAA36`},
706	{`0xAA43`, `0xAA43`}, {`0xAA4C`, `0xAA4C`}, {`0xAA7C`, `0xAA7C`},
707	{`0xAAB0`, `0xAAB0`}, {`0xAAB2`, `0xAAB4`}, {`0xAAB7`, `0xAAB8`},
708	{`0xAABE`, `0xAABF`}, {`0xAAC1`, `0xAAC1`}, {`0xAAEC`, `0xAAED`},
709	{`0xAAF6`, `0xAAF6`}, {`0xABE5`, `0xABE5`}, {`0xABE8`, `0xABE8`},
710	{`0xABED`, `0xABED`}, {`0xFB1E`, `0xFB1E`}, {`0xFE00`, `0xFE0F`},
711	{`0xFE20`, `0xFE2F`},
712	};
713
714	/ test for 8-bit control characters /
715	if (ucs == `0`)
716	return `0`;
717
718	if (ucs < `0x20` \|\| (ucs >= `0x7f` && ucs < `0xa0`) \|\| ucs > `0x0010ffff`)
719	return -`1`;
720
721	/ binary search in table of non-spacing characters /
722	if (mbbisearch(ucs, combining,
723	sizeof(combining) / sizeof(struct mbinterval) - `1`))
724	return `0`;
725
726	/*
727	* if we arrive here, ucs is not a combining or C0/C1 control character
728	*/
729
730	return `1` +
731	(ucs >= `0x1100` &&
732	(ucs <= `0x115f` \|\| / Hangul Jamo init. consonants /
733	(ucs >= `0x2e80` && ucs <= `0xa4cf` && (ucs & ~`0x0011`) != `0x300a` &&
734	ucs != `0x303f`) \|\| / CJK ... Yi /
735	(ucs >= `0xac00` && ucs <= `0xd7a3`) \|\| / Hangul Syllables /
736	(ucs >= `0xf900` && ucs <= `0xfaff`) \|\| / CJK Compatibility*
737	* Ideographs */
738	(ucs >= `0xfe30` && ucs <= `0xfe6f`) \|\| / CJK Compatibility Forms /
739	(ucs >= `0xff00` && ucs <= `0xff5f`) \|\| / Fullwidth Forms /
740	(ucs >= `0xffe0` && ucs <= `0xffe6`) \|\|
741	(ucs >= `0x20000` && ucs <= `0x2ffff`)));
742	}
743
744	/*
745	* Convert a UTF-8 character to a Unicode code point.
746	* This is a one-character version of pg_utf2wchar_with_len.
747	*
748	* No error checks here, c must point to a long-enough string.
749	*/
750	pg_wchar
751	utf8_to_unicode(const unsigned char *c)
752	{
753	if ((*c & `0x80`) == `0`)
754	return (pg_wchar) c[`0`];
755	else if ((*c & `0xe0`) == `0xc0`)
756	return (pg_wchar) (((c[`0`] & `0x1f`) << `6`) \|
757	(c[`1`] & `0x3f`));
758	else if ((*c & `0xf0`) == `0xe0`)
759	return (pg_wchar) (((c[`0`] & `0x0f`) << `12`) \|
760	((c[`1`] & `0x3f`) << `6`) \|
761	(c[`2`] & `0x3f`));
762	else if ((*c & `0xf8`) == `0xf0`)
763	return (pg_wchar) (((c[`0`] & `0x07`) << `18`) \|
764	((c[`1`] & `0x3f`) << `12`) \|
765	((c[`2`] & `0x3f`) << `6`) \|
766	(c[`3`] & `0x3f`));
767	else
768	/ that is an invalid code on purpose /
769	return `0xffffffff`;
770	}
771
772	static int
773	pg_utf_dsplen(const unsigned char *s)
774	{
775	return ucs_wcwidth(utf8_to_unicode(s));
776	}
777
778	/*
779	* convert mule internal code to pg_wchar
780	* caller should allocate enough space for "to"
781	* len: length of from.
782	* "from" not necessarily null terminated.
783	*/
784	static int
785	pg_mule2wchar_with_len(const unsigned char from, pg_wchar to, int len)
786	{
787	int cnt = `0`;
788
789	while (len > `0` && *from)
790	{
791	if (IS_LC1(*from) && len >= `2`)
792	{
793	to = from++ << `16`;
794	to \|= from++;
795	len -= `2`;
796	}
797	else if (IS_LCPRV1(*from) && len >= `3`)
798	{
799	from++;
800	to = from++ << `16`;
801	to \|= from++;
802	len -= `3`;
803	}
804	else if (IS_LC2(*from) && len >= `3`)
805	{
806	to = from++ << `16`;
807	to \|= from++ << `8`;
808	to \|= from++;
809	len -= `3`;
810	}
811	else if (IS_LCPRV2(*from) && len >= `4`)
812	{
813	from++;
814	to = from++ << `16`;
815	to \|= from++ << `8`;
816	to \|= from++;
817	len -= `4`;
818	}
819	else
820	{ / assume ASCII /
821	to = (unsigned* char) *from++;
822	len--;
823	}
824	to++;
825	cnt++;
826	}
827	*to = `0`;
828	return cnt;
829	}
830
831	/*
832	* convert pg_wchar to mule internal code
833	* caller should allocate enough space for "to"
834	* len: length of from.
835	* "from" not necessarily null terminated.
836	*/
837	static int
838	pg_wchar2mule_with_len(const pg_wchar from, unsigned* char to, int* len)
839	{
840	int cnt = `0`;
841
842	while (len > `0` && *from)
843	{
844	unsigned char lb;
845
846	lb = (*from >> `16`) & `0xff`;
847	if (IS_LC1(lb))
848	{
849	*to++ = lb;
850	to++ = from & `0xff`;
851	cnt += `2`;
852	}
853	else if (IS_LC2(lb))
854	{
855	*to++ = lb;
856	to++ = (from >> `8`) & `0xff`;
857	to++ = from & `0xff`;
858	cnt += `3`;
859	}
860	else if (IS_LCPRV1_A_RANGE(lb))
861	{
862	*to++ = LCPRV1_A;
863	*to++ = lb;
864	to++ = from & `0xff`;
865	cnt += `3`;
866	}
867	else if (IS_LCPRV1_B_RANGE(lb))
868	{
869	*to++ = LCPRV1_B;
870	*to++ = lb;
871	to++ = from & `0xff`;
872	cnt += `3`;
873	}
874	else if (IS_LCPRV2_A_RANGE(lb))
875	{
876	*to++ = LCPRV2_A;
877	*to++ = lb;
878	to++ = (from >> `8`) & `0xff`;
879	to++ = from & `0xff`;
880	cnt += `4`;
881	}
882	else if (IS_LCPRV2_B_RANGE(lb))
883	{
884	*to++ = LCPRV2_B;
885	*to++ = lb;
886	to++ = (from >> `8`) & `0xff`;
887	to++ = from & `0xff`;
888	cnt += `4`;
889	}
890	else
891	{
892	to++ = from & `0xff`;
893	cnt += `1`;
894	}
895	from++;
896	len--;
897	}
898	*to = `0`;
899	return cnt;
900	}
901
902	int
903	pg_mule_mblen(const unsigned char *s)
904	{
905	int len;
906
907	if (IS_LC1(*s))
908	len = `2`;
909	else if (IS_LCPRV1(*s))
910	len = `3`;
911	else if (IS_LC2(*s))
912	len = `3`;
913	else if (IS_LCPRV2(*s))
914	len = `4`;
915	else
916	len = `1`; / assume ASCII /
917	return len;
918	}
919
920	static int
921	pg_mule_dsplen(const unsigned char *s)
922	{
923	int len;
924
925	/*
926	* Note: it's not really appropriate to assume that all multibyte charsets
927	* are double-wide on screen. But this seems an okay approximation for
928	* the MULE charsets we currently support.
929	*/
930
931	if (IS_LC1(*s))
932	len = `1`;
933	else if (IS_LCPRV1(*s))
934	len = `1`;
935	else if (IS_LC2(*s))
936	len = `2`;
937	else if (IS_LCPRV2(*s))
938	len = `2`;
939	else
940	len = `1`; / assume ASCII /
941
942	return len;
943	}
944
945	/*
946	* ISO8859-1
947	*/
948	static int
949	pg_latin12wchar_with_len(const unsigned char from, pg_wchar to, int len)
950	{
951	int cnt = `0`;
952
953	while (len > `0` && *from)
954	{
955	to++ = from++;
956	len--;
957	cnt++;
958	}
959	*to = `0`;
960	return cnt;
961	}
962
963	/*
964	* Trivial conversion from pg_wchar to single byte encoding. Just ignores
965	* high bits.
966	* caller should allocate enough space for "to"
967	* len: length of from.
968	* "from" not necessarily null terminated.
969	*/
970	static int
971	pg_wchar2single_with_len(const pg_wchar from, unsigned* char to, int* len)
972	{
973	int cnt = `0`;
974
975	while (len > `0` && *from)
976	{
977	to++ = from++;
978	len--;
979	cnt++;
980	}
981	*to = `0`;
982	return cnt;
983	}
984
985	static int
986	pg_latin1_mblen(const unsigned char *s)
987	{
988	return `1`;
989	}
990
991	static int
992	pg_latin1_dsplen(const unsigned char *s)
993	{
994	return pg_ascii_dsplen(s);
995	}
996
997	/*
998	* SJIS
999	*/
1000	static int
1001	pg_sjis_mblen(const unsigned char *s)
1002	{
1003	int len;
1004
1005	if (s >= `0xa1` && s <= `0xdf`)
1006	len = `1`; / 1 byte kana? /
1007	else if (IS_HIGHBIT_SET(*s))
1008	len = `2`; / kanji? /
1009	else
1010	len = `1`; / should be ASCII /
1011	return len;
1012	}
1013
1014	static int
1015	pg_sjis_dsplen(const unsigned char *s)
1016	{
1017	int len;
1018
1019	if (s >= `0xa1` && s <= `0xdf`)
1020	len = `1`; / 1 byte kana? /
1021	else if (IS_HIGHBIT_SET(*s))
1022	len = `2`; / kanji? /
1023	else
1024	len = pg_ascii_dsplen(s); / should be ASCII /
1025	return len;
1026	}
1027
1028	/*
1029	* Big5
1030	*/
1031	static int
1032	pg_big5_mblen(const unsigned char *s)
1033	{
1034	int len;
1035
1036	if (IS_HIGHBIT_SET(*s))
1037	len = `2`; / kanji? /
1038	else
1039	len = `1`; / should be ASCII /
1040	return len;
1041	}
1042
1043	static int
1044	pg_big5_dsplen(const unsigned char *s)
1045	{
1046	int len;
1047
1048	if (IS_HIGHBIT_SET(*s))
1049	len = `2`; / kanji? /
1050	else
1051	len = pg_ascii_dsplen(s); / should be ASCII /
1052	return len;
1053	}
1054
1055	/*
1056	* GBK
1057	*/
1058	static int
1059	pg_gbk_mblen(const unsigned char *s)
1060	{
1061	int len;
1062
1063	if (IS_HIGHBIT_SET(*s))
1064	len = `2`; / kanji? /
1065	else
1066	len = `1`; / should be ASCII /
1067	return len;
1068	}
1069
1070	static int
1071	pg_gbk_dsplen(const unsigned char *s)
1072	{
1073	int len;
1074
1075	if (IS_HIGHBIT_SET(*s))
1076	len = `2`; / kanji? /
1077	else
1078	len = pg_ascii_dsplen(s); / should be ASCII /
1079	return len;
1080	}
1081
1082	/*
1083	* UHC
1084	*/
1085	static int
1086	pg_uhc_mblen(const unsigned char *s)
1087	{
1088	int len;
1089
1090	if (IS_HIGHBIT_SET(*s))
1091	len = `2`; / 2byte? /
1092	else
1093	len = `1`; / should be ASCII /
1094	return len;
1095	}
1096
1097	static int
1098	pg_uhc_dsplen(const unsigned char *s)
1099	{
1100	int len;
1101
1102	if (IS_HIGHBIT_SET(*s))
1103	len = `2`; / 2byte? /
1104	else
1105	len = pg_ascii_dsplen(s); / should be ASCII /
1106	return len;
1107	}
1108
1109	/*
1110	* GB18030
1111	* Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
1112	*/
1113
1114	/*
1115	* Unlike all other mblen() functions, this also looks at the second byte of
1116	* the input. However, if you only pass the first byte of a multi-byte
1117	* string, and \0 as the second byte, this still works in a predictable way:
1118	* a 4-byte character will be reported as two 2-byte characters. That's
1119	* enough for all current uses, as a client-only encoding. It works that
1120	* way, because in any valid 4-byte GB18030-encoded character, the third and
1121	* fourth byte look like a 2-byte encoded character, when looked at
1122	* separately.
1123	*/
1124	static int
1125	pg_gb18030_mblen(const unsigned char *s)
1126	{
1127	int len;
1128
1129	if (!IS_HIGHBIT_SET(*s))
1130	len = `1`; / ASCII /
1131	else if ((s + `1`) >= `0x30` && (s + `1`) <= `0x39`)
1132	len = `4`;
1133	else
1134	len = `2`;
1135	return len;
1136	}
1137
1138	static int
1139	pg_gb18030_dsplen(const unsigned char *s)
1140	{
1141	int len;
1142
1143	if (IS_HIGHBIT_SET(*s))
1144	len = `2`;
1145	else
1146	len = pg_ascii_dsplen(s); / ASCII /
1147	return len;
1148	}
1149
1150	/*
1151	*-------------------------------------------------------------------
1152	* multibyte sequence validators
1153	*
1154	* These functions accept "s", a pointer to the first byte of a string,
1155	* and "len", the remaining length of the string. If there is a validly
1156	* encoded character beginning at *s, return its length in bytes; else
1157	* return -1.
1158	*
1159	* The functions can assume that len > 0 and that *s != '\0', but they must
1160	* test for and reject zeroes in any additional bytes of a multibyte character.
1161	*
1162	* Note that this definition allows the function for a single-byte
1163	* encoding to be just "return 1".
1164	*-------------------------------------------------------------------
1165	*/
1166
1167	static int
1168	pg_ascii_verifier(const unsigned char s, int* len)
1169	{
1170	return `1`;
1171	}
1172
1173	#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1174
1175	static int
1176	pg_eucjp_verifier(const unsigned char s, int* len)
1177	{
1178	int l;
1179	unsigned char c1,
1180	c2;
1181
1182	c1 = *s++;
1183
1184	switch (c1)
1185	{
1186	case SS2: / JIS X 0201 /
1187	l = `2`;
1188	if (l > len)
1189	return -`1`;
1190	c2 = *s++;
1191	if (c2 < `0xa1` \|\| c2 > `0xdf`)
1192	return -`1`;
1193	break;
1194
1195	case SS3: / JIS X 0212 /
1196	l = `3`;
1197	if (l > len)
1198	return -`1`;
1199	c2 = *s++;
1200	if (!IS_EUC_RANGE_VALID(c2))
1201	return -`1`;
1202	c2 = *s++;
1203	if (!IS_EUC_RANGE_VALID(c2))
1204	return -`1`;
1205	break;
1206
1207	default:
1208	if (IS_HIGHBIT_SET(c1)) / JIS X 0208? /
1209	{
1210	l = `2`;
1211	if (l > len)
1212	return -`1`;
1213	if (!IS_EUC_RANGE_VALID(c1))
1214	return -`1`;
1215	c2 = *s++;
1216	if (!IS_EUC_RANGE_VALID(c2))
1217	return -`1`;
1218	}
1219	else
1220	/ must be ASCII /
1221	{
1222	l = `1`;
1223	}
1224	break;
1225	}
1226
1227	return l;
1228	}
1229
1230	static int
1231	pg_euckr_verifier(const unsigned char s, int* len)
1232	{
1233	int l;
1234	unsigned char c1,
1235	c2;
1236
1237	c1 = *s++;
1238
1239	if (IS_HIGHBIT_SET(c1))
1240	{
1241	l = `2`;
1242	if (l > len)
1243	return -`1`;
1244	if (!IS_EUC_RANGE_VALID(c1))
1245	return -`1`;
1246	c2 = *s++;
1247	if (!IS_EUC_RANGE_VALID(c2))
1248	return -`1`;
1249	}
1250	else
1251	/ must be ASCII /
1252	{
1253	l = `1`;
1254	}
1255
1256	return l;
1257	}
1258
1259	/ EUC-CN byte sequences are exactly same as EUC-KR /
1260	#define pg_euccn_verifier pg_euckr_verifier
1261
1262	static int
1263	pg_euctw_verifier(const unsigned char s, int* len)
1264	{
1265	int l;
1266	unsigned char c1,
1267	c2;
1268
1269	c1 = *s++;
1270
1271	switch (c1)
1272	{
1273	case SS2: / CNS 11643 Plane 1-7 /
1274	l = `4`;
1275	if (l > len)
1276	return -`1`;
1277	c2 = *s++;
1278	if (c2 < `0xa1` \|\| c2 > `0xa7`)
1279	return -`1`;
1280	c2 = *s++;
1281	if (!IS_EUC_RANGE_VALID(c2))
1282	return -`1`;
1283	c2 = *s++;
1284	if (!IS_EUC_RANGE_VALID(c2))
1285	return -`1`;
1286	break;
1287
1288	case SS3: / unused /
1289	return -`1`;
1290
1291	default:
1292	if (IS_HIGHBIT_SET(c1)) / CNS 11643 Plane 1 /
1293	{
1294	l = `2`;
1295	if (l > len)
1296	return -`1`;
1297	/ no further range check on c1? /
1298	c2 = *s++;
1299	if (!IS_EUC_RANGE_VALID(c2))
1300	return -`1`;
1301	}
1302	else
1303	/ must be ASCII /
1304	{
1305	l = `1`;
1306	}
1307	break;
1308	}
1309	return l;
1310	}
1311
1312	static int
1313	pg_johab_verifier(const unsigned char s, int* len)
1314	{
1315	int l,
1316	mbl;
1317	unsigned char c;
1318
1319	l = mbl = pg_johab_mblen(s);
1320
1321	if (len < l)
1322	return -`1`;
1323
1324	if (!IS_HIGHBIT_SET(*s))
1325	return mbl;
1326
1327	while (--l > `0`)
1328	{
1329	c = *++s;
1330	if (!IS_EUC_RANGE_VALID(c))
1331	return -`1`;
1332	}
1333	return mbl;
1334	}
1335
1336	static int
1337	pg_mule_verifier(const unsigned char s, int* len)
1338	{
1339	int l,
1340	mbl;
1341	unsigned char c;
1342
1343	l = mbl = pg_mule_mblen(s);
1344
1345	if (len < l)
1346	return -`1`;
1347
1348	while (--l > `0`)
1349	{
1350	c = *++s;
1351	if (!IS_HIGHBIT_SET(c))
1352	return -`1`;
1353	}
1354	return mbl;
1355	}
1356
1357	static int
1358	pg_latin1_verifier(const unsigned char s, int* len)
1359	{
1360	return `1`;
1361	}
1362
1363	static int
1364	pg_sjis_verifier(const unsigned char s, int* len)
1365	{
1366	int l,
1367	mbl;
1368	unsigned char c1,
1369	c2;
1370
1371	l = mbl = pg_sjis_mblen(s);
1372
1373	if (len < l)
1374	return -`1`;
1375
1376	if (l == `1`) / pg_sjis_mblen already verified it /
1377	return mbl;
1378
1379	c1 = *s++;
1380	c2 = *s;
1381	if (!ISSJISHEAD(c1) \|\| !ISSJISTAIL(c2))
1382	return -`1`;
1383	return mbl;
1384	}
1385
1386	static int
1387	pg_big5_verifier(const unsigned char s, int* len)
1388	{
1389	int l,
1390	mbl;
1391
1392	l = mbl = pg_big5_mblen(s);
1393
1394	if (len < l)
1395	return -`1`;
1396
1397	while (--l > `0`)
1398	{
1399	if (*++s == `'\0'`)
1400	return -`1`;
1401	}
1402
1403	return mbl;
1404	}
1405
1406	static int
1407	pg_gbk_verifier(const unsigned char s, int* len)
1408	{
1409	int l,
1410	mbl;
1411
1412	l = mbl = pg_gbk_mblen(s);
1413
1414	if (len < l)
1415	return -`1`;
1416
1417	while (--l > `0`)
1418	{
1419	if (*++s == `'\0'`)
1420	return -`1`;
1421	}
1422
1423	return mbl;
1424	}
1425
1426	static int
1427	pg_uhc_verifier(const unsigned char s, int* len)
1428	{
1429	int l,
1430	mbl;
1431
1432	l = mbl = pg_uhc_mblen(s);
1433
1434	if (len < l)
1435	return -`1`;
1436
1437	while (--l > `0`)
1438	{
1439	if (*++s == `'\0'`)
1440	return -`1`;
1441	}
1442
1443	return mbl;
1444	}
1445
1446	static int
1447	pg_gb18030_verifier(const unsigned char s, int* len)
1448	{
1449	int l;
1450
1451	if (!IS_HIGHBIT_SET(*s))
1452	l = `1`; / ASCII /
1453	else if (len >= `4` && (s + `1`) >= `0x30` && (s + `1`) <= `0x39`)
1454	{
1455	/ Should be 4-byte, validate remaining bytes /
1456	if (s >= `0x81` && s <= `0xfe` &&
1457	(s + `2`) >= `0x81` && (s + `2`) <= `0xfe` &&
1458	(s + `3`) >= `0x30` && (s + `3`) <= `0x39`)
1459	l = `4`;
1460	else
1461	l = -`1`;
1462	}
1463	else if (len >= `2` && s >= `0x81` && s <= `0xfe`)
1464	{
1465	/ Should be 2-byte, validate /
1466	if (((s + `1`) >= `0x40` && (s + `1`) <= `0x7e`) \|\|
1467	((s + `1`) >= `0x80` && (s + `1`) <= `0xfe`))
1468	l = `2`;
1469	else
1470	l = -`1`;
1471	}
1472	else
1473	l = -`1`;
1474	return l;
1475	}
1476
1477	static int
1478	pg_utf8_verifier(const unsigned char s, int* len)
1479	{
1480	int l = pg_utf_mblen(s);
1481
1482	if (len < l)
1483	return -`1`;
1484
1485	if (!pg_utf8_islegal(s, l))
1486	return -`1`;
1487
1488	return l;
1489	}
1490
1491	/*
1492	* Check for validity of a single UTF-8 encoded character
1493	*
1494	* This directly implements the rules in RFC3629. The bizarre-looking
1495	* restrictions on the second byte are meant to ensure that there isn't
1496	* more than one encoding of a given Unicode character point; that is,
1497	* you may not use a longer-than-necessary byte sequence with high order
1498	* zero bits to represent a character that would fit in fewer bytes.
1499	* To do otherwise is to create security hazards (eg, create an apparent
1500	* non-ASCII character that decodes to plain ASCII).
1501	*
1502	* length is assumed to have been obtained by pg_utf_mblen(), and the
1503	* caller must have checked that that many bytes are present in the buffer.
1504	*/
1505	bool
1506	pg_utf8_islegal(const unsigned char source, int* length)
1507	{
1508	unsigned char a;
1509
1510	switch (length)
1511	{
1512	default:
1513	/ reject lengths 5 and 6 for now /
1514	return false;
1515	case `4`:
1516	a = source[`3`];
1517	if (a < `0x80` \|\| a > `0xBF`)
1518	return false;
1519	/ FALL THRU /
1520	case `3`:
1521	a = source[`2`];
1522	if (a < `0x80` \|\| a > `0xBF`)
1523	return false;
1524	/ FALL THRU /
1525	case `2`:
1526	a = source[`1`];
1527	switch (*source)
1528	{
1529	case `0xE0`:
1530	if (a < `0xA0` \|\| a > `0xBF`)
1531	return false;
1532	break;
1533	case `0xED`:
1534	if (a < `0x80` \|\| a > `0x9F`)
1535	return false;
1536	break;
1537	case `0xF0`:
1538	if (a < `0x90` \|\| a > `0xBF`)
1539	return false;
1540	break;
1541	case `0xF4`:
1542	if (a < `0x80` \|\| a > `0x8F`)
1543	return false;
1544	break;
1545	default:
1546	if (a < `0x80` \|\| a > `0xBF`)
1547	return false;
1548	break;
1549	}
1550	/ FALL THRU /
1551	case `1`:
1552	a = *source;
1553	if (a >= `0x80` && a < `0xC2`)
1554	return false;
1555	if (a > `0xF4`)
1556	return false;
1557	break;
1558	}
1559	return true;
1560	}
1561
1562	#ifndef FRONTEND
1563
1564	/*
1565	* Generic character incrementer function.
1566	*
1567	* Not knowing anything about the properties of the encoding in use, we just
1568	* keep incrementing the last byte until we get a validly-encoded result,
1569	* or we run out of values to try. We don't bother to try incrementing
1570	* higher-order bytes, so there's no growth in runtime for wider characters.
1571	* (If we did try to do that, we'd need to consider the likelihood that 255
1572	* is not a valid final byte in the encoding.)
1573	*/
1574	static bool
1575	pg_generic_charinc(unsigned char charptr, int* len)
1576	{
1577	unsigned char *lastbyte = charptr + len - `1`;
1578	mbverifier mbverify;
1579
1580	/ We can just invoke the character verifier directly. /
1581	mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
1582
1583	while (lastbyte < (unsigned* char) `255`)
1584	{
1585	(*lastbyte)++;
1586	if ((*mbverify) (charptr, len) == len)
1587	return true;
1588	}
1589
1590	return false;
1591	}
1592
1593	/*
1594	* UTF-8 character incrementer function.
1595	*
1596	* For a one-byte character less than 0x7F, we just increment the byte.
1597	*
1598	* For a multibyte character, every byte but the first must fall between 0x80
1599	* and 0xBF; and the first byte must be between 0xC0 and 0xF4. We increment
1600	* the last byte that's not already at its maximum value. If we can't find a
1601	* byte that's less than the maximum allowable value, we simply fail. We also
1602	* need some special-case logic to skip regions used for surrogate pair
1603	* handling, as those should not occur in valid UTF-8.
1604	*
1605	* Note that we don't reset lower-order bytes back to their minimums, since
1606	* we can't afford to make an exhaustive search (see make_greater_string).
1607	*/
1608	static bool
1609	pg_utf8_increment(unsigned char charptr, int* length)
1610	{
1611	unsigned char a;
1612	unsigned char limit;
1613
1614	switch (length)
1615	{
1616	default:
1617	/ reject lengths 5 and 6 for now /
1618	return false;
1619	case `4`:
1620	a = charptr[`3`];
1621	if (a < `0xBF`)
1622	{
1623	charptr[`3`]++;
1624	break;
1625	}
1626	/ FALL THRU /
1627	case `3`:
1628	a = charptr[`2`];
1629	if (a < `0xBF`)
1630	{
1631	charptr[`2`]++;
1632	break;
1633	}
1634	/ FALL THRU /
1635	case `2`:
1636	a = charptr[`1`];
1637	switch (*charptr)
1638	{
1639	case `0xED`:
1640	limit = `0x9F`;
1641	break;
1642	case `0xF4`:
1643	limit = `0x8F`;
1644	break;
1645	default:
1646	limit = `0xBF`;
1647	break;
1648	}
1649	if (a < limit)
1650	{
1651	charptr[`1`]++;
1652	break;
1653	}
1654	/ FALL THRU /
1655	case `1`:
1656	a = *charptr;
1657	if (a == `0x7F` \|\| a == `0xDF` \|\| a == `0xEF` \|\| a == `0xF4`)
1658	return false;
1659	charptr[`0`]++;
1660	break;
1661	}
1662
1663	return true;
1664	}
1665
1666	/*
1667	* EUC-JP character incrementer function.
1668	*
1669	* If the sequence starts with SS2 (0x8e), it must be a two-byte sequence
1670	* representing JIS X 0201 characters with the second byte ranging between
1671	* 0xa1 and 0xdf. We just increment the last byte if it's less than 0xdf,
1672	* and otherwise rewrite the whole sequence to 0xa1 0xa1.
1673	*
1674	* If the sequence starts with SS3 (0x8f), it must be a three-byte sequence
1675	* in which the last two bytes range between 0xa1 and 0xfe. The last byte
1676	* is incremented if possible, otherwise the second-to-last byte.
1677	*
1678	* If the sequence starts with a value other than the above and its MSB
1679	* is set, it must be a two-byte sequence representing JIS X 0208 characters
1680	* with both bytes ranging between 0xa1 and 0xfe. The last byte is
1681	* incremented if possible, otherwise the second-to-last byte.
1682	*
1683	* Otherwise, the sequence is a single-byte ASCII character. It is
1684	* incremented up to 0x7f.
1685	*/
1686	static bool
1687	pg_eucjp_increment(unsigned char charptr, int* length)
1688	{
1689	unsigned char c1,
1690	c2;
1691	int i;
1692
1693	c1 = *charptr;
1694
1695	switch (c1)
1696	{
1697	case SS2: / JIS X 0201 /
1698	if (length != `2`)
1699	return false;
1700
1701	c2 = charptr[`1`];
1702
1703	if (c2 >= `0xdf`)
1704	charptr[`0`] = charptr[`1`] = `0xa1`;
1705	else if (c2 < `0xa1`)
1706	charptr[`1`] = `0xa1`;
1707	else
1708	charptr[`1`]++;
1709	break;
1710
1711	case SS3: / JIS X 0212 /
1712	if (length != `3`)
1713	return false;
1714
1715	for (i = `2`; i > `0`; i--)
1716	{
1717	c2 = charptr[i];
1718	if (c2 < `0xa1`)
1719	{
1720	charptr[i] = `0xa1`;
1721	return true;
1722	}
1723	else if (c2 < `0xfe`)
1724	{
1725	charptr[i]++;
1726	return true;
1727	}
1728	}
1729
1730	/ Out of 3-byte code region /
1731	return false;
1732
1733	default:
1734	if (IS_HIGHBIT_SET(c1)) / JIS X 0208? /
1735	{
1736	if (length != `2`)
1737	return false;
1738
1739	for (i = `1`; i >= `0`; i--)
1740	{
1741	c2 = charptr[i];
1742	if (c2 < `0xa1`)
1743	{
1744	charptr[i] = `0xa1`;
1745	return true;
1746	}
1747	else if (c2 < `0xfe`)
1748	{
1749	charptr[i]++;
1750	return true;
1751	}
1752	}
1753
1754	/ Out of 2 byte code region /
1755	return false;
1756	}
1757	else
1758	{ / ASCII, single byte /
1759	if (c1 > `0x7e`)
1760	return false;
1761	(*charptr)++;
1762	}
1763	break;
1764	}
1765
1766	return true;
1767	}
1768	#endif /* !FRONTEND */
1769
1770
1771	/*
1772	*-------------------------------------------------------------------
1773	* encoding info table
1774	* XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
1775	*-------------------------------------------------------------------
1776	*/
1777	const pg_wchar_tbl pg_wchar_table[] = {
1778	{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, `1`}, / PG_SQL_ASCII /
1779	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, `3`}, / PG_EUC_JP /
1780	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, `2`}, / PG_EUC_CN /
1781	{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, `3`}, / PG_EUC_KR /
1782	{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, `4`}, / PG_EUC_TW /
1783	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, `3`}, / PG_EUC_JIS_2004 /
1784	{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, `4`}, / PG_UTF8 /
1785	{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, `4`}, / PG_MULE_INTERNAL /
1786	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN1 /
1787	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN2 /
1788	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN3 /
1789	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN4 /
1790	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN5 /
1791	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN6 /
1792	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN7 /
1793	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN8 /
1794	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN9 /
1795	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_LATIN10 /
1796	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1256 /
1797	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1258 /
1798	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN866 /
1799	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN874 /
1800	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_KOI8R /
1801	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1251 /
1802	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1252 /
1803	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / ISO-8859-5 /
1804	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / ISO-8859-6 /
1805	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / ISO-8859-7 /
1806	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / ISO-8859-8 /
1807	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1250 /
1808	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1253 /
1809	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1254 /
1810	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1255 /
1811	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_WIN1257 /
1812	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, `1`}, / PG_KOI8U /
1813	{`0`, `0`, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, `2`}, / PG_SJIS /
1814	{`0`, `0`, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, `2`}, / PG_BIG5 /
1815	{`0`, `0`, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, `2`}, / PG_GBK /
1816	{`0`, `0`, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, `2`}, / PG_UHC /
1817	{`0`, `0`, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, `4`}, / PG_GB18030 /
1818	{`0`, `0`, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, `3`}, / PG_JOHAB /
1819	{`0`, `0`, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, `2`} / PG_SHIFT_JIS_2004 /
1820	};
1821
1822	/ returns the byte length of a word for mule internal code /
1823	int
1824	pg_mic_mblen(const unsigned char *mbstr)
1825	{
1826	return pg_mule_mblen(mbstr);
1827	}
1828
1829	/*
1830	* Returns the byte length of a multibyte character.
1831	*/
1832	int
1833	pg_encoding_mblen(int encoding, const char *mbstr)
1834	{
1835	return (PG_VALID_ENCODING(encoding) ?
1836	pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
1837	pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
1838	}
1839
1840	/*
1841	* Returns the display length of a multibyte character.
1842	*/
1843	int
1844	pg_encoding_dsplen(int encoding, const char *mbstr)
1845	{
1846	return (PG_VALID_ENCODING(encoding) ?
1847	pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
1848	pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
1849	}
1850
1851	/*
1852	* Verify the first multibyte character of the given string.
1853	* Return its byte length if good, -1 if bad. (See comments above for
1854	* full details of the mbverify API.)
1855	*/
1856	int
1857	pg_encoding_verifymb(int encoding, const char mbstr, int* len)
1858	{
1859	return (PG_VALID_ENCODING(encoding) ?
1860	pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
1861	pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
1862	}
1863
1864	/*
1865	* fetch maximum length of a given encoding
1866	*/
1867	int
1868	pg_encoding_max_length(int encoding)
1869	{
1870	Assert(PG_VALID_ENCODING(encoding));
1871
1872	return pg_wchar_table[encoding].maxmblen;
1873	}
1874
1875	#ifndef FRONTEND
1876
1877	/*
1878	* fetch maximum length of the encoding for the current database
1879	*/
1880	int
1881	pg_database_encoding_max_length(void)
1882	{
1883	return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
1884	}
1885
1886	/*
1887	* get the character incrementer for the encoding for the current database
1888	*/
1889	mbcharacter_incrementer
1890	pg_database_encoding_character_incrementer(void)
1891	{
1892	/*
1893	* Eventually it might be best to add a field to pg_wchar_table[], but for
1894	* now we just use a switch.
1895	*/
1896	switch (GetDatabaseEncoding())
1897	{
1898	case PG_UTF8:
1899	return pg_utf8_increment;
1900
1901	case PG_EUC_JP:
1902	return pg_eucjp_increment;
1903
1904	default:
1905	return pg_generic_charinc;
1906	}
1907	}
1908
1909	/*
1910	* Verify mbstr to make sure that it is validly encoded in the current
1911	* database encoding. Otherwise same as pg_verify_mbstr().
1912	*/
1913	bool
1914	pg_verifymbstr(const char mbstr, int* len, bool noError)
1915	{
1916	return
1917	pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= `0`;
1918	}
1919
1920	/*
1921	* Verify mbstr to make sure that it is validly encoded in the specified
1922	* encoding.
1923	*/
1924	bool
1925	pg_verify_mbstr(int encoding, const char mbstr, int* len, bool noError)
1926	{
1927	return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= `0`;
1928	}
1929
1930	/*
1931	* Verify mbstr to make sure that it is validly encoded in the specified
1932	* encoding.
1933	*
1934	* mbstr is not necessarily zero terminated; length of mbstr is
1935	* specified by len.
1936	*
1937	* If OK, return length of string in the encoding.
1938	* If a problem is found, return -1 when noError is
1939	* true; when noError is false, ereport() a descriptive message.
1940	*/
1941	int
1942	pg_verify_mbstr_len(int encoding, const char mbstr, int* len, bool noError)
1943	{
1944	mbverifier mbverify;
1945	int mb_len;
1946
1947	Assert(PG_VALID_ENCODING(encoding));
1948
1949	/*
1950	* In single-byte encodings, we need only reject nulls (\0).
1951	*/
1952	if (pg_encoding_max_length(encoding) <= `1`)
1953	{
1954	const char *nullpos = memchr(mbstr, `0`, len);
1955
1956	if (nullpos == NULL)
1957	return len;
1958	if (noError)
1959	return -`1`;
1960	report_invalid_encoding(encoding, nullpos, `1`);
1961	}
1962
1963	/ fetch function pointer just once /
1964	mbverify = pg_wchar_table[encoding].mbverify;
1965
1966	mb_len = `0`;
1967
1968	while (len > `0`)
1969	{
1970	int l;
1971
1972	/ fast path for ASCII-subset characters /
1973	if (!IS_HIGHBIT_SET(*mbstr))
1974	{
1975	if (*mbstr != `'\0'`)
1976	{
1977	mb_len++;
1978	mbstr++;
1979	len--;
1980	continue;
1981	}
1982	if (noError)
1983	return -`1`;
1984	report_invalid_encoding(encoding, mbstr, len);
1985	}
1986
1987	l = (mbverify) ((const* unsigned char *) mbstr, len);
1988
1989	if (l < `0`)
1990	{
1991	if (noError)
1992	return -`1`;
1993	report_invalid_encoding(encoding, mbstr, len);
1994	}
1995
1996	mbstr += l;
1997	len -= l;
1998	mb_len++;
1999	}
2000	return mb_len;
2001	}
2002
2003	/*
2004	* check_encoding_conversion_args: check arguments of a conversion function
2005	*
2006	* "expected" arguments can be either an encoding ID or -1 to indicate that
2007	* the caller will check whether it accepts the ID.
2008	*
2009	* Note: the errors here are not really user-facing, so elog instead of
2010	* ereport seems sufficient. Also, we trust that the "expected" encoding
2011	* arguments are valid encoding IDs, but we don't trust the actuals.
2012	*/
2013	void
2014	check_encoding_conversion_args(int src_encoding,
2015	int dest_encoding,
2016	int len,
2017	int expected_src_encoding,
2018	int expected_dest_encoding)
2019	{
2020	if (!PG_VALID_ENCODING(src_encoding))
2021	elog(ERROR, "invalid source encoding ID: %d", src_encoding);
2022	if (src_encoding != expected_src_encoding && expected_src_encoding >= `0`)
2023	elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
2024	pg_enc2name_tbl[expected_src_encoding].name,
2025	pg_enc2name_tbl[src_encoding].name);
2026	if (!PG_VALID_ENCODING(dest_encoding))
2027	elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
2028	if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= `0`)
2029	elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
2030	pg_enc2name_tbl[expected_dest_encoding].name,
2031	pg_enc2name_tbl[dest_encoding].name);
2032	if (len < `0`)
2033	elog(ERROR, "encoding conversion length must not be negative");
2034	}
2035
2036	/*
2037	* report_invalid_encoding: complain about invalid multibyte character
2038	*
2039	* note: len is remaining length of string, not length of character;
2040	* len must be greater than zero, as we always examine the first byte.
2041	*/
2042	void
2043	report_invalid_encoding(int encoding, const char mbstr, int* len)
2044	{
2045	int l = pg_encoding_mblen(encoding, mbstr);
2046	char buf[`8` * `5` + `1`];
2047	char *p = buf;
2048	int j,
2049	jlimit;
2050
2051	jlimit = Min(l, len);
2052	jlimit = Min(jlimit, `8`); / prevent buffer overrun /
2053
2054	for (j = `0`; j < jlimit; j++)
2055	{
2056	p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2057	if (j < jlimit - `1`)
2058	p += sprintf(p, " ");
2059	}
2060
2061	ereport(ERROR,
2062	(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2063	errmsg("invalid byte sequence for encoding \"%s\": %s",
2064	pg_enc2name_tbl[encoding].name,
2065	buf)));
2066	}
2067
2068	/*
2069	* report_untranslatable_char: complain about untranslatable character
2070	*
2071	* note: len is remaining length of string, not length of character;
2072	* len must be greater than zero, as we always examine the first byte.
2073	*/
2074	void
2075	report_untranslatable_char(int src_encoding, int dest_encoding,
2076	const char mbstr, int* len)
2077	{
2078	int l = pg_encoding_mblen(src_encoding, mbstr);
2079	char buf[`8` * `5` + `1`];
2080	char *p = buf;
2081	int j,
2082	jlimit;
2083
2084	jlimit = Min(l, len);
2085	jlimit = Min(jlimit, `8`); / prevent buffer overrun /
2086
2087	for (j = `0`; j < jlimit; j++)
2088	{
2089	p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
2090	if (j < jlimit - `1`)
2091	p += sprintf(p, " ");
2092	}
2093
2094	ereport(ERROR,
2095	(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
2096	errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
2097	buf,
2098	pg_enc2name_tbl[src_encoding].name,
2099	pg_enc2name_tbl[dest_encoding].name)));
2100	}
2101
2102	#endif /* !FRONTEND */
2103

Browse the source code of PostgreSQL/src/backend/utils/mb/wchar.c