ascii.c source code [PostgreSQL/src/backend/utils/adt/ascii.c]

1	/-----------------------------------------------------------------------*
2	* ascii.c
3	* The PostgreSQL routine for string to ascii conversion.
4	*
5	* Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group
6	*
7	* IDENTIFICATION
8	* src/backend/utils/adt/ascii.c
9	*
10	*-----------------------------------------------------------------------
11	*/
12	#include "postgres.h"
13
14	#include "mb/pg_wchar.h"
15	#include "utils/ascii.h"
16	#include "utils/builtins.h"
17
18	static void pg_to_ascii(unsigned char src, unsigned* char *src_end,
19	unsigned char dest, int* enc);
20	static text encode_to_ascii(text data, int enc);
21
22
23	/ ----------*
24	* to_ascii
25	* ----------
26	*/
27	static void
28	pg_to_ascii(unsigned char src, unsigned* char src_end, unsigned* char dest, int* enc)
29	{
30	unsigned char *x;
31	const unsigned char *ascii;
32	int range;
33
34	/*
35	* relevant start for an encoding
36	*/
37	#define RANGE_128 128
38	#define RANGE_160 160
39
40	if (enc == PG_LATIN1)
41	{
42	/*
43	* ISO-8859-1 <range: 160 -- 255>
44	*/
45	ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
46	range = RANGE_160;
47	}
48	else if (enc == PG_LATIN2)
49	{
50	/*
51	* ISO-8859-2 <range: 160 -- 255>
52	*/
53	ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
54	range = RANGE_160;
55	}
56	else if (enc == PG_LATIN9)
57	{
58	/*
59	* ISO-8859-15 <range: 160 -- 255>
60	*/
61	ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
62	range = RANGE_160;
63	}
64	else if (enc == PG_WIN1250)
65	{
66	/*
67	* Window CP1250 <range: 128 -- 255>
68	*/
69	ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
70	range = RANGE_128;
71	}
72	else
73	{
74	ereport(ERROR,
75	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
76	errmsg("encoding conversion from %s to ASCII not supported",
77	pg_encoding_to_char(enc))));
78	return; / keep compiler quiet /
79	}
80
81	/*
82	* Encode
83	*/
84	for (x = src; x < src_end; x++)
85	{
86	if (*x < `128`)
87	dest++ = x;
88	else if (*x < range)
89	dest++ = `' '`; /* bogus 128 to 'range' /
90	else
91	dest++ = ascii[x - range];
92	}
93	}
94
95	/ ----------*
96	* encode text
97	*
98	* The text datum is overwritten in-place, therefore this coding method
99	* cannot support conversions that change the string length!
100	* ----------
101	*/
102	static text *
103	encode_to_ascii(text data, int* enc)
104	{
105	pg_to_ascii((unsigned char ) VARDATA(data), /* src /
106	(unsigned char ) (data) + VARSIZE(data), /* src end /
107	(unsigned char ) VARDATA(data), /* dest /
108	enc); / encoding /
109
110	return data;
111	}
112
113	/ ----------*
114	* convert to ASCII - enc is set as 'name' arg.
115	* ----------
116	*/
117	Datum
118	to_ascii_encname(PG_FUNCTION_ARGS)
119	{
120	text *data = PG_GETARG_TEXT_P_COPY(`0`);
121	char encname = NameStr(PG_GETARG_NAME(`1`));
122	int enc = pg_char_to_encoding(encname);
123
124	if (enc < `0`)
125	ereport(ERROR,
126	(errcode(ERRCODE_UNDEFINED_OBJECT),
127	errmsg("%s is not a valid encoding name", encname)));
128
129	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
130	}
131
132	/ ----------*
133	* convert to ASCII - enc is set as int4
134	* ----------
135	*/
136	Datum
137	to_ascii_enc(PG_FUNCTION_ARGS)
138	{
139	text *data = PG_GETARG_TEXT_P_COPY(`0`);
140	int enc = PG_GETARG_INT32(`1`);
141
142	if (!PG_VALID_ENCODING(enc))
143	ereport(ERROR,
144	(errcode(ERRCODE_UNDEFINED_OBJECT),
145	errmsg("%d is not a valid encoding code", enc)));
146
147	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
148	}
149
150	/ ----------*
151	* convert to ASCII - current enc is DatabaseEncoding
152	* ----------
153	*/
154	Datum
155	to_ascii_default(PG_FUNCTION_ARGS)
156	{
157	text *data = PG_GETARG_TEXT_P_COPY(`0`);
158	int enc = GetDatabaseEncoding();
159
160	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
161	}
162
163	/ ----------*
164	* Copy a string in an arbitrary backend-safe encoding, converting it to a
165	* valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
166	* behavior is identical to strlcpy(), except that we don't bother with a
167	* return value.
168	*
169	* This must not trigger ereport(ERROR), as it is called in postmaster.
170	* ----------
171	*/
172	void
173	ascii_safe_strlcpy(char dest, const* char *src, size_t destsiz)
174	{
175	if (destsiz == `0`) / corner case: no room for trailing nul /
176	return;
177
178	while (--destsiz > `0`)
179	{
180	/ use unsigned char here to avoid compiler warning /
181	unsigned char ch = *src++;
182
183	if (ch == `'\0'`)
184	break;
185	/ Keep printable ASCII characters /
186	if (`32` <= ch && ch <= `127`)
187	*dest = ch;
188	/ White-space is also OK /
189	else if (ch == `'\n'` \|\| ch == `'\r'` \|\| ch == `'\t'`)
190	*dest = ch;
191	/ Everything else is replaced with '?' /
192	else
193	*dest = `'?'`;
194	dest++;
195	}
196
197	*dest = `'\0'`;
198	}
199

Browse the source code of PostgreSQL/src/backend/utils/adt/ascii.c