1/*-----------------------------------------------------------------------
2 * ascii.c
3 * The PostgreSQL routine for string to ascii conversion.
4 *
5 * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/utils/adt/ascii.c
9 *
10 *-----------------------------------------------------------------------
11 */
12#include "postgres.h"
13
14#include "mb/pg_wchar.h"
15#include "utils/ascii.h"
16#include "utils/builtins.h"
17
18static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
19 unsigned char *dest, int enc);
20static text *encode_to_ascii(text *data, int enc);
21
22
23/* ----------
24 * to_ascii
25 * ----------
26 */
27static void
28pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
29{
30 unsigned char *x;
31 const unsigned char *ascii;
32 int range;
33
34 /*
35 * relevant start for an encoding
36 */
37#define RANGE_128 128
38#define RANGE_160 160
39
40 if (enc == PG_LATIN1)
41 {
42 /*
43 * ISO-8859-1 <range: 160 -- 255>
44 */
45 ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
46 range = RANGE_160;
47 }
48 else if (enc == PG_LATIN2)
49 {
50 /*
51 * ISO-8859-2 <range: 160 -- 255>
52 */
53 ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
54 range = RANGE_160;
55 }
56 else if (enc == PG_LATIN9)
57 {
58 /*
59 * ISO-8859-15 <range: 160 -- 255>
60 */
61 ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
62 range = RANGE_160;
63 }
64 else if (enc == PG_WIN1250)
65 {
66 /*
67 * Window CP1250 <range: 128 -- 255>
68 */
69 ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
70 range = RANGE_128;
71 }
72 else
73 {
74 ereport(ERROR,
75 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
76 errmsg("encoding conversion from %s to ASCII not supported",
77 pg_encoding_to_char(enc))));
78 return; /* keep compiler quiet */
79 }
80
81 /*
82 * Encode
83 */
84 for (x = src; x < src_end; x++)
85 {
86 if (*x < 128)
87 *dest++ = *x;
88 else if (*x < range)
89 *dest++ = ' '; /* bogus 128 to 'range' */
90 else
91 *dest++ = ascii[*x - range];
92 }
93}
94
95/* ----------
96 * encode text
97 *
98 * The text datum is overwritten in-place, therefore this coding method
99 * cannot support conversions that change the string length!
100 * ----------
101 */
102static text *
103encode_to_ascii(text *data, int enc)
104{
105 pg_to_ascii((unsigned char *) VARDATA(data), /* src */
106 (unsigned char *) (data) + VARSIZE(data), /* src end */
107 (unsigned char *) VARDATA(data), /* dest */
108 enc); /* encoding */
109
110 return data;
111}
112
113/* ----------
114 * convert to ASCII - enc is set as 'name' arg.
115 * ----------
116 */
117Datum
118to_ascii_encname(PG_FUNCTION_ARGS)
119{
120 text *data = PG_GETARG_TEXT_P_COPY(0);
121 char *encname = NameStr(*PG_GETARG_NAME(1));
122 int enc = pg_char_to_encoding(encname);
123
124 if (enc < 0)
125 ereport(ERROR,
126 (errcode(ERRCODE_UNDEFINED_OBJECT),
127 errmsg("%s is not a valid encoding name", encname)));
128
129 PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
130}
131
132/* ----------
133 * convert to ASCII - enc is set as int4
134 * ----------
135 */
136Datum
137to_ascii_enc(PG_FUNCTION_ARGS)
138{
139 text *data = PG_GETARG_TEXT_P_COPY(0);
140 int enc = PG_GETARG_INT32(1);
141
142 if (!PG_VALID_ENCODING(enc))
143 ereport(ERROR,
144 (errcode(ERRCODE_UNDEFINED_OBJECT),
145 errmsg("%d is not a valid encoding code", enc)));
146
147 PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
148}
149
150/* ----------
151 * convert to ASCII - current enc is DatabaseEncoding
152 * ----------
153 */
154Datum
155to_ascii_default(PG_FUNCTION_ARGS)
156{
157 text *data = PG_GETARG_TEXT_P_COPY(0);
158 int enc = GetDatabaseEncoding();
159
160 PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
161}
162
163/* ----------
164 * Copy a string in an arbitrary backend-safe encoding, converting it to a
165 * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
166 * behavior is identical to strlcpy(), except that we don't bother with a
167 * return value.
168 *
169 * This must not trigger ereport(ERROR), as it is called in postmaster.
170 * ----------
171 */
172void
173ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
174{
175 if (destsiz == 0) /* corner case: no room for trailing nul */
176 return;
177
178 while (--destsiz > 0)
179 {
180 /* use unsigned char here to avoid compiler warning */
181 unsigned char ch = *src++;
182
183 if (ch == '\0')
184 break;
185 /* Keep printable ASCII characters */
186 if (32 <= ch && ch <= 127)
187 *dest = ch;
188 /* White-space is also OK */
189 else if (ch == '\n' || ch == '\r' || ch == '\t')
190 *dest = ch;
191 /* Everything else is replaced with '?' */
192 else
193 *dest = '?';
194 dest++;
195 }
196
197 *dest = '\0';
198}
199