1 | /*----------------------------------------------------------------------- |
2 | * ascii.c |
3 | * The PostgreSQL routine for string to ascii conversion. |
4 | * |
5 | * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group |
6 | * |
7 | * IDENTIFICATION |
8 | * src/backend/utils/adt/ascii.c |
9 | * |
10 | *----------------------------------------------------------------------- |
11 | */ |
12 | #include "postgres.h" |
13 | |
14 | #include "mb/pg_wchar.h" |
15 | #include "utils/ascii.h" |
16 | #include "utils/builtins.h" |
17 | |
18 | static void pg_to_ascii(unsigned char *src, unsigned char *src_end, |
19 | unsigned char *dest, int enc); |
20 | static text *encode_to_ascii(text *data, int enc); |
21 | |
22 | |
23 | /* ---------- |
24 | * to_ascii |
25 | * ---------- |
26 | */ |
27 | static void |
28 | pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc) |
29 | { |
30 | unsigned char *x; |
31 | const unsigned char *ascii; |
32 | int range; |
33 | |
34 | /* |
35 | * relevant start for an encoding |
36 | */ |
37 | #define RANGE_128 128 |
38 | #define RANGE_160 160 |
39 | |
40 | if (enc == PG_LATIN1) |
41 | { |
42 | /* |
43 | * ISO-8859-1 <range: 160 -- 255> |
44 | */ |
45 | ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty" ; |
46 | range = RANGE_160; |
47 | } |
48 | else if (enc == PG_LATIN2) |
49 | { |
50 | /* |
51 | * ISO-8859-2 <range: 160 -- 255> |
52 | */ |
53 | ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt." ; |
54 | range = RANGE_160; |
55 | } |
56 | else if (enc == PG_LATIN9) |
57 | { |
58 | /* |
59 | * ISO-8859-15 <range: 160 -- 255> |
60 | */ |
61 | ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty" ; |
62 | range = RANGE_160; |
63 | } |
64 | else if (enc == PG_WIN1250) |
65 | { |
66 | /* |
67 | * Window CP1250 <range: 128 -- 255> |
68 | */ |
69 | ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt " ; |
70 | range = RANGE_128; |
71 | } |
72 | else |
73 | { |
74 | ereport(ERROR, |
75 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
76 | errmsg("encoding conversion from %s to ASCII not supported" , |
77 | pg_encoding_to_char(enc)))); |
78 | return; /* keep compiler quiet */ |
79 | } |
80 | |
81 | /* |
82 | * Encode |
83 | */ |
84 | for (x = src; x < src_end; x++) |
85 | { |
86 | if (*x < 128) |
87 | *dest++ = *x; |
88 | else if (*x < range) |
89 | *dest++ = ' '; /* bogus 128 to 'range' */ |
90 | else |
91 | *dest++ = ascii[*x - range]; |
92 | } |
93 | } |
94 | |
95 | /* ---------- |
96 | * encode text |
97 | * |
98 | * The text datum is overwritten in-place, therefore this coding method |
99 | * cannot support conversions that change the string length! |
100 | * ---------- |
101 | */ |
102 | static text * |
103 | encode_to_ascii(text *data, int enc) |
104 | { |
105 | pg_to_ascii((unsigned char *) VARDATA(data), /* src */ |
106 | (unsigned char *) (data) + VARSIZE(data), /* src end */ |
107 | (unsigned char *) VARDATA(data), /* dest */ |
108 | enc); /* encoding */ |
109 | |
110 | return data; |
111 | } |
112 | |
113 | /* ---------- |
114 | * convert to ASCII - enc is set as 'name' arg. |
115 | * ---------- |
116 | */ |
117 | Datum |
118 | to_ascii_encname(PG_FUNCTION_ARGS) |
119 | { |
120 | text *data = PG_GETARG_TEXT_P_COPY(0); |
121 | char *encname = NameStr(*PG_GETARG_NAME(1)); |
122 | int enc = pg_char_to_encoding(encname); |
123 | |
124 | if (enc < 0) |
125 | ereport(ERROR, |
126 | (errcode(ERRCODE_UNDEFINED_OBJECT), |
127 | errmsg("%s is not a valid encoding name" , encname))); |
128 | |
129 | PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); |
130 | } |
131 | |
132 | /* ---------- |
133 | * convert to ASCII - enc is set as int4 |
134 | * ---------- |
135 | */ |
136 | Datum |
137 | to_ascii_enc(PG_FUNCTION_ARGS) |
138 | { |
139 | text *data = PG_GETARG_TEXT_P_COPY(0); |
140 | int enc = PG_GETARG_INT32(1); |
141 | |
142 | if (!PG_VALID_ENCODING(enc)) |
143 | ereport(ERROR, |
144 | (errcode(ERRCODE_UNDEFINED_OBJECT), |
145 | errmsg("%d is not a valid encoding code" , enc))); |
146 | |
147 | PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); |
148 | } |
149 | |
150 | /* ---------- |
151 | * convert to ASCII - current enc is DatabaseEncoding |
152 | * ---------- |
153 | */ |
154 | Datum |
155 | to_ascii_default(PG_FUNCTION_ARGS) |
156 | { |
157 | text *data = PG_GETARG_TEXT_P_COPY(0); |
158 | int enc = GetDatabaseEncoding(); |
159 | |
160 | PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); |
161 | } |
162 | |
163 | /* ---------- |
164 | * Copy a string in an arbitrary backend-safe encoding, converting it to a |
165 | * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the |
166 | * behavior is identical to strlcpy(), except that we don't bother with a |
167 | * return value. |
168 | * |
169 | * This must not trigger ereport(ERROR), as it is called in postmaster. |
170 | * ---------- |
171 | */ |
172 | void |
173 | ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz) |
174 | { |
175 | if (destsiz == 0) /* corner case: no room for trailing nul */ |
176 | return; |
177 | |
178 | while (--destsiz > 0) |
179 | { |
180 | /* use unsigned char here to avoid compiler warning */ |
181 | unsigned char ch = *src++; |
182 | |
183 | if (ch == '\0') |
184 | break; |
185 | /* Keep printable ASCII characters */ |
186 | if (32 <= ch && ch <= 127) |
187 | *dest = ch; |
188 | /* White-space is also OK */ |
189 | else if (ch == '\n' || ch == '\r' || ch == '\t') |
190 | *dest = ch; |
191 | /* Everything else is replaced with '?' */ |
192 | else |
193 | *dest = '?'; |
194 | dest++; |
195 | } |
196 | |
197 | *dest = '\0'; |
198 | } |
199 | |