| 1 | /*----------------------------------------------------------------------- |
| 2 | * ascii.c |
| 3 | * The PostgreSQL routine for string to ascii conversion. |
| 4 | * |
| 5 | * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group |
| 6 | * |
| 7 | * IDENTIFICATION |
| 8 | * src/backend/utils/adt/ascii.c |
| 9 | * |
| 10 | *----------------------------------------------------------------------- |
| 11 | */ |
| 12 | #include "postgres.h" |
| 13 | |
| 14 | #include "mb/pg_wchar.h" |
| 15 | #include "utils/ascii.h" |
| 16 | #include "utils/builtins.h" |
| 17 | |
| 18 | static void pg_to_ascii(unsigned char *src, unsigned char *src_end, |
| 19 | unsigned char *dest, int enc); |
| 20 | static text *encode_to_ascii(text *data, int enc); |
| 21 | |
| 22 | |
| 23 | /* ---------- |
| 24 | * to_ascii |
| 25 | * ---------- |
| 26 | */ |
| 27 | static void |
| 28 | pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc) |
| 29 | { |
| 30 | unsigned char *x; |
| 31 | const unsigned char *ascii; |
| 32 | int range; |
| 33 | |
| 34 | /* |
| 35 | * relevant start for an encoding |
| 36 | */ |
| 37 | #define RANGE_128 128 |
| 38 | #define RANGE_160 160 |
| 39 | |
| 40 | if (enc == PG_LATIN1) |
| 41 | { |
| 42 | /* |
| 43 | * ISO-8859-1 <range: 160 -- 255> |
| 44 | */ |
| 45 | ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty" ; |
| 46 | range = RANGE_160; |
| 47 | } |
| 48 | else if (enc == PG_LATIN2) |
| 49 | { |
| 50 | /* |
| 51 | * ISO-8859-2 <range: 160 -- 255> |
| 52 | */ |
| 53 | ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt." ; |
| 54 | range = RANGE_160; |
| 55 | } |
| 56 | else if (enc == PG_LATIN9) |
| 57 | { |
| 58 | /* |
| 59 | * ISO-8859-15 <range: 160 -- 255> |
| 60 | */ |
| 61 | ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty" ; |
| 62 | range = RANGE_160; |
| 63 | } |
| 64 | else if (enc == PG_WIN1250) |
| 65 | { |
| 66 | /* |
| 67 | * Window CP1250 <range: 128 -- 255> |
| 68 | */ |
| 69 | ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt " ; |
| 70 | range = RANGE_128; |
| 71 | } |
| 72 | else |
| 73 | { |
| 74 | ereport(ERROR, |
| 75 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 76 | errmsg("encoding conversion from %s to ASCII not supported" , |
| 77 | pg_encoding_to_char(enc)))); |
| 78 | return; /* keep compiler quiet */ |
| 79 | } |
| 80 | |
| 81 | /* |
| 82 | * Encode |
| 83 | */ |
| 84 | for (x = src; x < src_end; x++) |
| 85 | { |
| 86 | if (*x < 128) |
| 87 | *dest++ = *x; |
| 88 | else if (*x < range) |
| 89 | *dest++ = ' '; /* bogus 128 to 'range' */ |
| 90 | else |
| 91 | *dest++ = ascii[*x - range]; |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | /* ---------- |
| 96 | * encode text |
| 97 | * |
| 98 | * The text datum is overwritten in-place, therefore this coding method |
| 99 | * cannot support conversions that change the string length! |
| 100 | * ---------- |
| 101 | */ |
| 102 | static text * |
| 103 | encode_to_ascii(text *data, int enc) |
| 104 | { |
| 105 | pg_to_ascii((unsigned char *) VARDATA(data), /* src */ |
| 106 | (unsigned char *) (data) + VARSIZE(data), /* src end */ |
| 107 | (unsigned char *) VARDATA(data), /* dest */ |
| 108 | enc); /* encoding */ |
| 109 | |
| 110 | return data; |
| 111 | } |
| 112 | |
| 113 | /* ---------- |
| 114 | * convert to ASCII - enc is set as 'name' arg. |
| 115 | * ---------- |
| 116 | */ |
| 117 | Datum |
| 118 | to_ascii_encname(PG_FUNCTION_ARGS) |
| 119 | { |
| 120 | text *data = PG_GETARG_TEXT_P_COPY(0); |
| 121 | char *encname = NameStr(*PG_GETARG_NAME(1)); |
| 122 | int enc = pg_char_to_encoding(encname); |
| 123 | |
| 124 | if (enc < 0) |
| 125 | ereport(ERROR, |
| 126 | (errcode(ERRCODE_UNDEFINED_OBJECT), |
| 127 | errmsg("%s is not a valid encoding name" , encname))); |
| 128 | |
| 129 | PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); |
| 130 | } |
| 131 | |
| 132 | /* ---------- |
| 133 | * convert to ASCII - enc is set as int4 |
| 134 | * ---------- |
| 135 | */ |
| 136 | Datum |
| 137 | to_ascii_enc(PG_FUNCTION_ARGS) |
| 138 | { |
| 139 | text *data = PG_GETARG_TEXT_P_COPY(0); |
| 140 | int enc = PG_GETARG_INT32(1); |
| 141 | |
| 142 | if (!PG_VALID_ENCODING(enc)) |
| 143 | ereport(ERROR, |
| 144 | (errcode(ERRCODE_UNDEFINED_OBJECT), |
| 145 | errmsg("%d is not a valid encoding code" , enc))); |
| 146 | |
| 147 | PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); |
| 148 | } |
| 149 | |
| 150 | /* ---------- |
| 151 | * convert to ASCII - current enc is DatabaseEncoding |
| 152 | * ---------- |
| 153 | */ |
| 154 | Datum |
| 155 | to_ascii_default(PG_FUNCTION_ARGS) |
| 156 | { |
| 157 | text *data = PG_GETARG_TEXT_P_COPY(0); |
| 158 | int enc = GetDatabaseEncoding(); |
| 159 | |
| 160 | PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); |
| 161 | } |
| 162 | |
| 163 | /* ---------- |
| 164 | * Copy a string in an arbitrary backend-safe encoding, converting it to a |
| 165 | * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the |
| 166 | * behavior is identical to strlcpy(), except that we don't bother with a |
| 167 | * return value. |
| 168 | * |
| 169 | * This must not trigger ereport(ERROR), as it is called in postmaster. |
| 170 | * ---------- |
| 171 | */ |
| 172 | void |
| 173 | ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz) |
| 174 | { |
| 175 | if (destsiz == 0) /* corner case: no room for trailing nul */ |
| 176 | return; |
| 177 | |
| 178 | while (--destsiz > 0) |
| 179 | { |
| 180 | /* use unsigned char here to avoid compiler warning */ |
| 181 | unsigned char ch = *src++; |
| 182 | |
| 183 | if (ch == '\0') |
| 184 | break; |
| 185 | /* Keep printable ASCII characters */ |
| 186 | if (32 <= ch && ch <= 127) |
| 187 | *dest = ch; |
| 188 | /* White-space is also OK */ |
| 189 | else if (ch == '\n' || ch == '\r' || ch == '\t') |
| 190 | *dest = ch; |
| 191 | /* Everything else is replaced with '?' */ |
| 192 | else |
| 193 | *dest = '?'; |
| 194 | dest++; |
| 195 | } |
| 196 | |
| 197 | *dest = '\0'; |
| 198 | } |
| 199 | |