1/*
2 * psql - the PostgreSQL interactive terminal
3 *
4 * Copyright (c) 2000-2019, PostgreSQL Global Development Group
5 *
6 * src/bin/psql/stringutils.c
7 */
8#include "postgres_fe.h"
9
10#include <ctype.h>
11
12#include "common.h"
13#include "stringutils.h"
14
15
16/*
17 * Replacement for strtok() (a.k.a. poor man's flex)
18 *
19 * Splits a string into tokens, returning one token per call, then NULL
20 * when no more tokens exist in the given string.
21 *
22 * The calling convention is similar to that of strtok, but with more
23 * frammishes.
24 *
25 * s - string to parse, if NULL continue parsing the last string
26 * whitespace - set of whitespace characters that separate tokens
27 * delim - set of non-whitespace separator characters (or NULL)
28 * quote - set of characters that can quote a token (NULL if none)
29 * escape - character that can quote quotes (0 if none)
30 * e_strings - if true, treat E'...' syntax as a valid token
31 * del_quotes - if true, strip quotes from the returned token, else return
32 * it exactly as found in the string
33 * encoding - the active character-set encoding
34 *
35 * Characters in 'delim', if any, will be returned as single-character
36 * tokens unless part of a quoted token.
37 *
38 * Double occurrences of the quoting character are always taken to represent
39 * a single quote character in the data. If escape isn't 0, then escape
40 * followed by anything (except \0) is a data character too.
41 *
42 * The combination of e_strings and del_quotes both true is not currently
43 * handled. This could be fixed but it's not needed anywhere at the moment.
44 *
45 * Note that the string s is _not_ overwritten in this implementation.
46 *
47 * NB: it's okay to vary delim, quote, and escape from one call to the
48 * next on a single source string, but changing whitespace is a bad idea
49 * since you might lose data.
50 */
51char *
52strtokx(const char *s,
53 const char *whitespace,
54 const char *delim,
55 const char *quote,
56 char escape,
57 bool e_strings,
58 bool del_quotes,
59 int encoding)
60{
61 static char *storage = NULL; /* store the local copy of the users
62 * string here */
63 static char *string = NULL; /* pointer into storage where to continue on
64 * next call */
65
66 /* variously abused variables: */
67 unsigned int offset;
68 char *start;
69 char *p;
70
71 if (s)
72 {
73 free(storage);
74
75 /*
76 * We may need extra space to insert delimiter nulls for adjacent
77 * tokens. 2X the space is a gross overestimate, but it's unlikely
78 * that this code will be used on huge strings anyway.
79 */
80 storage = pg_malloc(2 * strlen(s) + 1);
81 strcpy(storage, s);
82 string = storage;
83 }
84
85 if (!storage)
86 return NULL;
87
88 /* skip leading whitespace */
89 offset = strspn(string, whitespace);
90 start = &string[offset];
91
92 /* end of string reached? */
93 if (*start == '\0')
94 {
95 /* technically we don't need to free here, but we're nice */
96 free(storage);
97 storage = NULL;
98 string = NULL;
99 return NULL;
100 }
101
102 /* test if delimiter character */
103 if (delim && strchr(delim, *start))
104 {
105 /*
106 * If not at end of string, we need to insert a null to terminate the
107 * returned token. We can just overwrite the next character if it
108 * happens to be in the whitespace set ... otherwise move over the
109 * rest of the string to make room. (This is why we allocated extra
110 * space above).
111 */
112 p = start + 1;
113 if (*p != '\0')
114 {
115 if (!strchr(whitespace, *p))
116 memmove(p + 1, p, strlen(p) + 1);
117 *p = '\0';
118 string = p + 1;
119 }
120 else
121 {
122 /* at end of string, so no extra work */
123 string = p;
124 }
125
126 return start;
127 }
128
129 /* check for E string */
130 p = start;
131 if (e_strings &&
132 (*p == 'E' || *p == 'e') &&
133 p[1] == '\'')
134 {
135 quote = "'";
136 escape = '\\'; /* if std strings before, not any more */
137 p++;
138 }
139
140 /* test if quoting character */
141 if (quote && strchr(quote, *p))
142 {
143 /* okay, we have a quoted token, now scan for the closer */
144 char thisquote = *p++;
145
146 for (; *p; p += PQmblen(p, encoding))
147 {
148 if (*p == escape && p[1] != '\0')
149 p++; /* process escaped anything */
150 else if (*p == thisquote && p[1] == thisquote)
151 p++; /* process doubled quote */
152 else if (*p == thisquote)
153 {
154 p++; /* skip trailing quote */
155 break;
156 }
157 }
158
159 /*
160 * If not at end of string, we need to insert a null to terminate the
161 * returned token. See notes above.
162 */
163 if (*p != '\0')
164 {
165 if (!strchr(whitespace, *p))
166 memmove(p + 1, p, strlen(p) + 1);
167 *p = '\0';
168 string = p + 1;
169 }
170 else
171 {
172 /* at end of string, so no extra work */
173 string = p;
174 }
175
176 /* Clean up the token if caller wants that */
177 if (del_quotes)
178 strip_quotes(start, thisquote, escape, encoding);
179
180 return start;
181 }
182
183 /*
184 * Otherwise no quoting character. Scan till next whitespace, delimiter
185 * or quote. NB: at this point, *start is known not to be '\0',
186 * whitespace, delim, or quote, so we will consume at least one character.
187 */
188 offset = strcspn(start, whitespace);
189
190 if (delim)
191 {
192 unsigned int offset2 = strcspn(start, delim);
193
194 if (offset > offset2)
195 offset = offset2;
196 }
197
198 if (quote)
199 {
200 unsigned int offset2 = strcspn(start, quote);
201
202 if (offset > offset2)
203 offset = offset2;
204 }
205
206 p = start + offset;
207
208 /*
209 * If not at end of string, we need to insert a null to terminate the
210 * returned token. See notes above.
211 */
212 if (*p != '\0')
213 {
214 if (!strchr(whitespace, *p))
215 memmove(p + 1, p, strlen(p) + 1);
216 *p = '\0';
217 string = p + 1;
218 }
219 else
220 {
221 /* at end of string, so no extra work */
222 string = p;
223 }
224
225 return start;
226}
227
228
229/*
230 * strip_quotes
231 *
232 * Remove quotes from the string at *source. Leading and trailing occurrences
233 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
234 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
235 * significance of next character.
236 *
237 * Note that the source string is overwritten in-place.
238 */
239void
240strip_quotes(char *source, char quote, char escape, int encoding)
241{
242 char *src;
243 char *dst;
244
245 Assert(source != NULL);
246 Assert(quote != '\0');
247
248 src = dst = source;
249
250 if (*src && *src == quote)
251 src++; /* skip leading quote */
252
253 while (*src)
254 {
255 char c = *src;
256 int i;
257
258 if (c == quote && src[1] == '\0')
259 break; /* skip trailing quote */
260 else if (c == quote && src[1] == quote)
261 src++; /* process doubled quote */
262 else if (c == escape && src[1] != '\0')
263 src++; /* process escaped character */
264
265 i = PQmblen(src, encoding);
266 while (i--)
267 *dst++ = *src++;
268 }
269
270 *dst = '\0';
271}
272
273
274/*
275 * quote_if_needed
276 *
277 * Opposite of strip_quotes(). If "source" denotes itself literally without
278 * quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with
279 * quoting and escaping applied:
280 *
281 * source - string to parse
282 * entails_quote - any of these present? need outer quotes
283 * quote - doubled within string, affixed to both ends
284 * escape - doubled within string
285 * encoding - the active character-set encoding
286 *
287 * Do not use this as a substitute for PQescapeStringConn(). Use it for
288 * strings to be parsed by strtokx() or psql_scan_slash_option().
289 */
290char *
291quote_if_needed(const char *source, const char *entails_quote,
292 char quote, char escape, int encoding)
293{
294 const char *src;
295 char *ret;
296 char *dst;
297 bool need_quotes = false;
298
299 Assert(source != NULL);
300 Assert(quote != '\0');
301
302 src = source;
303 dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
304
305 *dst++ = quote;
306
307 while (*src)
308 {
309 char c = *src;
310 int i;
311
312 if (c == quote)
313 {
314 need_quotes = true;
315 *dst++ = quote;
316 }
317 else if (c == escape)
318 {
319 need_quotes = true;
320 *dst++ = escape;
321 }
322 else if (strchr(entails_quote, c))
323 need_quotes = true;
324
325 i = PQmblen(src, encoding);
326 while (i--)
327 *dst++ = *src++;
328 }
329
330 *dst++ = quote;
331 *dst = '\0';
332
333 if (!need_quotes)
334 {
335 free(ret);
336 ret = NULL;
337 }
338
339 return ret;
340}
341