stringutils.c source code [PostgreSQL/src/bin/psql/stringutils.c]

1	/*
2	* psql - the PostgreSQL interactive terminal
3	*
4	* Copyright (c) 2000-2019, PostgreSQL Global Development Group
5	*
6	* src/bin/psql/stringutils.c
7	*/
8	#include "postgres_fe.h"
9
10	#include <ctype.h>
11
12	#include "common.h"
13	#include "stringutils.h"
14
15
16	/*
17	* Replacement for strtok() (a.k.a. poor man's flex)
18	*
19	* Splits a string into tokens, returning one token per call, then NULL
20	* when no more tokens exist in the given string.
21	*
22	* The calling convention is similar to that of strtok, but with more
23	* frammishes.
24	*
25	* s - string to parse, if NULL continue parsing the last string
26	* whitespace - set of whitespace characters that separate tokens
27	* delim - set of non-whitespace separator characters (or NULL)
28	* quote - set of characters that can quote a token (NULL if none)
29	* escape - character that can quote quotes (0 if none)
30	* e_strings - if true, treat E'...' syntax as a valid token
31	* del_quotes - if true, strip quotes from the returned token, else return
32	* it exactly as found in the string
33	* encoding - the active character-set encoding
34	*
35	* Characters in 'delim', if any, will be returned as single-character
36	* tokens unless part of a quoted token.
37	*
38	* Double occurrences of the quoting character are always taken to represent
39	* a single quote character in the data. If escape isn't 0, then escape
40	* followed by anything (except \0) is a data character too.
41	*
42	* The combination of e_strings and del_quotes both true is not currently
43	* handled. This could be fixed but it's not needed anywhere at the moment.
44	*
45	* Note that the string s is _not_ overwritten in this implementation.
46	*
47	* NB: it's okay to vary delim, quote, and escape from one call to the
48	* next on a single source string, but changing whitespace is a bad idea
49	* since you might lose data.
50	*/
51	char *
52	strtokx(const char *s,
53	const char *whitespace,
54	const char *delim,
55	const char *quote,
56	char escape,
57	bool e_strings,
58	bool del_quotes,
59	int encoding)
60	{
61	static char storage = NULL; /* store the local copy of the users*
62	* string here */
63	static char string = NULL; /* pointer into storage where to continue on*
64	* next call */
65
66	/ variously abused variables: /
67	unsigned int offset;
68	char *start;
69	char *p;
70
71	if (s)
72	{
73	free(storage);
74
75	/*
76	* We may need extra space to insert delimiter nulls for adjacent
77	* tokens. 2X the space is a gross overestimate, but it's unlikely
78	* that this code will be used on huge strings anyway.
79	*/
80	storage = pg_malloc(`2` * strlen(s) + `1`);
81	strcpy(storage, s);
82	string = storage;
83	}
84
85	if (!storage)
86	return NULL;
87
88	/ skip leading whitespace /
89	offset = strspn(string, whitespace);
90	start = &string[offset];
91
92	/ end of string reached? /
93	if (*start == `'\0'`)
94	{
95	/ technically we don't need to free here, but we're nice /
96	free(storage);
97	storage = NULL;
98	string = NULL;
99	return NULL;
100	}
101
102	/ test if delimiter character /
103	if (delim && strchr(delim, *start))
104	{
105	/*
106	* If not at end of string, we need to insert a null to terminate the
107	* returned token. We can just overwrite the next character if it
108	* happens to be in the whitespace set ... otherwise move over the
109	* rest of the string to make room. (This is why we allocated extra
110	* space above).
111	*/
112	p = start + `1`;
113	if (*p != `'\0'`)
114	{
115	if (!strchr(whitespace, *p))
116	memmove(p + `1`, p, strlen(p) + `1`);
117	*p = `'\0'`;
118	string = p + `1`;
119	}
120	else
121	{
122	/ at end of string, so no extra work /
123	string = p;
124	}
125
126	return start;
127	}
128
129	/ check for E string /
130	p = start;
131	if (e_strings &&
132	(p == `'E'` \|\| p == `'e'`) &&
133	p[`1`] == `'\''`)
134	{
135	quote = "'";
136	escape = `'\\'`; / if std strings before, not any more /
137	p++;
138	}
139
140	/ test if quoting character /
141	if (quote && strchr(quote, *p))
142	{
143	/ okay, we have a quoted token, now scan for the closer /
144	char thisquote = *p++;
145
146	for (; *p; p += PQmblen(p, encoding))
147	{
148	if (*p == escape && p[`1`] != `'\0'`)
149	p++; / process escaped anything /
150	else if (*p == thisquote && p[`1`] == thisquote)
151	p++; / process doubled quote /
152	else if (*p == thisquote)
153	{
154	p++; / skip trailing quote /
155	break;
156	}
157	}
158
159	/*
160	* If not at end of string, we need to insert a null to terminate the
161	* returned token. See notes above.
162	*/
163	if (*p != `'\0'`)
164	{
165	if (!strchr(whitespace, *p))
166	memmove(p + `1`, p, strlen(p) + `1`);
167	*p = `'\0'`;
168	string = p + `1`;
169	}
170	else
171	{
172	/ at end of string, so no extra work /
173	string = p;
174	}
175
176	/ Clean up the token if caller wants that /
177	if (del_quotes)
178	strip_quotes(start, thisquote, escape, encoding);
179
180	return start;
181	}
182
183	/*
184	* Otherwise no quoting character. Scan till next whitespace, delimiter
185	* or quote. NB: at this point, *start is known not to be '\0',
186	* whitespace, delim, or quote, so we will consume at least one character.
187	*/
188	offset = strcspn(start, whitespace);
189
190	if (delim)
191	{
192	unsigned int offset2 = strcspn(start, delim);
193
194	if (offset > offset2)
195	offset = offset2;
196	}
197
198	if (quote)
199	{
200	unsigned int offset2 = strcspn(start, quote);
201
202	if (offset > offset2)
203	offset = offset2;
204	}
205
206	p = start + offset;
207
208	/*
209	* If not at end of string, we need to insert a null to terminate the
210	* returned token. See notes above.
211	*/
212	if (*p != `'\0'`)
213	{
214	if (!strchr(whitespace, *p))
215	memmove(p + `1`, p, strlen(p) + `1`);
216	*p = `'\0'`;
217	string = p + `1`;
218	}
219	else
220	{
221	/ at end of string, so no extra work /
222	string = p;
223	}
224
225	return start;
226	}
227
228
229	/*
230	* strip_quotes
231	*
232	* Remove quotes from the string at *source. Leading and trailing occurrences
233	* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
234	* to single occurrences; if 'escape' is not 0 then 'escape' removes special
235	* significance of next character.
236	*
237	* Note that the source string is overwritten in-place.
238	*/
239	void
240	strip_quotes(char source, char* quote, char escape, int encoding)
241	{
242	char *src;
243	char *dst;
244
245	Assert(source != NULL);
246	Assert(quote != `'\0'`);
247
248	src = dst = source;
249
250	if (src && src == quote)
251	src++; / skip leading quote /
252
253	while (*src)
254	{
255	char c = *src;
256	int i;
257
258	if (c == quote && src[`1`] == `'\0'`)
259	break; / skip trailing quote /
260	else if (c == quote && src[`1`] == quote)
261	src++; / process doubled quote /
262	else if (c == escape && src[`1`] != `'\0'`)
263	src++; / process escaped character /
264
265	i = PQmblen(src, encoding);
266	while (i--)
267	dst++ = src++;
268	}
269
270	*dst = `'\0'`;
271	}
272
273
274	/*
275	* quote_if_needed
276	*
277	* Opposite of strip_quotes(). If "source" denotes itself literally without
278	* quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with
279	* quoting and escaping applied:
280	*
281	* source - string to parse
282	* entails_quote - any of these present? need outer quotes
283	* quote - doubled within string, affixed to both ends
284	* escape - doubled within string
285	* encoding - the active character-set encoding
286	*
287	* Do not use this as a substitute for PQescapeStringConn(). Use it for
288	* strings to be parsed by strtokx() or psql_scan_slash_option().
289	*/
290	char *
291	quote_if_needed(const char source, const* char *entails_quote,
292	char quote, char escape, int encoding)
293	{
294	const char *src;
295	char *ret;
296	char *dst;
297	bool need_quotes = false;
298
299	Assert(source != NULL);
300	Assert(quote != `'\0'`);
301
302	src = source;
303	dst = ret = pg_malloc(`2` * strlen(src) + `3`); / excess /
304
305	*dst++ = quote;
306
307	while (*src)
308	{
309	char c = *src;
310	int i;
311
312	if (c == quote)
313	{
314	need_quotes = true;
315	*dst++ = quote;
316	}
317	else if (c == escape)
318	{
319	need_quotes = true;
320	*dst++ = escape;
321	}
322	else if (strchr(entails_quote, c))
323	need_quotes = true;
324
325	i = PQmblen(src, encoding);
326	while (i--)
327	dst++ = src++;
328	}
329
330	*dst++ = quote;
331	*dst = `'\0'`;
332
333	if (!need_quotes)
334	{
335	free(ret);
336	ret = NULL;
337	}
338
339	return ret;
340	}
341

Browse the source code of PostgreSQL/src/bin/psql/stringutils.c