tokenize.c source code [sqlite/src/tokenize.c]

1	/*
2	** 2001 September 15
3	**
4	** The author disclaims copyright to this source code. In place of
5	** a legal notice, here is a blessing:
6	**
7	** May you do good and not evil.
8	** May you find forgiveness for yourself and forgive others.
9	** May you share freely, never taking more than you give.
10	**
11	*************************************************************************
12	** An tokenizer for SQL
13	**
14	** This file contains C code that splits an SQL input string up into
15	** individual tokens and sends those tokens one-by-one over to the
16	** parser for analysis.
17	*/
18	#include "sqliteInt.h"
19	#include <stdlib.h>
20
21	/ Character classes for tokenizing*
22	**
23	** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented
24	** using a lookup table, whereas a switch() directly on c uses a binary search.
25	** The lookup table is much faster. To maximize speed, and to ensure that
26	** a lookup table is used, all of the classes need to be small integers and
27	** all of them need to be used within the switch.
28	*/
29	#define CC_X 0 /* The letter 'x', or start of BLOB literal */
30	#define CC_KYWD0 1 /* First letter of a keyword */
31	#define CC_KYWD 2 /* Alphabetics or '_'. Usable in a keyword */
32	#define CC_DIGIT 3 /* Digits */
33	#define CC_DOLLAR 4 /* '$' */
34	#define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */
35	#define CC_VARNUM 6 /* '?'. Numeric SQL variables */
36	#define CC_SPACE 7 /* Space characters */
37	#define CC_QUOTE 8 /* '"', '\'', or '`'. String literals, quoted ids */
38	#define CC_QUOTE2 9 /* '['. [...] style quoted ids */
39	#define CC_PIPE 10 /* '\|'. Bitwise OR or concatenate */
40	#define CC_MINUS 11 /* '-'. Minus or SQL-style comment */
41	#define CC_LT 12 /* '<'. Part of < or <= or <> */
42	#define CC_GT 13 /* '>'. Part of > or >= */
43	#define CC_EQ 14 /* '='. Part of = or == */
44	#define CC_BANG 15 /* '!'. Part of != */
45	#define CC_SLASH 16 /* '/'. / or c-style comment */
46	#define CC_LP 17 /* '(' */
47	#define CC_RP 18 /* ')' */
48	#define CC_SEMI 19 /* ';' */
49	#define CC_PLUS 20 /* '+' */
50	#define CC_STAR 21 /* '' /
51	#define CC_PERCENT 22 /* '%' */
52	#define CC_COMMA 23 /* ',' */
53	#define CC_AND 24 /* '&' */
54	#define CC_TILDA 25 /* '~' */
55	#define CC_DOT 26 /* '.' */
56	#define CC_ID 27 /* unicode characters usable in IDs */
57	#define CC_ILLEGAL 28 /* Illegal character */
58	#define CC_NUL 29 /* 0x00 */
59	#define CC_BOM 30 /* First byte of UTF8 BOM: 0xEF 0xBB 0xBF */
60
61	static const unsigned char aiClass[] = {
62	#ifdef SQLITE_ASCII
63	/ x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf /
64	/ 0x / `29`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `7`, `7`, `28`, `7`, `7`, `28`, `28`,
65	/ 1x / `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`,
66	/ 2x / `7`, `15`, `8`, `5`, `4`, `22`, `24`, `8`, `17`, `18`, `21`, `20`, `23`, `11`, `26`, `16`,
67	/ 3x / `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `5`, `19`, `12`, `14`, `13`, `6`,
68	/ 4x / `5`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
69	/ 5x / `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `2`, `2`, `9`, `28`, `28`, `28`, `2`,
70	/ 6x / `8`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
71	/ 7x / `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `2`, `2`, `28`, `10`, `28`, `25`, `28`,
72	/ 8x / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`,
73	/ 9x / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`,
74	/ Ax / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`,
75	/ Bx / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`,
76	/ Cx / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`,
77	/ Dx / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`,
78	/ Ex / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `30`,
79	/ Fx / `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`, `27`
80	#endif
81	#ifdef SQLITE_EBCDIC
82	/ x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf /
83	/ 0x / `29`, `28`, `28`, `28`, `28`, `7`, `28`, `28`, `28`, `28`, `28`, `28`, `7`, `7`, `28`, `28`,
84	/ 1x / `28`, `28`, `28`, `28`, `28`, `7`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`,
85	/ 2x / `28`, `28`, `28`, `28`, `28`, `7`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`,
86	/ 3x / `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`,
87	/ 4x / `7`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `26`, `12`, `17`, `20`, `10`,
88	/ 5x / `24`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `15`, `4`, `21`, `18`, `19`, `28`,
89	/ 6x / `11`, `16`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `23`, `22`, `2`, `13`, `6`,
90	/ 7x / `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `8`, `5`, `5`, `5`, `8`, `14`, `8`,
91	/ 8x / `28`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `28`, `28`, `28`, `28`, `28`, `28`,
92	/ 9x / `28`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `28`, `28`, `28`, `28`, `28`, `28`,
93	/ Ax / `28`, `25`, `1`, `1`, `1`, `1`, `1`, `0`, `2`, `2`, `28`, `28`, `28`, `28`, `28`, `28`,
94	/ Bx / `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `9`, `28`, `28`, `28`, `28`, `28`,
95	/ Cx / `28`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `28`, `28`, `28`, `28`, `28`, `28`,
96	/ Dx / `28`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `28`, `28`, `28`, `28`, `28`, `28`,
97	/ Ex / `28`, `28`, `1`, `1`, `1`, `1`, `1`, `0`, `2`, `2`, `28`, `28`, `28`, `28`, `28`, `28`,
98	/ Fx / `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `28`, `28`, `28`, `28`, `28`, `28`,
99	#endif
100	};
101
102	/*
103	** The charMap() macro maps alphabetic characters (only) into their
104	** lower-case ASCII equivalent. On ASCII machines, this is just
105	** an upper-to-lower case map. On EBCDIC machines we also need
106	** to adjust the encoding. The mapping is only valid for alphabetics
107	** which are the only characters for which this feature is used.
108	**
109	** Used by keywordhash.h
110	*/
111	#ifdef SQLITE_ASCII
112	# define charMap(X) sqlite3UpperToLower[(unsigned char)X]
113	#endif
114	#ifdef SQLITE_EBCDIC
115	# define charMap(X) ebcdicToAscii[(unsigned char)X]
116	const unsigned char ebcdicToAscii[] = {
117	/ 0 1 2 3 4 5 6 7 8 9 A B C D E F /
118	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 0x /
119	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 1x /
120	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 2x /
121	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 3x /
122	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 4x /
123	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 5x /
124	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `95`, `0`, `0`, / 6x /
125	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 7x /
126	`0`, `97`, `98`, `99`,`100`,`101`,`102`,`103`,`104`,`105`, `0`, `0`, `0`, `0`, `0`, `0`, / 8x /
127	`0`,`106`,`107`,`108`,`109`,`110`,`111`,`112`,`113`,`114`, `0`, `0`, `0`, `0`, `0`, `0`, / 9x /
128	`0`, `0`,`115`,`116`,`117`,`118`,`119`,`120`,`121`,`122`, `0`, `0`, `0`, `0`, `0`, `0`, / Ax /
129	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / Bx /
130	`0`, `97`, `98`, `99`,`100`,`101`,`102`,`103`,`104`,`105`, `0`, `0`, `0`, `0`, `0`, `0`, / Cx /
131	`0`,`106`,`107`,`108`,`109`,`110`,`111`,`112`,`113`,`114`, `0`, `0`, `0`, `0`, `0`, `0`, / Dx /
132	`0`, `0`,`115`,`116`,`117`,`118`,`119`,`120`,`121`,`122`, `0`, `0`, `0`, `0`, `0`, `0`, / Ex /
133	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / Fx /
134	};
135	#endif
136
137	/*
138	** The sqlite3KeywordCode function looks up an identifier to determine if
139	** it is a keyword. If it is a keyword, the token code of that keyword is
140	** returned. If the input is not a keyword, TK_ID is returned.
141	**
142	** The implementation of this routine was generated by a program,
143	** mkkeywordhash.c, located in the tool subdirectory of the distribution.
144	** The output of the mkkeywordhash.c program is written into a file
145	** named keywordhash.h and then included into this source file by
146	** the #include below.
147	*/
148	#include "keywordhash.h"
149
150
151	/*
152	** If X is a character that can be used in an identifier then
153	** IdChar(X) will be true. Otherwise it is false.
154	**
155	** For ASCII, any character with the high-order bit set is
156	** allowed in an identifier. For 7-bit characters,
157	** sqlite3IsIdChar[X] must be 1.
158	**
159	** For EBCDIC, the rules are more complex but have the same
160	** end result.
161	**
162	** Ticket #1066. the SQL standard does not allow '$' in the
163	** middle of identifiers. But many SQL implementations do.
164	** SQLite will allow '$' in identifiers for compatibility.
165	** But the feature is undocumented.
166	*/
167	#ifdef SQLITE_ASCII
168	#define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
169	#endif
170	#ifdef SQLITE_EBCDIC
171	const char sqlite3IsEbcdicIdChar[] = {
172	/ x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF /
173	`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, / 4x /
174	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `0`, `0`, `0`, `0`, / 5x /
175	`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `1`, `0`, `0`, / 6x /
176	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / 7x /
177	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `1`, `1`, `1`, `0`, / 8x /
178	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `1`, `0`, `1`, `0`, / 9x /
179	`1`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `1`, `1`, `0`, / Ax /
180	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, / Bx /
181	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `1`, `1`, `1`, `1`, / Cx /
182	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `1`, `1`, `1`, `1`, / Dx /
183	`0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `1`, `1`, `1`, `1`, / Ex /
184	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `1`, `1`, `1`, `0`, / Fx /
185	};
186	#define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
187	#endif
188
189	/ Make the IdChar function accessible from ctime.c and alter.c /
190	int sqlite3IsIdChar(u8 c){ return IdChar(c); }
191
192	#ifndef SQLITE_OMIT_WINDOWFUNC
193	/*
194	** Return the id of the next token in string (*pz). Before returning, set
195	** (*pz) to point to the byte following the parsed token.
196	*/
197	static int getToken(const unsigned char **pz){
198	const unsigned char z = pz;
199	int t; / Token type to return /
200	do {
201	z += sqlite3GetToken(z, &t);
202	}while( t==TK_SPACE );
203	if( t==TK_ID
204	\|\| t==TK_STRING
205	\|\| t==TK_JOIN_KW
206	\|\| t==TK_WINDOW
207	\|\| t==TK_OVER
208	\|\| sqlite3ParserFallback(t)==TK_ID
209	){
210	t = TK_ID;
211	}
212	*pz = z;
213	return t;
214	}
215
216	/*
217	** The following three functions are called immediately after the tokenizer
218	** reads the keywords WINDOW, OVER and FILTER, respectively, to determine
219	** whether the token should be treated as a keyword or an SQL identifier.
220	** This cannot be handled by the usual lemon %fallback method, due to
221	** the ambiguity in some constructions. e.g.
222	**
223	** SELECT sum(x) OVER ...
224	**
225	** In the above, "OVER" might be a keyword, or it might be an alias for the
226	** sum(x) expression. If a "%fallback ID OVER" directive were added to
227	** grammar, then SQLite would always treat "OVER" as an alias, making it
228	** impossible to call a window-function without a FILTER clause.
229	**
230	** WINDOW is treated as a keyword if:
231	**
232	** * the following token is an identifier, or a keyword that can fallback
233	** to being an identifier, and
234	** * the token after than one is TK_AS.
235	**
236	** OVER is a keyword if:
237	**
238	** * the previous token was TK_RP, and
239	** * the next token is either TK_LP or an identifier.
240	**
241	** FILTER is a keyword if:
242	**
243	** * the previous token was TK_RP, and
244	** * the next token is TK_LP.
245	*/
246	static int analyzeWindowKeyword(const unsigned char *z){
247	int t;
248	t = getToken(&z);
249	if( t!=TK_ID ) return TK_ID;
250	t = getToken(&z);
251	if( t!=TK_AS ) return TK_ID;
252	return TK_WINDOW;
253	}
254	static int analyzeOverKeyword(const unsigned char z, int* lastToken){
255	if( lastToken==TK_RP ){
256	int t = getToken(&z);
257	if( t==TK_LP \|\| t==TK_ID ) return TK_OVER;
258	}
259	return TK_ID;
260	}
261	static int analyzeFilterKeyword(const unsigned char z, int* lastToken){
262	if( lastToken==TK_RP && getToken(&z)==TK_LP ){
263	return TK_FILTER;
264	}
265	return TK_ID;
266	}
267	#endif /* SQLITE_OMIT_WINDOWFUNC */
268
269	/*
270	** Return the length (in bytes) of the token that begins at z[0].
271	** Store the token type in *tokenType before returning.
272	*/
273	int sqlite3GetToken(const unsigned char z, int* *tokenType){
274	int i, c;
275	switch( aiClass[z] ){ /* Switch on the character-class of the first byte*
276	** of the token. See the comment on the CC_ defines
277	** above. */
278	case CC_SPACE: {
279	testcase( z[`0`]==`' '` );
280	testcase( z[`0`]==`'\t'` );
281	testcase( z[`0`]==`'\n'` );
282	testcase( z[`0`]==`'\f'` );
283	testcase( z[`0`]==`'\r'` );
284	for(i=`1`; sqlite3Isspace(z[i]); i++){}
285	*tokenType = TK_SPACE;
286	return i;
287	}
288	case CC_MINUS: {
289	if( z[`1`]==`'-'` ){
290	for(i=`2`; (c=z[i])!=`0` && c!=`'\n'`; i++){}
291	tokenType = TK_SPACE; /* IMP: R-22934-25134 /
292	return i;
293	}else if( z[`1`]==`'>'` ){
294	*tokenType = TK_PTR;
295	return `2` + (z[`2`]==`'>'`);
296	}
297	*tokenType = TK_MINUS;
298	return `1`;
299	}
300	case CC_LP: {
301	*tokenType = TK_LP;
302	return `1`;
303	}
304	case CC_RP: {
305	*tokenType = TK_RP;
306	return `1`;
307	}
308	case CC_SEMI: {
309	*tokenType = TK_SEMI;
310	return `1`;
311	}
312	case CC_PLUS: {
313	*tokenType = TK_PLUS;
314	return `1`;
315	}
316	case CC_STAR: {
317	*tokenType = TK_STAR;
318	return `1`;
319	}
320	case CC_SLASH: {
321	if( z[`1`]!=`'*'` \|\| z[`2`]==`0` ){
322	*tokenType = TK_SLASH;
323	return `1`;
324	}
325	for(i=`3`, c=z[`2`]; (c!=`'*'` \|\| z[i]!=`'/'`) && (c=z[i])!=`0`; i++){}
326	if( c ) i++;
327	tokenType = TK_SPACE; /* IMP: R-22934-25134 /
328	return i;
329	}
330	case CC_PERCENT: {
331	*tokenType = TK_REM;
332	return `1`;
333	}
334	case CC_EQ: {
335	*tokenType = TK_EQ;
336	return `1` + (z[`1`]==`'='`);
337	}
338	case CC_LT: {
339	if( (c=z[`1`])==`'='` ){
340	*tokenType = TK_LE;
341	return `2`;
342	}else if( c==`'>'` ){
343	*tokenType = TK_NE;
344	return `2`;
345	}else if( c==`'<'` ){
346	*tokenType = TK_LSHIFT;
347	return `2`;
348	}else{
349	*tokenType = TK_LT;
350	return `1`;
351	}
352	}
353	case CC_GT: {
354	if( (c=z[`1`])==`'='` ){
355	*tokenType = TK_GE;
356	return `2`;
357	}else if( c==`'>'` ){
358	*tokenType = TK_RSHIFT;
359	return `2`;
360	}else{
361	*tokenType = TK_GT;
362	return `1`;
363	}
364	}
365	case CC_BANG: {
366	if( z[`1`]!=`'='` ){
367	*tokenType = TK_ILLEGAL;
368	return `1`;
369	}else{
370	*tokenType = TK_NE;
371	return `2`;
372	}
373	}
374	case CC_PIPE: {
375	if( z[`1`]!=`'\|'` ){
376	*tokenType = TK_BITOR;
377	return `1`;
378	}else{
379	*tokenType = TK_CONCAT;
380	return `2`;
381	}
382	}
383	case CC_COMMA: {
384	*tokenType = TK_COMMA;
385	return `1`;
386	}
387	case CC_AND: {
388	*tokenType = TK_BITAND;
389	return `1`;
390	}
391	case CC_TILDA: {
392	*tokenType = TK_BITNOT;
393	return `1`;
394	}
395	case CC_QUOTE: {
396	int delim = z[`0`];
397	testcase( delim=='`' );
398	testcase( delim==`'\''` );
399	testcase( delim==`'"'` );
400	for(i=`1`; (c=z[i])!=`0`; i++){
401	if( c==delim ){
402	if( z[i+`1`]==delim ){
403	i++;
404	}else{
405	break;
406	}
407	}
408	}
409	if( c==`'\''` ){
410	*tokenType = TK_STRING;
411	return i+`1`;
412	}else if( c!=`0` ){
413	*tokenType = TK_ID;
414	return i+`1`;
415	}else{
416	*tokenType = TK_ILLEGAL;
417	return i;
418	}
419	}
420	case CC_DOT: {
421	#ifndef SQLITE_OMIT_FLOATING_POINT
422	if( !sqlite3Isdigit(z[`1`]) )
423	#endif
424	{
425	*tokenType = TK_DOT;
426	return `1`;
427	}
428	/ If the next character is a digit, this is a floating point*
429	** number that begins with ".". Fall thru into the next case */
430	/ no break / deliberate_fall_through
431	}
432	case CC_DIGIT: {
433	testcase( z[`0`]==`'0'` ); testcase( z[`0`]==`'1'` ); testcase( z[`0`]==`'2'` );
434	testcase( z[`0`]==`'3'` ); testcase( z[`0`]==`'4'` ); testcase( z[`0`]==`'5'` );
435	testcase( z[`0`]==`'6'` ); testcase( z[`0`]==`'7'` ); testcase( z[`0`]==`'8'` );
436	testcase( z[`0`]==`'9'` );
437	*tokenType = TK_INTEGER;
438	#ifndef SQLITE_OMIT_HEX_INTEGER
439	if( z[`0`]==`'0'` && (z[`1`]==`'x'` \|\| z[`1`]==`'X'`) && sqlite3Isxdigit(z[`2`]) ){
440	for(i=`3`; sqlite3Isxdigit(z[i]); i++){}
441	return i;
442	}
443	#endif
444	for(i=`0`; sqlite3Isdigit(z[i]); i++){}
445	#ifndef SQLITE_OMIT_FLOATING_POINT
446	if( z[i]==`'.'` ){
447	i++;
448	while( sqlite3Isdigit(z[i]) ){ i++; }
449	*tokenType = TK_FLOAT;
450	}
451	if( (z[i]==`'e'` \|\| z[i]==`'E'`) &&
452	( sqlite3Isdigit(z[i+`1`])
453	\|\| ((z[i+`1`]==`'+'` \|\| z[i+`1`]==`'-'`) && sqlite3Isdigit(z[i+`2`]))
454	)
455	){
456	i += `2`;
457	while( sqlite3Isdigit(z[i]) ){ i++; }
458	*tokenType = TK_FLOAT;
459	}
460	#endif
461	while( IdChar(z[i]) ){
462	*tokenType = TK_ILLEGAL;
463	i++;
464	}
465	return i;
466	}
467	case CC_QUOTE2: {
468	for(i=`1`, c=z[`0`]; c!=`']'` && (c=z[i])!=`0`; i++){}
469	*tokenType = c==`']'` ? TK_ID : TK_ILLEGAL;
470	return i;
471	}
472	case CC_VARNUM: {
473	*tokenType = TK_VARIABLE;
474	for(i=`1`; sqlite3Isdigit(z[i]); i++){}
475	return i;
476	}
477	case CC_DOLLAR:
478	case CC_VARALPHA: {
479	int n = `0`;
480	testcase( z[`0`]==`'$'` ); testcase( z[`0`]==`'@'` );
481	testcase( z[`0`]==`':'` ); testcase( z[`0`]==`'#'` );
482	*tokenType = TK_VARIABLE;
483	for(i=`1`; (c=z[i])!=`0`; i++){
484	if( IdChar(c) ){
485	n++;
486	#ifndef SQLITE_OMIT_TCL_VARIABLE
487	}else if( c==`'('` && n>`0` ){
488	do{
489	i++;
490	}while( (c=z[i])!=`0` && !sqlite3Isspace(c) && c!=`')'` );
491	if( c==`')'` ){
492	i++;
493	}else{
494	*tokenType = TK_ILLEGAL;
495	}
496	break;
497	}else if( c==`':'` && z[i+`1`]==`':'` ){
498	i++;
499	#endif
500	}else{
501	break;
502	}
503	}
504	if( n==`0` ) *tokenType = TK_ILLEGAL;
505	return i;
506	}
507	case CC_KYWD0: {
508	for(i=`1`; aiClass[z[i]]<=CC_KYWD; i++){}
509	if( IdChar(z[i]) ){
510	/ This token started out using characters that can appear in keywords,*
511	** but z[i] is a character not allowed within keywords, so this must
512	** be an identifier instead */
513	i++;
514	break;
515	}
516	*tokenType = TK_ID;
517	return keywordCode((char*)z, i, tokenType);
518	}
519	case CC_X: {
520	#ifndef SQLITE_OMIT_BLOB_LITERAL
521	testcase( z[`0`]==`'x'` ); testcase( z[`0`]==`'X'` );
522	if( z[`1`]==`'\''` ){
523	*tokenType = TK_BLOB;
524	for(i=`2`; sqlite3Isxdigit(z[i]); i++){}
525	if( z[i]!=`'\''` \|\| i%`2` ){
526	*tokenType = TK_ILLEGAL;
527	while( z[i] && z[i]!=`'\''` ){ i++; }
528	}
529	if( z[i] ) i++;
530	return i;
531	}
532	#endif
533	/ If it is not a BLOB literal, then it must be an ID, since no*
534	** SQL keywords start with the letter 'x'. Fall through */
535	/ no break / deliberate_fall_through
536	}
537	case CC_KYWD:
538	case CC_ID: {
539	i = `1`;
540	break;
541	}
542	case CC_BOM: {
543	if( z[`1`]==`0xbb` && z[`2`]==`0xbf` ){
544	*tokenType = TK_SPACE;
545	return `3`;
546	}
547	i = `1`;
548	break;
549	}
550	case CC_NUL: {
551	*tokenType = TK_ILLEGAL;
552	return `0`;
553	}
554	default: {
555	*tokenType = TK_ILLEGAL;
556	return `1`;
557	}
558	}
559	while( IdChar(z[i]) ){ i++; }
560	*tokenType = TK_ID;
561	return i;
562	}
563
564	/*
565	** Run the parser on the given SQL string.
566	*/
567	int sqlite3RunParser(Parse pParse, const* char *zSql){
568	int nErr = `0`; / Number of errors encountered /
569	void pEngine; /* The LEMON-generated LALR(1) parser /
570	int n = `0`; / Length of the next token token /
571	int tokenType; / type of the next token /
572	int lastTokenParsed = -`1`; / type of the previous token /
573	sqlite3 db = pParse->db; /* The database connection /
574	int mxSqlLen; / Max length of an SQL string /
575	Parse pParentParse = `0`; /* Outer parse context, if any /
576	#ifdef sqlite3Parser_ENGINEALWAYSONSTACK
577	yyParser sEngine; / Space to hold the Lemon-generated Parser object /
578	#endif
579	VVA_ONLY( u8 startedWithOom = db->mallocFailed );
580
581	assert( zSql!=`0` );
582	mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
583	if( db->nVdbeActive==`0` ){
584	AtomicStore(&db->u1.isInterrupted, `0`);
585	}
586	pParse->rc = SQLITE_OK;
587	pParse->zTail = zSql;
588	#ifdef SQLITE_DEBUG
589	if( db->flags & SQLITE_ParserTrace ){
590	printf("parser: [[[%s]]]\n", zSql);
591	sqlite3ParserTrace(stdout, "parser: ");
592	}else{
593	sqlite3ParserTrace(`0`, `0`);
594	}
595	#endif
596	#ifdef sqlite3Parser_ENGINEALWAYSONSTACK
597	pEngine = &sEngine;
598	sqlite3ParserInit(pEngine, pParse);
599	#else
600	pEngine = sqlite3ParserAlloc(sqlite3Malloc, pParse);
601	if( pEngine==`0` ){
602	sqlite3OomFault(db);
603	return SQLITE_NOMEM_BKPT;
604	}
605	#endif
606	assert( pParse->pNewTable==`0` );
607	assert( pParse->pNewTrigger==`0` );
608	assert( pParse->nVar==`0` );
609	assert( pParse->pVList==`0` );
610	pParentParse = db->pParse;
611	db->pParse = pParse;
612	while( `1` ){
613	n = sqlite3GetToken((u8*)zSql, &tokenType);
614	mxSqlLen -= n;
615	if( mxSqlLen<`0` ){
616	pParse->rc = SQLITE_TOOBIG;
617	pParse->nErr++;
618	break;
619	}
620	#ifndef SQLITE_OMIT_WINDOWFUNC
621	if( tokenType>=TK_WINDOW ){
622	assert( tokenType==TK_SPACE \|\| tokenType==TK_OVER \|\| tokenType==TK_FILTER
623	\|\| tokenType==TK_ILLEGAL \|\| tokenType==TK_WINDOW
624	);
625	#else
626	if( tokenType>=TK_SPACE ){
627	assert( tokenType==TK_SPACE \|\| tokenType==TK_ILLEGAL );
628	#endif /* SQLITE_OMIT_WINDOWFUNC */
629	if( AtomicLoad(&db->u1.isInterrupted) ){
630	pParse->rc = SQLITE_INTERRUPT;
631	pParse->nErr++;
632	break;
633	}
634	if( tokenType==TK_SPACE ){
635	zSql += n;
636	continue;
637	}
638	if( zSql[`0`]==`0` ){
639	/ Upon reaching the end of input, call the parser two more times*
640	** with tokens TK_SEMI and 0, in that order. */
641	if( lastTokenParsed==TK_SEMI ){
642	tokenType = `0`;
643	}else if( lastTokenParsed==`0` ){
644	break;
645	}else{
646	tokenType = TK_SEMI;
647	}
648	n = `0`;
649	#ifndef SQLITE_OMIT_WINDOWFUNC
650	}else if( tokenType==TK_WINDOW ){
651	assert( n==`6` );
652	tokenType = analyzeWindowKeyword((const u8*)&zSql[`6`]);
653	}else if( tokenType==TK_OVER ){
654	assert( n==`4` );
655	tokenType = analyzeOverKeyword((const u8*)&zSql[`4`], lastTokenParsed);
656	}else if( tokenType==TK_FILTER ){
657	assert( n==`6` );
658	tokenType = analyzeFilterKeyword((const u8*)&zSql[`6`], lastTokenParsed);
659	#endif /* SQLITE_OMIT_WINDOWFUNC */
660	}else{
661	Token x;
662	x.z = zSql;
663	x.n = n;
664	sqlite3ErrorMsg(pParse, "unrecognized token: \"%T\"", &x);
665	break;
666	}
667	}
668	pParse->sLastToken.z = zSql;
669	pParse->sLastToken.n = n;
670	sqlite3Parser(pEngine, tokenType, pParse->sLastToken);
671	lastTokenParsed = tokenType;
672	zSql += n;
673	assert( db->mallocFailed==`0` \|\| pParse->rc!=SQLITE_OK \|\| startedWithOom );
674	if( pParse->rc!=SQLITE_OK ) break;
675	}
676	assert( nErr==`0` );
677	#ifdef YYTRACKMAXSTACKDEPTH
678	sqlite3_mutex_enter(sqlite3MallocMutex());
679	sqlite3StatusHighwater(SQLITE_STATUS_PARSER_STACK,
680	sqlite3ParserStackPeak(pEngine)
681	);
682	sqlite3_mutex_leave(sqlite3MallocMutex());
683	#endif /* YYDEBUG */
684	#ifdef sqlite3Parser_ENGINEALWAYSONSTACK
685	sqlite3ParserFinalize(pEngine);
686	#else
687	sqlite3ParserFree(pEngine, sqlite3_free);
688	#endif
689	if( db->mallocFailed ){
690	pParse->rc = SQLITE_NOMEM_BKPT;
691	}
692	if( pParse->zErrMsg \|\| (pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE) ){
693	if( pParse->zErrMsg==`0` ){
694	pParse->zErrMsg = sqlite3MPrintf(db, "%s", sqlite3ErrStr(pParse->rc));
695	}
696	sqlite3_log(pParse->rc, "%s in \"%s\"", pParse->zErrMsg, pParse->zTail);
697	nErr++;
698	}
699	pParse->zTail = zSql;
700	#ifndef SQLITE_OMIT_VIRTUALTABLE
701	sqlite3_free(pParse->apVtabLock);
702	#endif
703
704	if( pParse->pNewTable && !IN_SPECIAL_PARSE ){
705	/ If the pParse->declareVtab flag is set, do not delete any table*
706	** structure built up in pParse->pNewTable. The calling code (see vtab.c)
707	** will take responsibility for freeing the Table structure.
708	*/
709	sqlite3DeleteTable(db, pParse->pNewTable);
710	}
711	if( pParse->pNewTrigger && !IN_RENAME_OBJECT ){
712	sqlite3DeleteTrigger(db, pParse->pNewTrigger);
713	}
714	if( pParse->pVList ) sqlite3DbNNFreeNN(db, pParse->pVList);
715	db->pParse = pParentParse;
716	assert( nErr==`0` \|\| pParse->rc!=SQLITE_OK );
717	return nErr;
718	}
719
720
721	#ifdef SQLITE_ENABLE_NORMALIZE
722	/*
723	** Insert a single space character into pStr if the current string
724	** ends with an identifier
725	*/
726	static void addSpaceSeparator(sqlite3_str *pStr){
727	if( pStr->nChar && sqlite3IsIdChar(pStr->zText[pStr->nChar-`1`]) ){
728	sqlite3_str_append(pStr, " ", `1`);
729	}
730	}
731
732	/*
733	** Compute a normalization of the SQL given by zSql[0..nSql-1]. Return
734	** the normalization in space obtained from sqlite3DbMalloc(). Or return
735	** NULL if anything goes wrong or if zSql is NULL.
736	*/
737	char *sqlite3Normalize(
738	Vdbe pVdbe, /* VM being reprepared /
739	const char zSql /* The original SQL string /
740	){
741	sqlite3 db; /* The database connection /
742	int i; / Next unread byte of zSql[] /
743	int n; / length of current token /
744	int tokenType; / type of current token /
745	int prevType = `0`; / Previous non-whitespace token /
746	int nParen; / Number of nested levels of parentheses /
747	int iStartIN; / Start of RHS of IN operator in z[] /
748	int nParenAtIN; / Value of nParent at start of RHS of IN operator /
749	u32 j; / Bytes of normalized SQL generated so far /
750	sqlite3_str pStr; /* The normalized SQL string under construction /
751
752	db = sqlite3VdbeDb(pVdbe);
753	tokenType = -`1`;
754	nParen = iStartIN = nParenAtIN = `0`;
755	pStr = sqlite3_str_new(db);
756	assert( pStr!=`0` ); / sqlite3_str_new() never returns NULL /
757	for(i=`0`; zSql[i] && pStr->accError==`0`; i+=n){
758	if( tokenType!=TK_SPACE ){
759	prevType = tokenType;
760	}
761	n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType);
762	if( NEVER(n<=`0`) ) break;
763	switch( tokenType ){
764	case TK_SPACE: {
765	break;
766	}
767	case TK_NULL: {
768	if( prevType==TK_IS \|\| prevType==TK_NOT ){
769	sqlite3_str_append(pStr, " NULL", `5`);
770	break;
771	}
772	/ Fall through /
773	}
774	case TK_STRING:
775	case TK_INTEGER:
776	case TK_FLOAT:
777	case TK_VARIABLE:
778	case TK_BLOB: {
779	sqlite3_str_append(pStr, "?", `1`);
780	break;
781	}
782	case TK_LP: {
783	nParen++;
784	if( prevType==TK_IN ){
785	iStartIN = pStr->nChar;
786	nParenAtIN = nParen;
787	}
788	sqlite3_str_append(pStr, "(", `1`);
789	break;
790	}
791	case TK_RP: {
792	if( iStartIN>`0` && nParen==nParenAtIN ){
793	assert( pStr->nChar>=(u32)iStartIN );
794	pStr->nChar = iStartIN+`1`;
795	sqlite3_str_append(pStr, "?,?,?", `5`);
796	iStartIN = `0`;
797	}
798	nParen--;
799	sqlite3_str_append(pStr, ")", `1`);
800	break;
801	}
802	case TK_ID: {
803	iStartIN = `0`;
804	j = pStr->nChar;
805	if( sqlite3Isquote(zSql[i]) ){
806	char *zId = sqlite3DbStrNDup(db, zSql+i, n);
807	int nId;
808	int eType = `0`;
809	if( zId==`0` ) break;
810	sqlite3Dequote(zId);
811	if( zSql[i]==`'"'` && sqlite3VdbeUsesDoubleQuotedString(pVdbe, zId) ){
812	sqlite3_str_append(pStr, "?", `1`);
813	sqlite3DbFree(db, zId);
814	break;
815	}
816	nId = sqlite3Strlen30(zId);
817	if( sqlite3GetToken((u8*)zId, &eType)==nId && eType==TK_ID ){
818	addSpaceSeparator(pStr);
819	sqlite3_str_append(pStr, zId, nId);
820	}else{
821	sqlite3_str_appendf(pStr, "\"%w\"", zId);
822	}
823	sqlite3DbFree(db, zId);
824	}else{
825	addSpaceSeparator(pStr);
826	sqlite3_str_append(pStr, zSql+i, n);
827	}
828	while( j<pStr->nChar ){
829	pStr->zText[j] = sqlite3Tolower(pStr->zText[j]);
830	j++;
831	}
832	break;
833	}
834	case TK_SELECT: {
835	iStartIN = `0`;
836	/ fall through /
837	}
838	default: {
839	if( sqlite3IsIdChar(zSql[i]) ) addSpaceSeparator(pStr);
840	j = pStr->nChar;
841	sqlite3_str_append(pStr, zSql+i, n);
842	while( j<pStr->nChar ){
843	pStr->zText[j] = sqlite3Toupper(pStr->zText[j]);
844	j++;
845	}
846	break;
847	}
848	}
849	}
850	if( tokenType!=TK_SEMI ) sqlite3_str_append(pStr, ";", `1`);
851	return sqlite3_str_finish(pStr);
852	}
853	#endif /* SQLITE_ENABLE_NORMALIZE */
854

Browse the source code of sqlite/src/tokenize.c