1 | /* |
2 | ** 2001 September 15 |
3 | ** |
4 | ** The author disclaims copyright to this source code. In place of |
5 | ** a legal notice, here is a blessing: |
6 | ** |
7 | ** May you do good and not evil. |
8 | ** May you find forgiveness for yourself and forgive others. |
9 | ** May you share freely, never taking more than you give. |
10 | ** |
11 | ************************************************************************* |
12 | ** An tokenizer for SQL |
13 | ** |
14 | ** This file contains C code that implements the sqlite3_complete() API. |
15 | ** This code used to be part of the tokenizer.c source file. But by |
16 | ** separating it out, the code will be automatically omitted from |
17 | ** static links that do not use it. |
18 | */ |
19 | #include "sqliteInt.h" |
20 | #ifndef SQLITE_OMIT_COMPLETE |
21 | |
22 | /* |
23 | ** This is defined in tokenize.c. We just have to import the definition. |
24 | */ |
25 | #ifndef SQLITE_AMALGAMATION |
26 | #ifdef SQLITE_ASCII |
27 | #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) |
28 | #endif |
29 | #ifdef SQLITE_EBCDIC |
30 | extern const char sqlite3IsEbcdicIdChar[]; |
31 | #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) |
32 | #endif |
33 | #endif /* SQLITE_AMALGAMATION */ |
34 | |
35 | |
36 | /* |
37 | ** Token types used by the sqlite3_complete() routine. See the header |
38 | ** comments on that procedure for additional information. |
39 | */ |
40 | #define tkSEMI 0 |
41 | #define tkWS 1 |
42 | #define tkOTHER 2 |
43 | #ifndef SQLITE_OMIT_TRIGGER |
44 | #define tkEXPLAIN 3 |
45 | #define tkCREATE 4 |
46 | #define tkTEMP 5 |
47 | #define tkTRIGGER 6 |
48 | #define tkEND 7 |
49 | #endif |
50 | |
51 | /* |
52 | ** Return TRUE if the given SQL string ends in a semicolon. |
53 | ** |
54 | ** Special handling is require for CREATE TRIGGER statements. |
55 | ** Whenever the CREATE TRIGGER keywords are seen, the statement |
56 | ** must end with ";END;". |
57 | ** |
58 | ** This implementation uses a state machine with 8 states: |
59 | ** |
60 | ** (0) INVALID We have not yet seen a non-whitespace character. |
61 | ** |
62 | ** (1) START At the beginning or end of an SQL statement. This routine |
63 | ** returns 1 if it ends in the START state and 0 if it ends |
64 | ** in any other state. |
65 | ** |
66 | ** (2) NORMAL We are in the middle of statement which ends with a single |
67 | ** semicolon. |
68 | ** |
69 | ** (3) EXPLAIN The keyword EXPLAIN has been seen at the beginning of |
70 | ** a statement. |
71 | ** |
72 | ** (4) CREATE The keyword CREATE has been seen at the beginning of a |
73 | ** statement, possibly preceded by EXPLAIN and/or followed by |
74 | ** TEMP or TEMPORARY |
75 | ** |
76 | ** (5) TRIGGER We are in the middle of a trigger definition that must be |
77 | ** ended by a semicolon, the keyword END, and another semicolon. |
78 | ** |
79 | ** (6) SEMI We've seen the first semicolon in the ";END;" that occurs at |
80 | ** the end of a trigger definition. |
81 | ** |
82 | ** (7) END We've seen the ";END" of the ";END;" that occurs at the end |
83 | ** of a trigger definition. |
84 | ** |
85 | ** Transitions between states above are determined by tokens extracted |
86 | ** from the input. The following tokens are significant: |
87 | ** |
88 | ** (0) tkSEMI A semicolon. |
89 | ** (1) tkWS Whitespace. |
90 | ** (2) tkOTHER Any other SQL token. |
91 | ** (3) tkEXPLAIN The "explain" keyword. |
92 | ** (4) tkCREATE The "create" keyword. |
93 | ** (5) tkTEMP The "temp" or "temporary" keyword. |
94 | ** (6) tkTRIGGER The "trigger" keyword. |
95 | ** (7) tkEND The "end" keyword. |
96 | ** |
97 | ** Whitespace never causes a state transition and is always ignored. |
98 | ** This means that a SQL string of all whitespace is invalid. |
99 | ** |
100 | ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed |
101 | ** to recognize the end of a trigger can be omitted. All we have to do |
102 | ** is look for a semicolon that is not part of an string or comment. |
103 | */ |
104 | int sqlite3_complete(const char *zSql){ |
105 | u8 state = 0; /* Current state, using numbers defined in header comment */ |
106 | u8 token; /* Value of the next token */ |
107 | |
108 | #ifndef SQLITE_OMIT_TRIGGER |
109 | /* A complex statement machine used to detect the end of a CREATE TRIGGER |
110 | ** statement. This is the normal case. |
111 | */ |
112 | static const u8 trans[8][8] = { |
113 | /* Token: */ |
114 | /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ |
115 | /* 0 INVALID: */ { 1, 0, 2, 3, 4, 2, 2, 2, }, |
116 | /* 1 START: */ { 1, 1, 2, 3, 4, 2, 2, 2, }, |
117 | /* 2 NORMAL: */ { 1, 2, 2, 2, 2, 2, 2, 2, }, |
118 | /* 3 EXPLAIN: */ { 1, 3, 3, 2, 4, 2, 2, 2, }, |
119 | /* 4 CREATE: */ { 1, 4, 2, 2, 2, 4, 5, 2, }, |
120 | /* 5 TRIGGER: */ { 6, 5, 5, 5, 5, 5, 5, 5, }, |
121 | /* 6 SEMI: */ { 6, 6, 5, 5, 5, 5, 5, 7, }, |
122 | /* 7 END: */ { 1, 7, 5, 5, 5, 5, 5, 5, }, |
123 | }; |
124 | #else |
125 | /* If triggers are not supported by this compile then the statement machine |
126 | ** used to detect the end of a statement is much simpler |
127 | */ |
128 | static const u8 trans[3][3] = { |
129 | /* Token: */ |
130 | /* State: ** SEMI WS OTHER */ |
131 | /* 0 INVALID: */ { 1, 0, 2, }, |
132 | /* 1 START: */ { 1, 1, 2, }, |
133 | /* 2 NORMAL: */ { 1, 2, 2, }, |
134 | }; |
135 | #endif /* SQLITE_OMIT_TRIGGER */ |
136 | |
137 | #ifdef SQLITE_ENABLE_API_ARMOR |
138 | if( zSql==0 ){ |
139 | (void)SQLITE_MISUSE_BKPT; |
140 | return 0; |
141 | } |
142 | #endif |
143 | |
144 | while( *zSql ){ |
145 | switch( *zSql ){ |
146 | case ';': { /* A semicolon */ |
147 | token = tkSEMI; |
148 | break; |
149 | } |
150 | case ' ': |
151 | case '\r': |
152 | case '\t': |
153 | case '\n': |
154 | case '\f': { /* White space is ignored */ |
155 | token = tkWS; |
156 | break; |
157 | } |
158 | case '/': { /* C-style comments */ |
159 | if( zSql[1]!='*' ){ |
160 | token = tkOTHER; |
161 | break; |
162 | } |
163 | zSql += 2; |
164 | while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } |
165 | if( zSql[0]==0 ) return 0; |
166 | zSql++; |
167 | token = tkWS; |
168 | break; |
169 | } |
170 | case '-': { /* SQL-style comments from "--" to end of line */ |
171 | if( zSql[1]!='-' ){ |
172 | token = tkOTHER; |
173 | break; |
174 | } |
175 | while( *zSql && *zSql!='\n' ){ zSql++; } |
176 | if( *zSql==0 ) return state==1; |
177 | token = tkWS; |
178 | break; |
179 | } |
180 | case '[': { /* Microsoft-style identifiers in [...] */ |
181 | zSql++; |
182 | while( *zSql && *zSql!=']' ){ zSql++; } |
183 | if( *zSql==0 ) return 0; |
184 | token = tkOTHER; |
185 | break; |
186 | } |
187 | case '`': /* Grave-accent quoted symbols used by MySQL */ |
188 | case '"': /* single- and double-quoted strings */ |
189 | case '\'': { |
190 | int c = *zSql; |
191 | zSql++; |
192 | while( *zSql && *zSql!=c ){ zSql++; } |
193 | if( *zSql==0 ) return 0; |
194 | token = tkOTHER; |
195 | break; |
196 | } |
197 | default: { |
198 | #ifdef SQLITE_EBCDIC |
199 | unsigned char c; |
200 | #endif |
201 | if( IdChar((u8)*zSql) ){ |
202 | /* Keywords and unquoted identifiers */ |
203 | int nId; |
204 | for(nId=1; IdChar(zSql[nId]); nId++){} |
205 | #ifdef SQLITE_OMIT_TRIGGER |
206 | token = tkOTHER; |
207 | #else |
208 | switch( *zSql ){ |
209 | case 'c': case 'C': { |
210 | if( nId==6 && sqlite3StrNICmp(zSql, "create" , 6)==0 ){ |
211 | token = tkCREATE; |
212 | }else{ |
213 | token = tkOTHER; |
214 | } |
215 | break; |
216 | } |
217 | case 't': case 'T': { |
218 | if( nId==7 && sqlite3StrNICmp(zSql, "trigger" , 7)==0 ){ |
219 | token = tkTRIGGER; |
220 | }else if( nId==4 && sqlite3StrNICmp(zSql, "temp" , 4)==0 ){ |
221 | token = tkTEMP; |
222 | }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary" , 9)==0 ){ |
223 | token = tkTEMP; |
224 | }else{ |
225 | token = tkOTHER; |
226 | } |
227 | break; |
228 | } |
229 | case 'e': case 'E': { |
230 | if( nId==3 && sqlite3StrNICmp(zSql, "end" , 3)==0 ){ |
231 | token = tkEND; |
232 | }else |
233 | #ifndef SQLITE_OMIT_EXPLAIN |
234 | if( nId==7 && sqlite3StrNICmp(zSql, "explain" , 7)==0 ){ |
235 | token = tkEXPLAIN; |
236 | }else |
237 | #endif |
238 | { |
239 | token = tkOTHER; |
240 | } |
241 | break; |
242 | } |
243 | default: { |
244 | token = tkOTHER; |
245 | break; |
246 | } |
247 | } |
248 | #endif /* SQLITE_OMIT_TRIGGER */ |
249 | zSql += nId-1; |
250 | }else{ |
251 | /* Operators and special symbols */ |
252 | token = tkOTHER; |
253 | } |
254 | break; |
255 | } |
256 | } |
257 | state = trans[state][token]; |
258 | zSql++; |
259 | } |
260 | return state==1; |
261 | } |
262 | |
263 | #ifndef SQLITE_OMIT_UTF16 |
264 | /* |
265 | ** This routine is the same as the sqlite3_complete() routine described |
266 | ** above, except that the parameter is required to be UTF-16 encoded, not |
267 | ** UTF-8. |
268 | */ |
269 | int sqlite3_complete16(const void *zSql){ |
270 | sqlite3_value *pVal; |
271 | char const *zSql8; |
272 | int rc; |
273 | |
274 | #ifndef SQLITE_OMIT_AUTOINIT |
275 | rc = sqlite3_initialize(); |
276 | if( rc ) return rc; |
277 | #endif |
278 | pVal = sqlite3ValueNew(0); |
279 | sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); |
280 | zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); |
281 | if( zSql8 ){ |
282 | rc = sqlite3_complete(zSql8); |
283 | }else{ |
284 | rc = SQLITE_NOMEM_BKPT; |
285 | } |
286 | sqlite3ValueFree(pVal); |
287 | return rc & 0xff; |
288 | } |
289 | #endif /* SQLITE_OMIT_UTF16 */ |
290 | #endif /* SQLITE_OMIT_COMPLETE */ |
291 | |