1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include <wctype.h> |
11 | #include "sql_mem.h" |
12 | #include "sql_scan.h" |
13 | #include "sql_types.h" |
14 | #include "sql_symbol.h" |
15 | #include "sql_mvc.h" |
16 | #include "sql_parser.tab.h" |
17 | #include "sql_semantic.h" |
18 | #include "sql_parser.h" /* for sql_error() */ |
19 | |
20 | #include "stream.h" |
21 | #include <unistd.h> |
22 | #include <string.h> |
23 | #include <ctype.h> |
24 | #include "sql_keyword.h" |
25 | #ifdef HAVE_HGE |
26 | #include "mal.h" /* for have_hge */ |
27 | #endif |
28 | |
29 | char * |
30 | query_cleaned(const char *query) |
31 | { |
32 | char *q, *r; |
33 | int quote = 0; /* inside quotes ('..', "..", {..}) */ |
34 | bool bs = false; /* seen a backslash in a quoted string */ |
35 | bool = false; /* inside traditional C style comment */ |
36 | bool = false; /* inside comment starting with -- */ |
37 | r = GDKmalloc(strlen(query) + 1); |
38 | if(!r) |
39 | return NULL; |
40 | |
41 | for (q = r; *query; query++) { |
42 | if (incomment1) { |
43 | if (*query == '/' && query[-1] == '*') { |
44 | incomment1 = false; |
45 | } |
46 | } else if (incomment2) { |
47 | if (*query == '\n') { |
48 | incomment2 = false; |
49 | /* add newline only if comment doesn't |
50 | * occupy whole line */ |
51 | if (q > r && q[-1] != '\n') |
52 | *q++ = '\n'; |
53 | } |
54 | } else if (quote) { |
55 | if (bs) { |
56 | bs = false; |
57 | } else if (*query == '\\') { |
58 | bs = true; |
59 | } else if (*query == quote) { |
60 | quote = 0; |
61 | } |
62 | *q++ = *query; |
63 | } else if (*query == '"' || *query == '\'') { |
64 | quote = *query; |
65 | *q++ = *query; |
66 | } else if (*query == '{') { |
67 | quote = '}'; |
68 | *q++ = *query; |
69 | } else if (*query == '-' && query[1] == '-') { |
70 | incomment2 = true; |
71 | } else if (*query == '/' && query[1] == '*') { |
72 | incomment1 = true; |
73 | } else if (*query == '\n') { |
74 | /* collapse newlines */ |
75 | if (q > r && q[-1] != '\n') |
76 | *q++ = '\n'; |
77 | } else if (*query == ' ' || *query == '\t') { |
78 | /* collapse white space */ |
79 | if (q > r && q[-1] != ' ') |
80 | *q++ = ' '; |
81 | } else { |
82 | *q++ = *query; |
83 | } |
84 | } |
85 | *q = 0; |
86 | return r; |
87 | } |
88 | |
89 | int |
90 | scanner_init_keywords(void) |
91 | { |
92 | int failed = 0; |
93 | |
94 | failed += keywords_insert("false" , BOOL_FALSE); |
95 | failed += keywords_insert("true" , BOOL_TRUE); |
96 | |
97 | failed += keywords_insert("ALTER" , ALTER); |
98 | failed += keywords_insert("ADD" , ADD); |
99 | failed += keywords_insert("AND" , AND); |
100 | failed += keywords_insert("MEDIAN" , AGGR); |
101 | failed += keywords_insert("CORR" , AGGR2); |
102 | failed += keywords_insert("QUANTILE" , AGGR2); |
103 | failed += keywords_insert("AVG" , AGGR); |
104 | failed += keywords_insert("MIN" , AGGR); |
105 | failed += keywords_insert("MAX" , AGGR); |
106 | failed += keywords_insert("SUM" , AGGR); |
107 | failed += keywords_insert("PROD" , AGGR); |
108 | failed += keywords_insert("COUNT" , AGGR); |
109 | |
110 | failed += keywords_insert("RANK" , RANK); |
111 | failed += keywords_insert("DENSE_RANK" , RANK); |
112 | failed += keywords_insert("PERCENT_RANK" , RANK); |
113 | failed += keywords_insert("CUME_DIST" , RANK); |
114 | failed += keywords_insert("ROW_NUMBER" , RANK); |
115 | failed += keywords_insert("NTILE" , RANK); |
116 | failed += keywords_insert("LAG" , RANK); |
117 | failed += keywords_insert("LEAD" , RANK); |
118 | failed += keywords_insert("FIRST_VALUE" , RANK); |
119 | failed += keywords_insert("LAST_VALUE" , RANK); |
120 | failed += keywords_insert("NTH_VALUE" , RANK); |
121 | |
122 | failed += keywords_insert("BEST" , BEST); |
123 | failed += keywords_insert("EFFORT" , EFFORT); |
124 | |
125 | failed += keywords_insert("AS" , AS); |
126 | failed += keywords_insert("ASC" , ASC); |
127 | failed += keywords_insert("AUTHORIZATION" , AUTHORIZATION); |
128 | failed += keywords_insert("BETWEEN" , BETWEEN); |
129 | failed += keywords_insert("SYMMETRIC" , SYMMETRIC); |
130 | failed += keywords_insert("ASYMMETRIC" , ASYMMETRIC); |
131 | failed += keywords_insert("BY" , BY); |
132 | failed += keywords_insert("CAST" , CAST); |
133 | failed += keywords_insert("CONVERT" , CONVERT); |
134 | failed += keywords_insert("CHARACTER" , CHARACTER); |
135 | failed += keywords_insert("CHAR" , CHARACTER); |
136 | failed += keywords_insert("VARYING" , VARYING); |
137 | failed += keywords_insert("VARCHAR" , VARCHAR); |
138 | failed += keywords_insert("BINARY" , BINARY); |
139 | failed += keywords_insert("LARGE" , LARGE); |
140 | failed += keywords_insert("OBJECT" , OBJECT); |
141 | failed += keywords_insert("CLOB" , CLOB); |
142 | failed += keywords_insert("BLOB" , sqlBLOB); |
143 | failed += keywords_insert("TEXT" , sqlTEXT); |
144 | failed += keywords_insert("TINYTEXT" , sqlTEXT); |
145 | failed += keywords_insert("STRING" , CLOB); /* ? */ |
146 | failed += keywords_insert("CHECK" , CHECK); |
147 | failed += keywords_insert("CLIENT" , CLIENT); |
148 | failed += keywords_insert("SERVER" , SERVER); |
149 | failed += keywords_insert("COMMENT" , COMMENT); |
150 | failed += keywords_insert("CONSTRAINT" , CONSTRAINT); |
151 | failed += keywords_insert("CREATE" , CREATE); |
152 | failed += keywords_insert("CROSS" , CROSS); |
153 | failed += keywords_insert("COPY" , COPY); |
154 | failed += keywords_insert("RECORDS" , RECORDS); |
155 | failed += keywords_insert("DELIMITERS" , DELIMITERS); |
156 | failed += keywords_insert("STDIN" , STDIN); |
157 | failed += keywords_insert("STDOUT" , STDOUT); |
158 | |
159 | failed += keywords_insert("TINYINT" , TINYINT); |
160 | failed += keywords_insert("SMALLINT" , SMALLINT); |
161 | failed += keywords_insert("INTEGER" , sqlINTEGER); |
162 | failed += keywords_insert("INT" , sqlINTEGER); |
163 | failed += keywords_insert("MEDIUMINT" , sqlINTEGER); |
164 | failed += keywords_insert("BIGINT" , BIGINT); |
165 | #ifdef HAVE_HGE |
166 | if (have_hge) |
167 | failed += keywords_insert("HUGEINT" , HUGEINT); |
168 | #endif |
169 | failed += keywords_insert("DEC" , sqlDECIMAL); |
170 | failed += keywords_insert("DECIMAL" , sqlDECIMAL); |
171 | failed += keywords_insert("NUMERIC" , sqlDECIMAL); |
172 | failed += keywords_insert("DECLARE" , DECLARE); |
173 | failed += keywords_insert("DEFAULT" , DEFAULT); |
174 | failed += keywords_insert("DESC" , DESC); |
175 | failed += keywords_insert("DISTINCT" , DISTINCT); |
176 | failed += keywords_insert("DOUBLE" , sqlDOUBLE); |
177 | failed += keywords_insert("REAL" , sqlREAL); |
178 | failed += keywords_insert("DROP" , DROP); |
179 | failed += keywords_insert("ESCAPE" , ESCAPE); |
180 | failed += keywords_insert("EXISTS" , EXISTS); |
181 | failed += keywords_insert("UESCAPE" , UESCAPE); |
182 | failed += keywords_insert("EXTRACT" , EXTRACT); |
183 | failed += keywords_insert("FLOAT" , sqlFLOAT); |
184 | failed += keywords_insert("FOR" , FOR); |
185 | failed += keywords_insert("FOREIGN" , FOREIGN); |
186 | failed += keywords_insert("FROM" , FROM); |
187 | failed += keywords_insert("FWF" , FWF); |
188 | |
189 | failed += keywords_insert("REFERENCES" , REFERENCES); |
190 | |
191 | failed += keywords_insert("MATCH" , MATCH); |
192 | failed += keywords_insert("FULL" , FULL); |
193 | failed += keywords_insert("PARTIAL" , PARTIAL); |
194 | failed += keywords_insert("SIMPLE" , SIMPLE); |
195 | |
196 | failed += keywords_insert("INSERT" , INSERT); |
197 | failed += keywords_insert("UPDATE" , UPDATE); |
198 | failed += keywords_insert("DELETE" , sqlDELETE); |
199 | failed += keywords_insert("TRUNCATE" , TRUNCATE); |
200 | failed += keywords_insert("MATCHED" , MATCHED); |
201 | |
202 | failed += keywords_insert("ACTION" , ACTION); |
203 | failed += keywords_insert("CASCADE" , CASCADE); |
204 | failed += keywords_insert("RESTRICT" , RESTRICT); |
205 | failed += keywords_insert("FIRST" , FIRST); |
206 | failed += keywords_insert("GLOBAL" , GLOBAL); |
207 | failed += keywords_insert("GROUP" , sqlGROUP); |
208 | failed += keywords_insert("HAVING" , HAVING); |
209 | failed += keywords_insert("ILIKE" , ILIKE); |
210 | failed += keywords_insert("IMPRINTS" , IMPRINTS); |
211 | failed += keywords_insert("IN" , sqlIN); |
212 | failed += keywords_insert("INNER" , INNER); |
213 | failed += keywords_insert("INTO" , INTO); |
214 | failed += keywords_insert("IS" , IS); |
215 | failed += keywords_insert("JOIN" , JOIN); |
216 | failed += keywords_insert("KEY" , KEY); |
217 | failed += keywords_insert("LATERAL" , LATERAL); |
218 | failed += keywords_insert("LEFT" , LEFT); |
219 | failed += keywords_insert("LIKE" , LIKE); |
220 | failed += keywords_insert("LIMIT" , LIMIT); |
221 | failed += keywords_insert("SAMPLE" , SAMPLE); |
222 | failed += keywords_insert("SEED" , SEED); |
223 | failed += keywords_insert("LAST" , LAST); |
224 | failed += keywords_insert("LOCAL" , LOCAL); |
225 | failed += keywords_insert("LOCKED" , LOCKED); |
226 | failed += keywords_insert("NATURAL" , NATURAL); |
227 | failed += keywords_insert("NOT" , NOT); |
228 | failed += keywords_insert("NULL" , sqlNULL); |
229 | failed += keywords_insert("NULLS" , NULLS); |
230 | failed += keywords_insert("OFFSET" , OFFSET); |
231 | failed += keywords_insert("ON" , ON); |
232 | failed += keywords_insert("OPTIONS" , OPTIONS); |
233 | failed += keywords_insert("OPTION" , OPTION); |
234 | failed += keywords_insert("OR" , OR); |
235 | failed += keywords_insert("ORDER" , ORDER); |
236 | failed += keywords_insert("ORDERED" , ORDERED); |
237 | failed += keywords_insert("OUTER" , OUTER); |
238 | failed += keywords_insert("OVER" , OVER); |
239 | failed += keywords_insert("PARTITION" , PARTITION); |
240 | failed += keywords_insert("PATH" , PATH); |
241 | failed += keywords_insert("PRECISION" , PRECISION); |
242 | failed += keywords_insert("PRIMARY" , PRIMARY); |
243 | |
244 | failed += keywords_insert("USER" , USER); |
245 | failed += keywords_insert("RENAME" , RENAME); |
246 | failed += keywords_insert("UNENCRYPTED" , UNENCRYPTED); |
247 | failed += keywords_insert("ENCRYPTED" , ENCRYPTED); |
248 | failed += keywords_insert("PASSWORD" , PASSWORD); |
249 | failed += keywords_insert("GRANT" , GRANT); |
250 | failed += keywords_insert("REVOKE" , REVOKE); |
251 | failed += keywords_insert("ROLE" , ROLE); |
252 | failed += keywords_insert("ADMIN" , ADMIN); |
253 | failed += keywords_insert("PRIVILEGES" , PRIVILEGES); |
254 | failed += keywords_insert("PUBLIC" , PUBLIC); |
255 | failed += keywords_insert("CURRENT_USER" , CURRENT_USER); |
256 | failed += keywords_insert("CURRENT_ROLE" , CURRENT_ROLE); |
257 | failed += keywords_insert("SESSION_USER" , SESSION_USER); |
258 | failed += keywords_insert("SESSION" , sqlSESSION); |
259 | |
260 | failed += keywords_insert("RIGHT" , RIGHT); |
261 | failed += keywords_insert("SCHEMA" , SCHEMA); |
262 | failed += keywords_insert("SELECT" , SELECT); |
263 | failed += keywords_insert("SET" , SET); |
264 | failed += keywords_insert("AUTO_COMMIT" , AUTO_COMMIT); |
265 | |
266 | failed += keywords_insert("ALL" , ALL); |
267 | failed += keywords_insert("ANY" , ANY); |
268 | failed += keywords_insert("SOME" , SOME); |
269 | failed += keywords_insert("EVERY" , ANY); |
270 | /* |
271 | failed += keywords_insert("SQLCODE", SQLCODE ); |
272 | */ |
273 | failed += keywords_insert("COLUMN" , COLUMN); |
274 | failed += keywords_insert("TABLE" , TABLE); |
275 | failed += keywords_insert("TEMPORARY" , TEMPORARY); |
276 | failed += keywords_insert("TEMP" , TEMP); |
277 | failed += keywords_insert("STREAM" , STREAM); |
278 | failed += keywords_insert("REMOTE" , REMOTE); |
279 | failed += keywords_insert("MERGE" , MERGE); |
280 | failed += keywords_insert("REPLICA" , REPLICA); |
281 | failed += keywords_insert("TO" , TO); |
282 | failed += keywords_insert("UNION" , UNION); |
283 | failed += keywords_insert("EXCEPT" , EXCEPT); |
284 | failed += keywords_insert("INTERSECT" , INTERSECT); |
285 | failed += keywords_insert("CORRESPONDING" , CORRESPONDING); |
286 | failed += keywords_insert("UNIQUE" , UNIQUE); |
287 | failed += keywords_insert("USING" , USING); |
288 | failed += keywords_insert("VALUES" , VALUES); |
289 | failed += keywords_insert("VIEW" , VIEW); |
290 | failed += keywords_insert("WHERE" , WHERE); |
291 | failed += keywords_insert("WITH" , WITH); |
292 | failed += keywords_insert("DATA" , DATA); |
293 | |
294 | failed += keywords_insert("DATE" , sqlDATE); |
295 | failed += keywords_insert("TIME" , TIME); |
296 | failed += keywords_insert("TIMESTAMP" , TIMESTAMP); |
297 | failed += keywords_insert("INTERVAL" , INTERVAL); |
298 | failed += keywords_insert("CURRENT_DATE" , CURRENT_DATE); |
299 | failed += keywords_insert("CURRENT_TIME" , CURRENT_TIME); |
300 | failed += keywords_insert("CURRENT_TIMESTAMP" , CURRENT_TIMESTAMP); |
301 | failed += keywords_insert("NOW" , CURRENT_TIMESTAMP); |
302 | failed += keywords_insert("LOCALTIME" , LOCALTIME); |
303 | failed += keywords_insert("LOCALTIMESTAMP" , LOCALTIMESTAMP); |
304 | failed += keywords_insert("ZONE" , ZONE); |
305 | |
306 | failed += keywords_insert("CENTURY" , CENTURY); |
307 | failed += keywords_insert("DECADE" , DECADE); |
308 | failed += keywords_insert("YEAR" , YEAR); |
309 | failed += keywords_insert("QUARTER" , QUARTER); |
310 | failed += keywords_insert("MONTH" , MONTH); |
311 | failed += keywords_insert("WEEK" , WEEK); |
312 | failed += keywords_insert("DOW" , DOW); |
313 | failed += keywords_insert("DOY" , DOY); |
314 | failed += keywords_insert("DAY" , DAY); |
315 | failed += keywords_insert("HOUR" , HOUR); |
316 | failed += keywords_insert("MINUTE" , MINUTE); |
317 | failed += keywords_insert("SECOND" , SECOND); |
318 | |
319 | failed += keywords_insert("POSITION" , POSITION); |
320 | failed += keywords_insert("SUBSTRING" , SUBSTRING); |
321 | failed += keywords_insert("SPLIT_PART" , SPLIT_PART); |
322 | |
323 | failed += keywords_insert("CASE" , CASE); |
324 | failed += keywords_insert("WHEN" , WHEN); |
325 | failed += keywords_insert("THEN" , THEN); |
326 | failed += keywords_insert("ELSE" , ELSE); |
327 | failed += keywords_insert("END" , END); |
328 | failed += keywords_insert("NULLIF" , NULLIF); |
329 | failed += keywords_insert("COALESCE" , COALESCE); |
330 | failed += keywords_insert("ELSEIF" , ELSEIF); |
331 | failed += keywords_insert("IF" , IF); |
332 | failed += keywords_insert("WHILE" , WHILE); |
333 | failed += keywords_insert("DO" , DO); |
334 | |
335 | failed += keywords_insert("COMMIT" , COMMIT); |
336 | failed += keywords_insert("ROLLBACK" , ROLLBACK); |
337 | failed += keywords_insert("SAVEPOINT" , SAVEPOINT); |
338 | failed += keywords_insert("RELEASE" , RELEASE); |
339 | failed += keywords_insert("WORK" , WORK); |
340 | failed += keywords_insert("CHAIN" , CHAIN); |
341 | failed += keywords_insert("PRESERVE" , PRESERVE); |
342 | failed += keywords_insert("ROWS" , ROWS); |
343 | failed += keywords_insert("NO" , NO); |
344 | failed += keywords_insert("START" , START); |
345 | failed += keywords_insert("TRANSACTION" , TRANSACTION); |
346 | failed += keywords_insert("READ" , READ); |
347 | failed += keywords_insert("WRITE" , WRITE); |
348 | failed += keywords_insert("ONLY" , ONLY); |
349 | failed += keywords_insert("ISOLATION" , ISOLATION); |
350 | failed += keywords_insert("LEVEL" , LEVEL); |
351 | failed += keywords_insert("UNCOMMITTED" , UNCOMMITTED); |
352 | failed += keywords_insert("COMMITTED" , COMMITTED); |
353 | failed += keywords_insert("REPEATABLE" , sqlREPEATABLE); |
354 | failed += keywords_insert("SERIALIZABLE" , SERIALIZABLE); |
355 | failed += keywords_insert("DIAGNOSTICS" , DIAGNOSTICS); |
356 | failed += keywords_insert("SIZE" , sqlSIZE); |
357 | failed += keywords_insert("STORAGE" , STORAGE); |
358 | |
359 | failed += keywords_insert("TYPE" , TYPE); |
360 | failed += keywords_insert("PROCEDURE" , PROCEDURE); |
361 | failed += keywords_insert("FUNCTION" , FUNCTION); |
362 | failed += keywords_insert("LOADER" , sqlLOADER); |
363 | failed += keywords_insert("REPLACE" , REPLACE); |
364 | |
365 | failed += keywords_insert("FILTER" , FILTER); |
366 | failed += keywords_insert("AGGREGATE" , AGGREGATE); |
367 | failed += keywords_insert("RETURNS" , RETURNS); |
368 | failed += keywords_insert("EXTERNAL" , EXTERNAL); |
369 | failed += keywords_insert("NAME" , sqlNAME); |
370 | failed += keywords_insert("RETURN" , RETURN); |
371 | failed += keywords_insert("CALL" , CALL); |
372 | failed += keywords_insert("LANGUAGE" , LANGUAGE); |
373 | |
374 | failed += keywords_insert("ANALYZE" , ANALYZE); |
375 | failed += keywords_insert("MINMAX" , MINMAX); |
376 | failed += keywords_insert("EXPLAIN" , SQL_EXPLAIN); |
377 | failed += keywords_insert("PLAN" , SQL_PLAN); |
378 | failed += keywords_insert("DEBUG" , SQL_DEBUG); |
379 | failed += keywords_insert("TRACE" , SQL_TRACE); |
380 | failed += keywords_insert("PREPARE" , PREPARE); |
381 | failed += keywords_insert("PREP" , PREP); |
382 | failed += keywords_insert("EXECUTE" , EXECUTE); |
383 | failed += keywords_insert("EXEC" , EXEC); |
384 | |
385 | failed += keywords_insert("INDEX" , INDEX); |
386 | |
387 | failed += keywords_insert("SEQUENCE" , SEQUENCE); |
388 | failed += keywords_insert("RESTART" , RESTART); |
389 | failed += keywords_insert("INCREMENT" , INCREMENT); |
390 | failed += keywords_insert("MAXVALUE" , MAXVALUE); |
391 | failed += keywords_insert("MINVALUE" , MINVALUE); |
392 | failed += keywords_insert("CYCLE" , CYCLE); |
393 | failed += keywords_insert("CACHE" , CACHE); |
394 | failed += keywords_insert("NEXT" , NEXT); |
395 | failed += keywords_insert("VALUE" , VALUE); |
396 | failed += keywords_insert("GENERATED" , GENERATED); |
397 | failed += keywords_insert("ALWAYS" , ALWAYS); |
398 | failed += keywords_insert("IDENTITY" , IDENTITY); |
399 | failed += keywords_insert("SERIAL" , SERIAL); |
400 | failed += keywords_insert("BIGSERIAL" , BIGSERIAL); |
401 | failed += keywords_insert("AUTO_INCREMENT" , AUTO_INCREMENT); |
402 | failed += keywords_insert("CONTINUE" , CONTINUE); |
403 | |
404 | failed += keywords_insert("TRIGGER" , TRIGGER); |
405 | failed += keywords_insert("ATOMIC" , ATOMIC); |
406 | failed += keywords_insert("BEGIN" , BEGIN); |
407 | failed += keywords_insert("OF" , OF); |
408 | failed += keywords_insert("BEFORE" , BEFORE); |
409 | failed += keywords_insert("AFTER" , AFTER); |
410 | failed += keywords_insert("ROW" , ROW); |
411 | failed += keywords_insert("STATEMENT" , STATEMENT); |
412 | failed += keywords_insert("NEW" , sqlNEW); |
413 | failed += keywords_insert("OLD" , OLD); |
414 | failed += keywords_insert("EACH" , EACH); |
415 | failed += keywords_insert("REFERENCING" , REFERENCING); |
416 | |
417 | failed += keywords_insert("RANGE" , RANGE); |
418 | failed += keywords_insert("UNBOUNDED" , UNBOUNDED); |
419 | failed += keywords_insert("PRECEDING" , PRECEDING); |
420 | failed += keywords_insert("FOLLOWING" , FOLLOWING); |
421 | failed += keywords_insert("CURRENT" , CURRENT); |
422 | failed += keywords_insert("EXCLUDE" , EXCLUDE); |
423 | failed += keywords_insert("OTHERS" , OTHERS); |
424 | failed += keywords_insert("TIES" , TIES); |
425 | failed += keywords_insert("GROUPS" , GROUPS); |
426 | failed += keywords_insert("WINDOW" , WINDOW); |
427 | |
428 | /* special SQL/XML keywords */ |
429 | failed += keywords_insert("XMLCOMMENT" , XMLCOMMENT); |
430 | failed += keywords_insert("XMLCONCAT" , XMLCONCAT); |
431 | failed += keywords_insert("XMLDOCUMENT" , XMLDOCUMENT); |
432 | failed += keywords_insert("XMLELEMENT" , XMLELEMENT); |
433 | failed += keywords_insert("XMLATTRIBUTES" , XMLATTRIBUTES); |
434 | failed += keywords_insert("XMLFOREST" , XMLFOREST); |
435 | failed += keywords_insert("XMLPARSE" , XMLPARSE); |
436 | failed += keywords_insert("STRIP" , STRIP); |
437 | failed += keywords_insert("WHITESPACE" , WHITESPACE); |
438 | failed += keywords_insert("XMLPI" , XMLPI); |
439 | failed += keywords_insert("XMLQUERY" , XMLQUERY); |
440 | failed += keywords_insert("PASSING" , PASSING); |
441 | failed += keywords_insert("XMLTEXT" , XMLTEXT); |
442 | failed += keywords_insert("NIL" , NIL); |
443 | failed += keywords_insert("REF" , REF); |
444 | failed += keywords_insert("ABSENT" , ABSENT); |
445 | failed += keywords_insert("DOCUMENT" , DOCUMENT); |
446 | failed += keywords_insert("ELEMENT" , ELEMENT); |
447 | failed += keywords_insert("CONTENT" , CONTENT); |
448 | failed += keywords_insert("XMLNAMESPACES" , XMLNAMESPACES); |
449 | failed += keywords_insert("NAMESPACE" , NAMESPACE); |
450 | failed += keywords_insert("XMLVALIDATE" , XMLVALIDATE); |
451 | failed += keywords_insert("RETURNING" , RETURNING); |
452 | failed += keywords_insert("LOCATION" , LOCATION); |
453 | failed += keywords_insert("ID" , ID); |
454 | failed += keywords_insert("ACCORDING" , ACCORDING); |
455 | failed += keywords_insert("XMLSCHEMA" , XMLSCHEMA); |
456 | failed += keywords_insert("URI" , URI); |
457 | failed += keywords_insert("XMLAGG" , XMLAGG); |
458 | |
459 | /* keywords for opengis */ |
460 | failed += keywords_insert("GEOMETRY" , GEOMETRY); |
461 | |
462 | failed += keywords_insert("POINT" , GEOMETRYSUBTYPE); |
463 | failed += keywords_insert("LINESTRING" , GEOMETRYSUBTYPE); |
464 | failed += keywords_insert("POLYGON" , GEOMETRYSUBTYPE); |
465 | failed += keywords_insert("MULTIPOINT" , GEOMETRYSUBTYPE); |
466 | failed += keywords_insert("MULTILINESTRING" , GEOMETRYSUBTYPE); |
467 | failed += keywords_insert("MULTIPOLYGON" , GEOMETRYSUBTYPE); |
468 | failed += keywords_insert("GEOMETRYCOLLECTION" , GEOMETRYSUBTYPE); |
469 | |
470 | failed += keywords_insert("POINTZ" , GEOMETRYSUBTYPE); |
471 | failed += keywords_insert("LINESTRINGZ" , GEOMETRYSUBTYPE); |
472 | failed += keywords_insert("POLYGONZ" , GEOMETRYSUBTYPE); |
473 | failed += keywords_insert("MULTIPOINTZ" , GEOMETRYSUBTYPE); |
474 | failed += keywords_insert("MULTILINESTRINGZ" , GEOMETRYSUBTYPE); |
475 | failed += keywords_insert("MULTIPOLYGONZ" , GEOMETRYSUBTYPE); |
476 | failed += keywords_insert("GEOMETRYCOLLECTIONZ" , GEOMETRYSUBTYPE); |
477 | |
478 | failed += keywords_insert("POINTM" , GEOMETRYSUBTYPE); |
479 | failed += keywords_insert("LINESTRINGM" , GEOMETRYSUBTYPE); |
480 | failed += keywords_insert("POLYGONM" , GEOMETRYSUBTYPE); |
481 | failed += keywords_insert("MULTIPOINTM" , GEOMETRYSUBTYPE); |
482 | failed += keywords_insert("MULTILINESTRINGM" , GEOMETRYSUBTYPE); |
483 | failed += keywords_insert("MULTIPOLYGONM" , GEOMETRYSUBTYPE); |
484 | failed += keywords_insert("GEOMETRYCOLLECTIONM" , GEOMETRYSUBTYPE); |
485 | |
486 | failed += keywords_insert("POINTZM" , GEOMETRYSUBTYPE); |
487 | failed += keywords_insert("LINESTRINGZM" , GEOMETRYSUBTYPE); |
488 | failed += keywords_insert("POLYGONZM" , GEOMETRYSUBTYPE); |
489 | failed += keywords_insert("MULTIPOINTZM" , GEOMETRYSUBTYPE); |
490 | failed += keywords_insert("MULTILINESTRINGZM" , GEOMETRYSUBTYPE); |
491 | failed += keywords_insert("MULTIPOLYGONZM" , GEOMETRYSUBTYPE); |
492 | failed += keywords_insert("GEOMETRYCOLLECTIONZM" , GEOMETRYSUBTYPE); |
493 | |
494 | return failed; |
495 | } |
496 | |
497 | #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s) |
498 | |
499 | void |
500 | scanner_init(struct scanner *s, bstream *rs, stream *ws) |
501 | { |
502 | s->rs = rs; |
503 | s->ws = ws; |
504 | s->log = NULL; |
505 | |
506 | s->yynext = 0; |
507 | s->yylast = 0; |
508 | s->yyval = 0; |
509 | s->yybak = 0; /* keep backup of char replaced by EOS */ |
510 | s->yycur = 0; |
511 | |
512 | s->key = 0; /* keep a hash key of the query */ |
513 | s->started = 0; |
514 | s->as = 0; |
515 | |
516 | s->mode = LINE_N; |
517 | s->schema = NULL; |
518 | } |
519 | |
520 | void |
521 | scanner_query_processed(struct scanner *s) |
522 | { |
523 | int cur; |
524 | |
525 | if (s->yybak) { |
526 | s->rs->buf[s->rs->pos + s->yycur] = s->yybak; |
527 | s->yybak = 0; |
528 | } |
529 | if (s->rs) { |
530 | s->rs->pos += s->yycur; |
531 | /* completely eat the query including white space after the ; */ |
532 | while (s->rs->pos < s->rs->len && |
533 | (cur = s->rs->buf[s->rs->pos], iswspace(cur))) { |
534 | s->rs->pos++; |
535 | } |
536 | } |
537 | /*assert(s->rs->pos <= s->rs->len);*/ |
538 | s->yycur = 0; |
539 | s->key = 0; /* keep a hash key of the query */ |
540 | s->started = 0; |
541 | s->as = 0; |
542 | s->schema = NULL; |
543 | } |
544 | |
545 | void |
546 | scanner_reset_key(struct scanner *s) |
547 | { |
548 | s->key = 0; |
549 | } |
550 | |
551 | static int |
552 | scanner_error(mvc *lc, int cur) |
553 | { |
554 | switch (cur) { |
555 | case EOF: |
556 | (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input" ); |
557 | return -1; /* EOF needs -1 result */ |
558 | default: |
559 | /* on Windows at least, iswcntrl returns TRUE for |
560 | * U+FEFF, but we just want consistent error |
561 | * messages */ |
562 | (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)" , iswcntrl(cur) && cur != 0xFEFF ? " control" : "" , (unsigned) cur); |
563 | } |
564 | return LEX_ERROR; |
565 | } |
566 | |
567 | |
568 | /* |
569 | UTF-8 encoding is as follows: |
570 | U-00000000 - U-0000007F: 0xxxxxxx |
571 | U-00000080 - U-000007FF: 110xxxxx 10xxxxxx |
572 | U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx |
573 | U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
574 | U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
575 | U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
576 | */ |
577 | /* To be correctly coded UTF-8, the sequence should be the shortest |
578 | possible encoding of the value being encoded. This means that for |
579 | an encoding of length n+1 (1 <= n <= 5), at least one of the bits in |
580 | utf8chkmsk[n] should be non-zero (else the encoding could be |
581 | shorter). |
582 | */ |
583 | static int utf8chkmsk[] = { |
584 | 0x0000007f, |
585 | 0x00000780, |
586 | 0x0000f800, |
587 | 0x001f0000, |
588 | 0x03e00000, |
589 | 0x7c000000 |
590 | }; |
591 | |
592 | static void |
593 | utf8_putchar(struct scanner *lc, int ch) |
594 | { |
595 | if ((ch) < 0x80) { |
596 | lc->yycur--; |
597 | } else if ((ch) < 0x800) { |
598 | lc->yycur -= 2; |
599 | } else if ((ch) < 0x10000) { |
600 | lc->yycur -= 3; |
601 | } else { |
602 | lc->yycur -= 4; |
603 | } |
604 | } |
605 | |
606 | static inline int |
607 | scanner_read_more(struct scanner *lc, size_t n) |
608 | { |
609 | bstream *b = lc->rs; |
610 | bool more = false; |
611 | |
612 | |
613 | while (b->len < b->pos + lc->yycur + n) { |
614 | |
615 | if (lc->mode == LINE_1 || !lc->started) |
616 | return EOF; |
617 | |
618 | /* query is not finished ask for more */ |
619 | if (b->eof || !isa_block_stream(b->s)) { |
620 | if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1) |
621 | mnstr_flush(lc->ws); |
622 | b->eof = false; |
623 | more = true; |
624 | } |
625 | /* we need more query text */ |
626 | if (bstream_next(b) < 0 || |
627 | /* we asked for more data but didn't get any */ |
628 | (more && b->eof && b->len < b->pos + lc->yycur + n)) |
629 | return EOF; |
630 | } |
631 | return 1; |
632 | } |
633 | |
634 | static inline int |
635 | scanner_getc(struct scanner *lc) |
636 | { |
637 | bstream *b = lc->rs; |
638 | unsigned char *s = NULL; |
639 | int c, m, n, mask; |
640 | |
641 | if (scanner_read_more(lc, 1) == EOF) { |
642 | lc->errstr = SQLSTATE(42000) "end of input stream" ; |
643 | return EOF; |
644 | } |
645 | lc->errstr = NULL; |
646 | |
647 | s = (unsigned char *) b->buf + b->pos + lc->yycur++; |
648 | if (((c = *s) & 0x80) == 0) { |
649 | /* 7-bit char */ |
650 | return c; |
651 | } |
652 | for (n = 0, m = 0x40; c & m; n++, m >>= 1) |
653 | ; |
654 | /* n now is number of 10xxxxxx bytes that should follow */ |
655 | if (n == 0 || n >= 6 || (b->pos + n) > b->len) { |
656 | /* incorrect UTF-8 sequence */ |
657 | /* n==0: c == 10xxxxxx */ |
658 | /* n>=6: c == 1111111x */ |
659 | lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence" ; |
660 | goto error; |
661 | } |
662 | |
663 | if (scanner_read_more(lc, (size_t) n) == EOF) |
664 | return EOF; |
665 | s = (unsigned char *) b->buf + b->pos + lc->yycur; |
666 | |
667 | mask = utf8chkmsk[n]; |
668 | c &= ~(0xFFC0 >> n); /* remove non-x bits */ |
669 | while (--n >= 0) { |
670 | c <<= 6; |
671 | lc->yycur++; |
672 | if (((m = *s++) & 0xC0) != 0x80) { |
673 | /* incorrect UTF-8 sequence: byte is not 10xxxxxx */ |
674 | /* this includes end-of-string (m == 0) */ |
675 | lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence" ; |
676 | goto error; |
677 | } |
678 | c |= m & 0x3F; |
679 | } |
680 | if ((c & mask) == 0) { |
681 | /* incorrect UTF-8 sequence: not shortest possible */ |
682 | lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence" ; |
683 | goto error; |
684 | } |
685 | |
686 | return c; |
687 | |
688 | error: |
689 | if (b->pos + lc->yycur < b->len) /* skip bogus char */ |
690 | lc->yycur++; |
691 | return EOF; |
692 | } |
693 | |
694 | static int |
695 | scanner_token(struct scanner *lc, int token) |
696 | { |
697 | lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur]; |
698 | lc->rs->buf[lc->rs->pos + lc->yycur] = 0; |
699 | lc->yyval = token; |
700 | return lc->yyval; |
701 | } |
702 | |
703 | static int |
704 | scanner_string(mvc *c, int quote, bool escapes) |
705 | { |
706 | struct scanner *lc = &c->scanner; |
707 | bstream *rs = lc->rs; |
708 | int cur = quote; |
709 | bool escape = false; |
710 | const size_t limit = quote == '"' ? 1 << 11 : 1 << 30; |
711 | |
712 | lc->started = 1; |
713 | while (cur != EOF) { |
714 | size_t pos = 0; |
715 | const size_t yycur = rs->pos + lc->yycur; |
716 | |
717 | while (cur != EOF && pos < limit && |
718 | (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) && |
719 | cur && (cur != quote || escape)) { |
720 | if (escapes && cur == '\\') |
721 | escape = !escape; |
722 | else |
723 | escape = false; |
724 | } |
725 | if (pos == limit) { |
726 | (void) sql_error(c, 2, SQLSTATE(42000) "string too long" ); |
727 | return LEX_ERROR; |
728 | } |
729 | if (cur == EOF) |
730 | break; |
731 | lc->yycur += pos; |
732 | /* check for quote escaped quote: Obscure SQL Rule */ |
733 | /* TODO also handle double "" */ |
734 | if (cur == quote && rs->buf[yycur + pos] == quote) { |
735 | if (escapes) |
736 | rs->buf[yycur + pos - 1] = '\\'; |
737 | lc->yycur++; |
738 | continue; |
739 | } |
740 | assert(yycur + pos <= rs->len + 1); |
741 | if (cur == quote && !escape) { |
742 | return scanner_token(lc, STRING); |
743 | } |
744 | lc->yycur--; /* go back to current (possibly invalid) char */ |
745 | /* long utf8, if correct isn't the quote */ |
746 | if (!cur) { |
747 | if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) { |
748 | (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string" ); |
749 | return LEX_ERROR; |
750 | } |
751 | cur = scanner_read_more(lc, 1); |
752 | } else { |
753 | cur = scanner_getc(lc); |
754 | } |
755 | } |
756 | (void) sql_error(c, 2, "%s" , lc->errstr ? lc->errstr : SQLSTATE(42000) "unexpected end of input" ); |
757 | return LEX_ERROR; |
758 | } |
759 | |
760 | /* scan a structure {blah} into a string. We only count the matching {} |
761 | * unless escaped. We do not consider embeddings in string literals yet |
762 | */ |
763 | |
764 | static int |
765 | scanner_body(mvc *c) |
766 | { |
767 | struct scanner *lc = &c->scanner; |
768 | bstream *rs = lc->rs; |
769 | int cur = (int) 'x'; |
770 | int blk = 1; |
771 | bool escape = false; |
772 | |
773 | lc->started = 1; |
774 | assert(rs->buf[rs->pos + lc->yycur-1] == '{'); |
775 | while (cur != EOF) { |
776 | size_t pos = rs->pos + lc->yycur; |
777 | |
778 | while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) { |
779 | if (cur != '\\') |
780 | escape = false; |
781 | else |
782 | escape = !escape; |
783 | blk += cur =='{'; |
784 | blk -= cur =='}'; |
785 | } |
786 | lc->yycur = pos - rs->pos; |
787 | assert(pos <= rs->len + 1); |
788 | if (blk == 0 && !escape){ |
789 | lc->yycur--; /* go back to current (possibly invalid) char */ |
790 | return scanner_token(lc, X_BODY); |
791 | } |
792 | lc->yycur--; /* go back to current (possibly invalid) char */ |
793 | if (!cur) { |
794 | if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) { |
795 | (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string" ); |
796 | return LEX_ERROR; |
797 | } |
798 | cur = scanner_read_more(lc, 1); |
799 | } else { |
800 | cur = scanner_getc(lc); |
801 | } |
802 | } |
803 | (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input" ); |
804 | return LEX_ERROR; |
805 | } |
806 | |
807 | static int |
808 | keyword_or_ident(mvc * c, int cur) |
809 | { |
810 | struct scanner *lc = &c->scanner; |
811 | keyword *k = NULL; |
812 | size_t s; |
813 | |
814 | lc->started = 1; |
815 | utf8_putchar(lc, cur); |
816 | s = lc->yycur; |
817 | lc->yyval = IDENT; |
818 | while ((cur = scanner_getc(lc)) != EOF) { |
819 | if (!iswalnum(cur) && cur != '_') { |
820 | utf8_putchar(lc, cur); |
821 | (void)scanner_token(lc, IDENT); |
822 | k = find_keyword_bs(lc,s); |
823 | if (k) |
824 | lc->yyval = k->token; |
825 | /* find keyword in SELECT/JOIN/UNION FUNCTIONS */ |
826 | else if (sql_find_func(c->sa, cur_schema(c), lc->rs->buf+lc->rs->pos+s, -1, F_FILT, NULL)) |
827 | lc->yyval = FILTER_FUNC; |
828 | return lc->yyval; |
829 | } |
830 | } |
831 | (void)scanner_token(lc, IDENT); |
832 | k = find_keyword_bs(lc,s); |
833 | if (k) |
834 | lc->yyval = k->token; |
835 | /* find keyword in SELECT/JOIN/UNION FUNCTIONS */ |
836 | else if (sql_find_func(c->sa, cur_schema(c), lc->rs->buf+lc->rs->pos+s, -1, F_FILT, NULL)) |
837 | lc->yyval = FILTER_FUNC; |
838 | return lc->yyval; |
839 | } |
840 | |
841 | static int |
842 | skip_white_space(struct scanner * lc) |
843 | { |
844 | int cur; |
845 | |
846 | do { |
847 | lc->yysval = lc->yycur; |
848 | } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur)); |
849 | return cur; |
850 | } |
851 | |
852 | static int |
853 | (struct scanner * lc) |
854 | { |
855 | int cur; |
856 | int prev = 0; |
857 | int started = lc->started; |
858 | int depth = 1; |
859 | |
860 | lc->started = 1; |
861 | while (depth > 0 && (cur = scanner_getc(lc)) != EOF) { |
862 | if (prev == '*' && cur == '/') |
863 | depth--; |
864 | else if (prev == '/' && cur == '*') { |
865 | /* block comments can nest */ |
866 | cur = 0; /* prevent slash-star-slash from matching */ |
867 | depth++; |
868 | } |
869 | prev = cur; |
870 | } |
871 | lc->yysval = lc->yycur; |
872 | lc->started = started; |
873 | /* a comment is equivalent to a newline */ |
874 | return cur == EOF ? cur : '\n'; |
875 | } |
876 | |
877 | static int |
878 | (struct scanner * lc) |
879 | { |
880 | int cur; |
881 | int started = lc->started; |
882 | |
883 | lc->started = 1; |
884 | while ((cur = scanner_getc(lc)) != EOF && (cur != '\n')) |
885 | ; |
886 | lc->yysval = lc->yycur; |
887 | lc->started = started; |
888 | /* a comment is equivalent to a newline */ |
889 | return cur; |
890 | } |
891 | |
892 | static int tokenize(mvc * lc, int cur); |
893 | |
894 | static int |
895 | number(mvc * c, int cur) |
896 | { |
897 | struct scanner *lc = &c->scanner; |
898 | int token = sqlINT; |
899 | int before_cur = EOF; |
900 | |
901 | lc->started = 1; |
902 | if (cur == '0' && (cur = scanner_getc(lc)) == 'x') { |
903 | while ((cur = scanner_getc(lc)) != EOF && |
904 | (iswdigit(cur) || |
905 | (cur >= 'A' && cur <= 'F') || |
906 | (cur >= 'a' && cur <= 'f'))) |
907 | token = HEXADECIMAL; |
908 | if (token == sqlINT) |
909 | before_cur = 'x'; |
910 | } else { |
911 | if (iswdigit(cur)) |
912 | while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur)) |
913 | ; |
914 | if (cur == '@') { |
915 | token = OIDNUM; |
916 | cur = scanner_getc(lc); |
917 | if (cur == '0') |
918 | cur = scanner_getc(lc); |
919 | } |
920 | |
921 | if (cur == '.') { |
922 | token = INTNUM; |
923 | |
924 | while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur)) |
925 | ; |
926 | } |
927 | if (cur == 'e' || cur == 'E') { |
928 | token = APPROXNUM; |
929 | cur = scanner_getc(lc); |
930 | if (cur == '-' || cur == '+') |
931 | token = 0; |
932 | while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur)) |
933 | token = APPROXNUM; |
934 | } |
935 | } |
936 | |
937 | if (cur == EOF && lc->rs->buf == NULL) /* malloc failure */ |
938 | return EOF; |
939 | |
940 | if (token) { |
941 | if (cur != EOF) |
942 | utf8_putchar(lc, cur); |
943 | if (before_cur != EOF) |
944 | utf8_putchar(lc, before_cur); |
945 | return scanner_token(lc, token); |
946 | } else { |
947 | (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc" , (wint_t) cur); |
948 | return LEX_ERROR; |
949 | } |
950 | } |
951 | |
952 | static |
953 | int scanner_symbol(mvc * c, int cur) |
954 | { |
955 | struct scanner *lc = &c->scanner; |
956 | int next = 0; |
957 | int started = lc->started; |
958 | |
959 | switch (cur) { |
960 | case '/': |
961 | lc->started = 1; |
962 | next = scanner_getc(lc); |
963 | if (next == '*') { |
964 | lc->started = started; |
965 | cur = skip_c_comment(lc); |
966 | if (cur < 0) |
967 | return EOF; |
968 | return tokenize(c, cur); |
969 | } else { |
970 | utf8_putchar(lc, next); |
971 | return scanner_token(lc, cur); |
972 | } |
973 | case '0': |
974 | case '1': |
975 | case '2': |
976 | case '3': |
977 | case '4': |
978 | case '5': |
979 | case '6': |
980 | case '7': |
981 | case '8': |
982 | case '9': |
983 | return number(c, cur); |
984 | case '#': |
985 | if ((cur = skip_sql_comment(lc)) == EOF) |
986 | return cur; |
987 | return tokenize(c, cur); |
988 | case '\'': |
989 | case '"': |
990 | return scanner_string(c, cur, |
991 | #if 0 |
992 | false |
993 | #else |
994 | cur == '\'' |
995 | #endif |
996 | ); |
997 | case '{': |
998 | return scanner_body(c); |
999 | case '-': |
1000 | lc->started = 1; |
1001 | next = scanner_getc(lc); |
1002 | if (next == '-') { |
1003 | lc->started = started; |
1004 | if ((cur = skip_sql_comment(lc)) == EOF) |
1005 | return cur; |
1006 | return tokenize(c, cur); |
1007 | } |
1008 | lc->started = 1; |
1009 | utf8_putchar(lc, next); |
1010 | return scanner_token(lc, cur); |
1011 | case '~': /* binary not */ |
1012 | lc->started = 1; |
1013 | next = scanner_getc(lc); |
1014 | if (next == '=') |
1015 | return scanner_token(lc, GEOM_MBR_EQUAL); |
1016 | utf8_putchar(lc, next); |
1017 | return scanner_token(lc, cur); |
1018 | case '^': /* binary xor */ |
1019 | case '*': |
1020 | case '?': |
1021 | case '%': |
1022 | case '+': |
1023 | case '(': |
1024 | case ')': |
1025 | case ',': |
1026 | case '=': |
1027 | case '[': |
1028 | case ']': |
1029 | lc->started = 1; |
1030 | return scanner_token(lc, cur); |
1031 | case '&': |
1032 | lc->started = 1; |
1033 | cur = scanner_getc(lc); |
1034 | if(cur == '<') { |
1035 | next = scanner_getc(lc); |
1036 | if(next == '|') { |
1037 | return scanner_token(lc, GEOM_OVERLAP_OR_BELOW); |
1038 | } else { |
1039 | utf8_putchar(lc, next); //put the char back |
1040 | return scanner_token(lc, GEOM_OVERLAP_OR_LEFT); |
1041 | } |
1042 | } else if(cur == '>') |
1043 | return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT); |
1044 | else if(cur == '&') |
1045 | return scanner_token(lc, GEOM_OVERLAP); |
1046 | else {/* binary and */ |
1047 | utf8_putchar(lc, cur); //put the char back |
1048 | return scanner_token(lc, '&'); |
1049 | } |
1050 | case '@': |
1051 | lc->started = 1; |
1052 | return scanner_token(lc, AT); |
1053 | case ';': |
1054 | lc->started = 0; |
1055 | return scanner_token(lc, SCOLON); |
1056 | case '<': |
1057 | lc->started = 1; |
1058 | cur = scanner_getc(lc); |
1059 | if (cur == '=') { |
1060 | return scanner_token( lc, COMPARISON); |
1061 | } else if (cur == '>') { |
1062 | return scanner_token( lc, COMPARISON); |
1063 | } else if (cur == '<') { |
1064 | next = scanner_getc(lc); |
1065 | if (next == '=') { |
1066 | return scanner_token( lc, LEFT_SHIFT_ASSIGN); |
1067 | } else if (next == '|') { |
1068 | return scanner_token(lc, GEOM_BELOW); |
1069 | } else { |
1070 | utf8_putchar(lc, next); //put the char back |
1071 | return scanner_token( lc, LEFT_SHIFT); |
1072 | } |
1073 | } else if(cur == '-') { |
1074 | next = scanner_getc(lc); |
1075 | if(next == '>') { |
1076 | return scanner_token(lc, GEOM_DIST); |
1077 | } else { |
1078 | //put the characters back and fall in the next possible case |
1079 | utf8_putchar(lc, next); |
1080 | utf8_putchar(lc, cur); |
1081 | return scanner_token( lc, COMPARISON); |
1082 | } |
1083 | } else { |
1084 | utf8_putchar(lc, cur); |
1085 | return scanner_token( lc, COMPARISON); |
1086 | } |
1087 | case '>': |
1088 | lc->started = 1; |
1089 | cur = scanner_getc(lc); |
1090 | if (cur == '>') { |
1091 | cur = scanner_getc(lc); |
1092 | if (cur == '=') |
1093 | return scanner_token( lc, RIGHT_SHIFT_ASSIGN); |
1094 | utf8_putchar(lc, cur); |
1095 | return scanner_token( lc, RIGHT_SHIFT); |
1096 | } else if (cur != '=') { |
1097 | utf8_putchar(lc, cur); |
1098 | return scanner_token( lc, COMPARISON); |
1099 | } else { |
1100 | return scanner_token( lc, COMPARISON); |
1101 | } |
1102 | case '.': |
1103 | lc->started = 1; |
1104 | cur = scanner_getc(lc); |
1105 | if (!iswdigit(cur)) { |
1106 | utf8_putchar(lc, cur); |
1107 | return scanner_token( lc, '.'); |
1108 | } else { |
1109 | utf8_putchar(lc, cur); |
1110 | cur = '.'; |
1111 | return number(c, cur); |
1112 | } |
1113 | case '|': /* binary or or string concat */ |
1114 | lc->started = 1; |
1115 | cur = scanner_getc(lc); |
1116 | if (cur == '|') { |
1117 | return scanner_token(lc, CONCATSTRING); |
1118 | } else if (cur == '&') { |
1119 | next = scanner_getc(lc); |
1120 | if(next == '>') { |
1121 | return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE); |
1122 | } else { |
1123 | utf8_putchar(lc, next); //put the char back |
1124 | utf8_putchar(lc, cur); //put the char back |
1125 | return scanner_token(lc, '|'); |
1126 | } |
1127 | } else if (cur == '>') { |
1128 | next = scanner_getc(lc); |
1129 | if(next == '>') { |
1130 | return scanner_token(lc, GEOM_ABOVE); |
1131 | } else { |
1132 | utf8_putchar(lc, next); //put the char back |
1133 | utf8_putchar(lc, cur); //put the char back |
1134 | return scanner_token(lc, '|'); |
1135 | } |
1136 | } else { |
1137 | utf8_putchar(lc, cur); |
1138 | return scanner_token(lc, '|'); |
1139 | } |
1140 | } |
1141 | (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)" , (wint_t) cur); |
1142 | return LEX_ERROR; |
1143 | } |
1144 | |
1145 | static int |
1146 | tokenize(mvc * c, int cur) |
1147 | { |
1148 | struct scanner *lc = &c->scanner; |
1149 | while (1) { |
1150 | if (cur == 0xFEFF) { |
1151 | /* on Linux at least, iswpunct returns TRUE |
1152 | * for U+FEFF, but we don't want that, we just |
1153 | * want to go to the scanner_error case |
1154 | * below */ |
1155 | ; |
1156 | } else if (iswspace(cur)) { |
1157 | if ((cur = skip_white_space(lc)) == EOF) |
1158 | return cur; |
1159 | continue; /* try again */ |
1160 | } else if (iswdigit(cur)) { |
1161 | return number(c, cur); |
1162 | } else if (iswalpha(cur) || cur == '_') { |
1163 | if ((cur == 'E' || cur == 'e') && |
1164 | lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') { |
1165 | return scanner_string(c, scanner_getc(lc), true); |
1166 | } |
1167 | if ((cur == 'X' || cur == 'x') && |
1168 | lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') { |
1169 | return scanner_string(c, scanner_getc(lc), true); |
1170 | } |
1171 | if ((cur == 'U' || cur == 'u') && |
1172 | lc->rs->buf[lc->rs->pos + lc->yycur] == '&' && |
1173 | (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' || |
1174 | lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) { |
1175 | cur = scanner_getc(lc); /* '&' */ |
1176 | return scanner_string(c, scanner_getc(lc), false); |
1177 | } |
1178 | return keyword_or_ident(c, cur); |
1179 | } else if (iswpunct(cur)) { |
1180 | return scanner_symbol(c, cur); |
1181 | } |
1182 | if (cur == EOF) { |
1183 | if (lc->mode == LINE_1 || !lc->started ) |
1184 | return cur; |
1185 | return scanner_error(c, cur); |
1186 | } |
1187 | /* none of the above: error */ |
1188 | return scanner_error(c, cur); |
1189 | } |
1190 | } |
1191 | |
1192 | /* SQL 'quoted' idents consist of a set of any character of |
1193 | * the source language character set other than a 'quote' |
1194 | * |
1195 | * MonetDB has 2 restrictions: |
1196 | * 1 we disallow '%' as the first character. |
1197 | * 2 the length is limited to 1024 characters |
1198 | */ |
1199 | static bool |
1200 | valid_ident(const char *restrict s, char *restrict dst) |
1201 | { |
1202 | int p = 0; |
1203 | |
1204 | if (*s == '%') |
1205 | return false; |
1206 | |
1207 | while (*s) { |
1208 | if ((dst[p++] = *s++) == '"' && *s == '"') |
1209 | s++; |
1210 | if (p >= 1024) |
1211 | return false; |
1212 | } |
1213 | dst[p] = '\0'; |
1214 | return true; |
1215 | } |
1216 | |
1217 | static inline int |
1218 | sql_get_next_token(YYSTYPE *yylval, void *parm) |
1219 | { |
1220 | mvc *c = (mvc*)parm; |
1221 | struct scanner *lc = &c->scanner; |
1222 | int token = 0, cur = 0; |
1223 | |
1224 | if (lc->rs->buf == NULL) /* malloc failure */ |
1225 | return EOF; |
1226 | |
1227 | if (lc->yynext) { |
1228 | int next = lc->yynext; |
1229 | |
1230 | lc->yynext = 0; |
1231 | return(next); |
1232 | } |
1233 | |
1234 | if (lc->yybak) { |
1235 | lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak; |
1236 | lc->yybak = 0; |
1237 | } |
1238 | |
1239 | lc->yysval = lc->yycur; |
1240 | lc->yylast = lc->yyval; |
1241 | cur = scanner_getc(lc); |
1242 | if (cur < 0) |
1243 | return EOF; |
1244 | token = tokenize(c, cur); |
1245 | |
1246 | yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval); |
1247 | |
1248 | /* This is needed as ALIAS and aTYPE get defined too late, see |
1249 | sql_keyword.h */ |
1250 | if (token == KW_ALIAS) |
1251 | token = ALIAS; |
1252 | |
1253 | if (token == KW_TYPE) |
1254 | token = aTYPE; |
1255 | |
1256 | if (token == IDENT || token == COMPARISON || token == FILTER_FUNC || |
1257 | token == AGGR || token == AGGR2 || token == RANK || |
1258 | token == aTYPE || token == ALIAS) |
1259 | yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval); |
1260 | else if (token == STRING) { |
1261 | char quote = *yylval->sval; |
1262 | char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 ); |
1263 | assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x'); |
1264 | |
1265 | lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0; |
1266 | if (quote == '"') { |
1267 | if (valid_ident(yylval->sval+1,str)) { |
1268 | token = IDENT; |
1269 | } else { |
1270 | sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'" , yylval->sval+1); |
1271 | return LEX_ERROR; |
1272 | } |
1273 | } else if (quote == 'E' || quote == 'e') { |
1274 | assert(yylval->sval[1] == '\''); |
1275 | GDKstrFromStr((unsigned char *) str, |
1276 | (unsigned char *) yylval->sval + 2, |
1277 | lc->yycur-lc->yysval - 2); |
1278 | quote = '\''; |
1279 | } else if (quote == 'U' || quote == 'u') { |
1280 | assert(yylval->sval[1] == '&'); |
1281 | assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"'); |
1282 | strcpy(str, yylval->sval + 3); |
1283 | token = yylval->sval[2] == '\'' ? USTRING : UIDENT; |
1284 | quote = yylval->sval[2]; |
1285 | } else if (quote == 'X' || quote == 'x') { |
1286 | assert(yylval->sval[1] == '\''); |
1287 | char *dst = str; |
1288 | for (char *src = yylval->sval + 2; *src; dst++) |
1289 | if ((*dst = *src++) == '\'' && *src == '\'') |
1290 | src++; |
1291 | *dst = 0; |
1292 | quote = '\''; |
1293 | token = XSTRING; |
1294 | } else { |
1295 | #if 0 |
1296 | char *dst = str; |
1297 | for (char *src = yylval->sval + 1; *src; dst++) |
1298 | if ((*dst = *src++) == '\'' && *src == '\'') |
1299 | src++; |
1300 | *dst = 0; |
1301 | #else |
1302 | GDKstrFromStr((unsigned char *) str, |
1303 | (unsigned char *) yylval->sval + 1, |
1304 | lc->yycur-lc->yysval - 1); |
1305 | #endif |
1306 | } |
1307 | yylval->sval = str; |
1308 | |
1309 | /* reset original */ |
1310 | lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote; |
1311 | } |
1312 | |
1313 | return(token); |
1314 | } |
1315 | |
1316 | /* also see sql_parser.y */ |
1317 | extern int sqllex( YYSTYPE *yylval, void *m ); |
1318 | |
1319 | int |
1320 | sqllex(YYSTYPE * yylval, void *parm) |
1321 | { |
1322 | int token; |
1323 | mvc *c = (mvc *) parm; |
1324 | struct scanner *lc = &c->scanner; |
1325 | size_t pos; |
1326 | |
1327 | /* store position for when view's query ends */ |
1328 | pos = lc->rs->pos + lc->yycur; |
1329 | |
1330 | token = sql_get_next_token(yylval, parm); |
1331 | |
1332 | if (token == NOT) { |
1333 | int next = sqllex(yylval, parm); |
1334 | |
1335 | if (next == NOT) { |
1336 | return sqllex(yylval, parm); |
1337 | } else if (next == BETWEEN) { |
1338 | token = NOT_BETWEEN; |
1339 | } else if (next == sqlIN) { |
1340 | token = NOT_IN; |
1341 | } else if (next == LIKE) { |
1342 | token = NOT_LIKE; |
1343 | } else if (next == ILIKE) { |
1344 | token = NOT_ILIKE; |
1345 | } else { |
1346 | lc->yynext = next; |
1347 | } |
1348 | } else if (token == UNION) { |
1349 | int next = sqllex(yylval, parm); |
1350 | |
1351 | if (next == JOIN) { |
1352 | token = UNIONJOIN; |
1353 | } else { |
1354 | lc->yynext = next; |
1355 | } |
1356 | } else if (token == SCOLON) { |
1357 | /* ignore semi-colon(s) following a semi-colon */ |
1358 | if (lc->yylast == SCOLON) { |
1359 | size_t prev = lc->yycur; |
1360 | while ((token = sql_get_next_token(yylval, parm)) == SCOLON) |
1361 | prev = lc->yycur; |
1362 | |
1363 | /* skip the skipped stuff also in the buffer */ |
1364 | lc->rs->pos += prev; |
1365 | lc->yycur -= prev; |
1366 | } |
1367 | } |
1368 | |
1369 | if (lc->log) |
1370 | mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1); |
1371 | |
1372 | /* Don't include literals in the calculation of the key */ |
1373 | if (token != STRING && token != USTRING && token != sqlINT && token != OIDNUM && token != INTNUM && token != APPROXNUM && token != sqlNULL) |
1374 | lc->key ^= token; |
1375 | lc->started += (token != EOF); |
1376 | return token; |
1377 | } |
1378 | |