1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#include "monetdb_config.h"
10#include <wctype.h>
11#include "sql_mem.h"
12#include "sql_scan.h"
13#include "sql_types.h"
14#include "sql_symbol.h"
15#include "sql_mvc.h"
16#include "sql_parser.tab.h"
17#include "sql_semantic.h"
18#include "sql_parser.h" /* for sql_error() */
19
20#include "stream.h"
21#include <unistd.h>
22#include <string.h>
23#include <ctype.h>
24#include "sql_keyword.h"
25#ifdef HAVE_HGE
26#include "mal.h" /* for have_hge */
27#endif
28
29char *
30query_cleaned(const char *query)
31{
32 char *q, *r;
33 int quote = 0; /* inside quotes ('..', "..", {..}) */
34 bool bs = false; /* seen a backslash in a quoted string */
35 bool incomment1 = false; /* inside traditional C style comment */
36 bool incomment2 = false; /* inside comment starting with -- */
37 r = GDKmalloc(strlen(query) + 1);
38 if(!r)
39 return NULL;
40
41 for (q = r; *query; query++) {
42 if (incomment1) {
43 if (*query == '/' && query[-1] == '*') {
44 incomment1 = false;
45 }
46 } else if (incomment2) {
47 if (*query == '\n') {
48 incomment2 = false;
49 /* add newline only if comment doesn't
50 * occupy whole line */
51 if (q > r && q[-1] != '\n')
52 *q++ = '\n';
53 }
54 } else if (quote) {
55 if (bs) {
56 bs = false;
57 } else if (*query == '\\') {
58 bs = true;
59 } else if (*query == quote) {
60 quote = 0;
61 }
62 *q++ = *query;
63 } else if (*query == '"' || *query == '\'') {
64 quote = *query;
65 *q++ = *query;
66 } else if (*query == '{') {
67 quote = '}';
68 *q++ = *query;
69 } else if (*query == '-' && query[1] == '-') {
70 incomment2 = true;
71 } else if (*query == '/' && query[1] == '*') {
72 incomment1 = true;
73 } else if (*query == '\n') {
74 /* collapse newlines */
75 if (q > r && q[-1] != '\n')
76 *q++ = '\n';
77 } else if (*query == ' ' || *query == '\t') {
78 /* collapse white space */
79 if (q > r && q[-1] != ' ')
80 *q++ = ' ';
81 } else {
82 *q++ = *query;
83 }
84 }
85 *q = 0;
86 return r;
87}
88
89int
90scanner_init_keywords(void)
91{
92 int failed = 0;
93
94 failed += keywords_insert("false", BOOL_FALSE);
95 failed += keywords_insert("true", BOOL_TRUE);
96
97 failed += keywords_insert("ALTER", ALTER);
98 failed += keywords_insert("ADD", ADD);
99 failed += keywords_insert("AND", AND);
100 failed += keywords_insert("MEDIAN", AGGR);
101 failed += keywords_insert("CORR", AGGR2);
102 failed += keywords_insert("QUANTILE", AGGR2);
103 failed += keywords_insert("AVG", AGGR);
104 failed += keywords_insert("MIN", AGGR);
105 failed += keywords_insert("MAX", AGGR);
106 failed += keywords_insert("SUM", AGGR);
107 failed += keywords_insert("PROD", AGGR);
108 failed += keywords_insert("COUNT", AGGR);
109
110 failed += keywords_insert("RANK", RANK);
111 failed += keywords_insert("DENSE_RANK", RANK);
112 failed += keywords_insert("PERCENT_RANK", RANK);
113 failed += keywords_insert("CUME_DIST", RANK);
114 failed += keywords_insert("ROW_NUMBER", RANK);
115 failed += keywords_insert("NTILE", RANK);
116 failed += keywords_insert("LAG", RANK);
117 failed += keywords_insert("LEAD", RANK);
118 failed += keywords_insert("FIRST_VALUE", RANK);
119 failed += keywords_insert("LAST_VALUE", RANK);
120 failed += keywords_insert("NTH_VALUE", RANK);
121
122 failed += keywords_insert("BEST", BEST);
123 failed += keywords_insert("EFFORT", EFFORT);
124
125 failed += keywords_insert("AS", AS);
126 failed += keywords_insert("ASC", ASC);
127 failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
128 failed += keywords_insert("BETWEEN", BETWEEN);
129 failed += keywords_insert("SYMMETRIC", SYMMETRIC);
130 failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
131 failed += keywords_insert("BY", BY);
132 failed += keywords_insert("CAST", CAST);
133 failed += keywords_insert("CONVERT", CONVERT);
134 failed += keywords_insert("CHARACTER", CHARACTER);
135 failed += keywords_insert("CHAR", CHARACTER);
136 failed += keywords_insert("VARYING", VARYING);
137 failed += keywords_insert("VARCHAR", VARCHAR);
138 failed += keywords_insert("BINARY", BINARY);
139 failed += keywords_insert("LARGE", LARGE);
140 failed += keywords_insert("OBJECT", OBJECT);
141 failed += keywords_insert("CLOB", CLOB);
142 failed += keywords_insert("BLOB", sqlBLOB);
143 failed += keywords_insert("TEXT", sqlTEXT);
144 failed += keywords_insert("TINYTEXT", sqlTEXT);
145 failed += keywords_insert("STRING", CLOB); /* ? */
146 failed += keywords_insert("CHECK", CHECK);
147 failed += keywords_insert("CLIENT", CLIENT);
148 failed += keywords_insert("SERVER", SERVER);
149 failed += keywords_insert("COMMENT", COMMENT);
150 failed += keywords_insert("CONSTRAINT", CONSTRAINT);
151 failed += keywords_insert("CREATE", CREATE);
152 failed += keywords_insert("CROSS", CROSS);
153 failed += keywords_insert("COPY", COPY);
154 failed += keywords_insert("RECORDS", RECORDS);
155 failed += keywords_insert("DELIMITERS", DELIMITERS);
156 failed += keywords_insert("STDIN", STDIN);
157 failed += keywords_insert("STDOUT", STDOUT);
158
159 failed += keywords_insert("TINYINT", TINYINT);
160 failed += keywords_insert("SMALLINT", SMALLINT);
161 failed += keywords_insert("INTEGER", sqlINTEGER);
162 failed += keywords_insert("INT", sqlINTEGER);
163 failed += keywords_insert("MEDIUMINT", sqlINTEGER);
164 failed += keywords_insert("BIGINT", BIGINT);
165#ifdef HAVE_HGE
166 if (have_hge)
167 failed += keywords_insert("HUGEINT", HUGEINT);
168#endif
169 failed += keywords_insert("DEC", sqlDECIMAL);
170 failed += keywords_insert("DECIMAL", sqlDECIMAL);
171 failed += keywords_insert("NUMERIC", sqlDECIMAL);
172 failed += keywords_insert("DECLARE", DECLARE);
173 failed += keywords_insert("DEFAULT", DEFAULT);
174 failed += keywords_insert("DESC", DESC);
175 failed += keywords_insert("DISTINCT", DISTINCT);
176 failed += keywords_insert("DOUBLE", sqlDOUBLE);
177 failed += keywords_insert("REAL", sqlREAL);
178 failed += keywords_insert("DROP", DROP);
179 failed += keywords_insert("ESCAPE", ESCAPE);
180 failed += keywords_insert("EXISTS", EXISTS);
181 failed += keywords_insert("UESCAPE", UESCAPE);
182 failed += keywords_insert("EXTRACT", EXTRACT);
183 failed += keywords_insert("FLOAT", sqlFLOAT);
184 failed += keywords_insert("FOR", FOR);
185 failed += keywords_insert("FOREIGN", FOREIGN);
186 failed += keywords_insert("FROM", FROM);
187 failed += keywords_insert("FWF", FWF);
188
189 failed += keywords_insert("REFERENCES", REFERENCES);
190
191 failed += keywords_insert("MATCH", MATCH);
192 failed += keywords_insert("FULL", FULL);
193 failed += keywords_insert("PARTIAL", PARTIAL);
194 failed += keywords_insert("SIMPLE", SIMPLE);
195
196 failed += keywords_insert("INSERT", INSERT);
197 failed += keywords_insert("UPDATE", UPDATE);
198 failed += keywords_insert("DELETE", sqlDELETE);
199 failed += keywords_insert("TRUNCATE", TRUNCATE);
200 failed += keywords_insert("MATCHED", MATCHED);
201
202 failed += keywords_insert("ACTION", ACTION);
203 failed += keywords_insert("CASCADE", CASCADE);
204 failed += keywords_insert("RESTRICT", RESTRICT);
205 failed += keywords_insert("FIRST", FIRST);
206 failed += keywords_insert("GLOBAL", GLOBAL);
207 failed += keywords_insert("GROUP", sqlGROUP);
208 failed += keywords_insert("HAVING", HAVING);
209 failed += keywords_insert("ILIKE", ILIKE);
210 failed += keywords_insert("IMPRINTS", IMPRINTS);
211 failed += keywords_insert("IN", sqlIN);
212 failed += keywords_insert("INNER", INNER);
213 failed += keywords_insert("INTO", INTO);
214 failed += keywords_insert("IS", IS);
215 failed += keywords_insert("JOIN", JOIN);
216 failed += keywords_insert("KEY", KEY);
217 failed += keywords_insert("LATERAL", LATERAL);
218 failed += keywords_insert("LEFT", LEFT);
219 failed += keywords_insert("LIKE", LIKE);
220 failed += keywords_insert("LIMIT", LIMIT);
221 failed += keywords_insert("SAMPLE", SAMPLE);
222 failed += keywords_insert("SEED", SEED);
223 failed += keywords_insert("LAST", LAST);
224 failed += keywords_insert("LOCAL", LOCAL);
225 failed += keywords_insert("LOCKED", LOCKED);
226 failed += keywords_insert("NATURAL", NATURAL);
227 failed += keywords_insert("NOT", NOT);
228 failed += keywords_insert("NULL", sqlNULL);
229 failed += keywords_insert("NULLS", NULLS);
230 failed += keywords_insert("OFFSET", OFFSET);
231 failed += keywords_insert("ON", ON);
232 failed += keywords_insert("OPTIONS", OPTIONS);
233 failed += keywords_insert("OPTION", OPTION);
234 failed += keywords_insert("OR", OR);
235 failed += keywords_insert("ORDER", ORDER);
236 failed += keywords_insert("ORDERED", ORDERED);
237 failed += keywords_insert("OUTER", OUTER);
238 failed += keywords_insert("OVER", OVER);
239 failed += keywords_insert("PARTITION", PARTITION);
240 failed += keywords_insert("PATH", PATH);
241 failed += keywords_insert("PRECISION", PRECISION);
242 failed += keywords_insert("PRIMARY", PRIMARY);
243
244 failed += keywords_insert("USER", USER);
245 failed += keywords_insert("RENAME", RENAME);
246 failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
247 failed += keywords_insert("ENCRYPTED", ENCRYPTED);
248 failed += keywords_insert("PASSWORD", PASSWORD);
249 failed += keywords_insert("GRANT", GRANT);
250 failed += keywords_insert("REVOKE", REVOKE);
251 failed += keywords_insert("ROLE", ROLE);
252 failed += keywords_insert("ADMIN", ADMIN);
253 failed += keywords_insert("PRIVILEGES", PRIVILEGES);
254 failed += keywords_insert("PUBLIC", PUBLIC);
255 failed += keywords_insert("CURRENT_USER", CURRENT_USER);
256 failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
257 failed += keywords_insert("SESSION_USER", SESSION_USER);
258 failed += keywords_insert("SESSION", sqlSESSION);
259
260 failed += keywords_insert("RIGHT", RIGHT);
261 failed += keywords_insert("SCHEMA", SCHEMA);
262 failed += keywords_insert("SELECT", SELECT);
263 failed += keywords_insert("SET", SET);
264 failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
265
266 failed += keywords_insert("ALL", ALL);
267 failed += keywords_insert("ANY", ANY);
268 failed += keywords_insert("SOME", SOME);
269 failed += keywords_insert("EVERY", ANY);
270 /*
271 failed += keywords_insert("SQLCODE", SQLCODE );
272 */
273 failed += keywords_insert("COLUMN", COLUMN);
274 failed += keywords_insert("TABLE", TABLE);
275 failed += keywords_insert("TEMPORARY", TEMPORARY);
276 failed += keywords_insert("TEMP", TEMP);
277 failed += keywords_insert("STREAM", STREAM);
278 failed += keywords_insert("REMOTE", REMOTE);
279 failed += keywords_insert("MERGE", MERGE);
280 failed += keywords_insert("REPLICA", REPLICA);
281 failed += keywords_insert("TO", TO);
282 failed += keywords_insert("UNION", UNION);
283 failed += keywords_insert("EXCEPT", EXCEPT);
284 failed += keywords_insert("INTERSECT", INTERSECT);
285 failed += keywords_insert("CORRESPONDING", CORRESPONDING);
286 failed += keywords_insert("UNIQUE", UNIQUE);
287 failed += keywords_insert("USING", USING);
288 failed += keywords_insert("VALUES", VALUES);
289 failed += keywords_insert("VIEW", VIEW);
290 failed += keywords_insert("WHERE", WHERE);
291 failed += keywords_insert("WITH", WITH);
292 failed += keywords_insert("DATA", DATA);
293
294 failed += keywords_insert("DATE", sqlDATE);
295 failed += keywords_insert("TIME", TIME);
296 failed += keywords_insert("TIMESTAMP", TIMESTAMP);
297 failed += keywords_insert("INTERVAL", INTERVAL);
298 failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
299 failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
300 failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
301 failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
302 failed += keywords_insert("LOCALTIME", LOCALTIME);
303 failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
304 failed += keywords_insert("ZONE", ZONE);
305
306 failed += keywords_insert("CENTURY", CENTURY);
307 failed += keywords_insert("DECADE", DECADE);
308 failed += keywords_insert("YEAR", YEAR);
309 failed += keywords_insert("QUARTER", QUARTER);
310 failed += keywords_insert("MONTH", MONTH);
311 failed += keywords_insert("WEEK", WEEK);
312 failed += keywords_insert("DOW", DOW);
313 failed += keywords_insert("DOY", DOY);
314 failed += keywords_insert("DAY", DAY);
315 failed += keywords_insert("HOUR", HOUR);
316 failed += keywords_insert("MINUTE", MINUTE);
317 failed += keywords_insert("SECOND", SECOND);
318
319 failed += keywords_insert("POSITION", POSITION);
320 failed += keywords_insert("SUBSTRING", SUBSTRING);
321 failed += keywords_insert("SPLIT_PART", SPLIT_PART);
322
323 failed += keywords_insert("CASE", CASE);
324 failed += keywords_insert("WHEN", WHEN);
325 failed += keywords_insert("THEN", THEN);
326 failed += keywords_insert("ELSE", ELSE);
327 failed += keywords_insert("END", END);
328 failed += keywords_insert("NULLIF", NULLIF);
329 failed += keywords_insert("COALESCE", COALESCE);
330 failed += keywords_insert("ELSEIF", ELSEIF);
331 failed += keywords_insert("IF", IF);
332 failed += keywords_insert("WHILE", WHILE);
333 failed += keywords_insert("DO", DO);
334
335 failed += keywords_insert("COMMIT", COMMIT);
336 failed += keywords_insert("ROLLBACK", ROLLBACK);
337 failed += keywords_insert("SAVEPOINT", SAVEPOINT);
338 failed += keywords_insert("RELEASE", RELEASE);
339 failed += keywords_insert("WORK", WORK);
340 failed += keywords_insert("CHAIN", CHAIN);
341 failed += keywords_insert("PRESERVE", PRESERVE);
342 failed += keywords_insert("ROWS", ROWS);
343 failed += keywords_insert("NO", NO);
344 failed += keywords_insert("START", START);
345 failed += keywords_insert("TRANSACTION", TRANSACTION);
346 failed += keywords_insert("READ", READ);
347 failed += keywords_insert("WRITE", WRITE);
348 failed += keywords_insert("ONLY", ONLY);
349 failed += keywords_insert("ISOLATION", ISOLATION);
350 failed += keywords_insert("LEVEL", LEVEL);
351 failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
352 failed += keywords_insert("COMMITTED", COMMITTED);
353 failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
354 failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
355 failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
356 failed += keywords_insert("SIZE", sqlSIZE);
357 failed += keywords_insert("STORAGE", STORAGE);
358
359 failed += keywords_insert("TYPE", TYPE);
360 failed += keywords_insert("PROCEDURE", PROCEDURE);
361 failed += keywords_insert("FUNCTION", FUNCTION);
362 failed += keywords_insert("LOADER", sqlLOADER);
363 failed += keywords_insert("REPLACE", REPLACE);
364
365 failed += keywords_insert("FILTER", FILTER);
366 failed += keywords_insert("AGGREGATE", AGGREGATE);
367 failed += keywords_insert("RETURNS", RETURNS);
368 failed += keywords_insert("EXTERNAL", EXTERNAL);
369 failed += keywords_insert("NAME", sqlNAME);
370 failed += keywords_insert("RETURN", RETURN);
371 failed += keywords_insert("CALL", CALL);
372 failed += keywords_insert("LANGUAGE", LANGUAGE);
373
374 failed += keywords_insert("ANALYZE", ANALYZE);
375 failed += keywords_insert("MINMAX", MINMAX);
376 failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
377 failed += keywords_insert("PLAN", SQL_PLAN);
378 failed += keywords_insert("DEBUG", SQL_DEBUG);
379 failed += keywords_insert("TRACE", SQL_TRACE);
380 failed += keywords_insert("PREPARE", PREPARE);
381 failed += keywords_insert("PREP", PREP);
382 failed += keywords_insert("EXECUTE", EXECUTE);
383 failed += keywords_insert("EXEC", EXEC);
384
385 failed += keywords_insert("INDEX", INDEX);
386
387 failed += keywords_insert("SEQUENCE", SEQUENCE);
388 failed += keywords_insert("RESTART", RESTART);
389 failed += keywords_insert("INCREMENT", INCREMENT);
390 failed += keywords_insert("MAXVALUE", MAXVALUE);
391 failed += keywords_insert("MINVALUE", MINVALUE);
392 failed += keywords_insert("CYCLE", CYCLE);
393 failed += keywords_insert("CACHE", CACHE);
394 failed += keywords_insert("NEXT", NEXT);
395 failed += keywords_insert("VALUE", VALUE);
396 failed += keywords_insert("GENERATED", GENERATED);
397 failed += keywords_insert("ALWAYS", ALWAYS);
398 failed += keywords_insert("IDENTITY", IDENTITY);
399 failed += keywords_insert("SERIAL", SERIAL);
400 failed += keywords_insert("BIGSERIAL", BIGSERIAL);
401 failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
402 failed += keywords_insert("CONTINUE", CONTINUE);
403
404 failed += keywords_insert("TRIGGER", TRIGGER);
405 failed += keywords_insert("ATOMIC", ATOMIC);
406 failed += keywords_insert("BEGIN", BEGIN);
407 failed += keywords_insert("OF", OF);
408 failed += keywords_insert("BEFORE", BEFORE);
409 failed += keywords_insert("AFTER", AFTER);
410 failed += keywords_insert("ROW", ROW);
411 failed += keywords_insert("STATEMENT", STATEMENT);
412 failed += keywords_insert("NEW", sqlNEW);
413 failed += keywords_insert("OLD", OLD);
414 failed += keywords_insert("EACH", EACH);
415 failed += keywords_insert("REFERENCING", REFERENCING);
416
417 failed += keywords_insert("RANGE", RANGE);
418 failed += keywords_insert("UNBOUNDED", UNBOUNDED);
419 failed += keywords_insert("PRECEDING", PRECEDING);
420 failed += keywords_insert("FOLLOWING", FOLLOWING);
421 failed += keywords_insert("CURRENT", CURRENT);
422 failed += keywords_insert("EXCLUDE", EXCLUDE);
423 failed += keywords_insert("OTHERS", OTHERS);
424 failed += keywords_insert("TIES", TIES);
425 failed += keywords_insert("GROUPS", GROUPS);
426 failed += keywords_insert("WINDOW", WINDOW);
427
428 /* special SQL/XML keywords */
429 failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
430 failed += keywords_insert("XMLCONCAT", XMLCONCAT);
431 failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
432 failed += keywords_insert("XMLELEMENT", XMLELEMENT);
433 failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
434 failed += keywords_insert("XMLFOREST", XMLFOREST);
435 failed += keywords_insert("XMLPARSE", XMLPARSE);
436 failed += keywords_insert("STRIP", STRIP);
437 failed += keywords_insert("WHITESPACE", WHITESPACE);
438 failed += keywords_insert("XMLPI", XMLPI);
439 failed += keywords_insert("XMLQUERY", XMLQUERY);
440 failed += keywords_insert("PASSING", PASSING);
441 failed += keywords_insert("XMLTEXT", XMLTEXT);
442 failed += keywords_insert("NIL", NIL);
443 failed += keywords_insert("REF", REF);
444 failed += keywords_insert("ABSENT", ABSENT);
445 failed += keywords_insert("DOCUMENT", DOCUMENT);
446 failed += keywords_insert("ELEMENT", ELEMENT);
447 failed += keywords_insert("CONTENT", CONTENT);
448 failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
449 failed += keywords_insert("NAMESPACE", NAMESPACE);
450 failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
451 failed += keywords_insert("RETURNING", RETURNING);
452 failed += keywords_insert("LOCATION", LOCATION);
453 failed += keywords_insert("ID", ID);
454 failed += keywords_insert("ACCORDING", ACCORDING);
455 failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
456 failed += keywords_insert("URI", URI);
457 failed += keywords_insert("XMLAGG", XMLAGG);
458
459 /* keywords for opengis */
460 failed += keywords_insert("GEOMETRY", GEOMETRY);
461
462 failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
463 failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
464 failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
465 failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
466 failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
467 failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
468 failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
469
470 failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
471 failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
472 failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
473 failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
474 failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
475 failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
476 failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
477
478 failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
479 failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
480 failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
481 failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
482 failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
483 failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
484 failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
485
486 failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
487 failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
488 failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
489 failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
490 failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
491 failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
492 failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
493
494 return failed;
495}
496
497#define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
498
499void
500scanner_init(struct scanner *s, bstream *rs, stream *ws)
501{
502 s->rs = rs;
503 s->ws = ws;
504 s->log = NULL;
505
506 s->yynext = 0;
507 s->yylast = 0;
508 s->yyval = 0;
509 s->yybak = 0; /* keep backup of char replaced by EOS */
510 s->yycur = 0;
511
512 s->key = 0; /* keep a hash key of the query */
513 s->started = 0;
514 s->as = 0;
515
516 s->mode = LINE_N;
517 s->schema = NULL;
518}
519
520void
521scanner_query_processed(struct scanner *s)
522{
523 int cur;
524
525 if (s->yybak) {
526 s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
527 s->yybak = 0;
528 }
529 if (s->rs) {
530 s->rs->pos += s->yycur;
531 /* completely eat the query including white space after the ; */
532 while (s->rs->pos < s->rs->len &&
533 (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
534 s->rs->pos++;
535 }
536 }
537 /*assert(s->rs->pos <= s->rs->len);*/
538 s->yycur = 0;
539 s->key = 0; /* keep a hash key of the query */
540 s->started = 0;
541 s->as = 0;
542 s->schema = NULL;
543}
544
545void
546scanner_reset_key(struct scanner *s)
547{
548 s->key = 0;
549}
550
551static int
552scanner_error(mvc *lc, int cur)
553{
554 switch (cur) {
555 case EOF:
556 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
557 return -1; /* EOF needs -1 result */
558 default:
559 /* on Windows at least, iswcntrl returns TRUE for
560 * U+FEFF, but we just want consistent error
561 * messages */
562 (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
563 }
564 return LEX_ERROR;
565}
566
567
568/*
569 UTF-8 encoding is as follows:
570U-00000000 - U-0000007F: 0xxxxxxx
571U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
572U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
573U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
574U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
575U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
576*/
577/* To be correctly coded UTF-8, the sequence should be the shortest
578 possible encoding of the value being encoded. This means that for
579 an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
580 utf8chkmsk[n] should be non-zero (else the encoding could be
581 shorter).
582*/
583static int utf8chkmsk[] = {
584 0x0000007f,
585 0x00000780,
586 0x0000f800,
587 0x001f0000,
588 0x03e00000,
589 0x7c000000
590};
591
592static void
593utf8_putchar(struct scanner *lc, int ch)
594{
595 if ((ch) < 0x80) {
596 lc->yycur--;
597 } else if ((ch) < 0x800) {
598 lc->yycur -= 2;
599 } else if ((ch) < 0x10000) {
600 lc->yycur -= 3;
601 } else {
602 lc->yycur -= 4;
603 }
604}
605
606static inline int
607scanner_read_more(struct scanner *lc, size_t n)
608{
609 bstream *b = lc->rs;
610 bool more = false;
611
612
613 while (b->len < b->pos + lc->yycur + n) {
614
615 if (lc->mode == LINE_1 || !lc->started)
616 return EOF;
617
618 /* query is not finished ask for more */
619 if (b->eof || !isa_block_stream(b->s)) {
620 if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
621 mnstr_flush(lc->ws);
622 b->eof = false;
623 more = true;
624 }
625 /* we need more query text */
626 if (bstream_next(b) < 0 ||
627 /* we asked for more data but didn't get any */
628 (more && b->eof && b->len < b->pos + lc->yycur + n))
629 return EOF;
630 }
631 return 1;
632}
633
634static inline int
635scanner_getc(struct scanner *lc)
636{
637 bstream *b = lc->rs;
638 unsigned char *s = NULL;
639 int c, m, n, mask;
640
641 if (scanner_read_more(lc, 1) == EOF) {
642 lc->errstr = SQLSTATE(42000) "end of input stream";
643 return EOF;
644 }
645 lc->errstr = NULL;
646
647 s = (unsigned char *) b->buf + b->pos + lc->yycur++;
648 if (((c = *s) & 0x80) == 0) {
649 /* 7-bit char */
650 return c;
651 }
652 for (n = 0, m = 0x40; c & m; n++, m >>= 1)
653 ;
654 /* n now is number of 10xxxxxx bytes that should follow */
655 if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
656 /* incorrect UTF-8 sequence */
657 /* n==0: c == 10xxxxxx */
658 /* n>=6: c == 1111111x */
659 lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
660 goto error;
661 }
662
663 if (scanner_read_more(lc, (size_t) n) == EOF)
664 return EOF;
665 s = (unsigned char *) b->buf + b->pos + lc->yycur;
666
667 mask = utf8chkmsk[n];
668 c &= ~(0xFFC0 >> n); /* remove non-x bits */
669 while (--n >= 0) {
670 c <<= 6;
671 lc->yycur++;
672 if (((m = *s++) & 0xC0) != 0x80) {
673 /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
674 /* this includes end-of-string (m == 0) */
675 lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
676 goto error;
677 }
678 c |= m & 0x3F;
679 }
680 if ((c & mask) == 0) {
681 /* incorrect UTF-8 sequence: not shortest possible */
682 lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
683 goto error;
684 }
685
686 return c;
687
688error:
689 if (b->pos + lc->yycur < b->len) /* skip bogus char */
690 lc->yycur++;
691 return EOF;
692}
693
694static int
695scanner_token(struct scanner *lc, int token)
696{
697 lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
698 lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
699 lc->yyval = token;
700 return lc->yyval;
701}
702
703static int
704scanner_string(mvc *c, int quote, bool escapes)
705{
706 struct scanner *lc = &c->scanner;
707 bstream *rs = lc->rs;
708 int cur = quote;
709 bool escape = false;
710 const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
711
712 lc->started = 1;
713 while (cur != EOF) {
714 size_t pos = 0;
715 const size_t yycur = rs->pos + lc->yycur;
716
717 while (cur != EOF && pos < limit &&
718 (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
719 cur && (cur != quote || escape)) {
720 if (escapes && cur == '\\')
721 escape = !escape;
722 else
723 escape = false;
724 }
725 if (pos == limit) {
726 (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
727 return LEX_ERROR;
728 }
729 if (cur == EOF)
730 break;
731 lc->yycur += pos;
732 /* check for quote escaped quote: Obscure SQL Rule */
733 /* TODO also handle double "" */
734 if (cur == quote && rs->buf[yycur + pos] == quote) {
735 if (escapes)
736 rs->buf[yycur + pos - 1] = '\\';
737 lc->yycur++;
738 continue;
739 }
740 assert(yycur + pos <= rs->len + 1);
741 if (cur == quote && !escape) {
742 return scanner_token(lc, STRING);
743 }
744 lc->yycur--; /* go back to current (possibly invalid) char */
745 /* long utf8, if correct isn't the quote */
746 if (!cur) {
747 if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
748 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
749 return LEX_ERROR;
750 }
751 cur = scanner_read_more(lc, 1);
752 } else {
753 cur = scanner_getc(lc);
754 }
755 }
756 (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "unexpected end of input");
757 return LEX_ERROR;
758}
759
760/* scan a structure {blah} into a string. We only count the matching {}
761 * unless escaped. We do not consider embeddings in string literals yet
762 */
763
764static int
765scanner_body(mvc *c)
766{
767 struct scanner *lc = &c->scanner;
768 bstream *rs = lc->rs;
769 int cur = (int) 'x';
770 int blk = 1;
771 bool escape = false;
772
773 lc->started = 1;
774 assert(rs->buf[rs->pos + lc->yycur-1] == '{');
775 while (cur != EOF) {
776 size_t pos = rs->pos + lc->yycur;
777
778 while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
779 if (cur != '\\')
780 escape = false;
781 else
782 escape = !escape;
783 blk += cur =='{';
784 blk -= cur =='}';
785 }
786 lc->yycur = pos - rs->pos;
787 assert(pos <= rs->len + 1);
788 if (blk == 0 && !escape){
789 lc->yycur--; /* go back to current (possibly invalid) char */
790 return scanner_token(lc, X_BODY);
791 }
792 lc->yycur--; /* go back to current (possibly invalid) char */
793 if (!cur) {
794 if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
795 (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
796 return LEX_ERROR;
797 }
798 cur = scanner_read_more(lc, 1);
799 } else {
800 cur = scanner_getc(lc);
801 }
802 }
803 (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
804 return LEX_ERROR;
805}
806
807static int
808keyword_or_ident(mvc * c, int cur)
809{
810 struct scanner *lc = &c->scanner;
811 keyword *k = NULL;
812 size_t s;
813
814 lc->started = 1;
815 utf8_putchar(lc, cur);
816 s = lc->yycur;
817 lc->yyval = IDENT;
818 while ((cur = scanner_getc(lc)) != EOF) {
819 if (!iswalnum(cur) && cur != '_') {
820 utf8_putchar(lc, cur);
821 (void)scanner_token(lc, IDENT);
822 k = find_keyword_bs(lc,s);
823 if (k)
824 lc->yyval = k->token;
825 /* find keyword in SELECT/JOIN/UNION FUNCTIONS */
826 else if (sql_find_func(c->sa, cur_schema(c), lc->rs->buf+lc->rs->pos+s, -1, F_FILT, NULL))
827 lc->yyval = FILTER_FUNC;
828 return lc->yyval;
829 }
830 }
831 (void)scanner_token(lc, IDENT);
832 k = find_keyword_bs(lc,s);
833 if (k)
834 lc->yyval = k->token;
835 /* find keyword in SELECT/JOIN/UNION FUNCTIONS */
836 else if (sql_find_func(c->sa, cur_schema(c), lc->rs->buf+lc->rs->pos+s, -1, F_FILT, NULL))
837 lc->yyval = FILTER_FUNC;
838 return lc->yyval;
839}
840
841static int
842skip_white_space(struct scanner * lc)
843{
844 int cur;
845
846 do {
847 lc->yysval = lc->yycur;
848 } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
849 return cur;
850}
851
852static int
853skip_c_comment(struct scanner * lc)
854{
855 int cur;
856 int prev = 0;
857 int started = lc->started;
858 int depth = 1;
859
860 lc->started = 1;
861 while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
862 if (prev == '*' && cur == '/')
863 depth--;
864 else if (prev == '/' && cur == '*') {
865 /* block comments can nest */
866 cur = 0; /* prevent slash-star-slash from matching */
867 depth++;
868 }
869 prev = cur;
870 }
871 lc->yysval = lc->yycur;
872 lc->started = started;
873 /* a comment is equivalent to a newline */
874 return cur == EOF ? cur : '\n';
875}
876
877static int
878skip_sql_comment(struct scanner * lc)
879{
880 int cur;
881 int started = lc->started;
882
883 lc->started = 1;
884 while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
885 ;
886 lc->yysval = lc->yycur;
887 lc->started = started;
888 /* a comment is equivalent to a newline */
889 return cur;
890}
891
892static int tokenize(mvc * lc, int cur);
893
894static int
895number(mvc * c, int cur)
896{
897 struct scanner *lc = &c->scanner;
898 int token = sqlINT;
899 int before_cur = EOF;
900
901 lc->started = 1;
902 if (cur == '0' && (cur = scanner_getc(lc)) == 'x') {
903 while ((cur = scanner_getc(lc)) != EOF &&
904 (iswdigit(cur) ||
905 (cur >= 'A' && cur <= 'F') ||
906 (cur >= 'a' && cur <= 'f')))
907 token = HEXADECIMAL;
908 if (token == sqlINT)
909 before_cur = 'x';
910 } else {
911 if (iswdigit(cur))
912 while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur))
913 ;
914 if (cur == '@') {
915 token = OIDNUM;
916 cur = scanner_getc(lc);
917 if (cur == '0')
918 cur = scanner_getc(lc);
919 }
920
921 if (cur == '.') {
922 token = INTNUM;
923
924 while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur))
925 ;
926 }
927 if (cur == 'e' || cur == 'E') {
928 token = APPROXNUM;
929 cur = scanner_getc(lc);
930 if (cur == '-' || cur == '+')
931 token = 0;
932 while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur))
933 token = APPROXNUM;
934 }
935 }
936
937 if (cur == EOF && lc->rs->buf == NULL) /* malloc failure */
938 return EOF;
939
940 if (token) {
941 if (cur != EOF)
942 utf8_putchar(lc, cur);
943 if (before_cur != EOF)
944 utf8_putchar(lc, before_cur);
945 return scanner_token(lc, token);
946 } else {
947 (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
948 return LEX_ERROR;
949 }
950}
951
952static
953int scanner_symbol(mvc * c, int cur)
954{
955 struct scanner *lc = &c->scanner;
956 int next = 0;
957 int started = lc->started;
958
959 switch (cur) {
960 case '/':
961 lc->started = 1;
962 next = scanner_getc(lc);
963 if (next == '*') {
964 lc->started = started;
965 cur = skip_c_comment(lc);
966 if (cur < 0)
967 return EOF;
968 return tokenize(c, cur);
969 } else {
970 utf8_putchar(lc, next);
971 return scanner_token(lc, cur);
972 }
973 case '0':
974 case '1':
975 case '2':
976 case '3':
977 case '4':
978 case '5':
979 case '6':
980 case '7':
981 case '8':
982 case '9':
983 return number(c, cur);
984 case '#':
985 if ((cur = skip_sql_comment(lc)) == EOF)
986 return cur;
987 return tokenize(c, cur);
988 case '\'':
989 case '"':
990 return scanner_string(c, cur,
991#if 0
992 false
993#else
994 cur == '\''
995#endif
996 );
997 case '{':
998 return scanner_body(c);
999 case '-':
1000 lc->started = 1;
1001 next = scanner_getc(lc);
1002 if (next == '-') {
1003 lc->started = started;
1004 if ((cur = skip_sql_comment(lc)) == EOF)
1005 return cur;
1006 return tokenize(c, cur);
1007 }
1008 lc->started = 1;
1009 utf8_putchar(lc, next);
1010 return scanner_token(lc, cur);
1011 case '~': /* binary not */
1012 lc->started = 1;
1013 next = scanner_getc(lc);
1014 if (next == '=')
1015 return scanner_token(lc, GEOM_MBR_EQUAL);
1016 utf8_putchar(lc, next);
1017 return scanner_token(lc, cur);
1018 case '^': /* binary xor */
1019 case '*':
1020 case '?':
1021 case '%':
1022 case '+':
1023 case '(':
1024 case ')':
1025 case ',':
1026 case '=':
1027 case '[':
1028 case ']':
1029 lc->started = 1;
1030 return scanner_token(lc, cur);
1031 case '&':
1032 lc->started = 1;
1033 cur = scanner_getc(lc);
1034 if(cur == '<') {
1035 next = scanner_getc(lc);
1036 if(next == '|') {
1037 return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1038 } else {
1039 utf8_putchar(lc, next); //put the char back
1040 return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1041 }
1042 } else if(cur == '>')
1043 return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1044 else if(cur == '&')
1045 return scanner_token(lc, GEOM_OVERLAP);
1046 else {/* binary and */
1047 utf8_putchar(lc, cur); //put the char back
1048 return scanner_token(lc, '&');
1049 }
1050 case '@':
1051 lc->started = 1;
1052 return scanner_token(lc, AT);
1053 case ';':
1054 lc->started = 0;
1055 return scanner_token(lc, SCOLON);
1056 case '<':
1057 lc->started = 1;
1058 cur = scanner_getc(lc);
1059 if (cur == '=') {
1060 return scanner_token( lc, COMPARISON);
1061 } else if (cur == '>') {
1062 return scanner_token( lc, COMPARISON);
1063 } else if (cur == '<') {
1064 next = scanner_getc(lc);
1065 if (next == '=') {
1066 return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1067 } else if (next == '|') {
1068 return scanner_token(lc, GEOM_BELOW);
1069 } else {
1070 utf8_putchar(lc, next); //put the char back
1071 return scanner_token( lc, LEFT_SHIFT);
1072 }
1073 } else if(cur == '-') {
1074 next = scanner_getc(lc);
1075 if(next == '>') {
1076 return scanner_token(lc, GEOM_DIST);
1077 } else {
1078 //put the characters back and fall in the next possible case
1079 utf8_putchar(lc, next);
1080 utf8_putchar(lc, cur);
1081 return scanner_token( lc, COMPARISON);
1082 }
1083 } else {
1084 utf8_putchar(lc, cur);
1085 return scanner_token( lc, COMPARISON);
1086 }
1087 case '>':
1088 lc->started = 1;
1089 cur = scanner_getc(lc);
1090 if (cur == '>') {
1091 cur = scanner_getc(lc);
1092 if (cur == '=')
1093 return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1094 utf8_putchar(lc, cur);
1095 return scanner_token( lc, RIGHT_SHIFT);
1096 } else if (cur != '=') {
1097 utf8_putchar(lc, cur);
1098 return scanner_token( lc, COMPARISON);
1099 } else {
1100 return scanner_token( lc, COMPARISON);
1101 }
1102 case '.':
1103 lc->started = 1;
1104 cur = scanner_getc(lc);
1105 if (!iswdigit(cur)) {
1106 utf8_putchar(lc, cur);
1107 return scanner_token( lc, '.');
1108 } else {
1109 utf8_putchar(lc, cur);
1110 cur = '.';
1111 return number(c, cur);
1112 }
1113 case '|': /* binary or or string concat */
1114 lc->started = 1;
1115 cur = scanner_getc(lc);
1116 if (cur == '|') {
1117 return scanner_token(lc, CONCATSTRING);
1118 } else if (cur == '&') {
1119 next = scanner_getc(lc);
1120 if(next == '>') {
1121 return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1122 } else {
1123 utf8_putchar(lc, next); //put the char back
1124 utf8_putchar(lc, cur); //put the char back
1125 return scanner_token(lc, '|');
1126 }
1127 } else if (cur == '>') {
1128 next = scanner_getc(lc);
1129 if(next == '>') {
1130 return scanner_token(lc, GEOM_ABOVE);
1131 } else {
1132 utf8_putchar(lc, next); //put the char back
1133 utf8_putchar(lc, cur); //put the char back
1134 return scanner_token(lc, '|');
1135 }
1136 } else {
1137 utf8_putchar(lc, cur);
1138 return scanner_token(lc, '|');
1139 }
1140 }
1141 (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1142 return LEX_ERROR;
1143}
1144
1145static int
1146tokenize(mvc * c, int cur)
1147{
1148 struct scanner *lc = &c->scanner;
1149 while (1) {
1150 if (cur == 0xFEFF) {
1151 /* on Linux at least, iswpunct returns TRUE
1152 * for U+FEFF, but we don't want that, we just
1153 * want to go to the scanner_error case
1154 * below */
1155 ;
1156 } else if (iswspace(cur)) {
1157 if ((cur = skip_white_space(lc)) == EOF)
1158 return cur;
1159 continue; /* try again */
1160 } else if (iswdigit(cur)) {
1161 return number(c, cur);
1162 } else if (iswalpha(cur) || cur == '_') {
1163 if ((cur == 'E' || cur == 'e') &&
1164 lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1165 return scanner_string(c, scanner_getc(lc), true);
1166 }
1167 if ((cur == 'X' || cur == 'x') &&
1168 lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1169 return scanner_string(c, scanner_getc(lc), true);
1170 }
1171 if ((cur == 'U' || cur == 'u') &&
1172 lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1173 (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1174 lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1175 cur = scanner_getc(lc); /* '&' */
1176 return scanner_string(c, scanner_getc(lc), false);
1177 }
1178 return keyword_or_ident(c, cur);
1179 } else if (iswpunct(cur)) {
1180 return scanner_symbol(c, cur);
1181 }
1182 if (cur == EOF) {
1183 if (lc->mode == LINE_1 || !lc->started )
1184 return cur;
1185 return scanner_error(c, cur);
1186 }
1187 /* none of the above: error */
1188 return scanner_error(c, cur);
1189 }
1190}
1191
1192/* SQL 'quoted' idents consist of a set of any character of
1193 * the source language character set other than a 'quote'
1194 *
1195 * MonetDB has 2 restrictions:
1196 * 1 we disallow '%' as the first character.
1197 * 2 the length is limited to 1024 characters
1198 */
1199static bool
1200valid_ident(const char *restrict s, char *restrict dst)
1201{
1202 int p = 0;
1203
1204 if (*s == '%')
1205 return false;
1206
1207 while (*s) {
1208 if ((dst[p++] = *s++) == '"' && *s == '"')
1209 s++;
1210 if (p >= 1024)
1211 return false;
1212 }
1213 dst[p] = '\0';
1214 return true;
1215}
1216
1217static inline int
1218sql_get_next_token(YYSTYPE *yylval, void *parm)
1219{
1220 mvc *c = (mvc*)parm;
1221 struct scanner *lc = &c->scanner;
1222 int token = 0, cur = 0;
1223
1224 if (lc->rs->buf == NULL) /* malloc failure */
1225 return EOF;
1226
1227 if (lc->yynext) {
1228 int next = lc->yynext;
1229
1230 lc->yynext = 0;
1231 return(next);
1232 }
1233
1234 if (lc->yybak) {
1235 lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1236 lc->yybak = 0;
1237 }
1238
1239 lc->yysval = lc->yycur;
1240 lc->yylast = lc->yyval;
1241 cur = scanner_getc(lc);
1242 if (cur < 0)
1243 return EOF;
1244 token = tokenize(c, cur);
1245
1246 yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1247
1248 /* This is needed as ALIAS and aTYPE get defined too late, see
1249 sql_keyword.h */
1250 if (token == KW_ALIAS)
1251 token = ALIAS;
1252
1253 if (token == KW_TYPE)
1254 token = aTYPE;
1255
1256 if (token == IDENT || token == COMPARISON || token == FILTER_FUNC ||
1257 token == AGGR || token == AGGR2 || token == RANK ||
1258 token == aTYPE || token == ALIAS)
1259 yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1260 else if (token == STRING) {
1261 char quote = *yylval->sval;
1262 char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1263 assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x');
1264
1265 lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1266 if (quote == '"') {
1267 if (valid_ident(yylval->sval+1,str)) {
1268 token = IDENT;
1269 } else {
1270 sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1271 return LEX_ERROR;
1272 }
1273 } else if (quote == 'E' || quote == 'e') {
1274 assert(yylval->sval[1] == '\'');
1275 GDKstrFromStr((unsigned char *) str,
1276 (unsigned char *) yylval->sval + 2,
1277 lc->yycur-lc->yysval - 2);
1278 quote = '\'';
1279 } else if (quote == 'U' || quote == 'u') {
1280 assert(yylval->sval[1] == '&');
1281 assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1282 strcpy(str, yylval->sval + 3);
1283 token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
1284 quote = yylval->sval[2];
1285 } else if (quote == 'X' || quote == 'x') {
1286 assert(yylval->sval[1] == '\'');
1287 char *dst = str;
1288 for (char *src = yylval->sval + 2; *src; dst++)
1289 if ((*dst = *src++) == '\'' && *src == '\'')
1290 src++;
1291 *dst = 0;
1292 quote = '\'';
1293 token = XSTRING;
1294 } else {
1295#if 0
1296 char *dst = str;
1297 for (char *src = yylval->sval + 1; *src; dst++)
1298 if ((*dst = *src++) == '\'' && *src == '\'')
1299 src++;
1300 *dst = 0;
1301#else
1302 GDKstrFromStr((unsigned char *) str,
1303 (unsigned char *) yylval->sval + 1,
1304 lc->yycur-lc->yysval - 1);
1305#endif
1306 }
1307 yylval->sval = str;
1308
1309 /* reset original */
1310 lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1311 }
1312
1313 return(token);
1314}
1315
1316/* also see sql_parser.y */
1317extern int sqllex( YYSTYPE *yylval, void *m );
1318
1319int
1320sqllex(YYSTYPE * yylval, void *parm)
1321{
1322 int token;
1323 mvc *c = (mvc *) parm;
1324 struct scanner *lc = &c->scanner;
1325 size_t pos;
1326
1327 /* store position for when view's query ends */
1328 pos = lc->rs->pos + lc->yycur;
1329
1330 token = sql_get_next_token(yylval, parm);
1331
1332 if (token == NOT) {
1333 int next = sqllex(yylval, parm);
1334
1335 if (next == NOT) {
1336 return sqllex(yylval, parm);
1337 } else if (next == BETWEEN) {
1338 token = NOT_BETWEEN;
1339 } else if (next == sqlIN) {
1340 token = NOT_IN;
1341 } else if (next == LIKE) {
1342 token = NOT_LIKE;
1343 } else if (next == ILIKE) {
1344 token = NOT_ILIKE;
1345 } else {
1346 lc->yynext = next;
1347 }
1348 } else if (token == UNION) {
1349 int next = sqllex(yylval, parm);
1350
1351 if (next == JOIN) {
1352 token = UNIONJOIN;
1353 } else {
1354 lc->yynext = next;
1355 }
1356 } else if (token == SCOLON) {
1357 /* ignore semi-colon(s) following a semi-colon */
1358 if (lc->yylast == SCOLON) {
1359 size_t prev = lc->yycur;
1360 while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1361 prev = lc->yycur;
1362
1363 /* skip the skipped stuff also in the buffer */
1364 lc->rs->pos += prev;
1365 lc->yycur -= prev;
1366 }
1367 }
1368
1369 if (lc->log)
1370 mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1371
1372 /* Don't include literals in the calculation of the key */
1373 if (token != STRING && token != USTRING && token != sqlINT && token != OIDNUM && token != INTNUM && token != APPROXNUM && token != sqlNULL)
1374 lc->key ^= token;
1375 lc->started += (token != EOF);
1376 return token;
1377}
1378