| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | #include "monetdb_config.h" |
| 10 | #include <wctype.h> |
| 11 | #include "sql_mem.h" |
| 12 | #include "sql_scan.h" |
| 13 | #include "sql_types.h" |
| 14 | #include "sql_symbol.h" |
| 15 | #include "sql_mvc.h" |
| 16 | #include "sql_parser.tab.h" |
| 17 | #include "sql_semantic.h" |
| 18 | #include "sql_parser.h" /* for sql_error() */ |
| 19 | |
| 20 | #include "stream.h" |
| 21 | #include <unistd.h> |
| 22 | #include <string.h> |
| 23 | #include <ctype.h> |
| 24 | #include "sql_keyword.h" |
| 25 | #ifdef HAVE_HGE |
| 26 | #include "mal.h" /* for have_hge */ |
| 27 | #endif |
| 28 | |
| 29 | char * |
| 30 | query_cleaned(const char *query) |
| 31 | { |
| 32 | char *q, *r; |
| 33 | int quote = 0; /* inside quotes ('..', "..", {..}) */ |
| 34 | bool bs = false; /* seen a backslash in a quoted string */ |
| 35 | bool = false; /* inside traditional C style comment */ |
| 36 | bool = false; /* inside comment starting with -- */ |
| 37 | r = GDKmalloc(strlen(query) + 1); |
| 38 | if(!r) |
| 39 | return NULL; |
| 40 | |
| 41 | for (q = r; *query; query++) { |
| 42 | if (incomment1) { |
| 43 | if (*query == '/' && query[-1] == '*') { |
| 44 | incomment1 = false; |
| 45 | } |
| 46 | } else if (incomment2) { |
| 47 | if (*query == '\n') { |
| 48 | incomment2 = false; |
| 49 | /* add newline only if comment doesn't |
| 50 | * occupy whole line */ |
| 51 | if (q > r && q[-1] != '\n') |
| 52 | *q++ = '\n'; |
| 53 | } |
| 54 | } else if (quote) { |
| 55 | if (bs) { |
| 56 | bs = false; |
| 57 | } else if (*query == '\\') { |
| 58 | bs = true; |
| 59 | } else if (*query == quote) { |
| 60 | quote = 0; |
| 61 | } |
| 62 | *q++ = *query; |
| 63 | } else if (*query == '"' || *query == '\'') { |
| 64 | quote = *query; |
| 65 | *q++ = *query; |
| 66 | } else if (*query == '{') { |
| 67 | quote = '}'; |
| 68 | *q++ = *query; |
| 69 | } else if (*query == '-' && query[1] == '-') { |
| 70 | incomment2 = true; |
| 71 | } else if (*query == '/' && query[1] == '*') { |
| 72 | incomment1 = true; |
| 73 | } else if (*query == '\n') { |
| 74 | /* collapse newlines */ |
| 75 | if (q > r && q[-1] != '\n') |
| 76 | *q++ = '\n'; |
| 77 | } else if (*query == ' ' || *query == '\t') { |
| 78 | /* collapse white space */ |
| 79 | if (q > r && q[-1] != ' ') |
| 80 | *q++ = ' '; |
| 81 | } else { |
| 82 | *q++ = *query; |
| 83 | } |
| 84 | } |
| 85 | *q = 0; |
| 86 | return r; |
| 87 | } |
| 88 | |
| 89 | int |
| 90 | scanner_init_keywords(void) |
| 91 | { |
| 92 | int failed = 0; |
| 93 | |
| 94 | failed += keywords_insert("false" , BOOL_FALSE); |
| 95 | failed += keywords_insert("true" , BOOL_TRUE); |
| 96 | |
| 97 | failed += keywords_insert("ALTER" , ALTER); |
| 98 | failed += keywords_insert("ADD" , ADD); |
| 99 | failed += keywords_insert("AND" , AND); |
| 100 | failed += keywords_insert("MEDIAN" , AGGR); |
| 101 | failed += keywords_insert("CORR" , AGGR2); |
| 102 | failed += keywords_insert("QUANTILE" , AGGR2); |
| 103 | failed += keywords_insert("AVG" , AGGR); |
| 104 | failed += keywords_insert("MIN" , AGGR); |
| 105 | failed += keywords_insert("MAX" , AGGR); |
| 106 | failed += keywords_insert("SUM" , AGGR); |
| 107 | failed += keywords_insert("PROD" , AGGR); |
| 108 | failed += keywords_insert("COUNT" , AGGR); |
| 109 | |
| 110 | failed += keywords_insert("RANK" , RANK); |
| 111 | failed += keywords_insert("DENSE_RANK" , RANK); |
| 112 | failed += keywords_insert("PERCENT_RANK" , RANK); |
| 113 | failed += keywords_insert("CUME_DIST" , RANK); |
| 114 | failed += keywords_insert("ROW_NUMBER" , RANK); |
| 115 | failed += keywords_insert("NTILE" , RANK); |
| 116 | failed += keywords_insert("LAG" , RANK); |
| 117 | failed += keywords_insert("LEAD" , RANK); |
| 118 | failed += keywords_insert("FIRST_VALUE" , RANK); |
| 119 | failed += keywords_insert("LAST_VALUE" , RANK); |
| 120 | failed += keywords_insert("NTH_VALUE" , RANK); |
| 121 | |
| 122 | failed += keywords_insert("BEST" , BEST); |
| 123 | failed += keywords_insert("EFFORT" , EFFORT); |
| 124 | |
| 125 | failed += keywords_insert("AS" , AS); |
| 126 | failed += keywords_insert("ASC" , ASC); |
| 127 | failed += keywords_insert("AUTHORIZATION" , AUTHORIZATION); |
| 128 | failed += keywords_insert("BETWEEN" , BETWEEN); |
| 129 | failed += keywords_insert("SYMMETRIC" , SYMMETRIC); |
| 130 | failed += keywords_insert("ASYMMETRIC" , ASYMMETRIC); |
| 131 | failed += keywords_insert("BY" , BY); |
| 132 | failed += keywords_insert("CAST" , CAST); |
| 133 | failed += keywords_insert("CONVERT" , CONVERT); |
| 134 | failed += keywords_insert("CHARACTER" , CHARACTER); |
| 135 | failed += keywords_insert("CHAR" , CHARACTER); |
| 136 | failed += keywords_insert("VARYING" , VARYING); |
| 137 | failed += keywords_insert("VARCHAR" , VARCHAR); |
| 138 | failed += keywords_insert("BINARY" , BINARY); |
| 139 | failed += keywords_insert("LARGE" , LARGE); |
| 140 | failed += keywords_insert("OBJECT" , OBJECT); |
| 141 | failed += keywords_insert("CLOB" , CLOB); |
| 142 | failed += keywords_insert("BLOB" , sqlBLOB); |
| 143 | failed += keywords_insert("TEXT" , sqlTEXT); |
| 144 | failed += keywords_insert("TINYTEXT" , sqlTEXT); |
| 145 | failed += keywords_insert("STRING" , CLOB); /* ? */ |
| 146 | failed += keywords_insert("CHECK" , CHECK); |
| 147 | failed += keywords_insert("CLIENT" , CLIENT); |
| 148 | failed += keywords_insert("SERVER" , SERVER); |
| 149 | failed += keywords_insert("COMMENT" , COMMENT); |
| 150 | failed += keywords_insert("CONSTRAINT" , CONSTRAINT); |
| 151 | failed += keywords_insert("CREATE" , CREATE); |
| 152 | failed += keywords_insert("CROSS" , CROSS); |
| 153 | failed += keywords_insert("COPY" , COPY); |
| 154 | failed += keywords_insert("RECORDS" , RECORDS); |
| 155 | failed += keywords_insert("DELIMITERS" , DELIMITERS); |
| 156 | failed += keywords_insert("STDIN" , STDIN); |
| 157 | failed += keywords_insert("STDOUT" , STDOUT); |
| 158 | |
| 159 | failed += keywords_insert("TINYINT" , TINYINT); |
| 160 | failed += keywords_insert("SMALLINT" , SMALLINT); |
| 161 | failed += keywords_insert("INTEGER" , sqlINTEGER); |
| 162 | failed += keywords_insert("INT" , sqlINTEGER); |
| 163 | failed += keywords_insert("MEDIUMINT" , sqlINTEGER); |
| 164 | failed += keywords_insert("BIGINT" , BIGINT); |
| 165 | #ifdef HAVE_HGE |
| 166 | if (have_hge) |
| 167 | failed += keywords_insert("HUGEINT" , HUGEINT); |
| 168 | #endif |
| 169 | failed += keywords_insert("DEC" , sqlDECIMAL); |
| 170 | failed += keywords_insert("DECIMAL" , sqlDECIMAL); |
| 171 | failed += keywords_insert("NUMERIC" , sqlDECIMAL); |
| 172 | failed += keywords_insert("DECLARE" , DECLARE); |
| 173 | failed += keywords_insert("DEFAULT" , DEFAULT); |
| 174 | failed += keywords_insert("DESC" , DESC); |
| 175 | failed += keywords_insert("DISTINCT" , DISTINCT); |
| 176 | failed += keywords_insert("DOUBLE" , sqlDOUBLE); |
| 177 | failed += keywords_insert("REAL" , sqlREAL); |
| 178 | failed += keywords_insert("DROP" , DROP); |
| 179 | failed += keywords_insert("ESCAPE" , ESCAPE); |
| 180 | failed += keywords_insert("EXISTS" , EXISTS); |
| 181 | failed += keywords_insert("UESCAPE" , UESCAPE); |
| 182 | failed += keywords_insert("EXTRACT" , EXTRACT); |
| 183 | failed += keywords_insert("FLOAT" , sqlFLOAT); |
| 184 | failed += keywords_insert("FOR" , FOR); |
| 185 | failed += keywords_insert("FOREIGN" , FOREIGN); |
| 186 | failed += keywords_insert("FROM" , FROM); |
| 187 | failed += keywords_insert("FWF" , FWF); |
| 188 | |
| 189 | failed += keywords_insert("REFERENCES" , REFERENCES); |
| 190 | |
| 191 | failed += keywords_insert("MATCH" , MATCH); |
| 192 | failed += keywords_insert("FULL" , FULL); |
| 193 | failed += keywords_insert("PARTIAL" , PARTIAL); |
| 194 | failed += keywords_insert("SIMPLE" , SIMPLE); |
| 195 | |
| 196 | failed += keywords_insert("INSERT" , INSERT); |
| 197 | failed += keywords_insert("UPDATE" , UPDATE); |
| 198 | failed += keywords_insert("DELETE" , sqlDELETE); |
| 199 | failed += keywords_insert("TRUNCATE" , TRUNCATE); |
| 200 | failed += keywords_insert("MATCHED" , MATCHED); |
| 201 | |
| 202 | failed += keywords_insert("ACTION" , ACTION); |
| 203 | failed += keywords_insert("CASCADE" , CASCADE); |
| 204 | failed += keywords_insert("RESTRICT" , RESTRICT); |
| 205 | failed += keywords_insert("FIRST" , FIRST); |
| 206 | failed += keywords_insert("GLOBAL" , GLOBAL); |
| 207 | failed += keywords_insert("GROUP" , sqlGROUP); |
| 208 | failed += keywords_insert("HAVING" , HAVING); |
| 209 | failed += keywords_insert("ILIKE" , ILIKE); |
| 210 | failed += keywords_insert("IMPRINTS" , IMPRINTS); |
| 211 | failed += keywords_insert("IN" , sqlIN); |
| 212 | failed += keywords_insert("INNER" , INNER); |
| 213 | failed += keywords_insert("INTO" , INTO); |
| 214 | failed += keywords_insert("IS" , IS); |
| 215 | failed += keywords_insert("JOIN" , JOIN); |
| 216 | failed += keywords_insert("KEY" , KEY); |
| 217 | failed += keywords_insert("LATERAL" , LATERAL); |
| 218 | failed += keywords_insert("LEFT" , LEFT); |
| 219 | failed += keywords_insert("LIKE" , LIKE); |
| 220 | failed += keywords_insert("LIMIT" , LIMIT); |
| 221 | failed += keywords_insert("SAMPLE" , SAMPLE); |
| 222 | failed += keywords_insert("SEED" , SEED); |
| 223 | failed += keywords_insert("LAST" , LAST); |
| 224 | failed += keywords_insert("LOCAL" , LOCAL); |
| 225 | failed += keywords_insert("LOCKED" , LOCKED); |
| 226 | failed += keywords_insert("NATURAL" , NATURAL); |
| 227 | failed += keywords_insert("NOT" , NOT); |
| 228 | failed += keywords_insert("NULL" , sqlNULL); |
| 229 | failed += keywords_insert("NULLS" , NULLS); |
| 230 | failed += keywords_insert("OFFSET" , OFFSET); |
| 231 | failed += keywords_insert("ON" , ON); |
| 232 | failed += keywords_insert("OPTIONS" , OPTIONS); |
| 233 | failed += keywords_insert("OPTION" , OPTION); |
| 234 | failed += keywords_insert("OR" , OR); |
| 235 | failed += keywords_insert("ORDER" , ORDER); |
| 236 | failed += keywords_insert("ORDERED" , ORDERED); |
| 237 | failed += keywords_insert("OUTER" , OUTER); |
| 238 | failed += keywords_insert("OVER" , OVER); |
| 239 | failed += keywords_insert("PARTITION" , PARTITION); |
| 240 | failed += keywords_insert("PATH" , PATH); |
| 241 | failed += keywords_insert("PRECISION" , PRECISION); |
| 242 | failed += keywords_insert("PRIMARY" , PRIMARY); |
| 243 | |
| 244 | failed += keywords_insert("USER" , USER); |
| 245 | failed += keywords_insert("RENAME" , RENAME); |
| 246 | failed += keywords_insert("UNENCRYPTED" , UNENCRYPTED); |
| 247 | failed += keywords_insert("ENCRYPTED" , ENCRYPTED); |
| 248 | failed += keywords_insert("PASSWORD" , PASSWORD); |
| 249 | failed += keywords_insert("GRANT" , GRANT); |
| 250 | failed += keywords_insert("REVOKE" , REVOKE); |
| 251 | failed += keywords_insert("ROLE" , ROLE); |
| 252 | failed += keywords_insert("ADMIN" , ADMIN); |
| 253 | failed += keywords_insert("PRIVILEGES" , PRIVILEGES); |
| 254 | failed += keywords_insert("PUBLIC" , PUBLIC); |
| 255 | failed += keywords_insert("CURRENT_USER" , CURRENT_USER); |
| 256 | failed += keywords_insert("CURRENT_ROLE" , CURRENT_ROLE); |
| 257 | failed += keywords_insert("SESSION_USER" , SESSION_USER); |
| 258 | failed += keywords_insert("SESSION" , sqlSESSION); |
| 259 | |
| 260 | failed += keywords_insert("RIGHT" , RIGHT); |
| 261 | failed += keywords_insert("SCHEMA" , SCHEMA); |
| 262 | failed += keywords_insert("SELECT" , SELECT); |
| 263 | failed += keywords_insert("SET" , SET); |
| 264 | failed += keywords_insert("AUTO_COMMIT" , AUTO_COMMIT); |
| 265 | |
| 266 | failed += keywords_insert("ALL" , ALL); |
| 267 | failed += keywords_insert("ANY" , ANY); |
| 268 | failed += keywords_insert("SOME" , SOME); |
| 269 | failed += keywords_insert("EVERY" , ANY); |
| 270 | /* |
| 271 | failed += keywords_insert("SQLCODE", SQLCODE ); |
| 272 | */ |
| 273 | failed += keywords_insert("COLUMN" , COLUMN); |
| 274 | failed += keywords_insert("TABLE" , TABLE); |
| 275 | failed += keywords_insert("TEMPORARY" , TEMPORARY); |
| 276 | failed += keywords_insert("TEMP" , TEMP); |
| 277 | failed += keywords_insert("STREAM" , STREAM); |
| 278 | failed += keywords_insert("REMOTE" , REMOTE); |
| 279 | failed += keywords_insert("MERGE" , MERGE); |
| 280 | failed += keywords_insert("REPLICA" , REPLICA); |
| 281 | failed += keywords_insert("TO" , TO); |
| 282 | failed += keywords_insert("UNION" , UNION); |
| 283 | failed += keywords_insert("EXCEPT" , EXCEPT); |
| 284 | failed += keywords_insert("INTERSECT" , INTERSECT); |
| 285 | failed += keywords_insert("CORRESPONDING" , CORRESPONDING); |
| 286 | failed += keywords_insert("UNIQUE" , UNIQUE); |
| 287 | failed += keywords_insert("USING" , USING); |
| 288 | failed += keywords_insert("VALUES" , VALUES); |
| 289 | failed += keywords_insert("VIEW" , VIEW); |
| 290 | failed += keywords_insert("WHERE" , WHERE); |
| 291 | failed += keywords_insert("WITH" , WITH); |
| 292 | failed += keywords_insert("DATA" , DATA); |
| 293 | |
| 294 | failed += keywords_insert("DATE" , sqlDATE); |
| 295 | failed += keywords_insert("TIME" , TIME); |
| 296 | failed += keywords_insert("TIMESTAMP" , TIMESTAMP); |
| 297 | failed += keywords_insert("INTERVAL" , INTERVAL); |
| 298 | failed += keywords_insert("CURRENT_DATE" , CURRENT_DATE); |
| 299 | failed += keywords_insert("CURRENT_TIME" , CURRENT_TIME); |
| 300 | failed += keywords_insert("CURRENT_TIMESTAMP" , CURRENT_TIMESTAMP); |
| 301 | failed += keywords_insert("NOW" , CURRENT_TIMESTAMP); |
| 302 | failed += keywords_insert("LOCALTIME" , LOCALTIME); |
| 303 | failed += keywords_insert("LOCALTIMESTAMP" , LOCALTIMESTAMP); |
| 304 | failed += keywords_insert("ZONE" , ZONE); |
| 305 | |
| 306 | failed += keywords_insert("CENTURY" , CENTURY); |
| 307 | failed += keywords_insert("DECADE" , DECADE); |
| 308 | failed += keywords_insert("YEAR" , YEAR); |
| 309 | failed += keywords_insert("QUARTER" , QUARTER); |
| 310 | failed += keywords_insert("MONTH" , MONTH); |
| 311 | failed += keywords_insert("WEEK" , WEEK); |
| 312 | failed += keywords_insert("DOW" , DOW); |
| 313 | failed += keywords_insert("DOY" , DOY); |
| 314 | failed += keywords_insert("DAY" , DAY); |
| 315 | failed += keywords_insert("HOUR" , HOUR); |
| 316 | failed += keywords_insert("MINUTE" , MINUTE); |
| 317 | failed += keywords_insert("SECOND" , SECOND); |
| 318 | |
| 319 | failed += keywords_insert("POSITION" , POSITION); |
| 320 | failed += keywords_insert("SUBSTRING" , SUBSTRING); |
| 321 | failed += keywords_insert("SPLIT_PART" , SPLIT_PART); |
| 322 | |
| 323 | failed += keywords_insert("CASE" , CASE); |
| 324 | failed += keywords_insert("WHEN" , WHEN); |
| 325 | failed += keywords_insert("THEN" , THEN); |
| 326 | failed += keywords_insert("ELSE" , ELSE); |
| 327 | failed += keywords_insert("END" , END); |
| 328 | failed += keywords_insert("NULLIF" , NULLIF); |
| 329 | failed += keywords_insert("COALESCE" , COALESCE); |
| 330 | failed += keywords_insert("ELSEIF" , ELSEIF); |
| 331 | failed += keywords_insert("IF" , IF); |
| 332 | failed += keywords_insert("WHILE" , WHILE); |
| 333 | failed += keywords_insert("DO" , DO); |
| 334 | |
| 335 | failed += keywords_insert("COMMIT" , COMMIT); |
| 336 | failed += keywords_insert("ROLLBACK" , ROLLBACK); |
| 337 | failed += keywords_insert("SAVEPOINT" , SAVEPOINT); |
| 338 | failed += keywords_insert("RELEASE" , RELEASE); |
| 339 | failed += keywords_insert("WORK" , WORK); |
| 340 | failed += keywords_insert("CHAIN" , CHAIN); |
| 341 | failed += keywords_insert("PRESERVE" , PRESERVE); |
| 342 | failed += keywords_insert("ROWS" , ROWS); |
| 343 | failed += keywords_insert("NO" , NO); |
| 344 | failed += keywords_insert("START" , START); |
| 345 | failed += keywords_insert("TRANSACTION" , TRANSACTION); |
| 346 | failed += keywords_insert("READ" , READ); |
| 347 | failed += keywords_insert("WRITE" , WRITE); |
| 348 | failed += keywords_insert("ONLY" , ONLY); |
| 349 | failed += keywords_insert("ISOLATION" , ISOLATION); |
| 350 | failed += keywords_insert("LEVEL" , LEVEL); |
| 351 | failed += keywords_insert("UNCOMMITTED" , UNCOMMITTED); |
| 352 | failed += keywords_insert("COMMITTED" , COMMITTED); |
| 353 | failed += keywords_insert("REPEATABLE" , sqlREPEATABLE); |
| 354 | failed += keywords_insert("SERIALIZABLE" , SERIALIZABLE); |
| 355 | failed += keywords_insert("DIAGNOSTICS" , DIAGNOSTICS); |
| 356 | failed += keywords_insert("SIZE" , sqlSIZE); |
| 357 | failed += keywords_insert("STORAGE" , STORAGE); |
| 358 | |
| 359 | failed += keywords_insert("TYPE" , TYPE); |
| 360 | failed += keywords_insert("PROCEDURE" , PROCEDURE); |
| 361 | failed += keywords_insert("FUNCTION" , FUNCTION); |
| 362 | failed += keywords_insert("LOADER" , sqlLOADER); |
| 363 | failed += keywords_insert("REPLACE" , REPLACE); |
| 364 | |
| 365 | failed += keywords_insert("FILTER" , FILTER); |
| 366 | failed += keywords_insert("AGGREGATE" , AGGREGATE); |
| 367 | failed += keywords_insert("RETURNS" , RETURNS); |
| 368 | failed += keywords_insert("EXTERNAL" , EXTERNAL); |
| 369 | failed += keywords_insert("NAME" , sqlNAME); |
| 370 | failed += keywords_insert("RETURN" , RETURN); |
| 371 | failed += keywords_insert("CALL" , CALL); |
| 372 | failed += keywords_insert("LANGUAGE" , LANGUAGE); |
| 373 | |
| 374 | failed += keywords_insert("ANALYZE" , ANALYZE); |
| 375 | failed += keywords_insert("MINMAX" , MINMAX); |
| 376 | failed += keywords_insert("EXPLAIN" , SQL_EXPLAIN); |
| 377 | failed += keywords_insert("PLAN" , SQL_PLAN); |
| 378 | failed += keywords_insert("DEBUG" , SQL_DEBUG); |
| 379 | failed += keywords_insert("TRACE" , SQL_TRACE); |
| 380 | failed += keywords_insert("PREPARE" , PREPARE); |
| 381 | failed += keywords_insert("PREP" , PREP); |
| 382 | failed += keywords_insert("EXECUTE" , EXECUTE); |
| 383 | failed += keywords_insert("EXEC" , EXEC); |
| 384 | |
| 385 | failed += keywords_insert("INDEX" , INDEX); |
| 386 | |
| 387 | failed += keywords_insert("SEQUENCE" , SEQUENCE); |
| 388 | failed += keywords_insert("RESTART" , RESTART); |
| 389 | failed += keywords_insert("INCREMENT" , INCREMENT); |
| 390 | failed += keywords_insert("MAXVALUE" , MAXVALUE); |
| 391 | failed += keywords_insert("MINVALUE" , MINVALUE); |
| 392 | failed += keywords_insert("CYCLE" , CYCLE); |
| 393 | failed += keywords_insert("CACHE" , CACHE); |
| 394 | failed += keywords_insert("NEXT" , NEXT); |
| 395 | failed += keywords_insert("VALUE" , VALUE); |
| 396 | failed += keywords_insert("GENERATED" , GENERATED); |
| 397 | failed += keywords_insert("ALWAYS" , ALWAYS); |
| 398 | failed += keywords_insert("IDENTITY" , IDENTITY); |
| 399 | failed += keywords_insert("SERIAL" , SERIAL); |
| 400 | failed += keywords_insert("BIGSERIAL" , BIGSERIAL); |
| 401 | failed += keywords_insert("AUTO_INCREMENT" , AUTO_INCREMENT); |
| 402 | failed += keywords_insert("CONTINUE" , CONTINUE); |
| 403 | |
| 404 | failed += keywords_insert("TRIGGER" , TRIGGER); |
| 405 | failed += keywords_insert("ATOMIC" , ATOMIC); |
| 406 | failed += keywords_insert("BEGIN" , BEGIN); |
| 407 | failed += keywords_insert("OF" , OF); |
| 408 | failed += keywords_insert("BEFORE" , BEFORE); |
| 409 | failed += keywords_insert("AFTER" , AFTER); |
| 410 | failed += keywords_insert("ROW" , ROW); |
| 411 | failed += keywords_insert("STATEMENT" , STATEMENT); |
| 412 | failed += keywords_insert("NEW" , sqlNEW); |
| 413 | failed += keywords_insert("OLD" , OLD); |
| 414 | failed += keywords_insert("EACH" , EACH); |
| 415 | failed += keywords_insert("REFERENCING" , REFERENCING); |
| 416 | |
| 417 | failed += keywords_insert("RANGE" , RANGE); |
| 418 | failed += keywords_insert("UNBOUNDED" , UNBOUNDED); |
| 419 | failed += keywords_insert("PRECEDING" , PRECEDING); |
| 420 | failed += keywords_insert("FOLLOWING" , FOLLOWING); |
| 421 | failed += keywords_insert("CURRENT" , CURRENT); |
| 422 | failed += keywords_insert("EXCLUDE" , EXCLUDE); |
| 423 | failed += keywords_insert("OTHERS" , OTHERS); |
| 424 | failed += keywords_insert("TIES" , TIES); |
| 425 | failed += keywords_insert("GROUPS" , GROUPS); |
| 426 | failed += keywords_insert("WINDOW" , WINDOW); |
| 427 | |
| 428 | /* special SQL/XML keywords */ |
| 429 | failed += keywords_insert("XMLCOMMENT" , XMLCOMMENT); |
| 430 | failed += keywords_insert("XMLCONCAT" , XMLCONCAT); |
| 431 | failed += keywords_insert("XMLDOCUMENT" , XMLDOCUMENT); |
| 432 | failed += keywords_insert("XMLELEMENT" , XMLELEMENT); |
| 433 | failed += keywords_insert("XMLATTRIBUTES" , XMLATTRIBUTES); |
| 434 | failed += keywords_insert("XMLFOREST" , XMLFOREST); |
| 435 | failed += keywords_insert("XMLPARSE" , XMLPARSE); |
| 436 | failed += keywords_insert("STRIP" , STRIP); |
| 437 | failed += keywords_insert("WHITESPACE" , WHITESPACE); |
| 438 | failed += keywords_insert("XMLPI" , XMLPI); |
| 439 | failed += keywords_insert("XMLQUERY" , XMLQUERY); |
| 440 | failed += keywords_insert("PASSING" , PASSING); |
| 441 | failed += keywords_insert("XMLTEXT" , XMLTEXT); |
| 442 | failed += keywords_insert("NIL" , NIL); |
| 443 | failed += keywords_insert("REF" , REF); |
| 444 | failed += keywords_insert("ABSENT" , ABSENT); |
| 445 | failed += keywords_insert("DOCUMENT" , DOCUMENT); |
| 446 | failed += keywords_insert("ELEMENT" , ELEMENT); |
| 447 | failed += keywords_insert("CONTENT" , CONTENT); |
| 448 | failed += keywords_insert("XMLNAMESPACES" , XMLNAMESPACES); |
| 449 | failed += keywords_insert("NAMESPACE" , NAMESPACE); |
| 450 | failed += keywords_insert("XMLVALIDATE" , XMLVALIDATE); |
| 451 | failed += keywords_insert("RETURNING" , RETURNING); |
| 452 | failed += keywords_insert("LOCATION" , LOCATION); |
| 453 | failed += keywords_insert("ID" , ID); |
| 454 | failed += keywords_insert("ACCORDING" , ACCORDING); |
| 455 | failed += keywords_insert("XMLSCHEMA" , XMLSCHEMA); |
| 456 | failed += keywords_insert("URI" , URI); |
| 457 | failed += keywords_insert("XMLAGG" , XMLAGG); |
| 458 | |
| 459 | /* keywords for opengis */ |
| 460 | failed += keywords_insert("GEOMETRY" , GEOMETRY); |
| 461 | |
| 462 | failed += keywords_insert("POINT" , GEOMETRYSUBTYPE); |
| 463 | failed += keywords_insert("LINESTRING" , GEOMETRYSUBTYPE); |
| 464 | failed += keywords_insert("POLYGON" , GEOMETRYSUBTYPE); |
| 465 | failed += keywords_insert("MULTIPOINT" , GEOMETRYSUBTYPE); |
| 466 | failed += keywords_insert("MULTILINESTRING" , GEOMETRYSUBTYPE); |
| 467 | failed += keywords_insert("MULTIPOLYGON" , GEOMETRYSUBTYPE); |
| 468 | failed += keywords_insert("GEOMETRYCOLLECTION" , GEOMETRYSUBTYPE); |
| 469 | |
| 470 | failed += keywords_insert("POINTZ" , GEOMETRYSUBTYPE); |
| 471 | failed += keywords_insert("LINESTRINGZ" , GEOMETRYSUBTYPE); |
| 472 | failed += keywords_insert("POLYGONZ" , GEOMETRYSUBTYPE); |
| 473 | failed += keywords_insert("MULTIPOINTZ" , GEOMETRYSUBTYPE); |
| 474 | failed += keywords_insert("MULTILINESTRINGZ" , GEOMETRYSUBTYPE); |
| 475 | failed += keywords_insert("MULTIPOLYGONZ" , GEOMETRYSUBTYPE); |
| 476 | failed += keywords_insert("GEOMETRYCOLLECTIONZ" , GEOMETRYSUBTYPE); |
| 477 | |
| 478 | failed += keywords_insert("POINTM" , GEOMETRYSUBTYPE); |
| 479 | failed += keywords_insert("LINESTRINGM" , GEOMETRYSUBTYPE); |
| 480 | failed += keywords_insert("POLYGONM" , GEOMETRYSUBTYPE); |
| 481 | failed += keywords_insert("MULTIPOINTM" , GEOMETRYSUBTYPE); |
| 482 | failed += keywords_insert("MULTILINESTRINGM" , GEOMETRYSUBTYPE); |
| 483 | failed += keywords_insert("MULTIPOLYGONM" , GEOMETRYSUBTYPE); |
| 484 | failed += keywords_insert("GEOMETRYCOLLECTIONM" , GEOMETRYSUBTYPE); |
| 485 | |
| 486 | failed += keywords_insert("POINTZM" , GEOMETRYSUBTYPE); |
| 487 | failed += keywords_insert("LINESTRINGZM" , GEOMETRYSUBTYPE); |
| 488 | failed += keywords_insert("POLYGONZM" , GEOMETRYSUBTYPE); |
| 489 | failed += keywords_insert("MULTIPOINTZM" , GEOMETRYSUBTYPE); |
| 490 | failed += keywords_insert("MULTILINESTRINGZM" , GEOMETRYSUBTYPE); |
| 491 | failed += keywords_insert("MULTIPOLYGONZM" , GEOMETRYSUBTYPE); |
| 492 | failed += keywords_insert("GEOMETRYCOLLECTIONZM" , GEOMETRYSUBTYPE); |
| 493 | |
| 494 | return failed; |
| 495 | } |
| 496 | |
| 497 | #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s) |
| 498 | |
| 499 | void |
| 500 | scanner_init(struct scanner *s, bstream *rs, stream *ws) |
| 501 | { |
| 502 | s->rs = rs; |
| 503 | s->ws = ws; |
| 504 | s->log = NULL; |
| 505 | |
| 506 | s->yynext = 0; |
| 507 | s->yylast = 0; |
| 508 | s->yyval = 0; |
| 509 | s->yybak = 0; /* keep backup of char replaced by EOS */ |
| 510 | s->yycur = 0; |
| 511 | |
| 512 | s->key = 0; /* keep a hash key of the query */ |
| 513 | s->started = 0; |
| 514 | s->as = 0; |
| 515 | |
| 516 | s->mode = LINE_N; |
| 517 | s->schema = NULL; |
| 518 | } |
| 519 | |
| 520 | void |
| 521 | scanner_query_processed(struct scanner *s) |
| 522 | { |
| 523 | int cur; |
| 524 | |
| 525 | if (s->yybak) { |
| 526 | s->rs->buf[s->rs->pos + s->yycur] = s->yybak; |
| 527 | s->yybak = 0; |
| 528 | } |
| 529 | if (s->rs) { |
| 530 | s->rs->pos += s->yycur; |
| 531 | /* completely eat the query including white space after the ; */ |
| 532 | while (s->rs->pos < s->rs->len && |
| 533 | (cur = s->rs->buf[s->rs->pos], iswspace(cur))) { |
| 534 | s->rs->pos++; |
| 535 | } |
| 536 | } |
| 537 | /*assert(s->rs->pos <= s->rs->len);*/ |
| 538 | s->yycur = 0; |
| 539 | s->key = 0; /* keep a hash key of the query */ |
| 540 | s->started = 0; |
| 541 | s->as = 0; |
| 542 | s->schema = NULL; |
| 543 | } |
| 544 | |
| 545 | void |
| 546 | scanner_reset_key(struct scanner *s) |
| 547 | { |
| 548 | s->key = 0; |
| 549 | } |
| 550 | |
| 551 | static int |
| 552 | scanner_error(mvc *lc, int cur) |
| 553 | { |
| 554 | switch (cur) { |
| 555 | case EOF: |
| 556 | (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input" ); |
| 557 | return -1; /* EOF needs -1 result */ |
| 558 | default: |
| 559 | /* on Windows at least, iswcntrl returns TRUE for |
| 560 | * U+FEFF, but we just want consistent error |
| 561 | * messages */ |
| 562 | (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)" , iswcntrl(cur) && cur != 0xFEFF ? " control" : "" , (unsigned) cur); |
| 563 | } |
| 564 | return LEX_ERROR; |
| 565 | } |
| 566 | |
| 567 | |
| 568 | /* |
| 569 | UTF-8 encoding is as follows: |
| 570 | U-00000000 - U-0000007F: 0xxxxxxx |
| 571 | U-00000080 - U-000007FF: 110xxxxx 10xxxxxx |
| 572 | U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx |
| 573 | U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| 574 | U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| 575 | U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| 576 | */ |
| 577 | /* To be correctly coded UTF-8, the sequence should be the shortest |
| 578 | possible encoding of the value being encoded. This means that for |
| 579 | an encoding of length n+1 (1 <= n <= 5), at least one of the bits in |
| 580 | utf8chkmsk[n] should be non-zero (else the encoding could be |
| 581 | shorter). |
| 582 | */ |
| 583 | static int utf8chkmsk[] = { |
| 584 | 0x0000007f, |
| 585 | 0x00000780, |
| 586 | 0x0000f800, |
| 587 | 0x001f0000, |
| 588 | 0x03e00000, |
| 589 | 0x7c000000 |
| 590 | }; |
| 591 | |
| 592 | static void |
| 593 | utf8_putchar(struct scanner *lc, int ch) |
| 594 | { |
| 595 | if ((ch) < 0x80) { |
| 596 | lc->yycur--; |
| 597 | } else if ((ch) < 0x800) { |
| 598 | lc->yycur -= 2; |
| 599 | } else if ((ch) < 0x10000) { |
| 600 | lc->yycur -= 3; |
| 601 | } else { |
| 602 | lc->yycur -= 4; |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | static inline int |
| 607 | scanner_read_more(struct scanner *lc, size_t n) |
| 608 | { |
| 609 | bstream *b = lc->rs; |
| 610 | bool more = false; |
| 611 | |
| 612 | |
| 613 | while (b->len < b->pos + lc->yycur + n) { |
| 614 | |
| 615 | if (lc->mode == LINE_1 || !lc->started) |
| 616 | return EOF; |
| 617 | |
| 618 | /* query is not finished ask for more */ |
| 619 | if (b->eof || !isa_block_stream(b->s)) { |
| 620 | if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1) |
| 621 | mnstr_flush(lc->ws); |
| 622 | b->eof = false; |
| 623 | more = true; |
| 624 | } |
| 625 | /* we need more query text */ |
| 626 | if (bstream_next(b) < 0 || |
| 627 | /* we asked for more data but didn't get any */ |
| 628 | (more && b->eof && b->len < b->pos + lc->yycur + n)) |
| 629 | return EOF; |
| 630 | } |
| 631 | return 1; |
| 632 | } |
| 633 | |
| 634 | static inline int |
| 635 | scanner_getc(struct scanner *lc) |
| 636 | { |
| 637 | bstream *b = lc->rs; |
| 638 | unsigned char *s = NULL; |
| 639 | int c, m, n, mask; |
| 640 | |
| 641 | if (scanner_read_more(lc, 1) == EOF) { |
| 642 | lc->errstr = SQLSTATE(42000) "end of input stream" ; |
| 643 | return EOF; |
| 644 | } |
| 645 | lc->errstr = NULL; |
| 646 | |
| 647 | s = (unsigned char *) b->buf + b->pos + lc->yycur++; |
| 648 | if (((c = *s) & 0x80) == 0) { |
| 649 | /* 7-bit char */ |
| 650 | return c; |
| 651 | } |
| 652 | for (n = 0, m = 0x40; c & m; n++, m >>= 1) |
| 653 | ; |
| 654 | /* n now is number of 10xxxxxx bytes that should follow */ |
| 655 | if (n == 0 || n >= 6 || (b->pos + n) > b->len) { |
| 656 | /* incorrect UTF-8 sequence */ |
| 657 | /* n==0: c == 10xxxxxx */ |
| 658 | /* n>=6: c == 1111111x */ |
| 659 | lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence" ; |
| 660 | goto error; |
| 661 | } |
| 662 | |
| 663 | if (scanner_read_more(lc, (size_t) n) == EOF) |
| 664 | return EOF; |
| 665 | s = (unsigned char *) b->buf + b->pos + lc->yycur; |
| 666 | |
| 667 | mask = utf8chkmsk[n]; |
| 668 | c &= ~(0xFFC0 >> n); /* remove non-x bits */ |
| 669 | while (--n >= 0) { |
| 670 | c <<= 6; |
| 671 | lc->yycur++; |
| 672 | if (((m = *s++) & 0xC0) != 0x80) { |
| 673 | /* incorrect UTF-8 sequence: byte is not 10xxxxxx */ |
| 674 | /* this includes end-of-string (m == 0) */ |
| 675 | lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence" ; |
| 676 | goto error; |
| 677 | } |
| 678 | c |= m & 0x3F; |
| 679 | } |
| 680 | if ((c & mask) == 0) { |
| 681 | /* incorrect UTF-8 sequence: not shortest possible */ |
| 682 | lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence" ; |
| 683 | goto error; |
| 684 | } |
| 685 | |
| 686 | return c; |
| 687 | |
| 688 | error: |
| 689 | if (b->pos + lc->yycur < b->len) /* skip bogus char */ |
| 690 | lc->yycur++; |
| 691 | return EOF; |
| 692 | } |
| 693 | |
| 694 | static int |
| 695 | scanner_token(struct scanner *lc, int token) |
| 696 | { |
| 697 | lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur]; |
| 698 | lc->rs->buf[lc->rs->pos + lc->yycur] = 0; |
| 699 | lc->yyval = token; |
| 700 | return lc->yyval; |
| 701 | } |
| 702 | |
| 703 | static int |
| 704 | scanner_string(mvc *c, int quote, bool escapes) |
| 705 | { |
| 706 | struct scanner *lc = &c->scanner; |
| 707 | bstream *rs = lc->rs; |
| 708 | int cur = quote; |
| 709 | bool escape = false; |
| 710 | const size_t limit = quote == '"' ? 1 << 11 : 1 << 30; |
| 711 | |
| 712 | lc->started = 1; |
| 713 | while (cur != EOF) { |
| 714 | size_t pos = 0; |
| 715 | const size_t yycur = rs->pos + lc->yycur; |
| 716 | |
| 717 | while (cur != EOF && pos < limit && |
| 718 | (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) && |
| 719 | cur && (cur != quote || escape)) { |
| 720 | if (escapes && cur == '\\') |
| 721 | escape = !escape; |
| 722 | else |
| 723 | escape = false; |
| 724 | } |
| 725 | if (pos == limit) { |
| 726 | (void) sql_error(c, 2, SQLSTATE(42000) "string too long" ); |
| 727 | return LEX_ERROR; |
| 728 | } |
| 729 | if (cur == EOF) |
| 730 | break; |
| 731 | lc->yycur += pos; |
| 732 | /* check for quote escaped quote: Obscure SQL Rule */ |
| 733 | /* TODO also handle double "" */ |
| 734 | if (cur == quote && rs->buf[yycur + pos] == quote) { |
| 735 | if (escapes) |
| 736 | rs->buf[yycur + pos - 1] = '\\'; |
| 737 | lc->yycur++; |
| 738 | continue; |
| 739 | } |
| 740 | assert(yycur + pos <= rs->len + 1); |
| 741 | if (cur == quote && !escape) { |
| 742 | return scanner_token(lc, STRING); |
| 743 | } |
| 744 | lc->yycur--; /* go back to current (possibly invalid) char */ |
| 745 | /* long utf8, if correct isn't the quote */ |
| 746 | if (!cur) { |
| 747 | if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) { |
| 748 | (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string" ); |
| 749 | return LEX_ERROR; |
| 750 | } |
| 751 | cur = scanner_read_more(lc, 1); |
| 752 | } else { |
| 753 | cur = scanner_getc(lc); |
| 754 | } |
| 755 | } |
| 756 | (void) sql_error(c, 2, "%s" , lc->errstr ? lc->errstr : SQLSTATE(42000) "unexpected end of input" ); |
| 757 | return LEX_ERROR; |
| 758 | } |
| 759 | |
| 760 | /* scan a structure {blah} into a string. We only count the matching {} |
| 761 | * unless escaped. We do not consider embeddings in string literals yet |
| 762 | */ |
| 763 | |
| 764 | static int |
| 765 | scanner_body(mvc *c) |
| 766 | { |
| 767 | struct scanner *lc = &c->scanner; |
| 768 | bstream *rs = lc->rs; |
| 769 | int cur = (int) 'x'; |
| 770 | int blk = 1; |
| 771 | bool escape = false; |
| 772 | |
| 773 | lc->started = 1; |
| 774 | assert(rs->buf[rs->pos + lc->yycur-1] == '{'); |
| 775 | while (cur != EOF) { |
| 776 | size_t pos = rs->pos + lc->yycur; |
| 777 | |
| 778 | while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) { |
| 779 | if (cur != '\\') |
| 780 | escape = false; |
| 781 | else |
| 782 | escape = !escape; |
| 783 | blk += cur =='{'; |
| 784 | blk -= cur =='}'; |
| 785 | } |
| 786 | lc->yycur = pos - rs->pos; |
| 787 | assert(pos <= rs->len + 1); |
| 788 | if (blk == 0 && !escape){ |
| 789 | lc->yycur--; /* go back to current (possibly invalid) char */ |
| 790 | return scanner_token(lc, X_BODY); |
| 791 | } |
| 792 | lc->yycur--; /* go back to current (possibly invalid) char */ |
| 793 | if (!cur) { |
| 794 | if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) { |
| 795 | (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string" ); |
| 796 | return LEX_ERROR; |
| 797 | } |
| 798 | cur = scanner_read_more(lc, 1); |
| 799 | } else { |
| 800 | cur = scanner_getc(lc); |
| 801 | } |
| 802 | } |
| 803 | (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input" ); |
| 804 | return LEX_ERROR; |
| 805 | } |
| 806 | |
| 807 | static int |
| 808 | keyword_or_ident(mvc * c, int cur) |
| 809 | { |
| 810 | struct scanner *lc = &c->scanner; |
| 811 | keyword *k = NULL; |
| 812 | size_t s; |
| 813 | |
| 814 | lc->started = 1; |
| 815 | utf8_putchar(lc, cur); |
| 816 | s = lc->yycur; |
| 817 | lc->yyval = IDENT; |
| 818 | while ((cur = scanner_getc(lc)) != EOF) { |
| 819 | if (!iswalnum(cur) && cur != '_') { |
| 820 | utf8_putchar(lc, cur); |
| 821 | (void)scanner_token(lc, IDENT); |
| 822 | k = find_keyword_bs(lc,s); |
| 823 | if (k) |
| 824 | lc->yyval = k->token; |
| 825 | /* find keyword in SELECT/JOIN/UNION FUNCTIONS */ |
| 826 | else if (sql_find_func(c->sa, cur_schema(c), lc->rs->buf+lc->rs->pos+s, -1, F_FILT, NULL)) |
| 827 | lc->yyval = FILTER_FUNC; |
| 828 | return lc->yyval; |
| 829 | } |
| 830 | } |
| 831 | (void)scanner_token(lc, IDENT); |
| 832 | k = find_keyword_bs(lc,s); |
| 833 | if (k) |
| 834 | lc->yyval = k->token; |
| 835 | /* find keyword in SELECT/JOIN/UNION FUNCTIONS */ |
| 836 | else if (sql_find_func(c->sa, cur_schema(c), lc->rs->buf+lc->rs->pos+s, -1, F_FILT, NULL)) |
| 837 | lc->yyval = FILTER_FUNC; |
| 838 | return lc->yyval; |
| 839 | } |
| 840 | |
| 841 | static int |
| 842 | skip_white_space(struct scanner * lc) |
| 843 | { |
| 844 | int cur; |
| 845 | |
| 846 | do { |
| 847 | lc->yysval = lc->yycur; |
| 848 | } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur)); |
| 849 | return cur; |
| 850 | } |
| 851 | |
| 852 | static int |
| 853 | (struct scanner * lc) |
| 854 | { |
| 855 | int cur; |
| 856 | int prev = 0; |
| 857 | int started = lc->started; |
| 858 | int depth = 1; |
| 859 | |
| 860 | lc->started = 1; |
| 861 | while (depth > 0 && (cur = scanner_getc(lc)) != EOF) { |
| 862 | if (prev == '*' && cur == '/') |
| 863 | depth--; |
| 864 | else if (prev == '/' && cur == '*') { |
| 865 | /* block comments can nest */ |
| 866 | cur = 0; /* prevent slash-star-slash from matching */ |
| 867 | depth++; |
| 868 | } |
| 869 | prev = cur; |
| 870 | } |
| 871 | lc->yysval = lc->yycur; |
| 872 | lc->started = started; |
| 873 | /* a comment is equivalent to a newline */ |
| 874 | return cur == EOF ? cur : '\n'; |
| 875 | } |
| 876 | |
| 877 | static int |
| 878 | (struct scanner * lc) |
| 879 | { |
| 880 | int cur; |
| 881 | int started = lc->started; |
| 882 | |
| 883 | lc->started = 1; |
| 884 | while ((cur = scanner_getc(lc)) != EOF && (cur != '\n')) |
| 885 | ; |
| 886 | lc->yysval = lc->yycur; |
| 887 | lc->started = started; |
| 888 | /* a comment is equivalent to a newline */ |
| 889 | return cur; |
| 890 | } |
| 891 | |
| 892 | static int tokenize(mvc * lc, int cur); |
| 893 | |
| 894 | static int |
| 895 | number(mvc * c, int cur) |
| 896 | { |
| 897 | struct scanner *lc = &c->scanner; |
| 898 | int token = sqlINT; |
| 899 | int before_cur = EOF; |
| 900 | |
| 901 | lc->started = 1; |
| 902 | if (cur == '0' && (cur = scanner_getc(lc)) == 'x') { |
| 903 | while ((cur = scanner_getc(lc)) != EOF && |
| 904 | (iswdigit(cur) || |
| 905 | (cur >= 'A' && cur <= 'F') || |
| 906 | (cur >= 'a' && cur <= 'f'))) |
| 907 | token = HEXADECIMAL; |
| 908 | if (token == sqlINT) |
| 909 | before_cur = 'x'; |
| 910 | } else { |
| 911 | if (iswdigit(cur)) |
| 912 | while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur)) |
| 913 | ; |
| 914 | if (cur == '@') { |
| 915 | token = OIDNUM; |
| 916 | cur = scanner_getc(lc); |
| 917 | if (cur == '0') |
| 918 | cur = scanner_getc(lc); |
| 919 | } |
| 920 | |
| 921 | if (cur == '.') { |
| 922 | token = INTNUM; |
| 923 | |
| 924 | while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur)) |
| 925 | ; |
| 926 | } |
| 927 | if (cur == 'e' || cur == 'E') { |
| 928 | token = APPROXNUM; |
| 929 | cur = scanner_getc(lc); |
| 930 | if (cur == '-' || cur == '+') |
| 931 | token = 0; |
| 932 | while ((cur = scanner_getc(lc)) != EOF && iswdigit(cur)) |
| 933 | token = APPROXNUM; |
| 934 | } |
| 935 | } |
| 936 | |
| 937 | if (cur == EOF && lc->rs->buf == NULL) /* malloc failure */ |
| 938 | return EOF; |
| 939 | |
| 940 | if (token) { |
| 941 | if (cur != EOF) |
| 942 | utf8_putchar(lc, cur); |
| 943 | if (before_cur != EOF) |
| 944 | utf8_putchar(lc, before_cur); |
| 945 | return scanner_token(lc, token); |
| 946 | } else { |
| 947 | (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc" , (wint_t) cur); |
| 948 | return LEX_ERROR; |
| 949 | } |
| 950 | } |
| 951 | |
| 952 | static |
| 953 | int scanner_symbol(mvc * c, int cur) |
| 954 | { |
| 955 | struct scanner *lc = &c->scanner; |
| 956 | int next = 0; |
| 957 | int started = lc->started; |
| 958 | |
| 959 | switch (cur) { |
| 960 | case '/': |
| 961 | lc->started = 1; |
| 962 | next = scanner_getc(lc); |
| 963 | if (next == '*') { |
| 964 | lc->started = started; |
| 965 | cur = skip_c_comment(lc); |
| 966 | if (cur < 0) |
| 967 | return EOF; |
| 968 | return tokenize(c, cur); |
| 969 | } else { |
| 970 | utf8_putchar(lc, next); |
| 971 | return scanner_token(lc, cur); |
| 972 | } |
| 973 | case '0': |
| 974 | case '1': |
| 975 | case '2': |
| 976 | case '3': |
| 977 | case '4': |
| 978 | case '5': |
| 979 | case '6': |
| 980 | case '7': |
| 981 | case '8': |
| 982 | case '9': |
| 983 | return number(c, cur); |
| 984 | case '#': |
| 985 | if ((cur = skip_sql_comment(lc)) == EOF) |
| 986 | return cur; |
| 987 | return tokenize(c, cur); |
| 988 | case '\'': |
| 989 | case '"': |
| 990 | return scanner_string(c, cur, |
| 991 | #if 0 |
| 992 | false |
| 993 | #else |
| 994 | cur == '\'' |
| 995 | #endif |
| 996 | ); |
| 997 | case '{': |
| 998 | return scanner_body(c); |
| 999 | case '-': |
| 1000 | lc->started = 1; |
| 1001 | next = scanner_getc(lc); |
| 1002 | if (next == '-') { |
| 1003 | lc->started = started; |
| 1004 | if ((cur = skip_sql_comment(lc)) == EOF) |
| 1005 | return cur; |
| 1006 | return tokenize(c, cur); |
| 1007 | } |
| 1008 | lc->started = 1; |
| 1009 | utf8_putchar(lc, next); |
| 1010 | return scanner_token(lc, cur); |
| 1011 | case '~': /* binary not */ |
| 1012 | lc->started = 1; |
| 1013 | next = scanner_getc(lc); |
| 1014 | if (next == '=') |
| 1015 | return scanner_token(lc, GEOM_MBR_EQUAL); |
| 1016 | utf8_putchar(lc, next); |
| 1017 | return scanner_token(lc, cur); |
| 1018 | case '^': /* binary xor */ |
| 1019 | case '*': |
| 1020 | case '?': |
| 1021 | case '%': |
| 1022 | case '+': |
| 1023 | case '(': |
| 1024 | case ')': |
| 1025 | case ',': |
| 1026 | case '=': |
| 1027 | case '[': |
| 1028 | case ']': |
| 1029 | lc->started = 1; |
| 1030 | return scanner_token(lc, cur); |
| 1031 | case '&': |
| 1032 | lc->started = 1; |
| 1033 | cur = scanner_getc(lc); |
| 1034 | if(cur == '<') { |
| 1035 | next = scanner_getc(lc); |
| 1036 | if(next == '|') { |
| 1037 | return scanner_token(lc, GEOM_OVERLAP_OR_BELOW); |
| 1038 | } else { |
| 1039 | utf8_putchar(lc, next); //put the char back |
| 1040 | return scanner_token(lc, GEOM_OVERLAP_OR_LEFT); |
| 1041 | } |
| 1042 | } else if(cur == '>') |
| 1043 | return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT); |
| 1044 | else if(cur == '&') |
| 1045 | return scanner_token(lc, GEOM_OVERLAP); |
| 1046 | else {/* binary and */ |
| 1047 | utf8_putchar(lc, cur); //put the char back |
| 1048 | return scanner_token(lc, '&'); |
| 1049 | } |
| 1050 | case '@': |
| 1051 | lc->started = 1; |
| 1052 | return scanner_token(lc, AT); |
| 1053 | case ';': |
| 1054 | lc->started = 0; |
| 1055 | return scanner_token(lc, SCOLON); |
| 1056 | case '<': |
| 1057 | lc->started = 1; |
| 1058 | cur = scanner_getc(lc); |
| 1059 | if (cur == '=') { |
| 1060 | return scanner_token( lc, COMPARISON); |
| 1061 | } else if (cur == '>') { |
| 1062 | return scanner_token( lc, COMPARISON); |
| 1063 | } else if (cur == '<') { |
| 1064 | next = scanner_getc(lc); |
| 1065 | if (next == '=') { |
| 1066 | return scanner_token( lc, LEFT_SHIFT_ASSIGN); |
| 1067 | } else if (next == '|') { |
| 1068 | return scanner_token(lc, GEOM_BELOW); |
| 1069 | } else { |
| 1070 | utf8_putchar(lc, next); //put the char back |
| 1071 | return scanner_token( lc, LEFT_SHIFT); |
| 1072 | } |
| 1073 | } else if(cur == '-') { |
| 1074 | next = scanner_getc(lc); |
| 1075 | if(next == '>') { |
| 1076 | return scanner_token(lc, GEOM_DIST); |
| 1077 | } else { |
| 1078 | //put the characters back and fall in the next possible case |
| 1079 | utf8_putchar(lc, next); |
| 1080 | utf8_putchar(lc, cur); |
| 1081 | return scanner_token( lc, COMPARISON); |
| 1082 | } |
| 1083 | } else { |
| 1084 | utf8_putchar(lc, cur); |
| 1085 | return scanner_token( lc, COMPARISON); |
| 1086 | } |
| 1087 | case '>': |
| 1088 | lc->started = 1; |
| 1089 | cur = scanner_getc(lc); |
| 1090 | if (cur == '>') { |
| 1091 | cur = scanner_getc(lc); |
| 1092 | if (cur == '=') |
| 1093 | return scanner_token( lc, RIGHT_SHIFT_ASSIGN); |
| 1094 | utf8_putchar(lc, cur); |
| 1095 | return scanner_token( lc, RIGHT_SHIFT); |
| 1096 | } else if (cur != '=') { |
| 1097 | utf8_putchar(lc, cur); |
| 1098 | return scanner_token( lc, COMPARISON); |
| 1099 | } else { |
| 1100 | return scanner_token( lc, COMPARISON); |
| 1101 | } |
| 1102 | case '.': |
| 1103 | lc->started = 1; |
| 1104 | cur = scanner_getc(lc); |
| 1105 | if (!iswdigit(cur)) { |
| 1106 | utf8_putchar(lc, cur); |
| 1107 | return scanner_token( lc, '.'); |
| 1108 | } else { |
| 1109 | utf8_putchar(lc, cur); |
| 1110 | cur = '.'; |
| 1111 | return number(c, cur); |
| 1112 | } |
| 1113 | case '|': /* binary or or string concat */ |
| 1114 | lc->started = 1; |
| 1115 | cur = scanner_getc(lc); |
| 1116 | if (cur == '|') { |
| 1117 | return scanner_token(lc, CONCATSTRING); |
| 1118 | } else if (cur == '&') { |
| 1119 | next = scanner_getc(lc); |
| 1120 | if(next == '>') { |
| 1121 | return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE); |
| 1122 | } else { |
| 1123 | utf8_putchar(lc, next); //put the char back |
| 1124 | utf8_putchar(lc, cur); //put the char back |
| 1125 | return scanner_token(lc, '|'); |
| 1126 | } |
| 1127 | } else if (cur == '>') { |
| 1128 | next = scanner_getc(lc); |
| 1129 | if(next == '>') { |
| 1130 | return scanner_token(lc, GEOM_ABOVE); |
| 1131 | } else { |
| 1132 | utf8_putchar(lc, next); //put the char back |
| 1133 | utf8_putchar(lc, cur); //put the char back |
| 1134 | return scanner_token(lc, '|'); |
| 1135 | } |
| 1136 | } else { |
| 1137 | utf8_putchar(lc, cur); |
| 1138 | return scanner_token(lc, '|'); |
| 1139 | } |
| 1140 | } |
| 1141 | (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)" , (wint_t) cur); |
| 1142 | return LEX_ERROR; |
| 1143 | } |
| 1144 | |
| 1145 | static int |
| 1146 | tokenize(mvc * c, int cur) |
| 1147 | { |
| 1148 | struct scanner *lc = &c->scanner; |
| 1149 | while (1) { |
| 1150 | if (cur == 0xFEFF) { |
| 1151 | /* on Linux at least, iswpunct returns TRUE |
| 1152 | * for U+FEFF, but we don't want that, we just |
| 1153 | * want to go to the scanner_error case |
| 1154 | * below */ |
| 1155 | ; |
| 1156 | } else if (iswspace(cur)) { |
| 1157 | if ((cur = skip_white_space(lc)) == EOF) |
| 1158 | return cur; |
| 1159 | continue; /* try again */ |
| 1160 | } else if (iswdigit(cur)) { |
| 1161 | return number(c, cur); |
| 1162 | } else if (iswalpha(cur) || cur == '_') { |
| 1163 | if ((cur == 'E' || cur == 'e') && |
| 1164 | lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') { |
| 1165 | return scanner_string(c, scanner_getc(lc), true); |
| 1166 | } |
| 1167 | if ((cur == 'X' || cur == 'x') && |
| 1168 | lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') { |
| 1169 | return scanner_string(c, scanner_getc(lc), true); |
| 1170 | } |
| 1171 | if ((cur == 'U' || cur == 'u') && |
| 1172 | lc->rs->buf[lc->rs->pos + lc->yycur] == '&' && |
| 1173 | (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' || |
| 1174 | lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) { |
| 1175 | cur = scanner_getc(lc); /* '&' */ |
| 1176 | return scanner_string(c, scanner_getc(lc), false); |
| 1177 | } |
| 1178 | return keyword_or_ident(c, cur); |
| 1179 | } else if (iswpunct(cur)) { |
| 1180 | return scanner_symbol(c, cur); |
| 1181 | } |
| 1182 | if (cur == EOF) { |
| 1183 | if (lc->mode == LINE_1 || !lc->started ) |
| 1184 | return cur; |
| 1185 | return scanner_error(c, cur); |
| 1186 | } |
| 1187 | /* none of the above: error */ |
| 1188 | return scanner_error(c, cur); |
| 1189 | } |
| 1190 | } |
| 1191 | |
| 1192 | /* SQL 'quoted' idents consist of a set of any character of |
| 1193 | * the source language character set other than a 'quote' |
| 1194 | * |
| 1195 | * MonetDB has 2 restrictions: |
| 1196 | * 1 we disallow '%' as the first character. |
| 1197 | * 2 the length is limited to 1024 characters |
| 1198 | */ |
| 1199 | static bool |
| 1200 | valid_ident(const char *restrict s, char *restrict dst) |
| 1201 | { |
| 1202 | int p = 0; |
| 1203 | |
| 1204 | if (*s == '%') |
| 1205 | return false; |
| 1206 | |
| 1207 | while (*s) { |
| 1208 | if ((dst[p++] = *s++) == '"' && *s == '"') |
| 1209 | s++; |
| 1210 | if (p >= 1024) |
| 1211 | return false; |
| 1212 | } |
| 1213 | dst[p] = '\0'; |
| 1214 | return true; |
| 1215 | } |
| 1216 | |
| 1217 | static inline int |
| 1218 | sql_get_next_token(YYSTYPE *yylval, void *parm) |
| 1219 | { |
| 1220 | mvc *c = (mvc*)parm; |
| 1221 | struct scanner *lc = &c->scanner; |
| 1222 | int token = 0, cur = 0; |
| 1223 | |
| 1224 | if (lc->rs->buf == NULL) /* malloc failure */ |
| 1225 | return EOF; |
| 1226 | |
| 1227 | if (lc->yynext) { |
| 1228 | int next = lc->yynext; |
| 1229 | |
| 1230 | lc->yynext = 0; |
| 1231 | return(next); |
| 1232 | } |
| 1233 | |
| 1234 | if (lc->yybak) { |
| 1235 | lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak; |
| 1236 | lc->yybak = 0; |
| 1237 | } |
| 1238 | |
| 1239 | lc->yysval = lc->yycur; |
| 1240 | lc->yylast = lc->yyval; |
| 1241 | cur = scanner_getc(lc); |
| 1242 | if (cur < 0) |
| 1243 | return EOF; |
| 1244 | token = tokenize(c, cur); |
| 1245 | |
| 1246 | yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval); |
| 1247 | |
| 1248 | /* This is needed as ALIAS and aTYPE get defined too late, see |
| 1249 | sql_keyword.h */ |
| 1250 | if (token == KW_ALIAS) |
| 1251 | token = ALIAS; |
| 1252 | |
| 1253 | if (token == KW_TYPE) |
| 1254 | token = aTYPE; |
| 1255 | |
| 1256 | if (token == IDENT || token == COMPARISON || token == FILTER_FUNC || |
| 1257 | token == AGGR || token == AGGR2 || token == RANK || |
| 1258 | token == aTYPE || token == ALIAS) |
| 1259 | yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval); |
| 1260 | else if (token == STRING) { |
| 1261 | char quote = *yylval->sval; |
| 1262 | char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 ); |
| 1263 | assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x'); |
| 1264 | |
| 1265 | lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0; |
| 1266 | if (quote == '"') { |
| 1267 | if (valid_ident(yylval->sval+1,str)) { |
| 1268 | token = IDENT; |
| 1269 | } else { |
| 1270 | sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'" , yylval->sval+1); |
| 1271 | return LEX_ERROR; |
| 1272 | } |
| 1273 | } else if (quote == 'E' || quote == 'e') { |
| 1274 | assert(yylval->sval[1] == '\''); |
| 1275 | GDKstrFromStr((unsigned char *) str, |
| 1276 | (unsigned char *) yylval->sval + 2, |
| 1277 | lc->yycur-lc->yysval - 2); |
| 1278 | quote = '\''; |
| 1279 | } else if (quote == 'U' || quote == 'u') { |
| 1280 | assert(yylval->sval[1] == '&'); |
| 1281 | assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"'); |
| 1282 | strcpy(str, yylval->sval + 3); |
| 1283 | token = yylval->sval[2] == '\'' ? USTRING : UIDENT; |
| 1284 | quote = yylval->sval[2]; |
| 1285 | } else if (quote == 'X' || quote == 'x') { |
| 1286 | assert(yylval->sval[1] == '\''); |
| 1287 | char *dst = str; |
| 1288 | for (char *src = yylval->sval + 2; *src; dst++) |
| 1289 | if ((*dst = *src++) == '\'' && *src == '\'') |
| 1290 | src++; |
| 1291 | *dst = 0; |
| 1292 | quote = '\''; |
| 1293 | token = XSTRING; |
| 1294 | } else { |
| 1295 | #if 0 |
| 1296 | char *dst = str; |
| 1297 | for (char *src = yylval->sval + 1; *src; dst++) |
| 1298 | if ((*dst = *src++) == '\'' && *src == '\'') |
| 1299 | src++; |
| 1300 | *dst = 0; |
| 1301 | #else |
| 1302 | GDKstrFromStr((unsigned char *) str, |
| 1303 | (unsigned char *) yylval->sval + 1, |
| 1304 | lc->yycur-lc->yysval - 1); |
| 1305 | #endif |
| 1306 | } |
| 1307 | yylval->sval = str; |
| 1308 | |
| 1309 | /* reset original */ |
| 1310 | lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote; |
| 1311 | } |
| 1312 | |
| 1313 | return(token); |
| 1314 | } |
| 1315 | |
| 1316 | /* also see sql_parser.y */ |
| 1317 | extern int sqllex( YYSTYPE *yylval, void *m ); |
| 1318 | |
| 1319 | int |
| 1320 | sqllex(YYSTYPE * yylval, void *parm) |
| 1321 | { |
| 1322 | int token; |
| 1323 | mvc *c = (mvc *) parm; |
| 1324 | struct scanner *lc = &c->scanner; |
| 1325 | size_t pos; |
| 1326 | |
| 1327 | /* store position for when view's query ends */ |
| 1328 | pos = lc->rs->pos + lc->yycur; |
| 1329 | |
| 1330 | token = sql_get_next_token(yylval, parm); |
| 1331 | |
| 1332 | if (token == NOT) { |
| 1333 | int next = sqllex(yylval, parm); |
| 1334 | |
| 1335 | if (next == NOT) { |
| 1336 | return sqllex(yylval, parm); |
| 1337 | } else if (next == BETWEEN) { |
| 1338 | token = NOT_BETWEEN; |
| 1339 | } else if (next == sqlIN) { |
| 1340 | token = NOT_IN; |
| 1341 | } else if (next == LIKE) { |
| 1342 | token = NOT_LIKE; |
| 1343 | } else if (next == ILIKE) { |
| 1344 | token = NOT_ILIKE; |
| 1345 | } else { |
| 1346 | lc->yynext = next; |
| 1347 | } |
| 1348 | } else if (token == UNION) { |
| 1349 | int next = sqllex(yylval, parm); |
| 1350 | |
| 1351 | if (next == JOIN) { |
| 1352 | token = UNIONJOIN; |
| 1353 | } else { |
| 1354 | lc->yynext = next; |
| 1355 | } |
| 1356 | } else if (token == SCOLON) { |
| 1357 | /* ignore semi-colon(s) following a semi-colon */ |
| 1358 | if (lc->yylast == SCOLON) { |
| 1359 | size_t prev = lc->yycur; |
| 1360 | while ((token = sql_get_next_token(yylval, parm)) == SCOLON) |
| 1361 | prev = lc->yycur; |
| 1362 | |
| 1363 | /* skip the skipped stuff also in the buffer */ |
| 1364 | lc->rs->pos += prev; |
| 1365 | lc->yycur -= prev; |
| 1366 | } |
| 1367 | } |
| 1368 | |
| 1369 | if (lc->log) |
| 1370 | mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1); |
| 1371 | |
| 1372 | /* Don't include literals in the calculation of the key */ |
| 1373 | if (token != STRING && token != USTRING && token != sqlINT && token != OIDNUM && token != INTNUM && token != APPROXNUM && token != sqlNULL) |
| 1374 | lc->key ^= token; |
| 1375 | lc->started += (token != EOF); |
| 1376 | return token; |
| 1377 | } |
| 1378 | |