| 1 | /*-------------------------------------------------------------------- |
| 2 | * Symbols referenced in this file: |
| 3 | * - raw_parser |
| 4 | * - base_yylex |
| 5 | * - raw_parser |
| 6 | *-------------------------------------------------------------------- |
| 7 | */ |
| 8 | |
| 9 | /*------------------------------------------------------------------------- |
| 10 | * |
| 11 | * parser.c |
| 12 | * Main entry point/driver for PostgreSQL grammar |
| 13 | * |
| 14 | * Note that the grammar is not allowed to perform any table access |
| 15 | * (since we need to be able to do basic parsing even while inside an |
| 16 | * aborted transaction). Therefore, the data structures returned by |
| 17 | * the grammar are "raw" parsetrees that still need to be analyzed by |
| 18 | * analyze.c and related files. |
| 19 | * |
| 20 | * |
| 21 | * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup |
| 22 | * Portions Copyright (c) 1994, Regents of the University of California |
| 23 | * |
| 24 | * IDENTIFICATION |
| 25 | * src/backend/parser/parser.c |
| 26 | * |
| 27 | *------------------------------------------------------------------------- |
| 28 | */ |
| 29 | |
| 30 | #include "pg_functions.hpp" |
| 31 | |
| 32 | #include "parser/gramparse.hpp" |
| 33 | #include "parser/parser.hpp" |
| 34 | |
| 35 | |
| 36 | /* |
| 37 | * raw_parser |
| 38 | * Given a query in string form, do lexical and grammatical analysis. |
| 39 | * |
| 40 | * Returns a list of raw (un-analyzed) parse trees. The immediate elements |
| 41 | * of the list are always PGRawStmt nodes. |
| 42 | */ |
| 43 | PGList * |
| 44 | raw_parser(const char *str) |
| 45 | { |
| 46 | core_yyscan_t yyscanner; |
| 47 | base_yy_extra_type ; |
| 48 | int yyresult; |
| 49 | |
| 50 | /* initialize the flex scanner */ |
| 51 | yyscanner = scanner_init(str, &yyextra.core_yy_extra, |
| 52 | ScanKeywords, NumScanKeywords); |
| 53 | |
| 54 | /* base_yylex() only needs this much initialization */ |
| 55 | yyextra.have_lookahead = false; |
| 56 | |
| 57 | /* initialize the bison parser */ |
| 58 | parser_init(&yyextra); |
| 59 | |
| 60 | /* Parse! */ |
| 61 | yyresult = base_yyparse(yyscanner); |
| 62 | |
| 63 | /* Clean up (release memory) */ |
| 64 | scanner_finish(yyscanner); |
| 65 | |
| 66 | if (yyresult) /* error */ |
| 67 | return NIL; |
| 68 | |
| 69 | return yyextra.parsetree; |
| 70 | } |
| 71 | |
| 72 | |
| 73 | /* |
| 74 | * Intermediate filter between parser and core lexer (core_yylex in scan.l). |
| 75 | * |
| 76 | * This filter is needed because in some cases the standard SQL grammar |
| 77 | * requires more than one token lookahead. We reduce these cases to one-token |
| 78 | * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
| 79 | * |
| 80 | * Using a filter is simpler than trying to recognize multiword tokens |
| 81 | * directly in scan.l, because we'd have to allow for comments between the |
| 82 | * words. Furthermore it's not clear how to do that without re-introducing |
| 83 | * scanner backtrack, which would cost more performance than this filter |
| 84 | * layer does. |
| 85 | * |
| 86 | * The filter also provides a convenient place to translate between |
| 87 | * the core_YYSTYPE and YYSTYPE representations (which are really the |
| 88 | * same thing anyway, but notationally they're different). |
| 89 | */ |
| 90 | int |
| 91 | base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) |
| 92 | { |
| 93 | base_yy_extra_type * = pg_yyget_extra(yyscanner); |
| 94 | int cur_token; |
| 95 | int next_token; |
| 96 | int cur_token_length; |
| 97 | YYLTYPE cur_yylloc; |
| 98 | |
| 99 | /* Get next token --- we might already have it */ |
| 100 | if (yyextra->have_lookahead) |
| 101 | { |
| 102 | cur_token = yyextra->lookahead_token; |
| 103 | lvalp->core_yystype = yyextra->lookahead_yylval; |
| 104 | *llocp = yyextra->lookahead_yylloc; |
| 105 | *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; |
| 106 | yyextra->have_lookahead = false; |
| 107 | } |
| 108 | else |
| 109 | cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); |
| 110 | |
| 111 | /* |
| 112 | * If this token isn't one that requires lookahead, just return it. If it |
| 113 | * does, determine the token length. (We could get that via strlen(), but |
| 114 | * since we have such a small set of possibilities, hardwiring seems |
| 115 | * feasible and more efficient.) |
| 116 | */ |
| 117 | switch (cur_token) |
| 118 | { |
| 119 | case NOT: |
| 120 | cur_token_length = 3; |
| 121 | break; |
| 122 | case NULLS_P: |
| 123 | cur_token_length = 5; |
| 124 | break; |
| 125 | case WITH: |
| 126 | cur_token_length = 4; |
| 127 | break; |
| 128 | default: |
| 129 | return cur_token; |
| 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Identify end+1 of current token. core_yylex() has temporarily stored a |
| 134 | * '\0' here, and will undo that when we call it again. We need to redo |
| 135 | * it to fully revert the lookahead call for error reporting purposes. |
| 136 | */ |
| 137 | yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf + |
| 138 | *llocp + cur_token_length; |
| 139 | Assert(*(yyextra->lookahead_end) == '\0'); |
| 140 | |
| 141 | /* |
| 142 | * Save and restore *llocp around the call. It might look like we could |
| 143 | * avoid this by just passing &lookahead_yylloc to core_yylex(), but that |
| 144 | * does not work because flex actually holds onto the last-passed pointer |
| 145 | * internally, and will use that for error reporting. We need any error |
| 146 | * reports to point to the current token, not the next one. |
| 147 | */ |
| 148 | cur_yylloc = *llocp; |
| 149 | |
| 150 | /* Get next token, saving outputs into lookahead variables */ |
| 151 | next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner); |
| 152 | yyextra->lookahead_token = next_token; |
| 153 | yyextra->lookahead_yylloc = *llocp; |
| 154 | |
| 155 | *llocp = cur_yylloc; |
| 156 | |
| 157 | /* Now revert the un-truncation of the current token */ |
| 158 | yyextra->lookahead_hold_char = *(yyextra->lookahead_end); |
| 159 | *(yyextra->lookahead_end) = '\0'; |
| 160 | |
| 161 | yyextra->have_lookahead = true; |
| 162 | |
| 163 | /* Replace cur_token if needed, based on lookahead */ |
| 164 | switch (cur_token) |
| 165 | { |
| 166 | case NOT: |
| 167 | /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
| 168 | switch (next_token) |
| 169 | { |
| 170 | case BETWEEN: |
| 171 | case IN_P: |
| 172 | case LIKE: |
| 173 | case ILIKE: |
| 174 | case SIMILAR: |
| 175 | cur_token = NOT_LA; |
| 176 | break; |
| 177 | } |
| 178 | break; |
| 179 | |
| 180 | case NULLS_P: |
| 181 | /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
| 182 | switch (next_token) |
| 183 | { |
| 184 | case FIRST_P: |
| 185 | case LAST_P: |
| 186 | cur_token = NULLS_LA; |
| 187 | break; |
| 188 | } |
| 189 | break; |
| 190 | |
| 191 | case WITH: |
| 192 | /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
| 193 | switch (next_token) |
| 194 | { |
| 195 | case TIME: |
| 196 | case ORDINALITY: |
| 197 | cur_token = WITH_LA; |
| 198 | break; |
| 199 | } |
| 200 | break; |
| 201 | } |
| 202 | |
| 203 | return cur_token; |
| 204 | } |
| 205 | |