| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * parser.c |
| 4 | * Main entry point/driver for PostgreSQL grammar |
| 5 | * |
| 6 | * Note that the grammar is not allowed to perform any table access |
| 7 | * (since we need to be able to do basic parsing even while inside an |
| 8 | * aborted transaction). Therefore, the data structures returned by |
| 9 | * the grammar are "raw" parsetrees that still need to be analyzed by |
| 10 | * analyze.c and related files. |
| 11 | * |
| 12 | * |
| 13 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 14 | * Portions Copyright (c) 1994, Regents of the University of California |
| 15 | * |
| 16 | * IDENTIFICATION |
| 17 | * src/backend/parser/parser.c |
| 18 | * |
| 19 | *------------------------------------------------------------------------- |
| 20 | */ |
| 21 | |
| 22 | #include "postgres.h" |
| 23 | |
| 24 | #include "parser/gramparse.h" |
| 25 | #include "parser/parser.h" |
| 26 | |
| 27 | |
| 28 | /* |
| 29 | * raw_parser |
| 30 | * Given a query in string form, do lexical and grammatical analysis. |
| 31 | * |
| 32 | * Returns a list of raw (un-analyzed) parse trees. The immediate elements |
| 33 | * of the list are always RawStmt nodes. |
| 34 | */ |
| 35 | List * |
| 36 | raw_parser(const char *str) |
| 37 | { |
| 38 | core_yyscan_t yyscanner; |
| 39 | base_yy_extra_type ; |
| 40 | int yyresult; |
| 41 | |
| 42 | /* initialize the flex scanner */ |
| 43 | yyscanner = scanner_init(str, &yyextra.core_yy_extra, |
| 44 | &ScanKeywords, ScanKeywordTokens); |
| 45 | |
| 46 | /* base_yylex() only needs this much initialization */ |
| 47 | yyextra.have_lookahead = false; |
| 48 | |
| 49 | /* initialize the bison parser */ |
| 50 | parser_init(&yyextra); |
| 51 | |
| 52 | /* Parse! */ |
| 53 | yyresult = base_yyparse(yyscanner); |
| 54 | |
| 55 | /* Clean up (release memory) */ |
| 56 | scanner_finish(yyscanner); |
| 57 | |
| 58 | if (yyresult) /* error */ |
| 59 | return NIL; |
| 60 | |
| 61 | return yyextra.parsetree; |
| 62 | } |
| 63 | |
| 64 | |
| 65 | /* |
| 66 | * Intermediate filter between parser and core lexer (core_yylex in scan.l). |
| 67 | * |
| 68 | * This filter is needed because in some cases the standard SQL grammar |
| 69 | * requires more than one token lookahead. We reduce these cases to one-token |
| 70 | * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
| 71 | * |
| 72 | * Using a filter is simpler than trying to recognize multiword tokens |
| 73 | * directly in scan.l, because we'd have to allow for comments between the |
| 74 | * words. Furthermore it's not clear how to do that without re-introducing |
| 75 | * scanner backtrack, which would cost more performance than this filter |
| 76 | * layer does. |
| 77 | * |
| 78 | * The filter also provides a convenient place to translate between |
| 79 | * the core_YYSTYPE and YYSTYPE representations (which are really the |
| 80 | * same thing anyway, but notationally they're different). |
| 81 | */ |
| 82 | int |
| 83 | base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) |
| 84 | { |
| 85 | base_yy_extra_type * = pg_yyget_extra(yyscanner); |
| 86 | int cur_token; |
| 87 | int next_token; |
| 88 | int cur_token_length; |
| 89 | YYLTYPE cur_yylloc; |
| 90 | |
| 91 | /* Get next token --- we might already have it */ |
| 92 | if (yyextra->have_lookahead) |
| 93 | { |
| 94 | cur_token = yyextra->lookahead_token; |
| 95 | lvalp->core_yystype = yyextra->lookahead_yylval; |
| 96 | *llocp = yyextra->lookahead_yylloc; |
| 97 | *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; |
| 98 | yyextra->have_lookahead = false; |
| 99 | } |
| 100 | else |
| 101 | cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); |
| 102 | |
| 103 | /* |
| 104 | * If this token isn't one that requires lookahead, just return it. If it |
| 105 | * does, determine the token length. (We could get that via strlen(), but |
| 106 | * since we have such a small set of possibilities, hardwiring seems |
| 107 | * feasible and more efficient.) |
| 108 | */ |
| 109 | switch (cur_token) |
| 110 | { |
| 111 | case NOT: |
| 112 | cur_token_length = 3; |
| 113 | break; |
| 114 | case NULLS_P: |
| 115 | cur_token_length = 5; |
| 116 | break; |
| 117 | case WITH: |
| 118 | cur_token_length = 4; |
| 119 | break; |
| 120 | default: |
| 121 | return cur_token; |
| 122 | } |
| 123 | |
| 124 | /* |
| 125 | * Identify end+1 of current token. core_yylex() has temporarily stored a |
| 126 | * '\0' here, and will undo that when we call it again. We need to redo |
| 127 | * it to fully revert the lookahead call for error reporting purposes. |
| 128 | */ |
| 129 | yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf + |
| 130 | *llocp + cur_token_length; |
| 131 | Assert(*(yyextra->lookahead_end) == '\0'); |
| 132 | |
| 133 | /* |
| 134 | * Save and restore *llocp around the call. It might look like we could |
| 135 | * avoid this by just passing &lookahead_yylloc to core_yylex(), but that |
| 136 | * does not work because flex actually holds onto the last-passed pointer |
| 137 | * internally, and will use that for error reporting. We need any error |
| 138 | * reports to point to the current token, not the next one. |
| 139 | */ |
| 140 | cur_yylloc = *llocp; |
| 141 | |
| 142 | /* Get next token, saving outputs into lookahead variables */ |
| 143 | next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner); |
| 144 | yyextra->lookahead_token = next_token; |
| 145 | yyextra->lookahead_yylloc = *llocp; |
| 146 | |
| 147 | *llocp = cur_yylloc; |
| 148 | |
| 149 | /* Now revert the un-truncation of the current token */ |
| 150 | yyextra->lookahead_hold_char = *(yyextra->lookahead_end); |
| 151 | *(yyextra->lookahead_end) = '\0'; |
| 152 | |
| 153 | yyextra->have_lookahead = true; |
| 154 | |
| 155 | /* Replace cur_token if needed, based on lookahead */ |
| 156 | switch (cur_token) |
| 157 | { |
| 158 | case NOT: |
| 159 | /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
| 160 | switch (next_token) |
| 161 | { |
| 162 | case BETWEEN: |
| 163 | case IN_P: |
| 164 | case LIKE: |
| 165 | case ILIKE: |
| 166 | case SIMILAR: |
| 167 | cur_token = NOT_LA; |
| 168 | break; |
| 169 | } |
| 170 | break; |
| 171 | |
| 172 | case NULLS_P: |
| 173 | /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
| 174 | switch (next_token) |
| 175 | { |
| 176 | case FIRST_P: |
| 177 | case LAST_P: |
| 178 | cur_token = NULLS_LA; |
| 179 | break; |
| 180 | } |
| 181 | break; |
| 182 | |
| 183 | case WITH: |
| 184 | /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
| 185 | switch (next_token) |
| 186 | { |
| 187 | case TIME: |
| 188 | case ORDINALITY: |
| 189 | cur_token = WITH_LA; |
| 190 | break; |
| 191 | } |
| 192 | break; |
| 193 | } |
| 194 | |
| 195 | return cur_token; |
| 196 | } |
| 197 | |