1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * parser.c |
4 | * Main entry point/driver for PostgreSQL grammar |
5 | * |
6 | * Note that the grammar is not allowed to perform any table access |
7 | * (since we need to be able to do basic parsing even while inside an |
8 | * aborted transaction). Therefore, the data structures returned by |
9 | * the grammar are "raw" parsetrees that still need to be analyzed by |
10 | * analyze.c and related files. |
11 | * |
12 | * |
13 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
14 | * Portions Copyright (c) 1994, Regents of the University of California |
15 | * |
16 | * IDENTIFICATION |
17 | * src/backend/parser/parser.c |
18 | * |
19 | *------------------------------------------------------------------------- |
20 | */ |
21 | |
22 | #include "postgres.h" |
23 | |
24 | #include "parser/gramparse.h" |
25 | #include "parser/parser.h" |
26 | |
27 | |
28 | /* |
29 | * raw_parser |
30 | * Given a query in string form, do lexical and grammatical analysis. |
31 | * |
32 | * Returns a list of raw (un-analyzed) parse trees. The immediate elements |
33 | * of the list are always RawStmt nodes. |
34 | */ |
35 | List * |
36 | raw_parser(const char *str) |
37 | { |
38 | core_yyscan_t yyscanner; |
39 | base_yy_extra_type ; |
40 | int yyresult; |
41 | |
42 | /* initialize the flex scanner */ |
43 | yyscanner = scanner_init(str, &yyextra.core_yy_extra, |
44 | &ScanKeywords, ScanKeywordTokens); |
45 | |
46 | /* base_yylex() only needs this much initialization */ |
47 | yyextra.have_lookahead = false; |
48 | |
49 | /* initialize the bison parser */ |
50 | parser_init(&yyextra); |
51 | |
52 | /* Parse! */ |
53 | yyresult = base_yyparse(yyscanner); |
54 | |
55 | /* Clean up (release memory) */ |
56 | scanner_finish(yyscanner); |
57 | |
58 | if (yyresult) /* error */ |
59 | return NIL; |
60 | |
61 | return yyextra.parsetree; |
62 | } |
63 | |
64 | |
65 | /* |
66 | * Intermediate filter between parser and core lexer (core_yylex in scan.l). |
67 | * |
68 | * This filter is needed because in some cases the standard SQL grammar |
69 | * requires more than one token lookahead. We reduce these cases to one-token |
70 | * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
71 | * |
72 | * Using a filter is simpler than trying to recognize multiword tokens |
73 | * directly in scan.l, because we'd have to allow for comments between the |
74 | * words. Furthermore it's not clear how to do that without re-introducing |
75 | * scanner backtrack, which would cost more performance than this filter |
76 | * layer does. |
77 | * |
78 | * The filter also provides a convenient place to translate between |
79 | * the core_YYSTYPE and YYSTYPE representations (which are really the |
80 | * same thing anyway, but notationally they're different). |
81 | */ |
82 | int |
83 | base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) |
84 | { |
85 | base_yy_extra_type * = pg_yyget_extra(yyscanner); |
86 | int cur_token; |
87 | int next_token; |
88 | int cur_token_length; |
89 | YYLTYPE cur_yylloc; |
90 | |
91 | /* Get next token --- we might already have it */ |
92 | if (yyextra->have_lookahead) |
93 | { |
94 | cur_token = yyextra->lookahead_token; |
95 | lvalp->core_yystype = yyextra->lookahead_yylval; |
96 | *llocp = yyextra->lookahead_yylloc; |
97 | *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; |
98 | yyextra->have_lookahead = false; |
99 | } |
100 | else |
101 | cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); |
102 | |
103 | /* |
104 | * If this token isn't one that requires lookahead, just return it. If it |
105 | * does, determine the token length. (We could get that via strlen(), but |
106 | * since we have such a small set of possibilities, hardwiring seems |
107 | * feasible and more efficient.) |
108 | */ |
109 | switch (cur_token) |
110 | { |
111 | case NOT: |
112 | cur_token_length = 3; |
113 | break; |
114 | case NULLS_P: |
115 | cur_token_length = 5; |
116 | break; |
117 | case WITH: |
118 | cur_token_length = 4; |
119 | break; |
120 | default: |
121 | return cur_token; |
122 | } |
123 | |
124 | /* |
125 | * Identify end+1 of current token. core_yylex() has temporarily stored a |
126 | * '\0' here, and will undo that when we call it again. We need to redo |
127 | * it to fully revert the lookahead call for error reporting purposes. |
128 | */ |
129 | yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf + |
130 | *llocp + cur_token_length; |
131 | Assert(*(yyextra->lookahead_end) == '\0'); |
132 | |
133 | /* |
134 | * Save and restore *llocp around the call. It might look like we could |
135 | * avoid this by just passing &lookahead_yylloc to core_yylex(), but that |
136 | * does not work because flex actually holds onto the last-passed pointer |
137 | * internally, and will use that for error reporting. We need any error |
138 | * reports to point to the current token, not the next one. |
139 | */ |
140 | cur_yylloc = *llocp; |
141 | |
142 | /* Get next token, saving outputs into lookahead variables */ |
143 | next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner); |
144 | yyextra->lookahead_token = next_token; |
145 | yyextra->lookahead_yylloc = *llocp; |
146 | |
147 | *llocp = cur_yylloc; |
148 | |
149 | /* Now revert the un-truncation of the current token */ |
150 | yyextra->lookahead_hold_char = *(yyextra->lookahead_end); |
151 | *(yyextra->lookahead_end) = '\0'; |
152 | |
153 | yyextra->have_lookahead = true; |
154 | |
155 | /* Replace cur_token if needed, based on lookahead */ |
156 | switch (cur_token) |
157 | { |
158 | case NOT: |
159 | /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
160 | switch (next_token) |
161 | { |
162 | case BETWEEN: |
163 | case IN_P: |
164 | case LIKE: |
165 | case ILIKE: |
166 | case SIMILAR: |
167 | cur_token = NOT_LA; |
168 | break; |
169 | } |
170 | break; |
171 | |
172 | case NULLS_P: |
173 | /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
174 | switch (next_token) |
175 | { |
176 | case FIRST_P: |
177 | case LAST_P: |
178 | cur_token = NULLS_LA; |
179 | break; |
180 | } |
181 | break; |
182 | |
183 | case WITH: |
184 | /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
185 | switch (next_token) |
186 | { |
187 | case TIME: |
188 | case ORDINALITY: |
189 | cur_token = WITH_LA; |
190 | break; |
191 | } |
192 | break; |
193 | } |
194 | |
195 | return cur_token; |
196 | } |
197 | |