1 | /*-------------------------------------------------------------------- |
2 | * Symbols referenced in this file: |
3 | * - raw_parser |
4 | * - base_yylex |
5 | * - raw_parser |
6 | *-------------------------------------------------------------------- |
7 | */ |
8 | |
9 | /*------------------------------------------------------------------------- |
10 | * |
11 | * parser.c |
12 | * Main entry point/driver for PostgreSQL grammar |
13 | * |
14 | * Note that the grammar is not allowed to perform any table access |
15 | * (since we need to be able to do basic parsing even while inside an |
16 | * aborted transaction). Therefore, the data structures returned by |
17 | * the grammar are "raw" parsetrees that still need to be analyzed by |
18 | * analyze.c and related files. |
19 | * |
20 | * |
21 | * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup |
22 | * Portions Copyright (c) 1994, Regents of the University of California |
23 | * |
24 | * IDENTIFICATION |
25 | * src/backend/parser/parser.c |
26 | * |
27 | *------------------------------------------------------------------------- |
28 | */ |
29 | |
30 | #include "pg_functions.hpp" |
31 | |
32 | #include "parser/gramparse.hpp" |
33 | #include "parser/parser.hpp" |
34 | |
35 | |
36 | /* |
37 | * raw_parser |
38 | * Given a query in string form, do lexical and grammatical analysis. |
39 | * |
40 | * Returns a list of raw (un-analyzed) parse trees. The immediate elements |
41 | * of the list are always PGRawStmt nodes. |
42 | */ |
43 | PGList * |
44 | raw_parser(const char *str) |
45 | { |
46 | core_yyscan_t yyscanner; |
47 | base_yy_extra_type ; |
48 | int yyresult; |
49 | |
50 | /* initialize the flex scanner */ |
51 | yyscanner = scanner_init(str, &yyextra.core_yy_extra, |
52 | ScanKeywords, NumScanKeywords); |
53 | |
54 | /* base_yylex() only needs this much initialization */ |
55 | yyextra.have_lookahead = false; |
56 | |
57 | /* initialize the bison parser */ |
58 | parser_init(&yyextra); |
59 | |
60 | /* Parse! */ |
61 | yyresult = base_yyparse(yyscanner); |
62 | |
63 | /* Clean up (release memory) */ |
64 | scanner_finish(yyscanner); |
65 | |
66 | if (yyresult) /* error */ |
67 | return NIL; |
68 | |
69 | return yyextra.parsetree; |
70 | } |
71 | |
72 | |
73 | /* |
74 | * Intermediate filter between parser and core lexer (core_yylex in scan.l). |
75 | * |
76 | * This filter is needed because in some cases the standard SQL grammar |
77 | * requires more than one token lookahead. We reduce these cases to one-token |
78 | * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
79 | * |
80 | * Using a filter is simpler than trying to recognize multiword tokens |
81 | * directly in scan.l, because we'd have to allow for comments between the |
82 | * words. Furthermore it's not clear how to do that without re-introducing |
83 | * scanner backtrack, which would cost more performance than this filter |
84 | * layer does. |
85 | * |
86 | * The filter also provides a convenient place to translate between |
87 | * the core_YYSTYPE and YYSTYPE representations (which are really the |
88 | * same thing anyway, but notationally they're different). |
89 | */ |
90 | int |
91 | base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) |
92 | { |
93 | base_yy_extra_type * = pg_yyget_extra(yyscanner); |
94 | int cur_token; |
95 | int next_token; |
96 | int cur_token_length; |
97 | YYLTYPE cur_yylloc; |
98 | |
99 | /* Get next token --- we might already have it */ |
100 | if (yyextra->have_lookahead) |
101 | { |
102 | cur_token = yyextra->lookahead_token; |
103 | lvalp->core_yystype = yyextra->lookahead_yylval; |
104 | *llocp = yyextra->lookahead_yylloc; |
105 | *(yyextra->lookahead_end) = yyextra->lookahead_hold_char; |
106 | yyextra->have_lookahead = false; |
107 | } |
108 | else |
109 | cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); |
110 | |
111 | /* |
112 | * If this token isn't one that requires lookahead, just return it. If it |
113 | * does, determine the token length. (We could get that via strlen(), but |
114 | * since we have such a small set of possibilities, hardwiring seems |
115 | * feasible and more efficient.) |
116 | */ |
117 | switch (cur_token) |
118 | { |
119 | case NOT: |
120 | cur_token_length = 3; |
121 | break; |
122 | case NULLS_P: |
123 | cur_token_length = 5; |
124 | break; |
125 | case WITH: |
126 | cur_token_length = 4; |
127 | break; |
128 | default: |
129 | return cur_token; |
130 | } |
131 | |
132 | /* |
133 | * Identify end+1 of current token. core_yylex() has temporarily stored a |
134 | * '\0' here, and will undo that when we call it again. We need to redo |
135 | * it to fully revert the lookahead call for error reporting purposes. |
136 | */ |
137 | yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf + |
138 | *llocp + cur_token_length; |
139 | Assert(*(yyextra->lookahead_end) == '\0'); |
140 | |
141 | /* |
142 | * Save and restore *llocp around the call. It might look like we could |
143 | * avoid this by just passing &lookahead_yylloc to core_yylex(), but that |
144 | * does not work because flex actually holds onto the last-passed pointer |
145 | * internally, and will use that for error reporting. We need any error |
146 | * reports to point to the current token, not the next one. |
147 | */ |
148 | cur_yylloc = *llocp; |
149 | |
150 | /* Get next token, saving outputs into lookahead variables */ |
151 | next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner); |
152 | yyextra->lookahead_token = next_token; |
153 | yyextra->lookahead_yylloc = *llocp; |
154 | |
155 | *llocp = cur_yylloc; |
156 | |
157 | /* Now revert the un-truncation of the current token */ |
158 | yyextra->lookahead_hold_char = *(yyextra->lookahead_end); |
159 | *(yyextra->lookahead_end) = '\0'; |
160 | |
161 | yyextra->have_lookahead = true; |
162 | |
163 | /* Replace cur_token if needed, based on lookahead */ |
164 | switch (cur_token) |
165 | { |
166 | case NOT: |
167 | /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
168 | switch (next_token) |
169 | { |
170 | case BETWEEN: |
171 | case IN_P: |
172 | case LIKE: |
173 | case ILIKE: |
174 | case SIMILAR: |
175 | cur_token = NOT_LA; |
176 | break; |
177 | } |
178 | break; |
179 | |
180 | case NULLS_P: |
181 | /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
182 | switch (next_token) |
183 | { |
184 | case FIRST_P: |
185 | case LAST_P: |
186 | cur_token = NULLS_LA; |
187 | break; |
188 | } |
189 | break; |
190 | |
191 | case WITH: |
192 | /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
193 | switch (next_token) |
194 | { |
195 | case TIME: |
196 | case ORDINALITY: |
197 | cur_token = WITH_LA; |
198 | break; |
199 | } |
200 | break; |
201 | } |
202 | |
203 | return cur_token; |
204 | } |
205 | |