1/*-------------------------------------------------------------------------
2 *
3 * parser.c
4 * Main entry point/driver for PostgreSQL grammar
5 *
6 * Note that the grammar is not allowed to perform any table access
7 * (since we need to be able to do basic parsing even while inside an
8 * aborted transaction). Therefore, the data structures returned by
9 * the grammar are "raw" parsetrees that still need to be analyzed by
10 * analyze.c and related files.
11 *
12 *
13 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
14 * Portions Copyright (c) 1994, Regents of the University of California
15 *
16 * IDENTIFICATION
17 * src/backend/parser/parser.c
18 *
19 *-------------------------------------------------------------------------
20 */
21
22#include "postgres.h"
23
24#include "parser/gramparse.h"
25#include "parser/parser.h"
26
27
28/*
29 * raw_parser
30 * Given a query in string form, do lexical and grammatical analysis.
31 *
32 * Returns a list of raw (un-analyzed) parse trees. The immediate elements
33 * of the list are always RawStmt nodes.
34 */
35List *
36raw_parser(const char *str)
37{
38 core_yyscan_t yyscanner;
39 base_yy_extra_type yyextra;
40 int yyresult;
41
42 /* initialize the flex scanner */
43 yyscanner = scanner_init(str, &yyextra.core_yy_extra,
44 &ScanKeywords, ScanKeywordTokens);
45
46 /* base_yylex() only needs this much initialization */
47 yyextra.have_lookahead = false;
48
49 /* initialize the bison parser */
50 parser_init(&yyextra);
51
52 /* Parse! */
53 yyresult = base_yyparse(yyscanner);
54
55 /* Clean up (release memory) */
56 scanner_finish(yyscanner);
57
58 if (yyresult) /* error */
59 return NIL;
60
61 return yyextra.parsetree;
62}
63
64
65/*
66 * Intermediate filter between parser and core lexer (core_yylex in scan.l).
67 *
68 * This filter is needed because in some cases the standard SQL grammar
69 * requires more than one token lookahead. We reduce these cases to one-token
70 * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
71 *
72 * Using a filter is simpler than trying to recognize multiword tokens
73 * directly in scan.l, because we'd have to allow for comments between the
74 * words. Furthermore it's not clear how to do that without re-introducing
75 * scanner backtrack, which would cost more performance than this filter
76 * layer does.
77 *
78 * The filter also provides a convenient place to translate between
79 * the core_YYSTYPE and YYSTYPE representations (which are really the
80 * same thing anyway, but notationally they're different).
81 */
82int
83base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
84{
85 base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
86 int cur_token;
87 int next_token;
88 int cur_token_length;
89 YYLTYPE cur_yylloc;
90
91 /* Get next token --- we might already have it */
92 if (yyextra->have_lookahead)
93 {
94 cur_token = yyextra->lookahead_token;
95 lvalp->core_yystype = yyextra->lookahead_yylval;
96 *llocp = yyextra->lookahead_yylloc;
97 *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
98 yyextra->have_lookahead = false;
99 }
100 else
101 cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
102
103 /*
104 * If this token isn't one that requires lookahead, just return it. If it
105 * does, determine the token length. (We could get that via strlen(), but
106 * since we have such a small set of possibilities, hardwiring seems
107 * feasible and more efficient.)
108 */
109 switch (cur_token)
110 {
111 case NOT:
112 cur_token_length = 3;
113 break;
114 case NULLS_P:
115 cur_token_length = 5;
116 break;
117 case WITH:
118 cur_token_length = 4;
119 break;
120 default:
121 return cur_token;
122 }
123
124 /*
125 * Identify end+1 of current token. core_yylex() has temporarily stored a
126 * '\0' here, and will undo that when we call it again. We need to redo
127 * it to fully revert the lookahead call for error reporting purposes.
128 */
129 yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
130 *llocp + cur_token_length;
131 Assert(*(yyextra->lookahead_end) == '\0');
132
133 /*
134 * Save and restore *llocp around the call. It might look like we could
135 * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
136 * does not work because flex actually holds onto the last-passed pointer
137 * internally, and will use that for error reporting. We need any error
138 * reports to point to the current token, not the next one.
139 */
140 cur_yylloc = *llocp;
141
142 /* Get next token, saving outputs into lookahead variables */
143 next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
144 yyextra->lookahead_token = next_token;
145 yyextra->lookahead_yylloc = *llocp;
146
147 *llocp = cur_yylloc;
148
149 /* Now revert the un-truncation of the current token */
150 yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
151 *(yyextra->lookahead_end) = '\0';
152
153 yyextra->have_lookahead = true;
154
155 /* Replace cur_token if needed, based on lookahead */
156 switch (cur_token)
157 {
158 case NOT:
159 /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
160 switch (next_token)
161 {
162 case BETWEEN:
163 case IN_P:
164 case LIKE:
165 case ILIKE:
166 case SIMILAR:
167 cur_token = NOT_LA;
168 break;
169 }
170 break;
171
172 case NULLS_P:
173 /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
174 switch (next_token)
175 {
176 case FIRST_P:
177 case LAST_P:
178 cur_token = NULLS_LA;
179 break;
180 }
181 break;
182
183 case WITH:
184 /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
185 switch (next_token)
186 {
187 case TIME:
188 case ORDINALITY:
189 cur_token = WITH_LA;
190 break;
191 }
192 break;
193 }
194
195 return cur_token;
196}
197