1/*--------------------------------------------------------------------
2 * Symbols referenced in this file:
3 * - raw_parser
4 * - base_yylex
5 * - raw_parser
6 *--------------------------------------------------------------------
7 */
8
9/*-------------------------------------------------------------------------
10 *
11 * parser.c
12 * Main entry point/driver for PostgreSQL grammar
13 *
14 * Note that the grammar is not allowed to perform any table access
15 * (since we need to be able to do basic parsing even while inside an
16 * aborted transaction). Therefore, the data structures returned by
17 * the grammar are "raw" parsetrees that still need to be analyzed by
18 * analyze.c and related files.
19 *
20 *
21 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
22 * Portions Copyright (c) 1994, Regents of the University of California
23 *
24 * IDENTIFICATION
25 * src/backend/parser/parser.c
26 *
27 *-------------------------------------------------------------------------
28 */
29
30#include "pg_functions.hpp"
31
32#include "parser/gramparse.hpp"
33#include "parser/parser.hpp"
34
35
36/*
37 * raw_parser
38 * Given a query in string form, do lexical and grammatical analysis.
39 *
40 * Returns a list of raw (un-analyzed) parse trees. The immediate elements
41 * of the list are always PGRawStmt nodes.
42 */
43PGList *
44raw_parser(const char *str)
45{
46 core_yyscan_t yyscanner;
47 base_yy_extra_type yyextra;
48 int yyresult;
49
50 /* initialize the flex scanner */
51 yyscanner = scanner_init(str, &yyextra.core_yy_extra,
52 ScanKeywords, NumScanKeywords);
53
54 /* base_yylex() only needs this much initialization */
55 yyextra.have_lookahead = false;
56
57 /* initialize the bison parser */
58 parser_init(&yyextra);
59
60 /* Parse! */
61 yyresult = base_yyparse(yyscanner);
62
63 /* Clean up (release memory) */
64 scanner_finish(yyscanner);
65
66 if (yyresult) /* error */
67 return NIL;
68
69 return yyextra.parsetree;
70}
71
72
73/*
74 * Intermediate filter between parser and core lexer (core_yylex in scan.l).
75 *
76 * This filter is needed because in some cases the standard SQL grammar
77 * requires more than one token lookahead. We reduce these cases to one-token
78 * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
79 *
80 * Using a filter is simpler than trying to recognize multiword tokens
81 * directly in scan.l, because we'd have to allow for comments between the
82 * words. Furthermore it's not clear how to do that without re-introducing
83 * scanner backtrack, which would cost more performance than this filter
84 * layer does.
85 *
86 * The filter also provides a convenient place to translate between
87 * the core_YYSTYPE and YYSTYPE representations (which are really the
88 * same thing anyway, but notationally they're different).
89 */
90int
91base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
92{
93 base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
94 int cur_token;
95 int next_token;
96 int cur_token_length;
97 YYLTYPE cur_yylloc;
98
99 /* Get next token --- we might already have it */
100 if (yyextra->have_lookahead)
101 {
102 cur_token = yyextra->lookahead_token;
103 lvalp->core_yystype = yyextra->lookahead_yylval;
104 *llocp = yyextra->lookahead_yylloc;
105 *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
106 yyextra->have_lookahead = false;
107 }
108 else
109 cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
110
111 /*
112 * If this token isn't one that requires lookahead, just return it. If it
113 * does, determine the token length. (We could get that via strlen(), but
114 * since we have such a small set of possibilities, hardwiring seems
115 * feasible and more efficient.)
116 */
117 switch (cur_token)
118 {
119 case NOT:
120 cur_token_length = 3;
121 break;
122 case NULLS_P:
123 cur_token_length = 5;
124 break;
125 case WITH:
126 cur_token_length = 4;
127 break;
128 default:
129 return cur_token;
130 }
131
132 /*
133 * Identify end+1 of current token. core_yylex() has temporarily stored a
134 * '\0' here, and will undo that when we call it again. We need to redo
135 * it to fully revert the lookahead call for error reporting purposes.
136 */
137 yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
138 *llocp + cur_token_length;
139 Assert(*(yyextra->lookahead_end) == '\0');
140
141 /*
142 * Save and restore *llocp around the call. It might look like we could
143 * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
144 * does not work because flex actually holds onto the last-passed pointer
145 * internally, and will use that for error reporting. We need any error
146 * reports to point to the current token, not the next one.
147 */
148 cur_yylloc = *llocp;
149
150 /* Get next token, saving outputs into lookahead variables */
151 next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
152 yyextra->lookahead_token = next_token;
153 yyextra->lookahead_yylloc = *llocp;
154
155 *llocp = cur_yylloc;
156
157 /* Now revert the un-truncation of the current token */
158 yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
159 *(yyextra->lookahead_end) = '\0';
160
161 yyextra->have_lookahead = true;
162
163 /* Replace cur_token if needed, based on lookahead */
164 switch (cur_token)
165 {
166 case NOT:
167 /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
168 switch (next_token)
169 {
170 case BETWEEN:
171 case IN_P:
172 case LIKE:
173 case ILIKE:
174 case SIMILAR:
175 cur_token = NOT_LA;
176 break;
177 }
178 break;
179
180 case NULLS_P:
181 /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
182 switch (next_token)
183 {
184 case FIRST_P:
185 case LAST_P:
186 cur_token = NULLS_LA;
187 break;
188 }
189 break;
190
191 case WITH:
192 /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
193 switch (next_token)
194 {
195 case TIME:
196 case ORDINALITY:
197 cur_token = WITH_LA;
198 break;
199 }
200 break;
201 }
202
203 return cur_token;
204}
205