src_backend_parser_parser.cpp source code [DuckDB/third_party/libpg_query/src_backend_parser_parser.cpp]

1	/--------------------------------------------------------------------*
2	* Symbols referenced in this file:
3	* - raw_parser
4	* - base_yylex
5	* - raw_parser
6	*--------------------------------------------------------------------
7	*/
8
9	/-------------------------------------------------------------------------*
10	*
11	* parser.c
12	* Main entry point/driver for PostgreSQL grammar
13	*
14	* Note that the grammar is not allowed to perform any table access
15	* (since we need to be able to do basic parsing even while inside an
16	* aborted transaction). Therefore, the data structures returned by
17	* the grammar are "raw" parsetrees that still need to be analyzed by
18	* analyze.c and related files.
19	*
20	*
21	* Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
22	* Portions Copyright (c) 1994, Regents of the University of California
23	*
24	* IDENTIFICATION
25	* src/backend/parser/parser.c
26	*
27	*-------------------------------------------------------------------------
28	*/
29
30	#include "pg_functions.hpp"
31
32	#include "parser/gramparse.hpp"
33	#include "parser/parser.hpp"
34
35
36	/*
37	* raw_parser
38	* Given a query in string form, do lexical and grammatical analysis.
39	*
40	* Returns a list of raw (un-analyzed) parse trees. The immediate elements
41	* of the list are always PGRawStmt nodes.
42	*/
43	PGList *
44	raw_parser(const char *str)
45	{
46	core_yyscan_t yyscanner;
47	base_yy_extra_type yyextra;
48	int yyresult;
49
50	/ initialize the flex scanner /
51	yyscanner = scanner_init(str, &yyextra.core_yy_extra,
52	ScanKeywords, NumScanKeywords);
53
54	/ base_yylex() only needs this much initialization /
55	yyextra.have_lookahead = false;
56
57	/ initialize the bison parser /
58	parser_init(&yyextra);
59
60	/ Parse! /
61	yyresult = base_yyparse(yyscanner);
62
63	/ Clean up (release memory) /
64	scanner_finish(yyscanner);
65
66	if (yyresult) / error /
67	return NIL;
68
69	return yyextra.parsetree;
70	}
71
72
73	/*
74	* Intermediate filter between parser and core lexer (core_yylex in scan.l).
75	*
76	* This filter is needed because in some cases the standard SQL grammar
77	* requires more than one token lookahead. We reduce these cases to one-token
78	* lookahead by replacing tokens here, in order to keep the grammar LALR(1).
79	*
80	* Using a filter is simpler than trying to recognize multiword tokens
81	* directly in scan.l, because we'd have to allow for comments between the
82	* words. Furthermore it's not clear how to do that without re-introducing
83	* scanner backtrack, which would cost more performance than this filter
84	* layer does.
85	*
86	* The filter also provides a convenient place to translate between
87	* the core_YYSTYPE and YYSTYPE representations (which are really the
88	* same thing anyway, but notationally they're different).
89	*/
90	int
91	base_yylex(YYSTYPE lvalp, YYLTYPE llocp, core_yyscan_t yyscanner)
92	{
93	base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
94	int cur_token;
95	int next_token;
96	int cur_token_length;
97	YYLTYPE cur_yylloc;
98
99	/ Get next token --- we might already have it /
100	if (yyextra->have_lookahead)
101	{
102	cur_token = yyextra->lookahead_token;
103	lvalp->core_yystype = yyextra->lookahead_yylval;
104	*llocp = yyextra->lookahead_yylloc;
105	*(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
106	yyextra->have_lookahead = false;
107	}
108	else
109	cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
110
111	/*
112	* If this token isn't one that requires lookahead, just return it. If it
113	* does, determine the token length. (We could get that via strlen(), but
114	* since we have such a small set of possibilities, hardwiring seems
115	* feasible and more efficient.)
116	*/
117	switch (cur_token)
118	{
119	case NOT:
120	cur_token_length = `3`;
121	break;
122	case NULLS_P:
123	cur_token_length = `5`;
124	break;
125	case WITH:
126	cur_token_length = `4`;
127	break;
128	default:
129	return cur_token;
130	}
131
132	/*
133	* Identify end+1 of current token. core_yylex() has temporarily stored a
134	* '\0' here, and will undo that when we call it again. We need to redo
135	* it to fully revert the lookahead call for error reporting purposes.
136	*/
137	yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
138	*llocp + cur_token_length;
139	Assert(*(yyextra->lookahead_end) == `'\0'`);
140
141	/*
142	* Save and restore *llocp around the call. It might look like we could
143	* avoid this by just passing &lookahead_yylloc to core_yylex(), but that
144	* does not work because flex actually holds onto the last-passed pointer
145	* internally, and will use that for error reporting. We need any error
146	* reports to point to the current token, not the next one.
147	*/
148	cur_yylloc = *llocp;
149
150	/ Get next token, saving outputs into lookahead variables /
151	next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
152	yyextra->lookahead_token = next_token;
153	yyextra->lookahead_yylloc = *llocp;
154
155	*llocp = cur_yylloc;
156
157	/ Now revert the un-truncation of the current token /
158	yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
159	*(yyextra->lookahead_end) = `'\0'`;
160
161	yyextra->have_lookahead = true;
162
163	/ Replace cur_token if needed, based on lookahead /
164	switch (cur_token)
165	{
166	case NOT:
167	/ Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc /
168	switch (next_token)
169	{
170	case BETWEEN:
171	case IN_P:
172	case LIKE:
173	case ILIKE:
174	case SIMILAR:
175	cur_token = NOT_LA;
176	break;
177	}
178	break;
179
180	case NULLS_P:
181	/ Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST /
182	switch (next_token)
183	{
184	case FIRST_P:
185	case LAST_P:
186	cur_token = NULLS_LA;
187	break;
188	}
189	break;
190
191	case WITH:
192	/ Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY /
193	switch (next_token)
194	{
195	case TIME:
196	case ORDINALITY:
197	cur_token = WITH_LA;
198	break;
199	}
200	break;
201	}
202
203	return cur_token;
204	}
205

Browse the source code of DuckDB/third_party/libpg_query/src_backend_parser_parser.cpp