parser.c source code [PostgreSQL/src/backend/parser/parser.c]

1	/-------------------------------------------------------------------------*
2	*
3	* parser.c
4	* Main entry point/driver for PostgreSQL grammar
5	*
6	* Note that the grammar is not allowed to perform any table access
7	* (since we need to be able to do basic parsing even while inside an
8	* aborted transaction). Therefore, the data structures returned by
9	* the grammar are "raw" parsetrees that still need to be analyzed by
10	* analyze.c and related files.
11	*
12	*
13	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
14	* Portions Copyright (c) 1994, Regents of the University of California
15	*
16	* IDENTIFICATION
17	* src/backend/parser/parser.c
18	*
19	*-------------------------------------------------------------------------
20	*/
21
22	#include "postgres.h"
23
24	#include "parser/gramparse.h"
25	#include "parser/parser.h"
26
27
28	/*
29	* raw_parser
30	* Given a query in string form, do lexical and grammatical analysis.
31	*
32	* Returns a list of raw (un-analyzed) parse trees. The immediate elements
33	* of the list are always RawStmt nodes.
34	*/
35	List *
36	raw_parser(const char *str)
37	{
38	core_yyscan_t yyscanner;
39	base_yy_extra_type yyextra;
40	int yyresult;
41
42	/ initialize the flex scanner /
43	yyscanner = scanner_init(str, &yyextra.core_yy_extra,
44	&ScanKeywords, ScanKeywordTokens);
45
46	/ base_yylex() only needs this much initialization /
47	yyextra.have_lookahead = false;
48
49	/ initialize the bison parser /
50	parser_init(&yyextra);
51
52	/ Parse! /
53	yyresult = base_yyparse(yyscanner);
54
55	/ Clean up (release memory) /
56	scanner_finish(yyscanner);
57
58	if (yyresult) / error /
59	return NIL;
60
61	return yyextra.parsetree;
62	}
63
64
65	/*
66	* Intermediate filter between parser and core lexer (core_yylex in scan.l).
67	*
68	* This filter is needed because in some cases the standard SQL grammar
69	* requires more than one token lookahead. We reduce these cases to one-token
70	* lookahead by replacing tokens here, in order to keep the grammar LALR(1).
71	*
72	* Using a filter is simpler than trying to recognize multiword tokens
73	* directly in scan.l, because we'd have to allow for comments between the
74	* words. Furthermore it's not clear how to do that without re-introducing
75	* scanner backtrack, which would cost more performance than this filter
76	* layer does.
77	*
78	* The filter also provides a convenient place to translate between
79	* the core_YYSTYPE and YYSTYPE representations (which are really the
80	* same thing anyway, but notationally they're different).
81	*/
82	int
83	base_yylex(YYSTYPE lvalp, YYLTYPE llocp, core_yyscan_t yyscanner)
84	{
85	base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
86	int cur_token;
87	int next_token;
88	int cur_token_length;
89	YYLTYPE cur_yylloc;
90
91	/ Get next token --- we might already have it /
92	if (yyextra->have_lookahead)
93	{
94	cur_token = yyextra->lookahead_token;
95	lvalp->core_yystype = yyextra->lookahead_yylval;
96	*llocp = yyextra->lookahead_yylloc;
97	*(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
98	yyextra->have_lookahead = false;
99	}
100	else
101	cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
102
103	/*
104	* If this token isn't one that requires lookahead, just return it. If it
105	* does, determine the token length. (We could get that via strlen(), but
106	* since we have such a small set of possibilities, hardwiring seems
107	* feasible and more efficient.)
108	*/
109	switch (cur_token)
110	{
111	case NOT:
112	cur_token_length = `3`;
113	break;
114	case NULLS_P:
115	cur_token_length = `5`;
116	break;
117	case WITH:
118	cur_token_length = `4`;
119	break;
120	default:
121	return cur_token;
122	}
123
124	/*
125	* Identify end+1 of current token. core_yylex() has temporarily stored a
126	* '\0' here, and will undo that when we call it again. We need to redo
127	* it to fully revert the lookahead call for error reporting purposes.
128	*/
129	yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
130	*llocp + cur_token_length;
131	Assert(*(yyextra->lookahead_end) == `'\0'`);
132
133	/*
134	* Save and restore *llocp around the call. It might look like we could
135	* avoid this by just passing &lookahead_yylloc to core_yylex(), but that
136	* does not work because flex actually holds onto the last-passed pointer
137	* internally, and will use that for error reporting. We need any error
138	* reports to point to the current token, not the next one.
139	*/
140	cur_yylloc = *llocp;
141
142	/ Get next token, saving outputs into lookahead variables /
143	next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
144	yyextra->lookahead_token = next_token;
145	yyextra->lookahead_yylloc = *llocp;
146
147	*llocp = cur_yylloc;
148
149	/ Now revert the un-truncation of the current token /
150	yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
151	*(yyextra->lookahead_end) = `'\0'`;
152
153	yyextra->have_lookahead = true;
154
155	/ Replace cur_token if needed, based on lookahead /
156	switch (cur_token)
157	{
158	case NOT:
159	/ Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc /
160	switch (next_token)
161	{
162	case BETWEEN:
163	case IN_P:
164	case LIKE:
165	case ILIKE:
166	case SIMILAR:
167	cur_token = NOT_LA;
168	break;
169	}
170	break;
171
172	case NULLS_P:
173	/ Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST /
174	switch (next_token)
175	{
176	case FIRST_P:
177	case LAST_P:
178	cur_token = NULLS_LA;
179	break;
180	}
181	break;
182
183	case WITH:
184	/ Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY /
185	switch (next_token)
186	{
187	case TIME:
188	case ORDINALITY:
189	cur_token = WITH_LA;
190	break;
191	}
192	break;
193	}
194
195	return cur_token;
196	}
197

Browse the source code of PostgreSQL/src/backend/parser/parser.c