1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * parser.c |
4 | * Main entry point/driver for PostgreSQL grammar |
5 | * |
6 | * This should match src/backend/parser/parser.c, except that we do not |
7 | * need to bother with re-entrant interfaces. |
8 | * |
9 | * |
10 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
11 | * Portions Copyright (c) 1994, Regents of the University of California |
12 | * |
13 | * IDENTIFICATION |
14 | * src/interfaces/ecpg/preproc/parser.c |
15 | * |
16 | *------------------------------------------------------------------------- |
17 | */ |
18 | |
19 | #include "postgres_fe.h" |
20 | |
21 | #include "preproc_extern.h" |
22 | #include "preproc.h" |
23 | |
24 | |
25 | static bool have_lookahead; /* is lookahead info valid? */ |
26 | static int lookahead_token; /* one-token lookahead */ |
27 | static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ |
28 | static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ |
29 | static char *lookahead_yytext; /* start current token */ |
30 | static char *lookahead_end; /* end of current token */ |
31 | static char lookahead_hold_char; /* to be put back at *lookahead_end */ |
32 | |
33 | |
34 | /* |
35 | * Intermediate filter between parser and base lexer (base_yylex in scan.l). |
36 | * |
37 | * This filter is needed because in some cases the standard SQL grammar |
38 | * requires more than one token lookahead. We reduce these cases to one-token |
39 | * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
40 | * |
41 | * Using a filter is simpler than trying to recognize multiword tokens |
42 | * directly in scan.l, because we'd have to allow for comments between the |
43 | * words. Furthermore it's not clear how to do that without re-introducing |
44 | * scanner backtrack, which would cost more performance than this filter |
45 | * layer does. |
46 | */ |
47 | int |
48 | filtered_base_yylex(void) |
49 | { |
50 | int cur_token; |
51 | int next_token; |
52 | int cur_token_length; |
53 | YYSTYPE cur_yylval; |
54 | YYLTYPE cur_yylloc; |
55 | char *cur_yytext; |
56 | |
57 | /* Get next token --- we might already have it */ |
58 | if (have_lookahead) |
59 | { |
60 | cur_token = lookahead_token; |
61 | base_yylval = lookahead_yylval; |
62 | base_yylloc = lookahead_yylloc; |
63 | base_yytext = lookahead_yytext; |
64 | *lookahead_end = lookahead_hold_char; |
65 | have_lookahead = false; |
66 | } |
67 | else |
68 | cur_token = base_yylex(); |
69 | |
70 | /* |
71 | * If this token isn't one that requires lookahead, just return it. If it |
72 | * does, determine the token length. (We could get that via strlen(), but |
73 | * since we have such a small set of possibilities, hardwiring seems |
74 | * feasible and more efficient.) |
75 | */ |
76 | switch (cur_token) |
77 | { |
78 | case NOT: |
79 | cur_token_length = 3; |
80 | break; |
81 | case NULLS_P: |
82 | cur_token_length = 5; |
83 | break; |
84 | case WITH: |
85 | cur_token_length = 4; |
86 | break; |
87 | default: |
88 | return cur_token; |
89 | } |
90 | |
91 | /* |
92 | * Identify end+1 of current token. base_yylex() has temporarily stored a |
93 | * '\0' here, and will undo that when we call it again. We need to redo |
94 | * it to fully revert the lookahead call for error reporting purposes. |
95 | */ |
96 | lookahead_end = base_yytext + cur_token_length; |
97 | Assert(*lookahead_end == '\0'); |
98 | |
99 | /* Save and restore lexer output variables around the call */ |
100 | cur_yylval = base_yylval; |
101 | cur_yylloc = base_yylloc; |
102 | cur_yytext = base_yytext; |
103 | |
104 | /* Get next token, saving outputs into lookahead variables */ |
105 | next_token = base_yylex(); |
106 | |
107 | lookahead_token = next_token; |
108 | lookahead_yylval = base_yylval; |
109 | lookahead_yylloc = base_yylloc; |
110 | lookahead_yytext = base_yytext; |
111 | |
112 | base_yylval = cur_yylval; |
113 | base_yylloc = cur_yylloc; |
114 | base_yytext = cur_yytext; |
115 | |
116 | /* Now revert the un-truncation of the current token */ |
117 | lookahead_hold_char = *lookahead_end; |
118 | *lookahead_end = '\0'; |
119 | |
120 | have_lookahead = true; |
121 | |
122 | /* Replace cur_token if needed, based on lookahead */ |
123 | switch (cur_token) |
124 | { |
125 | case NOT: |
126 | /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
127 | switch (next_token) |
128 | { |
129 | case BETWEEN: |
130 | case IN_P: |
131 | case LIKE: |
132 | case ILIKE: |
133 | case SIMILAR: |
134 | cur_token = NOT_LA; |
135 | break; |
136 | } |
137 | break; |
138 | |
139 | case NULLS_P: |
140 | /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
141 | switch (next_token) |
142 | { |
143 | case FIRST_P: |
144 | case LAST_P: |
145 | cur_token = NULLS_LA; |
146 | break; |
147 | } |
148 | break; |
149 | |
150 | case WITH: |
151 | /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
152 | switch (next_token) |
153 | { |
154 | case TIME: |
155 | case ORDINALITY: |
156 | cur_token = WITH_LA; |
157 | break; |
158 | } |
159 | break; |
160 | } |
161 | |
162 | return cur_token; |
163 | } |
164 | |