| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * parser.c |
| 4 | * Main entry point/driver for PostgreSQL grammar |
| 5 | * |
| 6 | * This should match src/backend/parser/parser.c, except that we do not |
| 7 | * need to bother with re-entrant interfaces. |
| 8 | * |
| 9 | * |
| 10 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 11 | * Portions Copyright (c) 1994, Regents of the University of California |
| 12 | * |
| 13 | * IDENTIFICATION |
| 14 | * src/interfaces/ecpg/preproc/parser.c |
| 15 | * |
| 16 | *------------------------------------------------------------------------- |
| 17 | */ |
| 18 | |
| 19 | #include "postgres_fe.h" |
| 20 | |
| 21 | #include "preproc_extern.h" |
| 22 | #include "preproc.h" |
| 23 | |
| 24 | |
| 25 | static bool have_lookahead; /* is lookahead info valid? */ |
| 26 | static int lookahead_token; /* one-token lookahead */ |
| 27 | static YYSTYPE lookahead_yylval; /* yylval for lookahead token */ |
| 28 | static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ |
| 29 | static char *lookahead_yytext; /* start current token */ |
| 30 | static char *lookahead_end; /* end of current token */ |
| 31 | static char lookahead_hold_char; /* to be put back at *lookahead_end */ |
| 32 | |
| 33 | |
| 34 | /* |
| 35 | * Intermediate filter between parser and base lexer (base_yylex in scan.l). |
| 36 | * |
| 37 | * This filter is needed because in some cases the standard SQL grammar |
| 38 | * requires more than one token lookahead. We reduce these cases to one-token |
| 39 | * lookahead by replacing tokens here, in order to keep the grammar LALR(1). |
| 40 | * |
| 41 | * Using a filter is simpler than trying to recognize multiword tokens |
| 42 | * directly in scan.l, because we'd have to allow for comments between the |
| 43 | * words. Furthermore it's not clear how to do that without re-introducing |
| 44 | * scanner backtrack, which would cost more performance than this filter |
| 45 | * layer does. |
| 46 | */ |
| 47 | int |
| 48 | filtered_base_yylex(void) |
| 49 | { |
| 50 | int cur_token; |
| 51 | int next_token; |
| 52 | int cur_token_length; |
| 53 | YYSTYPE cur_yylval; |
| 54 | YYLTYPE cur_yylloc; |
| 55 | char *cur_yytext; |
| 56 | |
| 57 | /* Get next token --- we might already have it */ |
| 58 | if (have_lookahead) |
| 59 | { |
| 60 | cur_token = lookahead_token; |
| 61 | base_yylval = lookahead_yylval; |
| 62 | base_yylloc = lookahead_yylloc; |
| 63 | base_yytext = lookahead_yytext; |
| 64 | *lookahead_end = lookahead_hold_char; |
| 65 | have_lookahead = false; |
| 66 | } |
| 67 | else |
| 68 | cur_token = base_yylex(); |
| 69 | |
| 70 | /* |
| 71 | * If this token isn't one that requires lookahead, just return it. If it |
| 72 | * does, determine the token length. (We could get that via strlen(), but |
| 73 | * since we have such a small set of possibilities, hardwiring seems |
| 74 | * feasible and more efficient.) |
| 75 | */ |
| 76 | switch (cur_token) |
| 77 | { |
| 78 | case NOT: |
| 79 | cur_token_length = 3; |
| 80 | break; |
| 81 | case NULLS_P: |
| 82 | cur_token_length = 5; |
| 83 | break; |
| 84 | case WITH: |
| 85 | cur_token_length = 4; |
| 86 | break; |
| 87 | default: |
| 88 | return cur_token; |
| 89 | } |
| 90 | |
| 91 | /* |
| 92 | * Identify end+1 of current token. base_yylex() has temporarily stored a |
| 93 | * '\0' here, and will undo that when we call it again. We need to redo |
| 94 | * it to fully revert the lookahead call for error reporting purposes. |
| 95 | */ |
| 96 | lookahead_end = base_yytext + cur_token_length; |
| 97 | Assert(*lookahead_end == '\0'); |
| 98 | |
| 99 | /* Save and restore lexer output variables around the call */ |
| 100 | cur_yylval = base_yylval; |
| 101 | cur_yylloc = base_yylloc; |
| 102 | cur_yytext = base_yytext; |
| 103 | |
| 104 | /* Get next token, saving outputs into lookahead variables */ |
| 105 | next_token = base_yylex(); |
| 106 | |
| 107 | lookahead_token = next_token; |
| 108 | lookahead_yylval = base_yylval; |
| 109 | lookahead_yylloc = base_yylloc; |
| 110 | lookahead_yytext = base_yytext; |
| 111 | |
| 112 | base_yylval = cur_yylval; |
| 113 | base_yylloc = cur_yylloc; |
| 114 | base_yytext = cur_yytext; |
| 115 | |
| 116 | /* Now revert the un-truncation of the current token */ |
| 117 | lookahead_hold_char = *lookahead_end; |
| 118 | *lookahead_end = '\0'; |
| 119 | |
| 120 | have_lookahead = true; |
| 121 | |
| 122 | /* Replace cur_token if needed, based on lookahead */ |
| 123 | switch (cur_token) |
| 124 | { |
| 125 | case NOT: |
| 126 | /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */ |
| 127 | switch (next_token) |
| 128 | { |
| 129 | case BETWEEN: |
| 130 | case IN_P: |
| 131 | case LIKE: |
| 132 | case ILIKE: |
| 133 | case SIMILAR: |
| 134 | cur_token = NOT_LA; |
| 135 | break; |
| 136 | } |
| 137 | break; |
| 138 | |
| 139 | case NULLS_P: |
| 140 | /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */ |
| 141 | switch (next_token) |
| 142 | { |
| 143 | case FIRST_P: |
| 144 | case LAST_P: |
| 145 | cur_token = NULLS_LA; |
| 146 | break; |
| 147 | } |
| 148 | break; |
| 149 | |
| 150 | case WITH: |
| 151 | /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */ |
| 152 | switch (next_token) |
| 153 | { |
| 154 | case TIME: |
| 155 | case ORDINALITY: |
| 156 | cur_token = WITH_LA; |
| 157 | break; |
| 158 | } |
| 159 | break; |
| 160 | } |
| 161 | |
| 162 | return cur_token; |
| 163 | } |
| 164 | |