1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * psqlscan_int.h |
4 | * lexical scanner internal declarations |
5 | * |
6 | * This file declares the PsqlScanStateData structure used by psqlscan.l |
7 | * and shared by other lexers compatible with it, such as psqlscanslash.l. |
8 | * |
9 | * One difficult aspect of this code is that we need to work in multibyte |
10 | * encodings that are not ASCII-safe. A "safe" encoding is one in which each |
11 | * byte of a multibyte character has the high bit set (it's >= 0x80). Since |
12 | * all our lexing rules treat all high-bit-set characters alike, we don't |
13 | * really need to care whether such a byte is part of a sequence or not. |
14 | * In an "unsafe" encoding, we still expect the first byte of a multibyte |
15 | * sequence to be >= 0x80, but later bytes might not be. If we scan such |
16 | * a sequence as-is, the lexing rules could easily be fooled into matching |
17 | * such bytes to ordinary ASCII characters. Our solution for this is to |
18 | * substitute 0xFF for each non-first byte within the data presented to flex. |
19 | * The flex rules will then pass the FF's through unmolested. The |
20 | * psqlscan_emit() subroutine is responsible for looking back to the original |
21 | * string and replacing FF's with the corresponding original bytes. |
22 | * |
23 | * Another interesting thing we do here is scan different parts of the same |
24 | * input with physically separate flex lexers (ie, lexers written in separate |
25 | * .l files). We can get away with this because the only part of the |
26 | * persistent state of a flex lexer that depends on its parsing rule tables |
27 | * is the start state number, which is easy enough to manage --- usually, |
28 | * in fact, we just need to set it to INITIAL when changing lexers. But to |
29 | * make that work at all, we must use re-entrant lexers, so that all the |
30 | * relevant state is in the yyscanner_t attached to the PsqlScanState; |
31 | * if we were using lexers with separate static state we would soon end up |
32 | * with dangling buffer pointers in one or the other. Also note that this |
33 | * is unlikely to work very nicely if the lexers aren't all built with the |
34 | * same flex version, or if they don't use the same flex options. |
35 | * |
36 | * |
37 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
38 | * Portions Copyright (c) 1994, Regents of the University of California |
39 | * |
40 | * src/include/fe_utils/psqlscan_int.h |
41 | * |
42 | *------------------------------------------------------------------------- |
43 | */ |
44 | #ifndef PSQLSCAN_INT_H |
45 | #define PSQLSCAN_INT_H |
46 | |
47 | #include "fe_utils/psqlscan.h" |
48 | |
49 | /* |
50 | * These are just to allow this file to be compilable standalone for header |
51 | * validity checking; in actual use, this file should always be included |
52 | * from the body of a flex file, where these symbols are already defined. |
53 | */ |
54 | #ifndef YY_TYPEDEF_YY_BUFFER_STATE |
55 | #define YY_TYPEDEF_YY_BUFFER_STATE |
56 | typedef struct yy_buffer_state *YY_BUFFER_STATE; |
57 | #endif |
58 | #ifndef YY_TYPEDEF_YY_SCANNER_T |
59 | #define YY_TYPEDEF_YY_SCANNER_T |
60 | typedef void *yyscan_t; |
61 | #endif |
62 | |
63 | /* |
64 | * We use a stack of flex buffers to handle substitution of psql variables. |
65 | * Each stacked buffer contains the as-yet-unread text from one psql variable. |
66 | * When we pop the stack all the way, we resume reading from the outer buffer |
67 | * identified by scanbufhandle. |
68 | */ |
69 | typedef struct StackElem |
70 | { |
71 | YY_BUFFER_STATE buf; /* flex input control structure */ |
72 | char *bufstring; /* data actually being scanned by flex */ |
73 | char *origstring; /* copy of original data, if needed */ |
74 | char *varname; /* name of variable providing data, or NULL */ |
75 | struct StackElem *next; |
76 | } StackElem; |
77 | |
78 | /* |
79 | * All working state of the lexer must be stored in PsqlScanStateData |
80 | * between calls. This allows us to have multiple open lexer operations, |
81 | * which is needed for nested include files. The lexer itself is not |
82 | * recursive, but it must be re-entrant. |
83 | */ |
84 | typedef struct PsqlScanStateData |
85 | { |
86 | yyscan_t scanner; /* Flex's state for this PsqlScanState */ |
87 | |
88 | PQExpBuffer output_buf; /* current output buffer */ |
89 | |
90 | StackElem *buffer_stack; /* stack of variable expansion buffers */ |
91 | |
92 | /* |
93 | * These variables always refer to the outer buffer, never to any stacked |
94 | * variable-expansion buffer. |
95 | */ |
96 | YY_BUFFER_STATE scanbufhandle; |
97 | char *scanbuf; /* start of outer-level input buffer */ |
98 | const char *scanline; /* current input line at outer level */ |
99 | |
100 | /* safe_encoding, curline, refline are used by emit() to replace FFs */ |
101 | int encoding; /* encoding being used now */ |
102 | bool safe_encoding; /* is current encoding "safe"? */ |
103 | bool std_strings; /* are string literals standard? */ |
104 | const char *curline; /* actual flex input string for cur buf */ |
105 | const char *refline; /* original data for cur buffer */ |
106 | |
107 | /* |
108 | * All this state lives across successive input lines, until explicitly |
109 | * reset by psql_scan_reset. start_state is adopted by yylex() on entry, |
110 | * and updated with its finishing state on exit. |
111 | */ |
112 | int start_state; /* yylex's starting/finishing state */ |
113 | int paren_depth; /* depth of nesting in parentheses */ |
114 | int xcdepth; /* depth of nesting in slash-star comments */ |
115 | char *dolqstart; /* current $foo$ quote start string */ |
116 | |
117 | /* |
118 | * Callback functions provided by the program making use of the lexer, |
119 | * plus a void* callback passthrough argument. |
120 | */ |
121 | const PsqlScanCallbacks *callbacks; |
122 | void *cb_passthrough; |
123 | } PsqlScanStateData; |
124 | |
125 | |
126 | /* |
127 | * Functions exported by psqlscan.l, but only meant for use within |
128 | * compatible lexers. |
129 | */ |
130 | extern void psqlscan_push_new_buffer(PsqlScanState state, |
131 | const char *newstr, const char *varname); |
132 | extern void psqlscan_pop_buffer_stack(PsqlScanState state); |
133 | extern void psqlscan_select_top_buffer(PsqlScanState state); |
134 | extern bool psqlscan_var_is_current_source(PsqlScanState state, |
135 | const char *varname); |
136 | extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, |
137 | const char *txt, int len, |
138 | char **txtcopy); |
139 | extern void psqlscan_emit(PsqlScanState state, const char *txt, int len); |
140 | extern char *(PsqlScanState state, |
141 | const char *txt, int len); |
142 | extern void psqlscan_escape_variable(PsqlScanState state, |
143 | const char *txt, int len, |
144 | PsqlScanQuoteType quote); |
145 | extern void psqlscan_test_variable(PsqlScanState state, |
146 | const char *txt, int len); |
147 | |
148 | #endif /* PSQLSCAN_INT_H */ |
149 | |