1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * scanner.h |
4 | * API for the core scanner (flex machine) |
5 | * |
6 | * The core scanner is also used by PL/pgSQL, so we provide a public API |
7 | * for it. However, the rest of the backend is only expected to use the |
8 | * higher-level API provided by parser.h. |
9 | * |
10 | * |
11 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
12 | * Portions Copyright (c) 1994, Regents of the University of California |
13 | * |
14 | * src/include/parser/scanner.h |
15 | * |
16 | *------------------------------------------------------------------------- |
17 | */ |
18 | |
19 | #ifndef SCANNER_H |
20 | #define SCANNER_H |
21 | |
22 | #include "common/keywords.h" |
23 | |
24 | /* |
25 | * The scanner returns extra data about scanned tokens in this union type. |
26 | * Note that this is a subset of the fields used in YYSTYPE of the bison |
27 | * parsers built atop the scanner. |
28 | */ |
29 | typedef union core_YYSTYPE |
30 | { |
31 | int ival; /* for integer literals */ |
32 | char *str; /* for identifiers and non-integer literals */ |
33 | const char *keyword; /* canonical spelling of keywords */ |
34 | } core_YYSTYPE; |
35 | |
36 | /* |
37 | * We track token locations in terms of byte offsets from the start of the |
38 | * source string, not the column number/line number representation that |
39 | * bison uses by default. Also, to minimize overhead we track only one |
40 | * location (usually the first token location) for each construct, not |
41 | * the beginning and ending locations as bison does by default. It's |
42 | * therefore sufficient to make YYLTYPE an int. |
43 | */ |
44 | #define YYLTYPE int |
45 | |
46 | /* |
47 | * Another important component of the scanner's API is the token code numbers. |
48 | * However, those are not defined in this file, because bison insists on |
49 | * defining them for itself. The token codes used by the core scanner are |
50 | * the ASCII characters plus these: |
51 | * %token <str> IDENT FCONST SCONST BCONST XCONST Op |
52 | * %token <ival> ICONST PARAM |
53 | * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER |
54 | * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS |
55 | * The above token definitions *must* be the first ones declared in any |
56 | * bison parser built atop this scanner, so that they will have consistent |
57 | * numbers assigned to them (specifically, IDENT = 258 and so on). |
58 | */ |
59 | |
60 | /* |
61 | * The YY_EXTRA data that a flex scanner allows us to pass around. |
62 | * Private state needed by the core scanner goes here. Note that the actual |
63 | * yy_extra struct may be larger and have this as its first component, thus |
64 | * allowing the calling parser to keep some fields of its own in YY_EXTRA. |
65 | */ |
66 | typedef struct |
67 | { |
68 | /* |
69 | * The string the scanner is physically scanning. We keep this mainly so |
70 | * that we can cheaply compute the offset of the current token (yytext). |
71 | */ |
72 | char *; |
73 | Size ; |
74 | |
75 | /* |
76 | * The keyword list to use, and the associated grammar token codes. |
77 | */ |
78 | const ScanKeywordList *; |
79 | const uint16 *; |
80 | |
81 | /* |
82 | * Scanner settings to use. These are initialized from the corresponding |
83 | * GUC variables by scanner_init(). Callers can modify them after |
84 | * scanner_init() if they don't want the scanner's behavior to follow the |
85 | * prevailing GUC settings. |
86 | */ |
87 | int ; |
88 | bool ; |
89 | bool standard_conforming_strings; |
90 | |
91 | /* |
92 | * literalbuf is used to accumulate literal values when multiple rules are |
93 | * needed to parse a single literal. Call startlit() to reset buffer to |
94 | * empty, addlit() to add text. NOTE: the string in literalbuf is NOT |
95 | * necessarily null-terminated, but there always IS room to add a trailing |
96 | * null at offset literallen. We store a null only when we need it. |
97 | */ |
98 | char *; /* palloc'd expandable buffer */ |
99 | int ; /* actual current string length */ |
100 | int ; /* current allocated buffer size */ |
101 | |
102 | int ; /* depth of nesting in slash-star comments */ |
103 | char *; /* current $foo$ quote start string */ |
104 | |
105 | /* first part of UTF16 surrogate pair for Unicode escapes */ |
106 | int32 ; |
107 | |
108 | /* state variables for literal-lexing warnings */ |
109 | bool ; |
110 | bool ; |
111 | } ; |
112 | |
113 | /* |
114 | * The type of yyscanner is opaque outside scan.l. |
115 | */ |
116 | typedef void *core_yyscan_t; |
117 | |
118 | |
119 | /* Constant data exported from parser/scan.l */ |
120 | extern PGDLLIMPORT const uint16 ScanKeywordTokens[]; |
121 | |
122 | /* Entry points in parser/scan.l */ |
123 | extern core_yyscan_t scanner_init(const char *str, |
124 | core_yy_extra_type *yyext, |
125 | const ScanKeywordList *keywordlist, |
126 | const uint16 *keyword_tokens); |
127 | extern void scanner_finish(core_yyscan_t yyscanner); |
128 | extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, |
129 | core_yyscan_t yyscanner); |
130 | extern int scanner_errposition(int location, core_yyscan_t yyscanner); |
131 | extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn(); |
132 | |
133 | #endif /* SCANNER_H */ |
134 | |