1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * scanner.h |
4 | * API for the core scanner (flex machine) |
5 | * |
6 | * The core scanner is also used by PL/pgSQL, so we provide a public API |
7 | * for it. However, the rest of the backend is only expected to use the |
8 | * higher-level API provided by parser.h. |
9 | * |
10 | * |
11 | * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup |
12 | * Portions Copyright (c) 1994, Regents of the University of California |
13 | * |
14 | * src/include/parser/scanner.h |
15 | * |
16 | *------------------------------------------------------------------------- |
17 | */ |
18 | |
19 | #pragma once |
20 | |
21 | #include <cstdint> |
22 | |
23 | #include "common/keywords.hpp" |
24 | #include "pg_definitions.hpp" |
25 | |
26 | namespace duckdb_libpgquery { |
27 | |
28 | /* |
29 | * The scanner returns extra data about scanned tokens in this union type. |
30 | * Note that this is a subset of the fields used in YYSTYPE of the bison |
31 | * parsers built atop the scanner. |
32 | */ |
33 | typedef union core_YYSTYPE { |
34 | int ival; /* for integer literals */ |
35 | char *str; /* for identifiers and non-integer literals */ |
36 | const char *keyword; /* canonical spelling of keywords */ |
37 | } core_YYSTYPE; |
38 | |
39 | /* |
40 | * We track token locations in terms of byte offsets from the start of the |
41 | * source string, not the column number/line number representation that |
42 | * bison uses by default. Also, to minimize overhead we track only one |
43 | * location (usually the first token location) for each construct, not |
44 | * the beginning and ending locations as bison does by default. It's |
45 | * therefore sufficient to make YYLTYPE an int. |
46 | */ |
47 | #define YYLTYPE int |
48 | |
49 | /* |
50 | * Another important component of the scanner's API is the token code numbers. |
51 | * However, those are not defined in this file, because bison insists on |
52 | * defining them for itself. The token codes used by the core scanner are |
53 | * the ASCII characters plus these: |
54 | * %token <str> IDENT FCONST SCONST BCONST XCONST Op |
55 | * %token <ival> ICONST PARAM |
56 | * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER LAMBDA_ARROW |
57 | * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS |
58 | * The above token definitions *must* be the first ones declared in any |
59 | * bison parser built atop this scanner, so that they will have consistent |
60 | * numbers assigned to them (specifically, IDENT = 258 and so on). |
61 | */ |
62 | |
63 | /* |
64 | * The YY_EXTRA data that a flex scanner allows us to pass around. |
65 | * Private state needed by the core scanner goes here. Note that the actual |
66 | * yy_extra struct may be larger and have this as its first component, thus |
67 | * allowing the calling parser to keep some fields of its own in YY_EXTRA. |
68 | */ |
69 | typedef struct { |
70 | /* |
71 | * The string the scanner is physically scanning. We keep this mainly so |
72 | * that we can cheaply compute the offset of the current token (yytext). |
73 | */ |
74 | char *; |
75 | PGSize ; |
76 | |
77 | /* |
78 | * The keyword list to use. |
79 | */ |
80 | const PGScanKeyword *; |
81 | int ; |
82 | |
83 | /* |
84 | * Scanner settings to use. These are initialized from the corresponding |
85 | * GUC variables by scanner_init(). Callers can modify them after |
86 | * scanner_init() if they don't want the scanner's behavior to follow the |
87 | * prevailing GUC settings. |
88 | */ |
89 | int ; |
90 | bool ; |
91 | bool standard_conforming_strings; |
92 | |
93 | /* |
94 | * literalbuf is used to accumulate literal values when multiple rules are |
95 | * needed to parse a single literal. Call startlit() to reset buffer to |
96 | * empty, addlit() to add text. NOTE: the string in literalbuf is NOT |
97 | * necessarily null-terminated, but there always IS room to add a trailing |
98 | * null at offset literallen. We store a null only when we need it. |
99 | */ |
100 | char *; /* palloc'd expandable buffer */ |
101 | int ; /* actual current string length */ |
102 | int ; /* current allocated buffer size */ |
103 | |
104 | int ; /* depth of nesting in slash-star comments */ |
105 | char *; /* current $foo$ quote start string */ |
106 | |
107 | /* first part of UTF16 surrogate pair for Unicode escapes */ |
108 | int32_t ; |
109 | |
110 | /* state variables for literal-lexing warnings */ |
111 | bool ; |
112 | bool ; |
113 | } ; |
114 | |
115 | /* |
116 | * The type of yyscanner is opaque outside scan.l. |
117 | */ |
118 | typedef void *core_yyscan_t; |
119 | |
120 | /* Entry points in parser/scan.l */ |
121 | core_yyscan_t (const char *str, core_yy_extra_type *yyext, const PGScanKeyword *keywords, int num_keywords); |
122 | void scanner_finish(core_yyscan_t yyscanner); |
123 | int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner); |
124 | int scanner_errposition(int location, core_yyscan_t yyscanner); |
125 | void scanner_yyerror(const char *message, core_yyscan_t yyscanner); |
126 | |
127 | } |