| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * scanner.h |
| 4 | * API for the core scanner (flex machine) |
| 5 | * |
| 6 | * The core scanner is also used by PL/pgSQL, so we provide a public API |
| 7 | * for it. However, the rest of the backend is only expected to use the |
| 8 | * higher-level API provided by parser.h. |
| 9 | * |
| 10 | * |
| 11 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 12 | * Portions Copyright (c) 1994, Regents of the University of California |
| 13 | * |
| 14 | * src/include/parser/scanner.h |
| 15 | * |
| 16 | *------------------------------------------------------------------------- |
| 17 | */ |
| 18 | |
| 19 | #ifndef SCANNER_H |
| 20 | #define SCANNER_H |
| 21 | |
| 22 | #include "common/keywords.h" |
| 23 | |
| 24 | /* |
| 25 | * The scanner returns extra data about scanned tokens in this union type. |
| 26 | * Note that this is a subset of the fields used in YYSTYPE of the bison |
| 27 | * parsers built atop the scanner. |
| 28 | */ |
| 29 | typedef union core_YYSTYPE |
| 30 | { |
| 31 | int ival; /* for integer literals */ |
| 32 | char *str; /* for identifiers and non-integer literals */ |
| 33 | const char *keyword; /* canonical spelling of keywords */ |
| 34 | } core_YYSTYPE; |
| 35 | |
| 36 | /* |
| 37 | * We track token locations in terms of byte offsets from the start of the |
| 38 | * source string, not the column number/line number representation that |
| 39 | * bison uses by default. Also, to minimize overhead we track only one |
| 40 | * location (usually the first token location) for each construct, not |
| 41 | * the beginning and ending locations as bison does by default. It's |
| 42 | * therefore sufficient to make YYLTYPE an int. |
| 43 | */ |
| 44 | #define YYLTYPE int |
| 45 | |
| 46 | /* |
| 47 | * Another important component of the scanner's API is the token code numbers. |
| 48 | * However, those are not defined in this file, because bison insists on |
| 49 | * defining them for itself. The token codes used by the core scanner are |
| 50 | * the ASCII characters plus these: |
| 51 | * %token <str> IDENT FCONST SCONST BCONST XCONST Op |
| 52 | * %token <ival> ICONST PARAM |
| 53 | * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER |
| 54 | * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS |
| 55 | * The above token definitions *must* be the first ones declared in any |
| 56 | * bison parser built atop this scanner, so that they will have consistent |
| 57 | * numbers assigned to them (specifically, IDENT = 258 and so on). |
| 58 | */ |
| 59 | |
| 60 | /* |
| 61 | * The YY_EXTRA data that a flex scanner allows us to pass around. |
| 62 | * Private state needed by the core scanner goes here. Note that the actual |
| 63 | * yy_extra struct may be larger and have this as its first component, thus |
| 64 | * allowing the calling parser to keep some fields of its own in YY_EXTRA. |
| 65 | */ |
| 66 | typedef struct |
| 67 | { |
| 68 | /* |
| 69 | * The string the scanner is physically scanning. We keep this mainly so |
| 70 | * that we can cheaply compute the offset of the current token (yytext). |
| 71 | */ |
| 72 | char *; |
| 73 | Size ; |
| 74 | |
| 75 | /* |
| 76 | * The keyword list to use, and the associated grammar token codes. |
| 77 | */ |
| 78 | const ScanKeywordList *; |
| 79 | const uint16 *; |
| 80 | |
| 81 | /* |
| 82 | * Scanner settings to use. These are initialized from the corresponding |
| 83 | * GUC variables by scanner_init(). Callers can modify them after |
| 84 | * scanner_init() if they don't want the scanner's behavior to follow the |
| 85 | * prevailing GUC settings. |
| 86 | */ |
| 87 | int ; |
| 88 | bool ; |
| 89 | bool standard_conforming_strings; |
| 90 | |
| 91 | /* |
| 92 | * literalbuf is used to accumulate literal values when multiple rules are |
| 93 | * needed to parse a single literal. Call startlit() to reset buffer to |
| 94 | * empty, addlit() to add text. NOTE: the string in literalbuf is NOT |
| 95 | * necessarily null-terminated, but there always IS room to add a trailing |
| 96 | * null at offset literallen. We store a null only when we need it. |
| 97 | */ |
| 98 | char *; /* palloc'd expandable buffer */ |
| 99 | int ; /* actual current string length */ |
| 100 | int ; /* current allocated buffer size */ |
| 101 | |
| 102 | int ; /* depth of nesting in slash-star comments */ |
| 103 | char *; /* current $foo$ quote start string */ |
| 104 | |
| 105 | /* first part of UTF16 surrogate pair for Unicode escapes */ |
| 106 | int32 ; |
| 107 | |
| 108 | /* state variables for literal-lexing warnings */ |
| 109 | bool ; |
| 110 | bool ; |
| 111 | } ; |
| 112 | |
| 113 | /* |
| 114 | * The type of yyscanner is opaque outside scan.l. |
| 115 | */ |
| 116 | typedef void *core_yyscan_t; |
| 117 | |
| 118 | |
| 119 | /* Constant data exported from parser/scan.l */ |
| 120 | extern PGDLLIMPORT const uint16 ScanKeywordTokens[]; |
| 121 | |
| 122 | /* Entry points in parser/scan.l */ |
| 123 | extern core_yyscan_t scanner_init(const char *str, |
| 124 | core_yy_extra_type *yyext, |
| 125 | const ScanKeywordList *keywordlist, |
| 126 | const uint16 *keyword_tokens); |
| 127 | extern void scanner_finish(core_yyscan_t yyscanner); |
| 128 | extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, |
| 129 | core_yyscan_t yyscanner); |
| 130 | extern int scanner_errposition(int location, core_yyscan_t yyscanner); |
| 131 | extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn(); |
| 132 | |
| 133 | #endif /* SCANNER_H */ |
| 134 | |