1/*-------------------------------------------------------------------------
2 *
3 * scanner.h
4 * API for the core scanner (flex machine)
5 *
6 * The core scanner is also used by PL/pgSQL, so we provide a public API
7 * for it. However, the rest of the backend is only expected to use the
8 * higher-level API provided by parser.h.
9 *
10 *
11 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
13 *
14 * src/include/parser/scanner.h
15 *
16 *-------------------------------------------------------------------------
17 */
18
19#ifndef SCANNER_H
20#define SCANNER_H
21
22#include "common/keywords.h"
23
24/*
25 * The scanner returns extra data about scanned tokens in this union type.
26 * Note that this is a subset of the fields used in YYSTYPE of the bison
27 * parsers built atop the scanner.
28 */
29typedef union core_YYSTYPE
30{
31 int ival; /* for integer literals */
32 char *str; /* for identifiers and non-integer literals */
33 const char *keyword; /* canonical spelling of keywords */
34} core_YYSTYPE;
35
36/*
37 * We track token locations in terms of byte offsets from the start of the
38 * source string, not the column number/line number representation that
39 * bison uses by default. Also, to minimize overhead we track only one
40 * location (usually the first token location) for each construct, not
41 * the beginning and ending locations as bison does by default. It's
42 * therefore sufficient to make YYLTYPE an int.
43 */
44#define YYLTYPE int
45
46/*
47 * Another important component of the scanner's API is the token code numbers.
48 * However, those are not defined in this file, because bison insists on
49 * defining them for itself. The token codes used by the core scanner are
50 * the ASCII characters plus these:
51 * %token <str> IDENT FCONST SCONST BCONST XCONST Op
52 * %token <ival> ICONST PARAM
53 * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
54 * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
55 * The above token definitions *must* be the first ones declared in any
56 * bison parser built atop this scanner, so that they will have consistent
57 * numbers assigned to them (specifically, IDENT = 258 and so on).
58 */
59
60/*
61 * The YY_EXTRA data that a flex scanner allows us to pass around.
62 * Private state needed by the core scanner goes here. Note that the actual
63 * yy_extra struct may be larger and have this as its first component, thus
64 * allowing the calling parser to keep some fields of its own in YY_EXTRA.
65 */
66typedef struct core_yy_extra_type
67{
68 /*
69 * The string the scanner is physically scanning. We keep this mainly so
70 * that we can cheaply compute the offset of the current token (yytext).
71 */
72 char *scanbuf;
73 Size scanbuflen;
74
75 /*
76 * The keyword list to use, and the associated grammar token codes.
77 */
78 const ScanKeywordList *keywordlist;
79 const uint16 *keyword_tokens;
80
81 /*
82 * Scanner settings to use. These are initialized from the corresponding
83 * GUC variables by scanner_init(). Callers can modify them after
84 * scanner_init() if they don't want the scanner's behavior to follow the
85 * prevailing GUC settings.
86 */
87 int backslash_quote;
88 bool escape_string_warning;
89 bool standard_conforming_strings;
90
91 /*
92 * literalbuf is used to accumulate literal values when multiple rules are
93 * needed to parse a single literal. Call startlit() to reset buffer to
94 * empty, addlit() to add text. NOTE: the string in literalbuf is NOT
95 * necessarily null-terminated, but there always IS room to add a trailing
96 * null at offset literallen. We store a null only when we need it.
97 */
98 char *literalbuf; /* palloc'd expandable buffer */
99 int literallen; /* actual current string length */
100 int literalalloc; /* current allocated buffer size */
101
102 int xcdepth; /* depth of nesting in slash-star comments */
103 char *dolqstart; /* current $foo$ quote start string */
104
105 /* first part of UTF16 surrogate pair for Unicode escapes */
106 int32 utf16_first_part;
107
108 /* state variables for literal-lexing warnings */
109 bool warn_on_first_escape;
110 bool saw_non_ascii;
111} core_yy_extra_type;
112
113/*
114 * The type of yyscanner is opaque outside scan.l.
115 */
116typedef void *core_yyscan_t;
117
118
119/* Constant data exported from parser/scan.l */
120extern PGDLLIMPORT const uint16 ScanKeywordTokens[];
121
122/* Entry points in parser/scan.l */
123extern core_yyscan_t scanner_init(const char *str,
124 core_yy_extra_type *yyext,
125 const ScanKeywordList *keywordlist,
126 const uint16 *keyword_tokens);
127extern void scanner_finish(core_yyscan_t yyscanner);
128extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
129 core_yyscan_t yyscanner);
130extern int scanner_errposition(int location, core_yyscan_t yyscanner);
131extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
132
133#endif /* SCANNER_H */
134