1/*-------------------------------------------------------------------------
2 *
3 * scanner.h
4 * API for the core scanner (flex machine)
5 *
6 * The core scanner is also used by PL/pgSQL, so we provide a public API
7 * for it. However, the rest of the backend is only expected to use the
8 * higher-level API provided by parser.h.
9 *
10 *
11 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
12 * Portions Copyright (c) 1994, Regents of the University of California
13 *
14 * src/include/parser/scanner.h
15 *
16 *-------------------------------------------------------------------------
17 */
18
19#pragma once
20
21#include <cstdint>
22
23#include "common/keywords.hpp"
24#include "pg_definitions.hpp"
25
26/*
27 * The scanner returns extra data about scanned tokens in this union type.
28 * Note that this is a subset of the fields used in YYSTYPE of the bison
29 * parsers built atop the scanner.
30 */
31typedef union core_YYSTYPE
32{
33 int ival; /* for integer literals */
34 char *str; /* for identifiers and non-integer literals */
35 const char *keyword; /* canonical spelling of keywords */
36} core_YYSTYPE;
37
38/*
39 * We track token locations in terms of byte offsets from the start of the
40 * source string, not the column number/line number representation that
41 * bison uses by default. Also, to minimize overhead we track only one
42 * location (usually the first token location) for each construct, not
43 * the beginning and ending locations as bison does by default. It's
44 * therefore sufficient to make YYLTYPE an int.
45 */
46#define YYLTYPE int
47
48/*
49 * Another important component of the scanner's API is the token code numbers.
50 * However, those are not defined in this file, because bison insists on
51 * defining them for itself. The token codes used by the core scanner are
52 * the ASCII characters plus these:
53 * %token <str> IDENT FCONST SCONST BCONST XCONST Op
54 * %token <ival> ICONST PARAM
55 * %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
56 * %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
57 * The above token definitions *must* be the first ones declared in any
58 * bison parser built atop this scanner, so that they will have consistent
59 * numbers assigned to them (specifically, IDENT = 258 and so on).
60 */
61
62/*
63 * The YY_EXTRA data that a flex scanner allows us to pass around.
64 * Private state needed by the core scanner goes here. Note that the actual
65 * yy_extra struct may be larger and have this as its first component, thus
66 * allowing the calling parser to keep some fields of its own in YY_EXTRA.
67 */
68typedef struct core_yy_extra_type
69{
70 /*
71 * The string the scanner is physically scanning. We keep this mainly so
72 * that we can cheaply compute the offset of the current token (yytext).
73 */
74 char *scanbuf;
75 PGSize scanbuflen;
76
77 /*
78 * The keyword list to use.
79 */
80 const PGScanKeyword *keywords;
81 int num_keywords;
82
83 /*
84 * Scanner settings to use. These are initialized from the corresponding
85 * GUC variables by scanner_init(). Callers can modify them after
86 * scanner_init() if they don't want the scanner's behavior to follow the
87 * prevailing GUC settings.
88 */
89 int backslash_quote;
90 bool escape_string_warning;
91 bool standard_conforming_strings;
92
93 /*
94 * literalbuf is used to accumulate literal values when multiple rules are
95 * needed to parse a single literal. Call startlit() to reset buffer to
96 * empty, addlit() to add text. NOTE: the string in literalbuf is NOT
97 * necessarily null-terminated, but there always IS room to add a trailing
98 * null at offset literallen. We store a null only when we need it.
99 */
100 char *literalbuf; /* palloc'd expandable buffer */
101 int literallen; /* actual current string length */
102 int literalalloc; /* current allocated buffer size */
103
104 int xcdepth; /* depth of nesting in slash-star comments */
105 char *dolqstart; /* current $foo$ quote start string */
106
107 /* first part of UTF16 surrogate pair for Unicode escapes */
108 int32_t utf16_first_part;
109
110 /* state variables for literal-lexing warnings */
111 bool warn_on_first_escape;
112 bool saw_non_ascii;
113} core_yy_extra_type;
114
115/*
116 * The type of yyscanner is opaque outside scan.l.
117 */
118typedef void *core_yyscan_t;
119
120
121/* Entry points in parser/scan.l */
122extern core_yyscan_t scanner_init(const char *str,
123 core_yy_extra_type *yyext,
124 const PGScanKeyword *keywords,
125 int num_keywords);
126extern void scanner_finish(core_yyscan_t yyscanner);
127extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
128 core_yyscan_t yyscanner);
129extern int scanner_errposition(int location, core_yyscan_t yyscanner);
130extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner);
131