src_backend_parser_scansup.cpp source code [Velox/build/_deps/duckdb-src/third_party/libpg_query/src_backend_parser_scansup.cpp]

1	/--------------------------------------------------------------------*
2	* Symbols referenced in this file:
3	* - truncate_identifier
4	* - downcase_truncate_identifier
5	* - downcase_identifier
6	* - scanner_isspace
7	*--------------------------------------------------------------------
8	*/
9
10	/-------------------------------------------------------------------------*
11	*
12	* scansup.c
13	* support routines for the lex/flex scanner, used by both the normal
14	* backend as well as the bootstrap backend
15	*
16	* Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
17	* Portions Copyright (c) 1994, Regents of the University of California
18	*
19	*
20	* IDENTIFICATION
21	* src/backend/parser/scansup.c
22	*
23	*-------------------------------------------------------------------------
24	*/
25	#include "pg_functions.hpp"
26	#include <string.h>
27
28	#include <ctype.h>
29
30	#include "parser/scansup.hpp"
31	#include "mb/pg_wchar.hpp"
32
33	namespace duckdb_libpgquery {
34
35	/ ----------------*
36	* scanstr
37	*
38	* if the string passed in has escaped codes, map the escape codes to actual
39	* chars
40	*
41	* the string returned is palloc'd and should eventually be pfree'd by the
42	* caller!
43	* ----------------
44	*/
45
46	/*
47	* downcase_truncate_identifier() --- do appropriate downcasing and
48	* truncation of an unquoted identifier. Optionally warn of truncation.
49	*
50	* Returns a palloc'd string containing the adjusted identifier.
51	*
52	* Note: in some usages the passed string is not null-terminated.
53	*
54	* Note: the API of this function is designed to allow for downcasing
55	* transformations that increase the string length, but we don't yet
56	* support that. If you want to implement it, you'll need to fix
57	* SplitIdentifierString() in utils/adt/varlena.c.
58	*/
59	char downcase_truncate_identifier(const* char ident, int* len, bool warn) {
60	return downcase_identifier(ident, len, warn, truncate: true);
61	}
62
63	static __thread bool pg_preserve_identifier_case = false;
64
65	void set_preserve_identifier_case(bool preserve) {
66	pg_preserve_identifier_case = preserve;
67	}
68
69	bool get_preserve_identifier_case() {
70	return pg_preserve_identifier_case;
71	}
72
73	/*
74	* a workhorse for downcase_truncate_identifier
75	*/
76	char downcase_identifier(const* char ident, int* len, bool warn, bool truncate) {
77	char *result;
78	int i;
79	bool enc_is_single_byte;
80
81	result = (char *)palloc(n: len + `1`);
82	enc_is_single_byte = pg_database_encoding_max_length() == `1`;
83
84	/*
85	* SQL99 specifies Unicode-aware case normalization, which we don't yet
86	* have the infrastructure for. Instead we use tolower() to provide a
87	* locale-aware translation. However, there are some locales where this
88	* is not right either (eg, Turkish may do strange things with 'i' and
89	* 'I'). Our current compromise is to use tolower() for characters with
90	* the high bit set, as long as they aren't part of a multi-byte
91	* character, and use an ASCII-only downcasing for 7-bit characters.
92	*/
93	for (i = `0`; i < len; i++) {
94	unsigned char ch = (unsigned char)ident[i];
95
96	if (!get_preserve_identifier_case()) {
97	if (ch >= `'A'` && ch <= `'Z'`)
98	ch += `'a'` - `'A'`;
99	else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
100	ch = tolower(c: ch);
101	}
102	result[i] = (char)ch;
103	}
104	result[i] = `'\0'`;
105
106	return result;
107	}
108
109	/*
110	* scanner_isspace() --- return true if flex scanner considers char whitespace
111	*
112	* This should be used instead of the potentially locale-dependent isspace()
113	* function when it's important to match the lexer's behavior.
114	*
115	* In principle we might need similar functions for isalnum etc, but for the
116	* moment only isspace seems needed.
117	*/
118	bool scanner_isspace(char ch) {
119	/ This must match scan.l's list of {space} characters /
120	if (ch == `' '` \|\| ch == `'\t'` \|\| ch == `'\n'` \|\| ch == `'\r'` \|\| ch == `'\f'`)
121	return true;
122	return false;
123	}
124	}

Browse the source code of Velox/build/_deps/duckdb-src/third_party/libpg_query/src_backend_parser_scansup.cpp