src_backend_parser_scansup.cpp source code [DuckDB/third_party/libpg_query/src_backend_parser_scansup.cpp]

1	/--------------------------------------------------------------------*
2	* Symbols referenced in this file:
3	* - truncate_identifier
4	* - downcase_truncate_identifier
5	* - downcase_identifier
6	* - scanner_isspace
7	*--------------------------------------------------------------------
8	*/
9
10	/-------------------------------------------------------------------------*
11	*
12	* scansup.c
13	* support routines for the lex/flex scanner, used by both the normal
14	* backend as well as the bootstrap backend
15	*
16	* Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
17	* Portions Copyright (c) 1994, Regents of the University of California
18	*
19	*
20	* IDENTIFICATION
21	* src/backend/parser/scansup.c
22	*
23	*-------------------------------------------------------------------------
24	*/
25	#include "pg_functions.hpp"
26	#include <string.h>
27
28	#include <ctype.h>
29
30	#include "parser/scansup.hpp"
31	#include "mb/pg_wchar.hpp"
32
33
34	/ ----------------*
35	* scanstr
36	*
37	* if the string passed in has escaped codes, map the escape codes to actual
38	* chars
39	*
40	* the string returned is palloc'd and should eventually be pfree'd by the
41	* caller!
42	* ----------------
43	*/
44
45
46
47
48	/*
49	* downcase_truncate_identifier() --- do appropriate downcasing and
50	* truncation of an unquoted identifier. Optionally warn of truncation.
51	*
52	* Returns a palloc'd string containing the adjusted identifier.
53	*
54	* Note: in some usages the passed string is not null-terminated.
55	*
56	* Note: the API of this function is designed to allow for downcasing
57	* transformations that increase the string length, but we don't yet
58	* support that. If you want to implement it, you'll need to fix
59	* SplitIdentifierString() in utils/adt/varlena.c.
60	*/
61	char *
62	downcase_truncate_identifier(const char ident, int* len, bool warn)
63	{
64	return downcase_identifier(ident, len, warn, true);
65	}
66
67	/*
68	* a workhorse for downcase_truncate_identifier
69	*/
70	char *
71	downcase_identifier(const char ident, int* len, bool warn, bool truncate)
72	{
73	char *result;
74	int i;
75	bool enc_is_single_byte;
76
77	result = (char*) palloc(len + `1`);
78	enc_is_single_byte = pg_database_encoding_max_length() == `1`;
79
80	/*
81	* SQL99 specifies Unicode-aware case normalization, which we don't yet
82	* have the infrastructure for. Instead we use tolower() to provide a
83	* locale-aware translation. However, there are some locales where this
84	* is not right either (eg, Turkish may do strange things with 'i' and
85	* 'I'). Our current compromise is to use tolower() for characters with
86	* the high bit set, as long as they aren't part of a multi-byte
87	* character, and use an ASCII-only downcasing for 7-bit characters.
88	*/
89	for (i = `0`; i < len; i++)
90	{
91	unsigned char ch = (unsigned char) ident[i];
92
93	if (ch >= `'A'` && ch <= `'Z'`)
94	ch += `'a'` - `'A'`;
95	else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
96	ch = tolower(ch);
97	result[i] = (char) ch;
98	}
99	result[i] = `'\0'`;
100
101	if (i >= NAMEDATALEN && truncate)
102	truncate_identifier(result, i, warn);
103
104	return result;
105	}
106
107
108	/*
109	* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
110	*
111	* The given string is modified in-place, if necessary. A warning is
112	* issued if requested.
113	*
114	* We require the caller to pass in the string length since this saves a
115	* strlen() call in some common usages.
116	*/
117	void
118	truncate_identifier(char ident, int* len, bool warn)
119	{
120	if (len >= NAMEDATALEN)
121	{
122	len = pg_mbcliplen(ident, len, NAMEDATALEN - `1`);
123	if (warn)
124	{
125	/*
126	* We avoid using %.*s here because it can misbehave if the data
127	* is not valid in what libc thinks is the prevailing encoding.
128	*/
129	char buf[NAMEDATALEN];
130
131	memcpy(buf, ident, len);
132	buf[len] = `'\0'`;
133	ereport(PGNOTICE,
134	(errcode(ERRCODE_NAME_TOO_LONG),
135	errmsg("identifier \"%s\" will be truncated to \"%s\"",
136	ident, buf)));
137	}
138	ident[len] = `'\0'`;
139	}
140	}
141
142	/*
143	* scanner_isspace() --- return true if flex scanner considers char whitespace
144	*
145	* This should be used instead of the potentially locale-dependent isspace()
146	* function when it's important to match the lexer's behavior.
147	*
148	* In principle we might need similar functions for isalnum etc, but for the
149	* moment only isspace seems needed.
150	*/
151	bool
152	scanner_isspace(char ch)
153	{
154	/ This must match scan.l's list of {space} characters /
155	if (ch == `' '` \|\|
156	ch == `'\t'` \|\|
157	ch == `'\n'` \|\|
158	ch == `'\r'` \|\|
159	ch == `'\f'`)
160	return true;
161	return false;
162	}
163

Browse the source code of DuckDB/third_party/libpg_query/src_backend_parser_scansup.cpp