gen_lex_token.cc source code [MariaDB/sql/gen_lex_token.cc]

1	/*
2	Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published by
6	the Free Software Foundation; version 2 of the License.
7
8	This program is distributed in the hope that it will be useful,
9	but WITHOUT ANY WARRANTY; without even the implied warranty of
10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11	GNU General Public License for more details.
12
13	You should have received a copy of the GNU General Public License
14	along with this program; if not, write to the Free Software Foundation,
15	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA /*
16
17	#include "mariadb.h"
18	#include <string.h>
19
20	/ We only need the tokens here /
21	#define YYSTYPE_IS_DECLARED
22	#include <sql_yacc.h>
23	#include <lex.h>
24
25	#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
26
27	/*
28	This is a tool used during build only,
29	so MY_MAX_TOKEN does not need to be exact,
30	only big enough to hold:
31	- 256 character terminal tokens
32	- YYNTOKENS named terminal tokens
33	from bison.
34	See also YYMAXUTOK.
35	*/
36	#define MY_MAX_TOKEN 1000
37	/* Generated token. /
38	struct gen_lex_token_string
39	{
40	const char *m_token_string;
41	int m_token_length;
42	bool m_append_space;
43	bool m_start_expr;
44	};
45
46	gen_lex_token_string compiled_token_array[MY_MAX_TOKEN];
47	int max_token_seen= `0`;
48
49	char char_tokens[`256`];
50
51	int tok_generic_value= `0`;
52	int tok_generic_value_list= `0`;
53	int tok_row_single_value= `0`;
54	int tok_row_single_value_list= `0`;
55	int tok_row_multiple_value= `0`;
56	int tok_row_multiple_value_list= `0`;
57	int tok_ident= `0`;
58	int tok_unused= `0`;
59
60	void set_token(int tok, const char *str)
61	{
62	if (tok <= `0`)
63	{
64	fprintf(stderr, "Bad token found\n");
65	exit(`1`);
66	}
67
68	if (tok > max_token_seen)
69	{
70	max_token_seen= tok;
71	}
72
73	if (max_token_seen >= MY_MAX_TOKEN)
74	{
75	fprintf(stderr, "Added that many new keywords ? Increase MY_MAX_TOKEN\n");
76	exit(`1`);
77	}
78
79	compiled_token_array[tok].m_token_string= str;
80	compiled_token_array[tok].m_token_length= (int)strlen(str);
81	compiled_token_array[tok].m_append_space= true;
82	compiled_token_array[tok].m_start_expr= false;
83	}
84
85	void set_start_expr_token(int tok)
86	{
87	compiled_token_array[tok].m_start_expr= true;
88	}
89
90	void compute_tokens()
91	{
92	int tok;
93	unsigned int i;
94	char *str;
95
96	/*
97	Default value.
98	*/
99	for (tok= `0`; tok < MY_MAX_TOKEN; tok++)
100	{
101	compiled_token_array[tok].m_token_string= "(unknown)";
102	compiled_token_array[tok].m_token_length= `9`;
103	compiled_token_array[tok].m_append_space= true;
104	compiled_token_array[tok].m_start_expr= false;
105	}
106
107	/*
108	Tokens made of just one terminal character
109	*/
110	for (tok=`0`; tok < `256`; tok++)
111	{
112	str= & char_tokens[tok];
113	str[`0`]= (char) tok;
114	compiled_token_array[tok].m_token_string= str;
115	compiled_token_array[tok].m_token_length= `1`;
116	compiled_token_array[tok].m_append_space= true;
117	}
118
119	max_token_seen= `255`;
120
121	/*
122	String terminal tokens, used in sql_yacc.yy
123	*/
124	set_token(NEG, "~");
125	set_token(TABLE_REF_PRIORITY, "TABLE_REF_PRIORITY");
126
127	/*
128	Tokens hard coded in sql_lex.cc
129	*/
130
131	set_token(WITH_CUBE_SYM, "WITH CUBE");
132	set_token(WITH_ROLLUP_SYM, "WITH ROLLUP");
133	set_token(WITH_SYSTEM_SYM, "WITH SYSTEM");
134	set_token(FOR_SYSTEM_TIME_SYM, "FOR SYSTEM_TIME");
135	set_token(VALUES_IN_SYM, "VALUES IN");
136	set_token(VALUES_LESS_SYM, "VALUES LESS");
137	set_token(NOT2_SYM, "!");
138	set_token(OR2_SYM, "\|");
139	set_token(PARAM_MARKER, "?");
140	set_token(SET_VAR, ":=");
141	set_token(UNDERSCORE_CHARSET, "(_charset)");
142	set_token(END_OF_INPUT, "");
143
144	/*
145	Values.
146	These tokens are all normalized later,
147	so this strings will never be displayed.
148	*/
149	set_token(BIN_NUM, "(bin)");
150	set_token(DECIMAL_NUM, "(decimal)");
151	set_token(FLOAT_NUM, "(float)");
152	set_token(HEX_NUM, "(hex)");
153	set_token(LEX_HOSTNAME, "(hostname)");
154	set_token(LONG_NUM, "(long)");
155	set_token(NUM, "(num)");
156	set_token(TEXT_STRING, "(text)");
157	set_token(NCHAR_STRING, "(nchar)");
158	set_token(ULONGLONG_NUM, "(ulonglong)");
159
160	/*
161	Identifiers.
162	*/
163	set_token(IDENT, "(id)");
164	set_token(IDENT_QUOTED, "(id_quoted)");
165
166	/*
167	Unused tokens
168	*/
169	set_token(LOCATOR_SYM, "LOCATOR");
170	set_token(SERVER_OPTIONS, "SERVER_OPTIONS");
171	set_token(UDF_RETURNS_SYM, "UDF_RETURNS");
172
173	/*
174	See symbols[] in sql/lex.h
175	*/
176	for (i= `0`; i< sizeof(symbols)/sizeof(symbols[`0`]); i++)
177	{
178	set_token(symbols[i].tok, symbols[i].name);
179	}
180
181	/*
182	See sql_functions[] in sql/lex.h
183	*/
184	for (i= `0`; i< sizeof(sql_functions)/sizeof(sql_functions[`0`]); i++)
185	{
186	set_token(sql_functions[i].tok, sql_functions[i].name);
187	}
188
189	/*
190	Additional FAKE tokens,
191	used internally to normalize a digest text.
192	*/
193
194	max_token_seen++;
195	tok_generic_value= max_token_seen;
196	set_token(tok_generic_value, "?");
197
198	max_token_seen++;
199	tok_generic_value_list= max_token_seen;
200	set_token(tok_generic_value_list, "?, ...");
201
202	max_token_seen++;
203	tok_row_single_value= max_token_seen;
204	set_token(tok_row_single_value, "(?)");
205
206	max_token_seen++;
207	tok_row_single_value_list= max_token_seen;
208	set_token(tok_row_single_value_list, "(?) /* , ... */");
209
210	max_token_seen++;
211	tok_row_multiple_value= max_token_seen;
212	set_token(tok_row_multiple_value, "(...)");
213
214	max_token_seen++;
215	tok_row_multiple_value_list= max_token_seen;
216	set_token(tok_row_multiple_value_list, "(...) /* , ... */");
217
218	max_token_seen++;
219	tok_ident= max_token_seen;
220	set_token(tok_ident, "(tok_id)");
221
222	max_token_seen++;
223	tok_unused= max_token_seen;
224	set_token(tok_unused, "UNUSED");
225
226	/*
227	Fix whitespace for some special tokens.
228	*/
229
230	/*
231	The lexer parses "@@variable" as '@', '@', 'variable',
232	returning a token for '@' alone.
233
234	This is incorrect, '@' is not really a token,
235	because the syntax "@ @ variable" (with spaces) is not accepted:
236	The lexer keeps some internal state after the '@' fake token.
237
238	To work around this, digest text are printed as "@@variable".
239	*/
240	compiled_token_array[(int) `'@'`].m_append_space= false;
241
242	/*
243	Define additional properties for tokens.
244
245	List all the token that are followed by an expression.
246	This is needed to differentiate unary from binary
247	'+' and '-' operators, because we want to:
248	- reduce <unary +> <NUM> to <?>,
249	- preserve <...> <binary +> <NUM> as is.
250	*/
251	set_start_expr_token(`'('`);
252	set_start_expr_token(`','`);
253	set_start_expr_token(EVERY_SYM);
254	set_start_expr_token(AT_SYM);
255	set_start_expr_token(STARTS_SYM);
256	set_start_expr_token(ENDS_SYM);
257	set_start_expr_token(DEFAULT);
258	set_start_expr_token(RETURN_SYM);
259	set_start_expr_token(IF_SYM);
260	set_start_expr_token(ELSEIF_SYM);
261	set_start_expr_token(CASE_SYM);
262	set_start_expr_token(WHEN_SYM);
263	set_start_expr_token(WHILE_SYM);
264	set_start_expr_token(UNTIL_SYM);
265	set_start_expr_token(SELECT_SYM);
266
267	set_start_expr_token(OR_SYM);
268	set_start_expr_token(OR2_SYM);
269	set_start_expr_token(XOR);
270	set_start_expr_token(AND_SYM);
271	set_start_expr_token(AND_AND_SYM);
272	set_start_expr_token(NOT_SYM);
273	set_start_expr_token(BETWEEN_SYM);
274	set_start_expr_token(LIKE);
275	set_start_expr_token(REGEXP);
276
277	set_start_expr_token(`'\|'`);
278	set_start_expr_token(`'&'`);
279	set_start_expr_token(SHIFT_LEFT);
280	set_start_expr_token(SHIFT_RIGHT);
281	set_start_expr_token(`'+'`);
282	set_start_expr_token(`'-'`);
283	set_start_expr_token(INTERVAL_SYM);
284	set_start_expr_token(`'*'`);
285	set_start_expr_token(`'/'`);
286	set_start_expr_token(`'%'`);
287	set_start_expr_token(DIV_SYM);
288	set_start_expr_token(MOD_SYM);
289	set_start_expr_token(`'^'`);
290	}
291
292	void print_tokens()
293	{
294	int tok;
295
296	printf("#ifdef LEX_TOKEN_WITH_DEFINITION\n");
297	printf("lex_token_string lex_token_array[]=\n");
298	printf("{\n");
299	printf("/* PART 1: character tokens. */\n");
300
301	for (tok= `0`; tok<`256`; tok++)
302	{
303	printf("/* %03d */ { \"\\x%02x\", 1, %s, %s},\n",
304	tok,
305	tok,
306	compiled_token_array[tok].m_append_space ? "true" : "false",
307	compiled_token_array[tok].m_start_expr ? "true" : "false");
308	}
309
310	printf("/* PART 2: named tokens. */\n");
311
312	for (tok= `256`; tok<= max_token_seen; tok++)
313	{
314	printf("/* %03d */ { \"%s\", %d, %s, %s},\n",
315	tok,
316	compiled_token_array[tok].m_token_string,
317	compiled_token_array[tok].m_token_length,
318	compiled_token_array[tok].m_append_space ? "true" : "false",
319	compiled_token_array[tok].m_start_expr ? "true" : "false");
320	}
321
322	printf("/* DUMMY */ { \"\", 0, false, false}\n");
323	printf("};\n");
324	printf("#endif /* LEX_TOKEN_WITH_DEFINITION */\n");
325
326	printf("/* DIGEST specific tokens. */\n");
327	printf("#define TOK_GENERIC_VALUE %d\n", tok_generic_value);
328	printf("#define TOK_GENERIC_VALUE_LIST %d\n", tok_generic_value_list);
329	printf("#define TOK_ROW_SINGLE_VALUE %d\n", tok_row_single_value);
330	printf("#define TOK_ROW_SINGLE_VALUE_LIST %d\n", tok_row_single_value_list);
331	printf("#define TOK_ROW_MULTIPLE_VALUE %d\n", tok_row_multiple_value);
332	printf("#define TOK_ROW_MULTIPLE_VALUE_LIST %d\n", tok_row_multiple_value_list);
333	printf("#define TOK_IDENT %d\n", tok_ident);
334	printf("#define TOK_UNUSED %d\n", tok_unused);
335	}
336
337	int main(int argc,char **argv)
338	{
339	puts("/*");
340	puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
341	puts("*/");
342
343	printf("/*\n");
344	printf(" This file is generated, do not edit.\n");
345	printf(" See file sql/gen_lex_token.cc.\n");
346	printf("*/\n");
347	printf("struct lex_token_string\n");
348	printf("{\n");
349	printf(" const char *m_token_string;\n");
350	printf(" int m_token_length;\n");
351	printf(" bool m_append_space;\n");
352	printf(" bool m_start_expr;\n");
353	printf("};\n");
354	printf("typedef struct lex_token_string lex_token_string;\n");
355
356	compute_tokens();
357	print_tokens();
358
359	return `0`;
360	}
361
362

Browse the source code of MariaDB/sql/gen_lex_token.cc