1/*
2 Copyright (c) 2000, 2012, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/**
18 @file
19
20 @details
21@verbatim
22The idea of presented algorithm see in
23"The Art of Computer Programming" by Donald E. Knuth
24Volume 3 "Sorting and searching"
25(chapter 6.3 "Digital searching" - name and number of chapter
26 is back translation from Russian edition :))
27
28as illustration of data structures, imagine next table:
29
30static SYMBOL symbols[] = {
31 { "ADD", SYM(ADD),0,0},
32 { "AND", SYM(AND),0,0},
33 { "DAY", SYM(DAY_SYM),0,0},
34};
35
36for this structure, presented program generate next searching-structure:
37
38+-----------+-+-+-+
39| len |1|2|3|
40+-----------+-+-+-+
41|first_char |0|0|a|
42|last_char |0|0|d|
43|link |0|0|+|
44 |
45 V
46 +----------+-+-+-+--+
47 | 1 char|a|b|c|d |
48 +----------+-+-+-+--+
49 |first_char|b|0|0|0 |
50 |last_char |n|0|0|-1|
51 |link |+|0|0|+ |
52 | |
53 | V
54 | symbols[2] ( "DAY" )
55 V
56+----------+--+-+-+-+-+-+-+-+-+-+--+
57| 2 char|d |e|f|j|h|i|j|k|l|m|n |
58+----------+--+-+-+-+-+-+-+-+-+-+--+
59|first_char|0 |0|0|0|0|0|0|0|0|0|0 |
60|last_char |-1|0|0|0|0|0|0|0|0|0|-1|
61|link |+ |0|0|0|0|0|0|0|0|0|+ |
62 | |
63 V V
64 symbols[0] ( "ADD" ) symbols[1] ( "AND" )
65
66for optimization, link is the 16-bit index in 'symbols' or 'sql_functions'
67or search-array..
68
69So, we can read full search-structure as 32-bit word
70@endverbatim
71
72@todo
73 use instead to_upper_lex, special array
74 (substitute chars) without skip codes..
75@todo
76 try use reverse order of comparing..
77
78*/
79
80#define NO_YACC_SYMBOLS
81#undef CHECK_UNLIKELY
82#include "mariadb.h"
83#include "mysql_version.h"
84#include "lex.h"
85#include <string.h>
86
87#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
88
89struct hash_lex_struct
90{
91 int first_char;
92 char last_char;
93 union{
94 hash_lex_struct *char_tails;
95 int iresult;
96 };
97 int ithis;
98};
99
100hash_lex_struct *get_hash_struct_by_len(hash_lex_struct **root_by_len,
101 int len, int *max_len)
102{
103 if (*max_len<len){
104 *root_by_len= (hash_lex_struct *)realloc((char*)*root_by_len,
105 sizeof(hash_lex_struct)*len);
106 hash_lex_struct *cur, *end= *root_by_len + len;
107 for (cur= *root_by_len + *max_len; cur<end; cur++)
108 cur->first_char= 0;
109 *max_len= len;
110 }
111 return (*root_by_len)+(len-1);
112}
113
114void insert_into_hash(hash_lex_struct *root, const char *name,
115 int len_from_begin, int index, int function)
116{
117 hash_lex_struct *end, *cur, *tails;
118
119 if (!root->first_char)
120 {
121 root->first_char= -1;
122 root->iresult= index;
123 return;
124 }
125
126 if (root->first_char == -1)
127 {
128 int index2= root->iresult;
129 const char *name2= (index2 < 0 ? sql_functions[-index2-1] :
130 symbols[index2]).name + len_from_begin;
131 root->first_char= (int) (uchar) name2[0];
132 root->last_char= (char) root->first_char;
133 tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct));
134 root->char_tails= tails;
135 tails->first_char= -1;
136 tails->iresult= index2;
137 }
138
139 size_t real_size= (root->last_char-root->first_char+1);
140
141 if (root->first_char>(*name))
142 {
143 size_t new_size= root->last_char-(*name)+1;
144 if (unlikely(new_size<real_size))
145 printf("error!!!!\n");
146 tails= root->char_tails;
147 tails= (hash_lex_struct*)realloc((char*)tails,
148 sizeof(hash_lex_struct)*new_size);
149 root->char_tails= tails;
150 memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct));
151 end= tails + new_size - real_size;
152 for (cur= tails; cur<end; cur++)
153 cur->first_char= 0;
154 root->first_char= (int) (uchar) *name;
155 }
156
157 if (root->last_char<(*name))
158 {
159 size_t new_size= (*name)-root->first_char+1;
160 if (unlikely(new_size<real_size))
161 printf("error!!!!\n");
162 tails= root->char_tails;
163 tails= (hash_lex_struct*)realloc((char*)tails,
164 sizeof(hash_lex_struct)*new_size);
165 root->char_tails= tails;
166 end= tails + new_size;
167 for (cur= tails+real_size; cur<end; cur++)
168 cur->first_char= 0;
169 root->last_char= (*name);
170 }
171
172 insert_into_hash(root->char_tails+(*name)-root->first_char,
173 name+1,len_from_begin+1,index,function);
174}
175
176
177hash_lex_struct *root_by_len= 0;
178int max_len=0;
179
180hash_lex_struct *root_by_len2= 0;
181int max_len2=0;
182
183void insert_symbols()
184{
185 size_t i= 0;
186 SYMBOL *cur;
187 for (cur= symbols; i<array_elements(symbols); cur++, i++){
188 hash_lex_struct *root=
189 get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
190 insert_into_hash(root,cur->name,0,(uint) i,0);
191 }
192}
193
194void insert_sql_functions()
195{
196 int i= 0;
197 SYMBOL *cur;
198 for (cur= sql_functions; i < (int) array_elements(sql_functions); cur++, i++)
199 {
200 hash_lex_struct *root=
201 get_hash_struct_by_len(&root_by_len,cur->length,&max_len);
202 insert_into_hash(root,cur->name,0,-i-1,1);
203 }
204}
205
206void calc_length()
207{
208 SYMBOL *cur, *end= symbols + array_elements(symbols);
209 for (cur= symbols; cur < end; cur++)
210 cur->length=(uchar) strlen(cur->name);
211 end= sql_functions + array_elements(sql_functions);
212 for (cur= sql_functions; cur<end; cur++)
213 cur->length=(uchar) strlen(cur->name);
214}
215
216void generate_find_structs()
217{
218 root_by_len= 0;
219 max_len=0;
220
221 insert_symbols();
222
223 root_by_len2= root_by_len;
224 max_len2= max_len;
225
226 root_by_len= 0;
227 max_len= 0;
228
229 insert_symbols();
230 insert_sql_functions();
231}
232
233char *hash_map= 0;
234int size_hash_map= 0;
235
236void add_struct_to_map(hash_lex_struct *st)
237{
238 st->ithis= size_hash_map/4;
239 size_hash_map+= 4;
240 hash_map= (char*)realloc((char*)hash_map,size_hash_map);
241 hash_map[size_hash_map-4]= (char) (st->first_char == -1 ? 0 :
242 st->first_char);
243 hash_map[size_hash_map-3]= (char) (st->first_char == -1 ||
244 st->first_char == 0 ? 0 : st->last_char);
245 if (st->first_char == -1)
246 {
247 hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255;
248 hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8;
249 }
250 else if (st->first_char == 0)
251 {
252 hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255;
253 hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8;
254 }
255}
256
257
258void add_structs_to_map(hash_lex_struct *st, int len)
259{
260 hash_lex_struct *cur, *end= st+len;
261 for (cur= st; cur<end; cur++)
262 add_struct_to_map(cur);
263 for (cur= st; cur<end; cur++)
264 {
265 if (cur->first_char && cur->first_char != -1)
266 add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1);
267 }
268}
269
270void set_links(hash_lex_struct *st, int len)
271{
272 hash_lex_struct *cur, *end= st+len;
273 for (cur= st; cur<end; cur++)
274 {
275 if (cur->first_char != 0 && cur->first_char != -1)
276 {
277 int ilink= cur->char_tails->ithis;
278 hash_map[cur->ithis*4+2]= ilink%256;
279 hash_map[cur->ithis*4+3]= ilink/256;
280 set_links(cur->char_tails,cur->last_char-cur->first_char+1);
281 }
282 }
283}
284
285
286void print_hash_map(const char *name)
287{
288 char *cur;
289 int i;
290
291 printf("static uchar %s[%d]= {\n",name,size_hash_map);
292 for (i=0, cur= hash_map; i<size_hash_map; i++, cur++)
293 {
294 switch(i%4){
295 case 0: case 1:
296 if (!*cur)
297 printf("0, ");
298 else
299 printf("\'%c\', ",*cur);
300 break;
301 case 2: printf("%u, ",(uint)(uchar)*cur); break;
302 case 3: printf("%u,\n",(uint)(uchar)*cur); break;
303 }
304 }
305 printf("};\n");
306}
307
308
309void print_find_structs()
310{
311 add_structs_to_map(root_by_len,max_len);
312 set_links(root_by_len,max_len);
313 print_hash_map("sql_functions_map");
314 free(hash_map);
315
316 hash_map= 0;
317 size_hash_map= 0;
318
319 printf("\n");
320
321 add_structs_to_map(root_by_len2,max_len2);
322 set_links(root_by_len2,max_len2);
323 print_hash_map("symbols_map");
324 free(hash_map);
325}
326
327
328int check_dup_symbols(SYMBOL *s1, SYMBOL *s2)
329{
330 if (s1->length!=s2->length || strncmp(s1->name,s2->name,s1->length))
331 return 0;
332
333 const char *err_tmpl= "\ngen_lex_hash fatal error : \
334Unfortunately gen_lex_hash can not generate a hash,\n since \
335your lex.h has duplicate definition for a symbol \"%s\"\n\n";
336 printf (err_tmpl,s1->name);
337 fprintf (stderr,err_tmpl,s1->name);
338
339 return 1;
340}
341
342
343int check_duplicates()
344{
345 SYMBOL *cur1, *cur2, *s_end, *f_end;
346
347 s_end= symbols + array_elements(symbols);
348 f_end= sql_functions + array_elements(sql_functions);
349
350 for (cur1= symbols; cur1<s_end; cur1++)
351 {
352 for (cur2= cur1+1; cur2<s_end; cur2++)
353 {
354 if (check_dup_symbols(cur1,cur2))
355 return 1;
356 }
357 for (cur2= sql_functions; cur2<f_end; cur2++)
358 {
359 if (check_dup_symbols(cur1,cur2))
360 return 1;
361 }
362 }
363
364 for (cur1= sql_functions; cur1<f_end; cur1++)
365 {
366 for (cur2= cur1+1; cur2< f_end; cur2++)
367 {
368 if (check_dup_symbols(cur1,cur2))
369 return 1;
370 }
371 }
372 return 0;
373}
374
375
376int main(int argc,char **argv)
377{
378
379
380 /* Broken up to indicate that it's not advice to you, gentle reader. */
381 printf("/*\n\n Do " "not " "edit " "this " "file " "directly!\n\n*/\n");
382
383 puts("/*");
384 puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
385 puts("*/");
386
387 /* Broken up to indicate that it's not advice to you, gentle reader. */
388 printf("/* Do " "not " "edit " "this " "file! This is generated by "
389 "gen_lex_hash.cc\nthat seeks for a perfect hash function */\n\n");
390 printf("#include \"lex.h\"\n\n");
391
392 calc_length();
393
394 if (check_duplicates())
395 exit(1);
396
397 generate_find_structs();
398 print_find_structs();
399
400 printf("\nstatic unsigned int sql_functions_max_len=%d;\n", max_len);
401 printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2);
402
403 printf("\
404static SYMBOL *get_hash_symbol(const char *s,\n\
405 unsigned int len,bool function)\n\
406{\n\
407 uchar *hash_map;\n\
408 const char *cur_str= s;\n\
409\n\
410 if (len == 0) {\n\
411 DBUG_PRINT(\"warning\", (\"get_hash_symbol() received a request for a zero-length symbol, which is probably a mistake.\"));\
412 return(NULL);\n\
413 }\n"
414);
415
416 printf("\
417 if (function){\n\
418 if (len>sql_functions_max_len) return 0;\n\
419 hash_map= sql_functions_map;\n\
420 uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
421\n\
422 for (;;){\n\
423 uchar first_char= (uchar)cur_struct;\n\
424\n\
425 if (first_char == 0)\n\
426 {\n\
427 int16 ires= (int16)(cur_struct>>16);\n\
428 if (ires==array_elements(symbols)) return 0;\n\
429 SYMBOL *res;\n\
430 if (ires>=0) \n\
431 res= symbols+ires;\n\
432 else\n\
433 res= sql_functions-ires-1;\n\
434 uint count= (uint) (cur_str - s);\n\
435 return lex_casecmp(cur_str,res->name+count,len-count) ? 0 : res;\n\
436 }\n\
437\n\
438 uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
439 if (cur_char<first_char) return 0;\n\
440 cur_struct>>=8;\n\
441 if (cur_char>(uchar)cur_struct) return 0;\n\
442\n\
443 cur_struct>>=8;\n\
444 cur_struct= uint4korr(hash_map+\n\
445 (((uint16)cur_struct + cur_char - first_char)*4));\n\
446 cur_str++;\n\
447 }\n"
448);
449
450 printf("\
451 }else{\n\
452 if (len>symbols_max_len) return 0;\n\
453 hash_map= symbols_map;\n\
454 uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\
455\n\
456 for (;;){\n\
457 uchar first_char= (uchar)cur_struct;\n\
458\n\
459 if (first_char==0) {\n\
460 int16 ires= (int16)(cur_struct>>16);\n\
461 if (ires==array_elements(symbols)) return 0;\n\
462 SYMBOL *res= symbols+ires;\n\
463 uint count= (uint) (cur_str - s);\n\
464 return lex_casecmp(cur_str,res->name+count,len-count)!=0 ? 0 : res;\n\
465 }\n\
466\n\
467 uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\
468 if (cur_char<first_char) return 0;\n\
469 cur_struct>>=8;\n\
470 if (cur_char>(uchar)cur_struct) return 0;\n\
471\n\
472 cur_struct>>=8;\n\
473 cur_struct= uint4korr(hash_map+\n\
474 (((uint16)cur_struct + cur_char - first_char)*4));\n\
475 cur_str++;\n\
476 }\n\
477 }\n\
478}\n"
479);
480 exit(0);
481}
482
483