1 | /* |
2 | ** Compile and run this standalone program in order to generate code that |
3 | ** implements a function that will translate alphabetic identifiers into |
4 | ** parser token codes. |
5 | */ |
6 | #include <stdio.h> |
7 | #include <string.h> |
8 | #include <stdlib.h> |
9 | #include <assert.h> |
10 | |
11 | /* |
12 | ** A header comment placed at the beginning of generated code. |
13 | */ |
14 | static const char zHdr[] = |
15 | "/***** This file contains automatically generated code ******\n" |
16 | "**\n" |
17 | "** The code in this file has been automatically generated by\n" |
18 | "**\n" |
19 | "** sqlite/tool/mkkeywordhash.c\n" |
20 | "**\n" |
21 | "** The code in this file implements a function that determines whether\n" |
22 | "** or not a given identifier is really an SQL keyword. The same thing\n" |
23 | "** might be implemented more directly using a hand-written hash table.\n" |
24 | "** But by using this automatically generated code, the size of the code\n" |
25 | "** is substantially reduced. This is important for embedded applications\n" |
26 | "** on platforms with limited memory.\n" |
27 | "*/\n" |
28 | ; |
29 | |
30 | /* |
31 | ** All the keywords of the SQL language are stored in a hash |
32 | ** table composed of instances of the following structure. |
33 | */ |
34 | typedef struct Keyword Keyword; |
35 | struct Keyword { |
36 | char *zName; /* The keyword name */ |
37 | char *zTokenType; /* Token value for this keyword */ |
38 | int mask; /* Code this keyword if non-zero */ |
39 | int priority; /* Put higher priorities earlier in the hash chain */ |
40 | int id; /* Unique ID for this record */ |
41 | int hash; /* Hash on the keyword */ |
42 | int offset; /* Offset to start of name string */ |
43 | int len; /* Length of this keyword, not counting final \000 */ |
44 | int prefix; /* Number of characters in prefix */ |
45 | int longestSuffix; /* Longest suffix that is a prefix on another word */ |
46 | int iNext; /* Index in aKeywordTable[] of next with same hash */ |
47 | int substrId; /* Id to another keyword this keyword is embedded in */ |
48 | int substrOffset; /* Offset into substrId for start of this keyword */ |
49 | char zOrigName[20]; /* Original keyword name before processing */ |
50 | }; |
51 | |
52 | /* |
53 | ** Define masks used to determine which keywords are allowed |
54 | */ |
55 | #if defined(SQLITE_OMIT_ALTERTABLE) || defined(SQLITE_OMIT_VIRTUALTABLE) |
56 | # define ALTER 0 |
57 | #else |
58 | # define ALTER 0x00000001 |
59 | #endif |
60 | #define ALWAYS 0x00000002 |
61 | #ifdef SQLITE_OMIT_ANALYZE |
62 | # define ANALYZE 0 |
63 | #else |
64 | # define ANALYZE 0x00000004 |
65 | #endif |
66 | #ifdef SQLITE_OMIT_ATTACH |
67 | # define ATTACH 0 |
68 | #else |
69 | # define ATTACH 0x00000008 |
70 | #endif |
71 | #ifdef SQLITE_OMIT_AUTOINCREMENT |
72 | # define AUTOINCR 0 |
73 | #else |
74 | # define AUTOINCR 0x00000010 |
75 | #endif |
76 | #ifdef SQLITE_OMIT_CAST |
77 | # define CAST 0 |
78 | #else |
79 | # define CAST 0x00000020 |
80 | #endif |
81 | #ifdef SQLITE_OMIT_COMPOUND_SELECT |
82 | # define COMPOUND 0 |
83 | #else |
84 | # define COMPOUND 0x00000040 |
85 | #endif |
86 | #ifdef SQLITE_OMIT_CONFLICT_CLAUSE |
87 | # define CONFLICT 0 |
88 | #else |
89 | # define CONFLICT 0x00000080 |
90 | #endif |
91 | #ifdef SQLITE_OMIT_EXPLAIN |
92 | # define EXPLAIN 0 |
93 | #else |
94 | # define EXPLAIN 0x00000100 |
95 | #endif |
96 | #ifdef SQLITE_OMIT_FOREIGN_KEY |
97 | # define FKEY 0 |
98 | #else |
99 | # define FKEY 0x00000200 |
100 | #endif |
101 | #ifdef SQLITE_OMIT_PRAGMA |
102 | # define PRAGMA 0 |
103 | #else |
104 | # define PRAGMA 0x00000400 |
105 | #endif |
106 | #ifdef SQLITE_OMIT_REINDEX |
107 | # define REINDEX 0 |
108 | #else |
109 | # define REINDEX 0x00000800 |
110 | #endif |
111 | #ifdef SQLITE_OMIT_SUBQUERY |
112 | # define SUBQUERY 0 |
113 | #else |
114 | # define SUBQUERY 0x00001000 |
115 | #endif |
116 | #ifdef SQLITE_OMIT_TRIGGER |
117 | # define TRIGGER 0 |
118 | #else |
119 | # define TRIGGER 0x00002000 |
120 | #endif |
121 | #if defined(SQLITE_OMIT_AUTOVACUUM) && \ |
122 | (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH)) |
123 | # define VACUUM 0 |
124 | #else |
125 | # define VACUUM 0x00004000 |
126 | #endif |
127 | #ifdef SQLITE_OMIT_VIEW |
128 | # define VIEW 0 |
129 | #else |
130 | # define VIEW 0x00008000 |
131 | #endif |
132 | #ifdef SQLITE_OMIT_VIRTUALTABLE |
133 | # define VTAB 0 |
134 | #else |
135 | # define VTAB 0x00010000 |
136 | #endif |
137 | #ifdef SQLITE_OMIT_AUTOVACUUM |
138 | # define AUTOVACUUM 0 |
139 | #else |
140 | # define AUTOVACUUM 0x00020000 |
141 | #endif |
142 | #ifdef SQLITE_OMIT_CTE |
143 | # define CTE 0 |
144 | #else |
145 | # define CTE 0x00040000 |
146 | #endif |
147 | #ifdef SQLITE_OMIT_UPSERT |
148 | # define UPSERT 0 |
149 | #else |
150 | # define UPSERT 0x00080000 |
151 | #endif |
152 | #ifdef SQLITE_OMIT_WINDOWFUNC |
153 | # define WINDOWFUNC 0 |
154 | #else |
155 | # define WINDOWFUNC 0x00100000 |
156 | #endif |
157 | #ifdef SQLITE_OMIT_GENERATED_COLUMNS |
158 | # define GENCOL 0 |
159 | #else |
160 | # define GENCOL 0x00200000 |
161 | #endif |
162 | #ifdef SQLITE_OMIT_RETURNING |
163 | # define RETURNING 0 |
164 | #else |
165 | # define RETURNING 0x00400000 |
166 | #endif |
167 | |
168 | |
169 | /* |
170 | ** These are the keywords |
171 | */ |
172 | static Keyword aKeywordTable[] = { |
173 | { "ABORT" , "TK_ABORT" , CONFLICT|TRIGGER, 0 }, |
174 | { "ACTION" , "TK_ACTION" , FKEY, 0 }, |
175 | { "ADD" , "TK_ADD" , ALTER, 1 }, |
176 | { "AFTER" , "TK_AFTER" , TRIGGER, 0 }, |
177 | { "ALL" , "TK_ALL" , ALWAYS, 0 }, |
178 | { "ALTER" , "TK_ALTER" , ALTER, 0 }, |
179 | { "ALWAYS" , "TK_ALWAYS" , GENCOL, 0 }, |
180 | { "ANALYZE" , "TK_ANALYZE" , ANALYZE, 0 }, |
181 | { "AND" , "TK_AND" , ALWAYS, 10 }, |
182 | { "AS" , "TK_AS" , ALWAYS, 10 }, |
183 | { "ASC" , "TK_ASC" , ALWAYS, 0 }, |
184 | { "ATTACH" , "TK_ATTACH" , ATTACH, 1 }, |
185 | { "AUTOINCREMENT" , "TK_AUTOINCR" , AUTOINCR, 0 }, |
186 | { "BEFORE" , "TK_BEFORE" , TRIGGER, 0 }, |
187 | { "BEGIN" , "TK_BEGIN" , ALWAYS, 1 }, |
188 | { "BETWEEN" , "TK_BETWEEN" , ALWAYS, 5 }, |
189 | { "BY" , "TK_BY" , ALWAYS, 10 }, |
190 | { "CASCADE" , "TK_CASCADE" , FKEY, 1 }, |
191 | { "CASE" , "TK_CASE" , ALWAYS, 5 }, |
192 | { "CAST" , "TK_CAST" , CAST, 5 }, |
193 | { "CHECK" , "TK_CHECK" , ALWAYS, 1 }, |
194 | { "COLLATE" , "TK_COLLATE" , ALWAYS, 1 }, |
195 | { "COLUMN" , "TK_COLUMNKW" , ALTER, 1 }, |
196 | { "COMMIT" , "TK_COMMIT" , ALWAYS, 1 }, |
197 | { "CONFLICT" , "TK_CONFLICT" , CONFLICT, 0 }, |
198 | { "CONSTRAINT" , "TK_CONSTRAINT" , ALWAYS, 1 }, |
199 | { "CREATE" , "TK_CREATE" , ALWAYS, 2 }, |
200 | { "CROSS" , "TK_JOIN_KW" , ALWAYS, 3 }, |
201 | { "CURRENT" , "TK_CURRENT" , WINDOWFUNC, 1 }, |
202 | { "CURRENT_DATE" , "TK_CTIME_KW" , ALWAYS, 1 }, |
203 | { "CURRENT_TIME" , "TK_CTIME_KW" , ALWAYS, 1 }, |
204 | { "CURRENT_TIMESTAMP" ,"TK_CTIME_KW" , ALWAYS, 1 }, |
205 | { "DATABASE" , "TK_DATABASE" , ATTACH, 0 }, |
206 | { "DEFAULT" , "TK_DEFAULT" , ALWAYS, 1 }, |
207 | { "DEFERRED" , "TK_DEFERRED" , ALWAYS, 1 }, |
208 | { "DEFERRABLE" , "TK_DEFERRABLE" , FKEY, 1 }, |
209 | { "DELETE" , "TK_DELETE" , ALWAYS, 10 }, |
210 | { "DESC" , "TK_DESC" , ALWAYS, 3 }, |
211 | { "DETACH" , "TK_DETACH" , ATTACH, 0 }, |
212 | { "DISTINCT" , "TK_DISTINCT" , ALWAYS, 5 }, |
213 | { "DO" , "TK_DO" , UPSERT, 2 }, |
214 | { "DROP" , "TK_DROP" , ALWAYS, 1 }, |
215 | { "END" , "TK_END" , ALWAYS, 1 }, |
216 | { "EACH" , "TK_EACH" , TRIGGER, 1 }, |
217 | { "ELSE" , "TK_ELSE" , ALWAYS, 2 }, |
218 | { "ESCAPE" , "TK_ESCAPE" , ALWAYS, 4 }, |
219 | { "EXCEPT" , "TK_EXCEPT" , COMPOUND, 4 }, |
220 | { "EXCLUSIVE" , "TK_EXCLUSIVE" , ALWAYS, 1 }, |
221 | { "EXCLUDE" , "TK_EXCLUDE" , WINDOWFUNC, 1 }, |
222 | { "EXISTS" , "TK_EXISTS" , ALWAYS, 4 }, |
223 | { "EXPLAIN" , "TK_EXPLAIN" , EXPLAIN, 1 }, |
224 | { "FAIL" , "TK_FAIL" , CONFLICT|TRIGGER, 1 }, |
225 | { "FILTER" , "TK_FILTER" , WINDOWFUNC, 4 }, |
226 | { "FIRST" , "TK_FIRST" , ALWAYS, 4 }, |
227 | { "FOLLOWING" , "TK_FOLLOWING" , WINDOWFUNC, 4 }, |
228 | { "FOR" , "TK_FOR" , TRIGGER, 2 }, |
229 | { "FOREIGN" , "TK_FOREIGN" , FKEY, 1 }, |
230 | { "FROM" , "TK_FROM" , ALWAYS, 10 }, |
231 | { "FULL" , "TK_JOIN_KW" , ALWAYS, 3 }, |
232 | { "GENERATED" , "TK_GENERATED" , ALWAYS, 1 }, |
233 | { "GLOB" , "TK_LIKE_KW" , ALWAYS, 3 }, |
234 | { "GROUP" , "TK_GROUP" , ALWAYS, 5 }, |
235 | { "GROUPS" , "TK_GROUPS" , WINDOWFUNC, 2 }, |
236 | { "HAVING" , "TK_HAVING" , ALWAYS, 5 }, |
237 | { "IF" , "TK_IF" , ALWAYS, 2 }, |
238 | { "IGNORE" , "TK_IGNORE" , CONFLICT|TRIGGER, 1 }, |
239 | { "IMMEDIATE" , "TK_IMMEDIATE" , ALWAYS, 1 }, |
240 | { "IN" , "TK_IN" , ALWAYS, 10 }, |
241 | { "INDEX" , "TK_INDEX" , ALWAYS, 1 }, |
242 | { "INDEXED" , "TK_INDEXED" , ALWAYS, 0 }, |
243 | { "INITIALLY" , "TK_INITIALLY" , FKEY, 1 }, |
244 | { "INNER" , "TK_JOIN_KW" , ALWAYS, 1 }, |
245 | { "INSERT" , "TK_INSERT" , ALWAYS, 10 }, |
246 | { "INSTEAD" , "TK_INSTEAD" , TRIGGER, 1 }, |
247 | { "INTERSECT" , "TK_INTERSECT" , COMPOUND, 5 }, |
248 | { "INTO" , "TK_INTO" , ALWAYS, 10 }, |
249 | { "IS" , "TK_IS" , ALWAYS, 5 }, |
250 | { "ISNULL" , "TK_ISNULL" , ALWAYS, 5 }, |
251 | { "JOIN" , "TK_JOIN" , ALWAYS, 5 }, |
252 | { "KEY" , "TK_KEY" , ALWAYS, 1 }, |
253 | { "LAST" , "TK_LAST" , ALWAYS, 4 }, |
254 | { "LEFT" , "TK_JOIN_KW" , ALWAYS, 5 }, |
255 | { "LIKE" , "TK_LIKE_KW" , ALWAYS, 5 }, |
256 | { "LIMIT" , "TK_LIMIT" , ALWAYS, 3 }, |
257 | { "MATCH" , "TK_MATCH" , ALWAYS, 2 }, |
258 | { "MATERIALIZED" , "TK_MATERIALIZED" , CTE, 12 }, |
259 | { "NATURAL" , "TK_JOIN_KW" , ALWAYS, 3 }, |
260 | { "NO" , "TK_NO" , FKEY|WINDOWFUNC, 2 }, |
261 | { "NOT" , "TK_NOT" , ALWAYS, 10 }, |
262 | { "NOTHING" , "TK_NOTHING" , UPSERT, 1 }, |
263 | { "NOTNULL" , "TK_NOTNULL" , ALWAYS, 3 }, |
264 | { "NULL" , "TK_NULL" , ALWAYS, 10 }, |
265 | { "NULLS" , "TK_NULLS" , ALWAYS, 3 }, |
266 | { "OF" , "TK_OF" , ALWAYS, 3 }, |
267 | { "OFFSET" , "TK_OFFSET" , ALWAYS, 1 }, |
268 | { "ON" , "TK_ON" , ALWAYS, 1 }, |
269 | { "OR" , "TK_OR" , ALWAYS, 9 }, |
270 | { "ORDER" , "TK_ORDER" , ALWAYS, 10 }, |
271 | { "OTHERS" , "TK_OTHERS" , WINDOWFUNC, 3 }, |
272 | { "OUTER" , "TK_JOIN_KW" , ALWAYS, 5 }, |
273 | { "OVER" , "TK_OVER" , WINDOWFUNC, 3 }, |
274 | { "PARTITION" , "TK_PARTITION" , WINDOWFUNC, 3 }, |
275 | { "PLAN" , "TK_PLAN" , EXPLAIN, 0 }, |
276 | { "PRAGMA" , "TK_PRAGMA" , PRAGMA, 0 }, |
277 | { "PRECEDING" , "TK_PRECEDING" , WINDOWFUNC, 3 }, |
278 | { "PRIMARY" , "TK_PRIMARY" , ALWAYS, 1 }, |
279 | { "QUERY" , "TK_QUERY" , EXPLAIN, 0 }, |
280 | { "RAISE" , "TK_RAISE" , TRIGGER, 1 }, |
281 | { "RANGE" , "TK_RANGE" , WINDOWFUNC, 3 }, |
282 | { "RECURSIVE" , "TK_RECURSIVE" , CTE, 3 }, |
283 | { "REFERENCES" , "TK_REFERENCES" , FKEY, 1 }, |
284 | { "REGEXP" , "TK_LIKE_KW" , ALWAYS, 3 }, |
285 | { "REINDEX" , "TK_REINDEX" , REINDEX, 1 }, |
286 | { "RELEASE" , "TK_RELEASE" , ALWAYS, 1 }, |
287 | { "RENAME" , "TK_RENAME" , ALTER, 1 }, |
288 | { "REPLACE" , "TK_REPLACE" , CONFLICT, 10 }, |
289 | { "RESTRICT" , "TK_RESTRICT" , FKEY, 1 }, |
290 | { "RETURNING" , "TK_RETURNING" , RETURNING, 10 }, |
291 | { "RIGHT" , "TK_JOIN_KW" , ALWAYS, 0 }, |
292 | { "ROLLBACK" , "TK_ROLLBACK" , ALWAYS, 1 }, |
293 | { "ROW" , "TK_ROW" , TRIGGER, 1 }, |
294 | { "ROWS" , "TK_ROWS" , ALWAYS, 1 }, |
295 | { "SAVEPOINT" , "TK_SAVEPOINT" , ALWAYS, 1 }, |
296 | { "SELECT" , "TK_SELECT" , ALWAYS, 10 }, |
297 | { "SET" , "TK_SET" , ALWAYS, 10 }, |
298 | { "TABLE" , "TK_TABLE" , ALWAYS, 1 }, |
299 | { "TEMP" , "TK_TEMP" , ALWAYS, 1 }, |
300 | { "TEMPORARY" , "TK_TEMP" , ALWAYS, 1 }, |
301 | { "THEN" , "TK_THEN" , ALWAYS, 3 }, |
302 | { "TIES" , "TK_TIES" , WINDOWFUNC, 3 }, |
303 | { "TO" , "TK_TO" , ALWAYS, 3 }, |
304 | { "TRANSACTION" , "TK_TRANSACTION" , ALWAYS, 1 }, |
305 | { "TRIGGER" , "TK_TRIGGER" , TRIGGER, 1 }, |
306 | { "UNBOUNDED" , "TK_UNBOUNDED" , WINDOWFUNC, 3 }, |
307 | { "UNION" , "TK_UNION" , COMPOUND, 3 }, |
308 | { "UNIQUE" , "TK_UNIQUE" , ALWAYS, 1 }, |
309 | { "UPDATE" , "TK_UPDATE" , ALWAYS, 10 }, |
310 | { "USING" , "TK_USING" , ALWAYS, 8 }, |
311 | { "VACUUM" , "TK_VACUUM" , VACUUM, 1 }, |
312 | { "VALUES" , "TK_VALUES" , ALWAYS, 10 }, |
313 | { "VIEW" , "TK_VIEW" , VIEW, 1 }, |
314 | { "VIRTUAL" , "TK_VIRTUAL" , VTAB, 1 }, |
315 | { "WHEN" , "TK_WHEN" , ALWAYS, 1 }, |
316 | { "WHERE" , "TK_WHERE" , ALWAYS, 10 }, |
317 | { "WINDOW" , "TK_WINDOW" , WINDOWFUNC, 3 }, |
318 | { "WITH" , "TK_WITH" , CTE, 4 }, |
319 | { "WITHOUT" , "TK_WITHOUT" , ALWAYS, 1 }, |
320 | }; |
321 | |
322 | /* Number of keywords */ |
323 | static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); |
324 | |
325 | /* Map all alphabetic characters into lower-case for hashing. This is |
326 | ** only valid for alphabetics. In particular it does not work for '_' |
327 | ** and so the hash cannot be on a keyword position that might be an '_'. |
328 | */ |
329 | #define charMap(X) (0x20|(X)) |
330 | |
331 | /* |
332 | ** Comparision function for two Keyword records |
333 | */ |
334 | static int keywordCompare1(const void *a, const void *b){ |
335 | const Keyword *pA = (Keyword*)a; |
336 | const Keyword *pB = (Keyword*)b; |
337 | int n = pA->len - pB->len; |
338 | if( n==0 ){ |
339 | n = strcmp(pA->zName, pB->zName); |
340 | } |
341 | assert( n!=0 ); |
342 | return n; |
343 | } |
344 | static int keywordCompare2(const void *a, const void *b){ |
345 | const Keyword *pA = (Keyword*)a; |
346 | const Keyword *pB = (Keyword*)b; |
347 | int n = pB->longestSuffix - pA->longestSuffix; |
348 | if( n==0 ){ |
349 | n = strcmp(pA->zName, pB->zName); |
350 | } |
351 | assert( n!=0 ); |
352 | return n; |
353 | } |
354 | static int keywordCompare3(const void *a, const void *b){ |
355 | const Keyword *pA = (Keyword*)a; |
356 | const Keyword *pB = (Keyword*)b; |
357 | int n = pA->offset - pB->offset; |
358 | if( n==0 ) n = pB->id - pA->id; |
359 | assert( n!=0 ); |
360 | return n; |
361 | } |
362 | |
363 | /* |
364 | ** Return a KeywordTable entry with the given id |
365 | */ |
366 | static Keyword *findById(int id){ |
367 | int i; |
368 | for(i=0; i<nKeyword; i++){ |
369 | if( aKeywordTable[i].id==id ) break; |
370 | } |
371 | return &aKeywordTable[i]; |
372 | } |
373 | |
374 | /* |
375 | ** If aKeyword[*pFrom-1].iNext has a higher priority that aKeyword[*pFrom-1] |
376 | ** itself, then swap them. |
377 | */ |
378 | static void reorder(int *pFrom){ |
379 | int i = *pFrom - 1; |
380 | int j; |
381 | if( i<0 ) return; |
382 | j = aKeywordTable[i].iNext; |
383 | if( j==0 ) return; |
384 | j--; |
385 | if( aKeywordTable[i].priority >= aKeywordTable[j].priority ) return; |
386 | aKeywordTable[i].iNext = aKeywordTable[j].iNext; |
387 | aKeywordTable[j].iNext = i+1; |
388 | *pFrom = j+1; |
389 | reorder(&aKeywordTable[i].iNext); |
390 | } |
391 | |
392 | /* Parameter to the hash function |
393 | */ |
394 | #define HASH_OP ^ |
395 | #define HASH_CC '^' |
396 | #define HASH_C0 4 |
397 | #define HASH_C1 3 |
398 | #define HASH_C2 1 |
399 | |
400 | /* |
401 | ** This routine does the work. The generated code is printed on standard |
402 | ** output. |
403 | */ |
404 | int main(int argc, char **argv){ |
405 | int i, j, k, h; |
406 | int bestSize, bestCount; |
407 | int count; |
408 | int nChar; |
409 | int totalLen = 0; |
410 | int aKWHash[1000]; /* 1000 is much bigger than nKeyword */ |
411 | char zKWText[2000]; |
412 | |
413 | /* Remove entries from the list of keywords that have mask==0 */ |
414 | for(i=j=0; i<nKeyword; i++){ |
415 | if( aKeywordTable[i].mask==0 ) continue; |
416 | if( j<i ){ |
417 | aKeywordTable[j] = aKeywordTable[i]; |
418 | } |
419 | j++; |
420 | } |
421 | nKeyword = j; |
422 | |
423 | /* Fill in the lengths of strings and hashes for all entries. */ |
424 | for(i=0; i<nKeyword; i++){ |
425 | Keyword *p = &aKeywordTable[i]; |
426 | p->len = (int)strlen(p->zName); |
427 | assert( p->len<sizeof(p->zOrigName) ); |
428 | memcpy(p->zOrigName, p->zName, p->len+1); |
429 | totalLen += p->len; |
430 | p->hash = (charMap(p->zName[0])*HASH_C0) HASH_OP |
431 | (charMap(p->zName[p->len-1])*HASH_C1) HASH_OP |
432 | (p->len*HASH_C2); |
433 | p->id = i+1; |
434 | } |
435 | |
436 | /* Sort the table from shortest to longest keyword */ |
437 | qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1); |
438 | |
439 | /* Look for short keywords embedded in longer keywords */ |
440 | for(i=nKeyword-2; i>=0; i--){ |
441 | Keyword *p = &aKeywordTable[i]; |
442 | for(j=nKeyword-1; j>i && p->substrId==0; j--){ |
443 | Keyword *pOther = &aKeywordTable[j]; |
444 | if( pOther->substrId ) continue; |
445 | if( pOther->len<=p->len ) continue; |
446 | for(k=0; k<=pOther->len-p->len; k++){ |
447 | if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){ |
448 | p->substrId = pOther->id; |
449 | p->substrOffset = k; |
450 | break; |
451 | } |
452 | } |
453 | } |
454 | } |
455 | |
456 | /* Compute the longestSuffix value for every word */ |
457 | for(i=0; i<nKeyword; i++){ |
458 | Keyword *p = &aKeywordTable[i]; |
459 | if( p->substrId ) continue; |
460 | for(j=0; j<nKeyword; j++){ |
461 | Keyword *pOther; |
462 | if( j==i ) continue; |
463 | pOther = &aKeywordTable[j]; |
464 | if( pOther->substrId ) continue; |
465 | for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){ |
466 | if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ |
467 | p->longestSuffix = k; |
468 | } |
469 | } |
470 | } |
471 | } |
472 | |
473 | /* Sort the table into reverse order by length */ |
474 | qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2); |
475 | |
476 | /* Fill in the offset for all entries */ |
477 | nChar = 0; |
478 | for(i=0; i<nKeyword; i++){ |
479 | Keyword *p = &aKeywordTable[i]; |
480 | if( p->offset>0 || p->substrId ) continue; |
481 | p->offset = nChar; |
482 | nChar += p->len; |
483 | for(k=p->len-1; k>=1; k--){ |
484 | for(j=i+1; j<nKeyword; j++){ |
485 | Keyword *pOther = &aKeywordTable[j]; |
486 | if( pOther->offset>0 || pOther->substrId ) continue; |
487 | if( pOther->len<=k ) continue; |
488 | if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ |
489 | p = pOther; |
490 | p->offset = nChar - k; |
491 | nChar = p->offset + p->len; |
492 | p->zName += k; |
493 | p->len -= k; |
494 | p->prefix = k; |
495 | j = i; |
496 | k = p->len; |
497 | } |
498 | } |
499 | } |
500 | } |
501 | for(i=0; i<nKeyword; i++){ |
502 | Keyword *p = &aKeywordTable[i]; |
503 | if( p->substrId ){ |
504 | p->offset = findById(p->substrId)->offset + p->substrOffset; |
505 | } |
506 | } |
507 | |
508 | /* Sort the table by offset */ |
509 | qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3); |
510 | |
511 | /* Figure out how big to make the hash table in order to minimize the |
512 | ** number of collisions */ |
513 | bestSize = nKeyword; |
514 | bestCount = nKeyword*nKeyword; |
515 | for(i=nKeyword/2; i<=2*nKeyword; i++){ |
516 | if( i<=0 ) continue; |
517 | for(j=0; j<i; j++) aKWHash[j] = 0; |
518 | for(j=0; j<nKeyword; j++){ |
519 | h = aKeywordTable[j].hash % i; |
520 | aKWHash[h] *= 2; |
521 | aKWHash[h]++; |
522 | } |
523 | for(j=count=0; j<i; j++) count += aKWHash[j]; |
524 | if( count<bestCount ){ |
525 | bestCount = count; |
526 | bestSize = i; |
527 | } |
528 | } |
529 | |
530 | /* Compute the hash */ |
531 | for(i=0; i<bestSize; i++) aKWHash[i] = 0; |
532 | for(i=0; i<nKeyword; i++){ |
533 | h = aKeywordTable[i].hash % bestSize; |
534 | aKeywordTable[i].iNext = aKWHash[h]; |
535 | aKWHash[h] = i+1; |
536 | reorder(&aKWHash[h]); |
537 | } |
538 | |
539 | /* Begin generating code */ |
540 | printf("%s" , zHdr); |
541 | printf("/* Hash score: %d */\n" , bestCount); |
542 | printf("/* zKWText[] encodes %d bytes of keyword text in %d bytes */\n" , |
543 | totalLen + nKeyword, nChar+1 ); |
544 | for(i=j=k=0; i<nKeyword; i++){ |
545 | Keyword *p = &aKeywordTable[i]; |
546 | if( p->substrId ) continue; |
547 | memcpy(&zKWText[k], p->zName, p->len); |
548 | k += p->len; |
549 | if( j+p->len>70 ){ |
550 | printf("%*s */\n" , 74-j, "" ); |
551 | j = 0; |
552 | } |
553 | if( j==0 ){ |
554 | printf("/* " ); |
555 | j = 8; |
556 | } |
557 | printf("%s" , p->zName); |
558 | j += p->len; |
559 | } |
560 | if( j>0 ){ |
561 | printf("%*s */\n" , 74-j, "" ); |
562 | } |
563 | printf("static const char zKWText[%d] = {\n" , nChar); |
564 | zKWText[nChar] = 0; |
565 | for(i=j=0; i<k; i++){ |
566 | if( j==0 ){ |
567 | printf(" " ); |
568 | } |
569 | if( zKWText[i]==0 ){ |
570 | printf("0" ); |
571 | }else{ |
572 | printf("'%c'," , zKWText[i]); |
573 | } |
574 | j += 4; |
575 | if( j>68 ){ |
576 | printf("\n" ); |
577 | j = 0; |
578 | } |
579 | } |
580 | if( j>0 ) printf("\n" ); |
581 | printf("};\n" ); |
582 | |
583 | printf("/* aKWHash[i] is the hash value for the i-th keyword */\n" ); |
584 | printf("static const unsigned char aKWHash[%d] = {\n" , bestSize); |
585 | for(i=j=0; i<bestSize; i++){ |
586 | if( j==0 ) printf(" " ); |
587 | printf(" %3d," , aKWHash[i]); |
588 | j++; |
589 | if( j>12 ){ |
590 | printf("\n" ); |
591 | j = 0; |
592 | } |
593 | } |
594 | printf("%s};\n" , j==0 ? "" : "\n" ); |
595 | |
596 | printf("/* aKWNext[] forms the hash collision chain. If aKWHash[i]==0\n" ); |
597 | printf("** then the i-th keyword has no more hash collisions. Otherwise,\n" ); |
598 | printf("** the next keyword with the same hash is aKWHash[i]-1. */\n" ); |
599 | printf("static const unsigned char aKWNext[%d] = {\n" , nKeyword); |
600 | for(i=j=0; i<nKeyword; i++){ |
601 | if( j==0 ) printf(" " ); |
602 | printf(" %3d," , aKeywordTable[i].iNext); |
603 | j++; |
604 | if( j>12 ){ |
605 | printf("\n" ); |
606 | j = 0; |
607 | } |
608 | } |
609 | printf("%s};\n" , j==0 ? "" : "\n" ); |
610 | |
611 | printf("/* aKWLen[i] is the length (in bytes) of the i-th keyword */\n" ); |
612 | printf("static const unsigned char aKWLen[%d] = {\n" , nKeyword); |
613 | for(i=j=0; i<nKeyword; i++){ |
614 | if( j==0 ) printf(" " ); |
615 | printf(" %3d," , aKeywordTable[i].len+aKeywordTable[i].prefix); |
616 | j++; |
617 | if( j>12 ){ |
618 | printf("\n" ); |
619 | j = 0; |
620 | } |
621 | } |
622 | printf("%s};\n" , j==0 ? "" : "\n" ); |
623 | |
624 | printf("/* aKWOffset[i] is the index into zKWText[] of the start of\n" ); |
625 | printf("** the text for the i-th keyword. */\n" ); |
626 | printf("static const unsigned short int aKWOffset[%d] = {\n" , nKeyword); |
627 | for(i=j=0; i<nKeyword; i++){ |
628 | if( j==0 ) printf(" " ); |
629 | printf(" %3d," , aKeywordTable[i].offset); |
630 | j++; |
631 | if( j>12 ){ |
632 | printf("\n" ); |
633 | j = 0; |
634 | } |
635 | } |
636 | printf("%s};\n" , j==0 ? "" : "\n" ); |
637 | |
638 | printf("/* aKWCode[i] is the parser symbol code for the i-th keyword */\n" ); |
639 | printf("static const unsigned char aKWCode[%d] = {\n" , nKeyword); |
640 | for(i=j=0; i<nKeyword; i++){ |
641 | char *zToken = aKeywordTable[i].zTokenType; |
642 | if( j==0 ) printf(" " ); |
643 | printf("%s,%*s" , zToken, (int)(14-strlen(zToken)), "" ); |
644 | j++; |
645 | if( j>=5 ){ |
646 | printf("\n" ); |
647 | j = 0; |
648 | } |
649 | } |
650 | printf("%s};\n" , j==0 ? "" : "\n" ); |
651 | printf("/* Hash table decoded:\n" ); |
652 | for(i=0; i<bestSize; i++){ |
653 | j = aKWHash[i]; |
654 | printf("** %3d:" , i); |
655 | while( j ){ |
656 | printf(" %s" , aKeywordTable[j-1].zOrigName); |
657 | j = aKeywordTable[j-1].iNext; |
658 | } |
659 | printf("\n" ); |
660 | } |
661 | printf("*/\n" ); |
662 | printf("/* Check to see if z[0..n-1] is a keyword. If it is, write the\n" ); |
663 | printf("** parser symbol code for that keyword into *pType. Always\n" ); |
664 | printf("** return the integer n (the length of the token). */\n" ); |
665 | printf("static int keywordCode(const char *z, int n, int *pType){\n" ); |
666 | printf(" int i, j;\n" ); |
667 | printf(" const char *zKW;\n" ); |
668 | printf(" if( n>=2 ){\n" ); |
669 | printf(" i = ((charMap(z[0])*%d) %c" , HASH_C0, HASH_CC); |
670 | printf(" (charMap(z[n-1])*%d) %c" , HASH_C1, HASH_CC); |
671 | printf(" n*%d) %% %d;\n" , HASH_C2, bestSize); |
672 | printf(" for(i=((int)aKWHash[i])-1; i>=0; i=((int)aKWNext[i])-1){\n" ); |
673 | printf(" if( aKWLen[i]!=n ) continue;\n" ); |
674 | printf(" zKW = &zKWText[aKWOffset[i]];\n" ); |
675 | printf("#ifdef SQLITE_ASCII\n" ); |
676 | printf(" if( (z[0]&~0x20)!=zKW[0] ) continue;\n" ); |
677 | printf(" if( (z[1]&~0x20)!=zKW[1] ) continue;\n" ); |
678 | printf(" j = 2;\n" ); |
679 | printf(" while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }\n" ); |
680 | printf("#endif\n" ); |
681 | printf("#ifdef SQLITE_EBCDIC\n" ); |
682 | printf(" if( toupper(z[0])!=zKW[0] ) continue;\n" ); |
683 | printf(" if( toupper(z[1])!=zKW[1] ) continue;\n" ); |
684 | printf(" j = 2;\n" ); |
685 | printf(" while( j<n && toupper(z[j])==zKW[j] ){ j++; }\n" ); |
686 | printf("#endif\n" ); |
687 | printf(" if( j<n ) continue;\n" ); |
688 | for(i=0; i<nKeyword; i++){ |
689 | printf(" testcase( i==%d ); /* %s */\n" , |
690 | i, aKeywordTable[i].zOrigName); |
691 | } |
692 | printf(" *pType = aKWCode[i];\n" ); |
693 | printf(" break;\n" ); |
694 | printf(" }\n" ); |
695 | printf(" }\n" ); |
696 | printf(" return n;\n" ); |
697 | printf("}\n" ); |
698 | printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n" ); |
699 | printf(" int id = TK_ID;\n" ); |
700 | printf(" keywordCode((char*)z, n, &id);\n" ); |
701 | printf(" return id;\n" ); |
702 | printf("}\n" ); |
703 | printf("#define SQLITE_N_KEYWORD %d\n" , nKeyword); |
704 | printf("int sqlite3_keyword_name(int i,const char **pzName,int *pnName){\n" ); |
705 | printf(" if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR;\n" ); |
706 | printf(" *pzName = zKWText + aKWOffset[i];\n" ); |
707 | printf(" *pnName = aKWLen[i];\n" ); |
708 | printf(" return SQLITE_OK;\n" ); |
709 | printf("}\n" ); |
710 | printf("int sqlite3_keyword_count(void){ return SQLITE_N_KEYWORD; }\n" ); |
711 | printf("int sqlite3_keyword_check(const char *zName, int nName){\n" ); |
712 | printf(" return TK_ID!=sqlite3KeywordCode((const u8*)zName, nName);\n" ); |
713 | printf("}\n" ); |
714 | |
715 | return 0; |
716 | } |
717 | |