1 | /* |
2 | ** This file contains all sources (including headers) to the LEMON |
3 | ** LALR(1) parser generator. The sources have been combined into a |
4 | ** single file to make it easy to include LEMON in the source tree |
5 | ** and Makefile of another program. |
6 | ** |
7 | ** The author of this program disclaims copyright. |
8 | */ |
9 | #include <stdio.h> |
10 | #include <stdarg.h> |
11 | #include <string.h> |
12 | #include <ctype.h> |
13 | #include <stdlib.h> |
14 | #include <assert.h> |
15 | |
16 | #define ISSPACE(X) isspace((unsigned char)(X)) |
17 | #define ISDIGIT(X) isdigit((unsigned char)(X)) |
18 | #define ISALNUM(X) isalnum((unsigned char)(X)) |
19 | #define ISALPHA(X) isalpha((unsigned char)(X)) |
20 | #define ISUPPER(X) isupper((unsigned char)(X)) |
21 | #define ISLOWER(X) islower((unsigned char)(X)) |
22 | |
23 | |
24 | #ifndef __WIN32__ |
25 | # if defined(_WIN32) || defined(WIN32) |
26 | # define __WIN32__ |
27 | # endif |
28 | #endif |
29 | |
30 | #ifdef __WIN32__ |
31 | #ifdef __cplusplus |
32 | extern "C" { |
33 | #endif |
34 | extern int access(const char *path, int mode); |
35 | #ifdef __cplusplus |
36 | } |
37 | #endif |
38 | #else |
39 | #include <unistd.h> |
40 | #endif |
41 | |
42 | /* #define PRIVATE static */ |
43 | #define PRIVATE |
44 | |
45 | #ifdef TEST |
46 | #define MAXRHS 5 /* Set low to exercise exception code */ |
47 | #else |
48 | #define MAXRHS 1000 |
49 | #endif |
50 | |
51 | extern void memory_error(); |
52 | static int showPrecedenceConflict = 0; |
53 | static char *msort(char*,char**,int(*)(const char*,const char*)); |
54 | |
55 | /* |
56 | ** Compilers are getting increasingly pedantic about type conversions |
57 | ** as C evolves ever closer to Ada.... To work around the latest problems |
58 | ** we have to define the following variant of strlen(). |
59 | */ |
60 | #define lemonStrlen(X) ((int)strlen(X)) |
61 | |
62 | /* |
63 | ** Compilers are starting to complain about the use of sprintf() and strcpy(), |
64 | ** saying they are unsafe. So we define our own versions of those routines too. |
65 | ** |
66 | ** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and |
67 | ** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). |
68 | ** The third is a helper routine for vsnprintf() that adds texts to the end of a |
69 | ** buffer, making sure the buffer is always zero-terminated. |
70 | ** |
71 | ** The string formatter is a minimal subset of stdlib sprintf() supporting only |
72 | ** a few simply conversions: |
73 | ** |
74 | ** %d |
75 | ** %s |
76 | ** %.*s |
77 | ** |
78 | */ |
79 | static void lemon_addtext( |
80 | char *zBuf, /* The buffer to which text is added */ |
81 | int *pnUsed, /* Slots of the buffer used so far */ |
82 | const char *zIn, /* Text to add */ |
83 | int nIn, /* Bytes of text to add. -1 to use strlen() */ |
84 | int iWidth /* Field width. Negative to left justify */ |
85 | ){ |
86 | if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} |
87 | while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } |
88 | if( nIn==0 ) return; |
89 | memcpy(&zBuf[*pnUsed], zIn, nIn); |
90 | *pnUsed += nIn; |
91 | while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } |
92 | zBuf[*pnUsed] = 0; |
93 | } |
94 | static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ |
95 | int i, j, k, c; |
96 | int nUsed = 0; |
97 | const char *z; |
98 | char zTemp[50]; |
99 | str[0] = 0; |
100 | for(i=j=0; (c = zFormat[i])!=0; i++){ |
101 | if( c=='%' ){ |
102 | int iWidth = 0; |
103 | lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); |
104 | c = zFormat[++i]; |
105 | if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ |
106 | if( c=='-' ) i++; |
107 | while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; |
108 | if( c=='-' ) iWidth = -iWidth; |
109 | c = zFormat[i]; |
110 | } |
111 | if( c=='d' ){ |
112 | int v = va_arg(ap, int); |
113 | if( v<0 ){ |
114 | lemon_addtext(str, &nUsed, "-" , 1, iWidth); |
115 | v = -v; |
116 | }else if( v==0 ){ |
117 | lemon_addtext(str, &nUsed, "0" , 1, iWidth); |
118 | } |
119 | k = 0; |
120 | while( v>0 ){ |
121 | k++; |
122 | zTemp[sizeof(zTemp)-k] = (v%10) + '0'; |
123 | v /= 10; |
124 | } |
125 | lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); |
126 | }else if( c=='s' ){ |
127 | z = va_arg(ap, const char*); |
128 | lemon_addtext(str, &nUsed, z, -1, iWidth); |
129 | }else if( c=='.' && memcmp(&zFormat[i], ".*s" , 3)==0 ){ |
130 | i += 2; |
131 | k = va_arg(ap, int); |
132 | z = va_arg(ap, const char*); |
133 | lemon_addtext(str, &nUsed, z, k, iWidth); |
134 | }else if( c=='%' ){ |
135 | lemon_addtext(str, &nUsed, "%" , 1, 0); |
136 | }else{ |
137 | fprintf(stderr, "illegal format\n" ); |
138 | exit(1); |
139 | } |
140 | j = i+1; |
141 | } |
142 | } |
143 | lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); |
144 | return nUsed; |
145 | } |
146 | static int lemon_sprintf(char *str, const char *format, ...){ |
147 | va_list ap; |
148 | int rc; |
149 | va_start(ap, format); |
150 | rc = lemon_vsprintf(str, format, ap); |
151 | va_end(ap); |
152 | return rc; |
153 | } |
154 | static void lemon_strcpy(char *dest, const char *src){ |
155 | while( (*(dest++) = *(src++))!=0 ){} |
156 | } |
157 | static void lemon_strcat(char *dest, const char *src){ |
158 | while( *dest ) dest++; |
159 | lemon_strcpy(dest, src); |
160 | } |
161 | |
162 | |
163 | /* a few forward declarations... */ |
164 | struct rule; |
165 | struct lemon; |
166 | struct action; |
167 | |
168 | static struct action *Action_new(void); |
169 | static struct action *Action_sort(struct action *); |
170 | |
171 | /********** From the file "build.h" ************************************/ |
172 | void FindRulePrecedences(struct lemon*); |
173 | void FindFirstSets(struct lemon*); |
174 | void FindStates(struct lemon*); |
175 | void FindLinks(struct lemon*); |
176 | void FindFollowSets(struct lemon*); |
177 | void FindActions(struct lemon*); |
178 | |
179 | /********* From the file "configlist.h" *********************************/ |
180 | void Configlist_init(void); |
181 | struct config *Configlist_add(struct rule *, int); |
182 | struct config *Configlist_addbasis(struct rule *, int); |
183 | void Configlist_closure(struct lemon *); |
184 | void Configlist_sort(void); |
185 | void Configlist_sortbasis(void); |
186 | struct config *Configlist_return(void); |
187 | struct config *Configlist_basis(void); |
188 | void Configlist_eat(struct config *); |
189 | void Configlist_reset(void); |
190 | |
191 | /********* From the file "error.h" ***************************************/ |
192 | void ErrorMsg(const char *, int,const char *, ...); |
193 | |
194 | /****** From the file "option.h" ******************************************/ |
195 | enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, |
196 | OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; |
197 | struct s_options { |
198 | enum option_type type; |
199 | const char *label; |
200 | char *arg; |
201 | const char *message; |
202 | }; |
203 | int OptInit(char**,struct s_options*,FILE*); |
204 | int OptNArgs(void); |
205 | char *OptArg(int); |
206 | void OptErr(int); |
207 | void OptPrint(void); |
208 | |
209 | /******** From the file "parse.h" *****************************************/ |
210 | void Parse(struct lemon *lemp); |
211 | |
212 | /********* From the file "plink.h" ***************************************/ |
213 | struct plink *Plink_new(void); |
214 | void Plink_add(struct plink **, struct config *); |
215 | void Plink_copy(struct plink **, struct plink *); |
216 | void Plink_delete(struct plink *); |
217 | |
218 | /********** From the file "report.h" *************************************/ |
219 | void Reprint(struct lemon *); |
220 | void ReportOutput(struct lemon *); |
221 | void ReportTable(struct lemon *, int, int); |
222 | void ReportHeader(struct lemon *); |
223 | void CompressTables(struct lemon *); |
224 | void ResortStates(struct lemon *); |
225 | |
226 | /********** From the file "set.h" ****************************************/ |
227 | void SetSize(int); /* All sets will be of size N */ |
228 | char *SetNew(void); /* A new set for element 0..N */ |
229 | void SetFree(char*); /* Deallocate a set */ |
230 | int SetAdd(char*,int); /* Add element to a set */ |
231 | int SetUnion(char *,char *); /* A <- A U B, thru element N */ |
232 | #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ |
233 | |
234 | /********** From the file "struct.h" *************************************/ |
235 | /* |
236 | ** Principal data structures for the LEMON parser generator. |
237 | */ |
238 | |
239 | typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; |
240 | |
241 | /* Symbols (terminals and nonterminals) of the grammar are stored |
242 | ** in the following: */ |
243 | enum symbol_type { |
244 | TERMINAL, |
245 | NONTERMINAL, |
246 | MULTITERMINAL |
247 | }; |
248 | enum e_assoc { |
249 | LEFT, |
250 | RIGHT, |
251 | NONE, |
252 | UNK |
253 | }; |
254 | struct symbol { |
255 | const char *name; /* Name of the symbol */ |
256 | int index; /* Index number for this symbol */ |
257 | enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ |
258 | struct rule *rule; /* Linked list of rules of this (if an NT) */ |
259 | struct symbol *fallback; /* fallback token in case this token doesn't parse */ |
260 | int prec; /* Precedence if defined (-1 otherwise) */ |
261 | enum e_assoc assoc; /* Associativity if precedence is defined */ |
262 | char *firstset; /* First-set for all rules of this symbol */ |
263 | Boolean lambda; /* True if NT and can generate an empty string */ |
264 | int useCnt; /* Number of times used */ |
265 | char *destructor; /* Code which executes whenever this symbol is |
266 | ** popped from the stack during error processing */ |
267 | int destLineno; /* Line number for start of destructor. Set to |
268 | ** -1 for duplicate destructors. */ |
269 | char *datatype; /* The data type of information held by this |
270 | ** object. Only used if type==NONTERMINAL */ |
271 | int dtnum; /* The data type number. In the parser, the value |
272 | ** stack is a union. The .yy%d element of this |
273 | ** union is the correct data type for this object */ |
274 | int bContent; /* True if this symbol ever carries content - if |
275 | ** it is ever more than just syntax */ |
276 | /* The following fields are used by MULTITERMINALs only */ |
277 | int nsubsym; /* Number of constituent symbols in the MULTI */ |
278 | struct symbol **subsym; /* Array of constituent symbols */ |
279 | }; |
280 | |
281 | /* Each production rule in the grammar is stored in the following |
282 | ** structure. */ |
283 | struct rule { |
284 | struct symbol *lhs; /* Left-hand side of the rule */ |
285 | const char *lhsalias; /* Alias for the LHS (NULL if none) */ |
286 | int lhsStart; /* True if left-hand side is the start symbol */ |
287 | int ruleline; /* Line number for the rule */ |
288 | int nrhs; /* Number of RHS symbols */ |
289 | struct symbol **rhs; /* The RHS symbols */ |
290 | const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ |
291 | int line; /* Line number at which code begins */ |
292 | const char *code; /* The code executed when this rule is reduced */ |
293 | const char *codePrefix; /* Setup code before code[] above */ |
294 | const char *codeSuffix; /* Breakdown code after code[] above */ |
295 | struct symbol *precsym; /* Precedence symbol for this rule */ |
296 | int index; /* An index number for this rule */ |
297 | int iRule; /* Rule number as used in the generated tables */ |
298 | Boolean noCode; /* True if this rule has no associated C code */ |
299 | Boolean codeEmitted; /* True if the code has been emitted already */ |
300 | Boolean canReduce; /* True if this rule is ever reduced */ |
301 | Boolean doesReduce; /* Reduce actions occur after optimization */ |
302 | Boolean neverReduce; /* Reduce is theoretically possible, but prevented |
303 | ** by actions or other outside implementation */ |
304 | struct rule *nextlhs; /* Next rule with the same LHS */ |
305 | struct rule *next; /* Next rule in the global list */ |
306 | }; |
307 | |
308 | /* A configuration is a production rule of the grammar together with |
309 | ** a mark (dot) showing how much of that rule has been processed so far. |
310 | ** Configurations also contain a follow-set which is a list of terminal |
311 | ** symbols which are allowed to immediately follow the end of the rule. |
312 | ** Every configuration is recorded as an instance of the following: */ |
313 | enum cfgstatus { |
314 | COMPLETE, |
315 | INCOMPLETE |
316 | }; |
317 | struct config { |
318 | struct rule *rp; /* The rule upon which the configuration is based */ |
319 | int dot; /* The parse point */ |
320 | char *fws; /* Follow-set for this configuration only */ |
321 | struct plink *fplp; /* Follow-set forward propagation links */ |
322 | struct plink *bplp; /* Follow-set backwards propagation links */ |
323 | struct state *stp; /* Pointer to state which contains this */ |
324 | enum cfgstatus status; /* used during followset and shift computations */ |
325 | struct config *next; /* Next configuration in the state */ |
326 | struct config *bp; /* The next basis configuration */ |
327 | }; |
328 | |
329 | enum e_action { |
330 | SHIFT, |
331 | ACCEPT, |
332 | REDUCE, |
333 | ERROR, |
334 | SSCONFLICT, /* A shift/shift conflict */ |
335 | SRCONFLICT, /* Was a reduce, but part of a conflict */ |
336 | RRCONFLICT, /* Was a reduce, but part of a conflict */ |
337 | SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ |
338 | RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ |
339 | NOT_USED, /* Deleted by compression */ |
340 | SHIFTREDUCE /* Shift first, then reduce */ |
341 | }; |
342 | |
343 | /* Every shift or reduce operation is stored as one of the following */ |
344 | struct action { |
345 | struct symbol *sp; /* The look-ahead symbol */ |
346 | enum e_action type; |
347 | union { |
348 | struct state *stp; /* The new state, if a shift */ |
349 | struct rule *rp; /* The rule, if a reduce */ |
350 | } x; |
351 | struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ |
352 | struct action *next; /* Next action for this state */ |
353 | struct action *collide; /* Next action with the same hash */ |
354 | }; |
355 | |
356 | /* Each state of the generated parser's finite state machine |
357 | ** is encoded as an instance of the following structure. */ |
358 | struct state { |
359 | struct config *bp; /* The basis configurations for this state */ |
360 | struct config *cfp; /* All configurations in this set */ |
361 | int statenum; /* Sequential number for this state */ |
362 | struct action *ap; /* List of actions for this state */ |
363 | int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ |
364 | int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ |
365 | int iDfltReduce; /* Default action is to REDUCE by this rule */ |
366 | struct rule *pDfltReduce;/* The default REDUCE rule. */ |
367 | int autoReduce; /* True if this is an auto-reduce state */ |
368 | }; |
369 | #define NO_OFFSET (-2147483647) |
370 | |
371 | /* A followset propagation link indicates that the contents of one |
372 | ** configuration followset should be propagated to another whenever |
373 | ** the first changes. */ |
374 | struct plink { |
375 | struct config *cfp; /* The configuration to which linked */ |
376 | struct plink *next; /* The next propagate link */ |
377 | }; |
378 | |
379 | /* The state vector for the entire parser generator is recorded as |
380 | ** follows. (LEMON uses no global variables and makes little use of |
381 | ** static variables. Fields in the following structure can be thought |
382 | ** of as begin global variables in the program.) */ |
383 | struct lemon { |
384 | struct state **sorted; /* Table of states sorted by state number */ |
385 | struct rule *rule; /* List of all rules */ |
386 | struct rule *startRule; /* First rule */ |
387 | int nstate; /* Number of states */ |
388 | int nxstate; /* nstate with tail degenerate states removed */ |
389 | int nrule; /* Number of rules */ |
390 | int nruleWithAction; /* Number of rules with actions */ |
391 | int nsymbol; /* Number of terminal and nonterminal symbols */ |
392 | int nterminal; /* Number of terminal symbols */ |
393 | int minShiftReduce; /* Minimum shift-reduce action value */ |
394 | int errAction; /* Error action value */ |
395 | int accAction; /* Accept action value */ |
396 | int noAction; /* No-op action value */ |
397 | int minReduce; /* Minimum reduce action */ |
398 | int maxAction; /* Maximum action value of any kind */ |
399 | struct symbol **symbols; /* Sorted array of pointers to symbols */ |
400 | int errorcnt; /* Number of errors */ |
401 | struct symbol *errsym; /* The error symbol */ |
402 | struct symbol *wildcard; /* Token that matches anything */ |
403 | char *name; /* Name of the generated parser */ |
404 | char *arg; /* Declaration of the 3rd argument to parser */ |
405 | char *ctx; /* Declaration of 2nd argument to constructor */ |
406 | char *tokentype; /* Type of terminal symbols in the parser stack */ |
407 | char *vartype; /* The default type of non-terminal symbols */ |
408 | char *start; /* Name of the start symbol for the grammar */ |
409 | char *stacksize; /* Size of the parser stack */ |
410 | char *include; /* Code to put at the start of the C file */ |
411 | char *error; /* Code to execute when an error is seen */ |
412 | char *overflow; /* Code to execute on a stack overflow */ |
413 | char *failure; /* Code to execute on parser failure */ |
414 | char *accept; /* Code to execute when the parser excepts */ |
415 | char *; /* Code appended to the generated file */ |
416 | char *tokendest; /* Code to execute to destroy token data */ |
417 | char *vardest; /* Code for the default non-terminal destructor */ |
418 | char *filename; /* Name of the input file */ |
419 | char *outname; /* Name of the current output file */ |
420 | char *tokenprefix; /* A prefix added to token names in the .h file */ |
421 | int nconflict; /* Number of parsing conflicts */ |
422 | int nactiontab; /* Number of entries in the yy_action[] table */ |
423 | int nlookaheadtab; /* Number of entries in yy_lookahead[] */ |
424 | int tablesize; /* Total table size of all tables in bytes */ |
425 | int basisflag; /* Print only basis configurations */ |
426 | int printPreprocessed; /* Show preprocessor output on stdout */ |
427 | int has_fallback; /* True if any %fallback is seen in the grammar */ |
428 | int nolinenosflag; /* True if #line statements should not be printed */ |
429 | char *argv0; /* Name of the program */ |
430 | }; |
431 | |
432 | #define MemoryCheck(X) if((X)==0){ \ |
433 | extern void memory_error(); \ |
434 | memory_error(); \ |
435 | } |
436 | |
437 | /**************** From the file "table.h" *********************************/ |
438 | /* |
439 | ** All code in this file has been automatically generated |
440 | ** from a specification in the file |
441 | ** "table.q" |
442 | ** by the associative array code building program "aagen". |
443 | ** Do not edit this file! Instead, edit the specification |
444 | ** file, then rerun aagen. |
445 | */ |
446 | /* |
447 | ** Code for processing tables in the LEMON parser generator. |
448 | */ |
449 | /* Routines for handling a strings */ |
450 | |
451 | const char *Strsafe(const char *); |
452 | |
453 | void Strsafe_init(void); |
454 | int Strsafe_insert(const char *); |
455 | const char *Strsafe_find(const char *); |
456 | |
457 | /* Routines for handling symbols of the grammar */ |
458 | |
459 | struct symbol *Symbol_new(const char *); |
460 | int Symbolcmpp(const void *, const void *); |
461 | void Symbol_init(void); |
462 | int Symbol_insert(struct symbol *, const char *); |
463 | struct symbol *Symbol_find(const char *); |
464 | struct symbol *Symbol_Nth(int); |
465 | int Symbol_count(void); |
466 | struct symbol **Symbol_arrayof(void); |
467 | |
468 | /* Routines to manage the state table */ |
469 | |
470 | int Configcmp(const char *, const char *); |
471 | struct state *State_new(void); |
472 | void State_init(void); |
473 | int State_insert(struct state *, struct config *); |
474 | struct state *State_find(struct config *); |
475 | struct state **State_arrayof(void); |
476 | |
477 | /* Routines used for efficiency in Configlist_add */ |
478 | |
479 | void Configtable_init(void); |
480 | int Configtable_insert(struct config *); |
481 | struct config *Configtable_find(struct config *); |
482 | void Configtable_clear(int(*)(struct config *)); |
483 | |
484 | /****************** From the file "action.c" *******************************/ |
485 | /* |
486 | ** Routines processing parser actions in the LEMON parser generator. |
487 | */ |
488 | |
489 | /* Allocate a new parser action */ |
490 | static struct action *Action_new(void){ |
491 | static struct action *actionfreelist = 0; |
492 | struct action *newaction; |
493 | |
494 | if( actionfreelist==0 ){ |
495 | int i; |
496 | int amt = 100; |
497 | actionfreelist = (struct action *)calloc(amt, sizeof(struct action)); |
498 | if( actionfreelist==0 ){ |
499 | fprintf(stderr,"Unable to allocate memory for a new parser action." ); |
500 | exit(1); |
501 | } |
502 | for(i=0; i<amt-1; i++) actionfreelist[i].next = &actionfreelist[i+1]; |
503 | actionfreelist[amt-1].next = 0; |
504 | } |
505 | newaction = actionfreelist; |
506 | actionfreelist = actionfreelist->next; |
507 | return newaction; |
508 | } |
509 | |
510 | /* Compare two actions for sorting purposes. Return negative, zero, or |
511 | ** positive if the first action is less than, equal to, or greater than |
512 | ** the first |
513 | */ |
514 | static int actioncmp( |
515 | struct action *ap1, |
516 | struct action *ap2 |
517 | ){ |
518 | int rc; |
519 | rc = ap1->sp->index - ap2->sp->index; |
520 | if( rc==0 ){ |
521 | rc = (int)ap1->type - (int)ap2->type; |
522 | } |
523 | if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ |
524 | rc = ap1->x.rp->index - ap2->x.rp->index; |
525 | } |
526 | if( rc==0 ){ |
527 | rc = (int) (ap2 - ap1); |
528 | } |
529 | return rc; |
530 | } |
531 | |
532 | /* Sort parser actions */ |
533 | static struct action *Action_sort( |
534 | struct action *ap |
535 | ){ |
536 | ap = (struct action *)msort((char *)ap,(char **)&ap->next, |
537 | (int(*)(const char*,const char*))actioncmp); |
538 | return ap; |
539 | } |
540 | |
541 | void Action_add( |
542 | struct action **app, |
543 | enum e_action type, |
544 | struct symbol *sp, |
545 | char *arg |
546 | ){ |
547 | struct action *newaction; |
548 | newaction = Action_new(); |
549 | newaction->next = *app; |
550 | *app = newaction; |
551 | newaction->type = type; |
552 | newaction->sp = sp; |
553 | newaction->spOpt = 0; |
554 | if( type==SHIFT ){ |
555 | newaction->x.stp = (struct state *)arg; |
556 | }else{ |
557 | newaction->x.rp = (struct rule *)arg; |
558 | } |
559 | } |
560 | /********************** New code to implement the "acttab" module ***********/ |
561 | /* |
562 | ** This module implements routines use to construct the yy_action[] table. |
563 | */ |
564 | |
565 | /* |
566 | ** The state of the yy_action table under construction is an instance of |
567 | ** the following structure. |
568 | ** |
569 | ** The yy_action table maps the pair (state_number, lookahead) into an |
570 | ** action_number. The table is an array of integers pairs. The state_number |
571 | ** determines an initial offset into the yy_action array. The lookahead |
572 | ** value is then added to this initial offset to get an index X into the |
573 | ** yy_action array. If the aAction[X].lookahead equals the value of the |
574 | ** of the lookahead input, then the value of the action_number output is |
575 | ** aAction[X].action. If the lookaheads do not match then the |
576 | ** default action for the state_number is returned. |
577 | ** |
578 | ** All actions associated with a single state_number are first entered |
579 | ** into aLookahead[] using multiple calls to acttab_action(). Then the |
580 | ** actions for that single state_number are placed into the aAction[] |
581 | ** array with a single call to acttab_insert(). The acttab_insert() call |
582 | ** also resets the aLookahead[] array in preparation for the next |
583 | ** state number. |
584 | */ |
585 | struct lookahead_action { |
586 | int lookahead; /* Value of the lookahead token */ |
587 | int action; /* Action to take on the given lookahead */ |
588 | }; |
589 | typedef struct acttab acttab; |
590 | struct acttab { |
591 | int nAction; /* Number of used slots in aAction[] */ |
592 | int nActionAlloc; /* Slots allocated for aAction[] */ |
593 | struct lookahead_action |
594 | *aAction, /* The yy_action[] table under construction */ |
595 | *aLookahead; /* A single new transaction set */ |
596 | int mnLookahead; /* Minimum aLookahead[].lookahead */ |
597 | int mnAction; /* Action associated with mnLookahead */ |
598 | int mxLookahead; /* Maximum aLookahead[].lookahead */ |
599 | int nLookahead; /* Used slots in aLookahead[] */ |
600 | int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ |
601 | int nterminal; /* Number of terminal symbols */ |
602 | int nsymbol; /* total number of symbols */ |
603 | }; |
604 | |
605 | /* Return the number of entries in the yy_action table */ |
606 | #define acttab_lookahead_size(X) ((X)->nAction) |
607 | |
608 | /* The value for the N-th entry in yy_action */ |
609 | #define acttab_yyaction(X,N) ((X)->aAction[N].action) |
610 | |
611 | /* The value for the N-th entry in yy_lookahead */ |
612 | #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) |
613 | |
614 | /* Free all memory associated with the given acttab */ |
615 | void acttab_free(acttab *p){ |
616 | free( p->aAction ); |
617 | free( p->aLookahead ); |
618 | free( p ); |
619 | } |
620 | |
621 | /* Allocate a new acttab structure */ |
622 | acttab *acttab_alloc(int nsymbol, int nterminal){ |
623 | acttab *p = (acttab *) calloc( 1, sizeof(*p) ); |
624 | if( p==0 ){ |
625 | fprintf(stderr,"Unable to allocate memory for a new acttab." ); |
626 | exit(1); |
627 | } |
628 | memset(p, 0, sizeof(*p)); |
629 | p->nsymbol = nsymbol; |
630 | p->nterminal = nterminal; |
631 | return p; |
632 | } |
633 | |
634 | /* Add a new action to the current transaction set. |
635 | ** |
636 | ** This routine is called once for each lookahead for a particular |
637 | ** state. |
638 | */ |
639 | void acttab_action(acttab *p, int lookahead, int action){ |
640 | if( p->nLookahead>=p->nLookaheadAlloc ){ |
641 | p->nLookaheadAlloc += 25; |
642 | p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, |
643 | sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); |
644 | if( p->aLookahead==0 ){ |
645 | fprintf(stderr,"malloc failed\n" ); |
646 | exit(1); |
647 | } |
648 | } |
649 | if( p->nLookahead==0 ){ |
650 | p->mxLookahead = lookahead; |
651 | p->mnLookahead = lookahead; |
652 | p->mnAction = action; |
653 | }else{ |
654 | if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead; |
655 | if( p->mnLookahead>lookahead ){ |
656 | p->mnLookahead = lookahead; |
657 | p->mnAction = action; |
658 | } |
659 | } |
660 | p->aLookahead[p->nLookahead].lookahead = lookahead; |
661 | p->aLookahead[p->nLookahead].action = action; |
662 | p->nLookahead++; |
663 | } |
664 | |
665 | /* |
666 | ** Add the transaction set built up with prior calls to acttab_action() |
667 | ** into the current action table. Then reset the transaction set back |
668 | ** to an empty set in preparation for a new round of acttab_action() calls. |
669 | ** |
670 | ** Return the offset into the action table of the new transaction. |
671 | ** |
672 | ** If the makeItSafe parameter is true, then the offset is chosen so that |
673 | ** it is impossible to overread the yy_lookaside[] table regardless of |
674 | ** the lookaside token. This is done for the terminal symbols, as they |
675 | ** come from external inputs and can contain syntax errors. When makeItSafe |
676 | ** is false, there is more flexibility in selecting offsets, resulting in |
677 | ** a smaller table. For non-terminal symbols, which are never syntax errors, |
678 | ** makeItSafe can be false. |
679 | */ |
680 | int acttab_insert(acttab *p, int makeItSafe){ |
681 | int i, j, k, n, end; |
682 | assert( p->nLookahead>0 ); |
683 | |
684 | /* Make sure we have enough space to hold the expanded action table |
685 | ** in the worst case. The worst case occurs if the transaction set |
686 | ** must be appended to the current action table |
687 | */ |
688 | n = p->nsymbol + 1; |
689 | if( p->nAction + n >= p->nActionAlloc ){ |
690 | int oldAlloc = p->nActionAlloc; |
691 | p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; |
692 | p->aAction = (struct lookahead_action *) realloc( p->aAction, |
693 | sizeof(p->aAction[0])*p->nActionAlloc); |
694 | if( p->aAction==0 ){ |
695 | fprintf(stderr,"malloc failed\n" ); |
696 | exit(1); |
697 | } |
698 | for(i=oldAlloc; i<p->nActionAlloc; i++){ |
699 | p->aAction[i].lookahead = -1; |
700 | p->aAction[i].action = -1; |
701 | } |
702 | } |
703 | |
704 | /* Scan the existing action table looking for an offset that is a |
705 | ** duplicate of the current transaction set. Fall out of the loop |
706 | ** if and when the duplicate is found. |
707 | ** |
708 | ** i is the index in p->aAction[] where p->mnLookahead is inserted. |
709 | */ |
710 | end = makeItSafe ? p->mnLookahead : 0; |
711 | for(i=p->nAction-1; i>=end; i--){ |
712 | if( p->aAction[i].lookahead==p->mnLookahead ){ |
713 | /* All lookaheads and actions in the aLookahead[] transaction |
714 | ** must match against the candidate aAction[i] entry. */ |
715 | if( p->aAction[i].action!=p->mnAction ) continue; |
716 | for(j=0; j<p->nLookahead; j++){ |
717 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; |
718 | if( k<0 || k>=p->nAction ) break; |
719 | if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; |
720 | if( p->aLookahead[j].action!=p->aAction[k].action ) break; |
721 | } |
722 | if( j<p->nLookahead ) continue; |
723 | |
724 | /* No possible lookahead value that is not in the aLookahead[] |
725 | ** transaction is allowed to match aAction[i] */ |
726 | n = 0; |
727 | for(j=0; j<p->nAction; j++){ |
728 | if( p->aAction[j].lookahead<0 ) continue; |
729 | if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; |
730 | } |
731 | if( n==p->nLookahead ){ |
732 | break; /* An exact match is found at offset i */ |
733 | } |
734 | } |
735 | } |
736 | |
737 | /* If no existing offsets exactly match the current transaction, find an |
738 | ** an empty offset in the aAction[] table in which we can add the |
739 | ** aLookahead[] transaction. |
740 | */ |
741 | if( i<end ){ |
742 | /* Look for holes in the aAction[] table that fit the current |
743 | ** aLookahead[] transaction. Leave i set to the offset of the hole. |
744 | ** If no holes are found, i is left at p->nAction, which means the |
745 | ** transaction will be appended. */ |
746 | i = makeItSafe ? p->mnLookahead : 0; |
747 | for(; i<p->nActionAlloc - p->mxLookahead; i++){ |
748 | if( p->aAction[i].lookahead<0 ){ |
749 | for(j=0; j<p->nLookahead; j++){ |
750 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; |
751 | if( k<0 ) break; |
752 | if( p->aAction[k].lookahead>=0 ) break; |
753 | } |
754 | if( j<p->nLookahead ) continue; |
755 | for(j=0; j<p->nAction; j++){ |
756 | if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; |
757 | } |
758 | if( j==p->nAction ){ |
759 | break; /* Fits in empty slots */ |
760 | } |
761 | } |
762 | } |
763 | } |
764 | /* Insert transaction set at index i. */ |
765 | #if 0 |
766 | printf("Acttab:" ); |
767 | for(j=0; j<p->nLookahead; j++){ |
768 | printf(" %d" , p->aLookahead[j].lookahead); |
769 | } |
770 | printf(" inserted at %d\n" , i); |
771 | #endif |
772 | for(j=0; j<p->nLookahead; j++){ |
773 | k = p->aLookahead[j].lookahead - p->mnLookahead + i; |
774 | p->aAction[k] = p->aLookahead[j]; |
775 | if( k>=p->nAction ) p->nAction = k+1; |
776 | } |
777 | if( makeItSafe && i+p->nterminal>=p->nAction ) p->nAction = i+p->nterminal+1; |
778 | p->nLookahead = 0; |
779 | |
780 | /* Return the offset that is added to the lookahead in order to get the |
781 | ** index into yy_action of the action */ |
782 | return i - p->mnLookahead; |
783 | } |
784 | |
785 | /* |
786 | ** Return the size of the action table without the trailing syntax error |
787 | ** entries. |
788 | */ |
789 | int acttab_action_size(acttab *p){ |
790 | int n = p->nAction; |
791 | while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } |
792 | return n; |
793 | } |
794 | |
795 | /********************** From the file "build.c" *****************************/ |
796 | /* |
797 | ** Routines to construction the finite state machine for the LEMON |
798 | ** parser generator. |
799 | */ |
800 | |
801 | /* Find a precedence symbol of every rule in the grammar. |
802 | ** |
803 | ** Those rules which have a precedence symbol coded in the input |
804 | ** grammar using the "[symbol]" construct will already have the |
805 | ** rp->precsym field filled. Other rules take as their precedence |
806 | ** symbol the first RHS symbol with a defined precedence. If there |
807 | ** are not RHS symbols with a defined precedence, the precedence |
808 | ** symbol field is left blank. |
809 | */ |
810 | void FindRulePrecedences(struct lemon *xp) |
811 | { |
812 | struct rule *rp; |
813 | for(rp=xp->rule; rp; rp=rp->next){ |
814 | if( rp->precsym==0 ){ |
815 | int i, j; |
816 | for(i=0; i<rp->nrhs && rp->precsym==0; i++){ |
817 | struct symbol *sp = rp->rhs[i]; |
818 | if( sp->type==MULTITERMINAL ){ |
819 | for(j=0; j<sp->nsubsym; j++){ |
820 | if( sp->subsym[j]->prec>=0 ){ |
821 | rp->precsym = sp->subsym[j]; |
822 | break; |
823 | } |
824 | } |
825 | }else if( sp->prec>=0 ){ |
826 | rp->precsym = rp->rhs[i]; |
827 | } |
828 | } |
829 | } |
830 | } |
831 | return; |
832 | } |
833 | |
834 | /* Find all nonterminals which will generate the empty string. |
835 | ** Then go back and compute the first sets of every nonterminal. |
836 | ** The first set is the set of all terminal symbols which can begin |
837 | ** a string generated by that nonterminal. |
838 | */ |
839 | void FindFirstSets(struct lemon *lemp) |
840 | { |
841 | int i, j; |
842 | struct rule *rp; |
843 | int progress; |
844 | |
845 | for(i=0; i<lemp->nsymbol; i++){ |
846 | lemp->symbols[i]->lambda = LEMON_FALSE; |
847 | } |
848 | for(i=lemp->nterminal; i<lemp->nsymbol; i++){ |
849 | lemp->symbols[i]->firstset = SetNew(); |
850 | } |
851 | |
852 | /* First compute all lambdas */ |
853 | do{ |
854 | progress = 0; |
855 | for(rp=lemp->rule; rp; rp=rp->next){ |
856 | if( rp->lhs->lambda ) continue; |
857 | for(i=0; i<rp->nrhs; i++){ |
858 | struct symbol *sp = rp->rhs[i]; |
859 | assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); |
860 | if( sp->lambda==LEMON_FALSE ) break; |
861 | } |
862 | if( i==rp->nrhs ){ |
863 | rp->lhs->lambda = LEMON_TRUE; |
864 | progress = 1; |
865 | } |
866 | } |
867 | }while( progress ); |
868 | |
869 | /* Now compute all first sets */ |
870 | do{ |
871 | struct symbol *s1, *s2; |
872 | progress = 0; |
873 | for(rp=lemp->rule; rp; rp=rp->next){ |
874 | s1 = rp->lhs; |
875 | for(i=0; i<rp->nrhs; i++){ |
876 | s2 = rp->rhs[i]; |
877 | if( s2->type==TERMINAL ){ |
878 | progress += SetAdd(s1->firstset,s2->index); |
879 | break; |
880 | }else if( s2->type==MULTITERMINAL ){ |
881 | for(j=0; j<s2->nsubsym; j++){ |
882 | progress += SetAdd(s1->firstset,s2->subsym[j]->index); |
883 | } |
884 | break; |
885 | }else if( s1==s2 ){ |
886 | if( s1->lambda==LEMON_FALSE ) break; |
887 | }else{ |
888 | progress += SetUnion(s1->firstset,s2->firstset); |
889 | if( s2->lambda==LEMON_FALSE ) break; |
890 | } |
891 | } |
892 | } |
893 | }while( progress ); |
894 | return; |
895 | } |
896 | |
897 | /* Compute all LR(0) states for the grammar. Links |
898 | ** are added to between some states so that the LR(1) follow sets |
899 | ** can be computed later. |
900 | */ |
901 | PRIVATE struct state *getstate(struct lemon *); /* forward reference */ |
902 | void FindStates(struct lemon *lemp) |
903 | { |
904 | struct symbol *sp; |
905 | struct rule *rp; |
906 | |
907 | Configlist_init(); |
908 | |
909 | /* Find the start symbol */ |
910 | if( lemp->start ){ |
911 | sp = Symbol_find(lemp->start); |
912 | if( sp==0 ){ |
913 | ErrorMsg(lemp->filename,0, |
914 | "The specified start symbol \"%s\" is not " |
915 | "in a nonterminal of the grammar. \"%s\" will be used as the start " |
916 | "symbol instead." ,lemp->start,lemp->startRule->lhs->name); |
917 | lemp->errorcnt++; |
918 | sp = lemp->startRule->lhs; |
919 | } |
920 | }else if( lemp->startRule ){ |
921 | sp = lemp->startRule->lhs; |
922 | }else{ |
923 | ErrorMsg(lemp->filename,0,"Internal error - no start rule\n" ); |
924 | exit(1); |
925 | } |
926 | |
927 | /* Make sure the start symbol doesn't occur on the right-hand side of |
928 | ** any rule. Report an error if it does. (YACC would generate a new |
929 | ** start symbol in this case.) */ |
930 | for(rp=lemp->rule; rp; rp=rp->next){ |
931 | int i; |
932 | for(i=0; i<rp->nrhs; i++){ |
933 | if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ |
934 | ErrorMsg(lemp->filename,0, |
935 | "The start symbol \"%s\" occurs on the " |
936 | "right-hand side of a rule. This will result in a parser which " |
937 | "does not work properly." ,sp->name); |
938 | lemp->errorcnt++; |
939 | } |
940 | } |
941 | } |
942 | |
943 | /* The basis configuration set for the first state |
944 | ** is all rules which have the start symbol as their |
945 | ** left-hand side */ |
946 | for(rp=sp->rule; rp; rp=rp->nextlhs){ |
947 | struct config *newcfp; |
948 | rp->lhsStart = 1; |
949 | newcfp = Configlist_addbasis(rp,0); |
950 | SetAdd(newcfp->fws,0); |
951 | } |
952 | |
953 | /* Compute the first state. All other states will be |
954 | ** computed automatically during the computation of the first one. |
955 | ** The returned pointer to the first state is not used. */ |
956 | (void)getstate(lemp); |
957 | return; |
958 | } |
959 | |
960 | /* Return a pointer to a state which is described by the configuration |
961 | ** list which has been built from calls to Configlist_add. |
962 | */ |
963 | PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ |
964 | PRIVATE struct state *getstate(struct lemon *lemp) |
965 | { |
966 | struct config *cfp, *bp; |
967 | struct state *stp; |
968 | |
969 | /* Extract the sorted basis of the new state. The basis was constructed |
970 | ** by prior calls to "Configlist_addbasis()". */ |
971 | Configlist_sortbasis(); |
972 | bp = Configlist_basis(); |
973 | |
974 | /* Get a state with the same basis */ |
975 | stp = State_find(bp); |
976 | if( stp ){ |
977 | /* A state with the same basis already exists! Copy all the follow-set |
978 | ** propagation links from the state under construction into the |
979 | ** preexisting state, then return a pointer to the preexisting state */ |
980 | struct config *x, *y; |
981 | for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ |
982 | Plink_copy(&y->bplp,x->bplp); |
983 | Plink_delete(x->fplp); |
984 | x->fplp = x->bplp = 0; |
985 | } |
986 | cfp = Configlist_return(); |
987 | Configlist_eat(cfp); |
988 | }else{ |
989 | /* This really is a new state. Construct all the details */ |
990 | Configlist_closure(lemp); /* Compute the configuration closure */ |
991 | Configlist_sort(); /* Sort the configuration closure */ |
992 | cfp = Configlist_return(); /* Get a pointer to the config list */ |
993 | stp = State_new(); /* A new state structure */ |
994 | MemoryCheck(stp); |
995 | stp->bp = bp; /* Remember the configuration basis */ |
996 | stp->cfp = cfp; /* Remember the configuration closure */ |
997 | stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ |
998 | stp->ap = 0; /* No actions, yet. */ |
999 | State_insert(stp,stp->bp); /* Add to the state table */ |
1000 | buildshifts(lemp,stp); /* Recursively compute successor states */ |
1001 | } |
1002 | return stp; |
1003 | } |
1004 | |
1005 | /* |
1006 | ** Return true if two symbols are the same. |
1007 | */ |
1008 | int same_symbol(struct symbol *a, struct symbol *b) |
1009 | { |
1010 | int i; |
1011 | if( a==b ) return 1; |
1012 | if( a->type!=MULTITERMINAL ) return 0; |
1013 | if( b->type!=MULTITERMINAL ) return 0; |
1014 | if( a->nsubsym!=b->nsubsym ) return 0; |
1015 | for(i=0; i<a->nsubsym; i++){ |
1016 | if( a->subsym[i]!=b->subsym[i] ) return 0; |
1017 | } |
1018 | return 1; |
1019 | } |
1020 | |
1021 | /* Construct all successor states to the given state. A "successor" |
1022 | ** state is any state which can be reached by a shift action. |
1023 | */ |
1024 | PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) |
1025 | { |
1026 | struct config *cfp; /* For looping thru the config closure of "stp" */ |
1027 | struct config *bcfp; /* For the inner loop on config closure of "stp" */ |
1028 | struct config *newcfg; /* */ |
1029 | struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ |
1030 | struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ |
1031 | struct state *newstp; /* A pointer to a successor state */ |
1032 | |
1033 | /* Each configuration becomes complete after it contributes to a successor |
1034 | ** state. Initially, all configurations are incomplete */ |
1035 | for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; |
1036 | |
1037 | /* Loop through all configurations of the state "stp" */ |
1038 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ |
1039 | if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ |
1040 | if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ |
1041 | Configlist_reset(); /* Reset the new config set */ |
1042 | sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ |
1043 | |
1044 | /* For every configuration in the state "stp" which has the symbol "sp" |
1045 | ** following its dot, add the same configuration to the basis set under |
1046 | ** construction but with the dot shifted one symbol to the right. */ |
1047 | for(bcfp=cfp; bcfp; bcfp=bcfp->next){ |
1048 | if( bcfp->status==COMPLETE ) continue; /* Already used */ |
1049 | if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ |
1050 | bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ |
1051 | if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ |
1052 | bcfp->status = COMPLETE; /* Mark this config as used */ |
1053 | newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); |
1054 | Plink_add(&newcfg->bplp,bcfp); |
1055 | } |
1056 | |
1057 | /* Get a pointer to the state described by the basis configuration set |
1058 | ** constructed in the preceding loop */ |
1059 | newstp = getstate(lemp); |
1060 | |
1061 | /* The state "newstp" is reached from the state "stp" by a shift action |
1062 | ** on the symbol "sp" */ |
1063 | if( sp->type==MULTITERMINAL ){ |
1064 | int i; |
1065 | for(i=0; i<sp->nsubsym; i++){ |
1066 | Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); |
1067 | } |
1068 | }else{ |
1069 | Action_add(&stp->ap,SHIFT,sp,(char *)newstp); |
1070 | } |
1071 | } |
1072 | } |
1073 | |
1074 | /* |
1075 | ** Construct the propagation links |
1076 | */ |
1077 | void FindLinks(struct lemon *lemp) |
1078 | { |
1079 | int i; |
1080 | struct config *cfp, *other; |
1081 | struct state *stp; |
1082 | struct plink *plp; |
1083 | |
1084 | /* Housekeeping detail: |
1085 | ** Add to every propagate link a pointer back to the state to |
1086 | ** which the link is attached. */ |
1087 | for(i=0; i<lemp->nstate; i++){ |
1088 | stp = lemp->sorted[i]; |
1089 | for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ |
1090 | cfp->stp = stp; |
1091 | } |
1092 | } |
1093 | |
1094 | /* Convert all backlinks into forward links. Only the forward |
1095 | ** links are used in the follow-set computation. */ |
1096 | for(i=0; i<lemp->nstate; i++){ |
1097 | stp = lemp->sorted[i]; |
1098 | for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ |
1099 | for(plp=cfp->bplp; plp; plp=plp->next){ |
1100 | other = plp->cfp; |
1101 | Plink_add(&other->fplp,cfp); |
1102 | } |
1103 | } |
1104 | } |
1105 | } |
1106 | |
1107 | /* Compute all followsets. |
1108 | ** |
1109 | ** A followset is the set of all symbols which can come immediately |
1110 | ** after a configuration. |
1111 | */ |
1112 | void FindFollowSets(struct lemon *lemp) |
1113 | { |
1114 | int i; |
1115 | struct config *cfp; |
1116 | struct plink *plp; |
1117 | int progress; |
1118 | int change; |
1119 | |
1120 | for(i=0; i<lemp->nstate; i++){ |
1121 | assert( lemp->sorted[i]!=0 ); |
1122 | for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ |
1123 | cfp->status = INCOMPLETE; |
1124 | } |
1125 | } |
1126 | |
1127 | do{ |
1128 | progress = 0; |
1129 | for(i=0; i<lemp->nstate; i++){ |
1130 | assert( lemp->sorted[i]!=0 ); |
1131 | for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ |
1132 | if( cfp->status==COMPLETE ) continue; |
1133 | for(plp=cfp->fplp; plp; plp=plp->next){ |
1134 | change = SetUnion(plp->cfp->fws,cfp->fws); |
1135 | if( change ){ |
1136 | plp->cfp->status = INCOMPLETE; |
1137 | progress = 1; |
1138 | } |
1139 | } |
1140 | cfp->status = COMPLETE; |
1141 | } |
1142 | } |
1143 | }while( progress ); |
1144 | } |
1145 | |
1146 | static int resolve_conflict(struct action *,struct action *); |
1147 | |
1148 | /* Compute the reduce actions, and resolve conflicts. |
1149 | */ |
1150 | void FindActions(struct lemon *lemp) |
1151 | { |
1152 | int i,j; |
1153 | struct config *cfp; |
1154 | struct state *stp; |
1155 | struct symbol *sp; |
1156 | struct rule *rp; |
1157 | |
1158 | /* Add all of the reduce actions |
1159 | ** A reduce action is added for each element of the followset of |
1160 | ** a configuration which has its dot at the extreme right. |
1161 | */ |
1162 | for(i=0; i<lemp->nstate; i++){ /* Loop over all states */ |
1163 | stp = lemp->sorted[i]; |
1164 | for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ |
1165 | if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ |
1166 | for(j=0; j<lemp->nterminal; j++){ |
1167 | if( SetFind(cfp->fws,j) ){ |
1168 | /* Add a reduce action to the state "stp" which will reduce by the |
1169 | ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ |
1170 | Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); |
1171 | } |
1172 | } |
1173 | } |
1174 | } |
1175 | } |
1176 | |
1177 | /* Add the accepting token */ |
1178 | if( lemp->start ){ |
1179 | sp = Symbol_find(lemp->start); |
1180 | if( sp==0 ){ |
1181 | if( lemp->startRule==0 ){ |
1182 | fprintf(stderr, "internal error on source line %d: no start rule\n" , |
1183 | __LINE__); |
1184 | exit(1); |
1185 | } |
1186 | sp = lemp->startRule->lhs; |
1187 | } |
1188 | }else{ |
1189 | sp = lemp->startRule->lhs; |
1190 | } |
1191 | /* Add to the first state (which is always the starting state of the |
1192 | ** finite state machine) an action to ACCEPT if the lookahead is the |
1193 | ** start nonterminal. */ |
1194 | Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); |
1195 | |
1196 | /* Resolve conflicts */ |
1197 | for(i=0; i<lemp->nstate; i++){ |
1198 | struct action *ap, *nap; |
1199 | stp = lemp->sorted[i]; |
1200 | /* assert( stp->ap ); */ |
1201 | stp->ap = Action_sort(stp->ap); |
1202 | for(ap=stp->ap; ap && ap->next; ap=ap->next){ |
1203 | for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ |
1204 | /* The two actions "ap" and "nap" have the same lookahead. |
1205 | ** Figure out which one should be used */ |
1206 | lemp->nconflict += resolve_conflict(ap,nap); |
1207 | } |
1208 | } |
1209 | } |
1210 | |
1211 | /* Report an error for each rule that can never be reduced. */ |
1212 | for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; |
1213 | for(i=0; i<lemp->nstate; i++){ |
1214 | struct action *ap; |
1215 | for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ |
1216 | if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; |
1217 | } |
1218 | } |
1219 | for(rp=lemp->rule; rp; rp=rp->next){ |
1220 | if( rp->canReduce ) continue; |
1221 | ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n" ); |
1222 | lemp->errorcnt++; |
1223 | } |
1224 | } |
1225 | |
1226 | /* Resolve a conflict between the two given actions. If the |
1227 | ** conflict can't be resolved, return non-zero. |
1228 | ** |
1229 | ** NO LONGER TRUE: |
1230 | ** To resolve a conflict, first look to see if either action |
1231 | ** is on an error rule. In that case, take the action which |
1232 | ** is not associated with the error rule. If neither or both |
1233 | ** actions are associated with an error rule, then try to |
1234 | ** use precedence to resolve the conflict. |
1235 | ** |
1236 | ** If either action is a SHIFT, then it must be apx. This |
1237 | ** function won't work if apx->type==REDUCE and apy->type==SHIFT. |
1238 | */ |
1239 | static int resolve_conflict( |
1240 | struct action *apx, |
1241 | struct action *apy |
1242 | ){ |
1243 | struct symbol *spx, *spy; |
1244 | int errcnt = 0; |
1245 | assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ |
1246 | if( apx->type==SHIFT && apy->type==SHIFT ){ |
1247 | apy->type = SSCONFLICT; |
1248 | errcnt++; |
1249 | } |
1250 | if( apx->type==SHIFT && apy->type==REDUCE ){ |
1251 | spx = apx->sp; |
1252 | spy = apy->x.rp->precsym; |
1253 | if( spy==0 || spx->prec<0 || spy->prec<0 ){ |
1254 | /* Not enough precedence information. */ |
1255 | apy->type = SRCONFLICT; |
1256 | errcnt++; |
1257 | }else if( spx->prec>spy->prec ){ /* higher precedence wins */ |
1258 | apy->type = RD_RESOLVED; |
1259 | }else if( spx->prec<spy->prec ){ |
1260 | apx->type = SH_RESOLVED; |
1261 | }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ |
1262 | apy->type = RD_RESOLVED; /* associativity */ |
1263 | }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ |
1264 | apx->type = SH_RESOLVED; |
1265 | }else{ |
1266 | assert( spx->prec==spy->prec && spx->assoc==NONE ); |
1267 | apx->type = ERROR; |
1268 | } |
1269 | }else if( apx->type==REDUCE && apy->type==REDUCE ){ |
1270 | spx = apx->x.rp->precsym; |
1271 | spy = apy->x.rp->precsym; |
1272 | if( spx==0 || spy==0 || spx->prec<0 || |
1273 | spy->prec<0 || spx->prec==spy->prec ){ |
1274 | apy->type = RRCONFLICT; |
1275 | errcnt++; |
1276 | }else if( spx->prec>spy->prec ){ |
1277 | apy->type = RD_RESOLVED; |
1278 | }else if( spx->prec<spy->prec ){ |
1279 | apx->type = RD_RESOLVED; |
1280 | } |
1281 | }else{ |
1282 | assert( |
1283 | apx->type==SH_RESOLVED || |
1284 | apx->type==RD_RESOLVED || |
1285 | apx->type==SSCONFLICT || |
1286 | apx->type==SRCONFLICT || |
1287 | apx->type==RRCONFLICT || |
1288 | apy->type==SH_RESOLVED || |
1289 | apy->type==RD_RESOLVED || |
1290 | apy->type==SSCONFLICT || |
1291 | apy->type==SRCONFLICT || |
1292 | apy->type==RRCONFLICT |
1293 | ); |
1294 | /* The REDUCE/SHIFT case cannot happen because SHIFTs come before |
1295 | ** REDUCEs on the list. If we reach this point it must be because |
1296 | ** the parser conflict had already been resolved. */ |
1297 | } |
1298 | return errcnt; |
1299 | } |
1300 | /********************* From the file "configlist.c" *************************/ |
1301 | /* |
1302 | ** Routines to processing a configuration list and building a state |
1303 | ** in the LEMON parser generator. |
1304 | */ |
1305 | |
1306 | static struct config *freelist = 0; /* List of free configurations */ |
1307 | static struct config *current = 0; /* Top of list of configurations */ |
1308 | static struct config **currentend = 0; /* Last on list of configs */ |
1309 | static struct config *basis = 0; /* Top of list of basis configs */ |
1310 | static struct config **basisend = 0; /* End of list of basis configs */ |
1311 | |
1312 | /* Return a pointer to a new configuration */ |
1313 | PRIVATE struct config *newconfig(void){ |
1314 | return (struct config*)calloc(1, sizeof(struct config)); |
1315 | } |
1316 | |
1317 | /* The configuration "old" is no longer used */ |
1318 | PRIVATE void deleteconfig(struct config *old) |
1319 | { |
1320 | old->next = freelist; |
1321 | freelist = old; |
1322 | } |
1323 | |
1324 | /* Initialized the configuration list builder */ |
1325 | void Configlist_init(void){ |
1326 | current = 0; |
1327 | currentend = ¤t; |
1328 | basis = 0; |
1329 | basisend = &basis; |
1330 | Configtable_init(); |
1331 | return; |
1332 | } |
1333 | |
1334 | /* Initialized the configuration list builder */ |
1335 | void Configlist_reset(void){ |
1336 | current = 0; |
1337 | currentend = ¤t; |
1338 | basis = 0; |
1339 | basisend = &basis; |
1340 | Configtable_clear(0); |
1341 | return; |
1342 | } |
1343 | |
1344 | /* Add another configuration to the configuration list */ |
1345 | struct config *Configlist_add( |
1346 | struct rule *rp, /* The rule */ |
1347 | int dot /* Index into the RHS of the rule where the dot goes */ |
1348 | ){ |
1349 | struct config *cfp, model; |
1350 | |
1351 | assert( currentend!=0 ); |
1352 | model.rp = rp; |
1353 | model.dot = dot; |
1354 | cfp = Configtable_find(&model); |
1355 | if( cfp==0 ){ |
1356 | cfp = newconfig(); |
1357 | cfp->rp = rp; |
1358 | cfp->dot = dot; |
1359 | cfp->fws = SetNew(); |
1360 | cfp->stp = 0; |
1361 | cfp->fplp = cfp->bplp = 0; |
1362 | cfp->next = 0; |
1363 | cfp->bp = 0; |
1364 | *currentend = cfp; |
1365 | currentend = &cfp->next; |
1366 | Configtable_insert(cfp); |
1367 | } |
1368 | return cfp; |
1369 | } |
1370 | |
1371 | /* Add a basis configuration to the configuration list */ |
1372 | struct config *Configlist_addbasis(struct rule *rp, int dot) |
1373 | { |
1374 | struct config *cfp, model; |
1375 | |
1376 | assert( basisend!=0 ); |
1377 | assert( currentend!=0 ); |
1378 | model.rp = rp; |
1379 | model.dot = dot; |
1380 | cfp = Configtable_find(&model); |
1381 | if( cfp==0 ){ |
1382 | cfp = newconfig(); |
1383 | cfp->rp = rp; |
1384 | cfp->dot = dot; |
1385 | cfp->fws = SetNew(); |
1386 | cfp->stp = 0; |
1387 | cfp->fplp = cfp->bplp = 0; |
1388 | cfp->next = 0; |
1389 | cfp->bp = 0; |
1390 | *currentend = cfp; |
1391 | currentend = &cfp->next; |
1392 | *basisend = cfp; |
1393 | basisend = &cfp->bp; |
1394 | Configtable_insert(cfp); |
1395 | } |
1396 | return cfp; |
1397 | } |
1398 | |
1399 | /* Compute the closure of the configuration list */ |
1400 | void Configlist_closure(struct lemon *lemp) |
1401 | { |
1402 | struct config *cfp, *newcfp; |
1403 | struct rule *rp, *newrp; |
1404 | struct symbol *sp, *xsp; |
1405 | int i, dot; |
1406 | |
1407 | assert( currentend!=0 ); |
1408 | for(cfp=current; cfp; cfp=cfp->next){ |
1409 | rp = cfp->rp; |
1410 | dot = cfp->dot; |
1411 | if( dot>=rp->nrhs ) continue; |
1412 | sp = rp->rhs[dot]; |
1413 | if( sp->type==NONTERMINAL ){ |
1414 | if( sp->rule==0 && sp!=lemp->errsym ){ |
1415 | ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules." , |
1416 | sp->name); |
1417 | lemp->errorcnt++; |
1418 | } |
1419 | for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ |
1420 | newcfp = Configlist_add(newrp,0); |
1421 | for(i=dot+1; i<rp->nrhs; i++){ |
1422 | xsp = rp->rhs[i]; |
1423 | if( xsp->type==TERMINAL ){ |
1424 | SetAdd(newcfp->fws,xsp->index); |
1425 | break; |
1426 | }else if( xsp->type==MULTITERMINAL ){ |
1427 | int k; |
1428 | for(k=0; k<xsp->nsubsym; k++){ |
1429 | SetAdd(newcfp->fws, xsp->subsym[k]->index); |
1430 | } |
1431 | break; |
1432 | }else{ |
1433 | SetUnion(newcfp->fws,xsp->firstset); |
1434 | if( xsp->lambda==LEMON_FALSE ) break; |
1435 | } |
1436 | } |
1437 | if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); |
1438 | } |
1439 | } |
1440 | } |
1441 | return; |
1442 | } |
1443 | |
1444 | /* Sort the configuration list */ |
1445 | void Configlist_sort(void){ |
1446 | current = (struct config*)msort((char*)current,(char**)&(current->next), |
1447 | Configcmp); |
1448 | currentend = 0; |
1449 | return; |
1450 | } |
1451 | |
1452 | /* Sort the basis configuration list */ |
1453 | void Configlist_sortbasis(void){ |
1454 | basis = (struct config*)msort((char*)current,(char**)&(current->bp), |
1455 | Configcmp); |
1456 | basisend = 0; |
1457 | return; |
1458 | } |
1459 | |
1460 | /* Return a pointer to the head of the configuration list and |
1461 | ** reset the list */ |
1462 | struct config *Configlist_return(void){ |
1463 | struct config *old; |
1464 | old = current; |
1465 | current = 0; |
1466 | currentend = 0; |
1467 | return old; |
1468 | } |
1469 | |
1470 | /* Return a pointer to the head of the configuration list and |
1471 | ** reset the list */ |
1472 | struct config *Configlist_basis(void){ |
1473 | struct config *old; |
1474 | old = basis; |
1475 | basis = 0; |
1476 | basisend = 0; |
1477 | return old; |
1478 | } |
1479 | |
1480 | /* Free all elements of the given configuration list */ |
1481 | void Configlist_eat(struct config *cfp) |
1482 | { |
1483 | struct config *nextcfp; |
1484 | for(; cfp; cfp=nextcfp){ |
1485 | nextcfp = cfp->next; |
1486 | assert( cfp->fplp==0 ); |
1487 | assert( cfp->bplp==0 ); |
1488 | if( cfp->fws ) SetFree(cfp->fws); |
1489 | deleteconfig(cfp); |
1490 | } |
1491 | return; |
1492 | } |
1493 | /***************** From the file "error.c" *********************************/ |
1494 | /* |
1495 | ** Code for printing error message. |
1496 | */ |
1497 | |
1498 | void ErrorMsg(const char *filename, int lineno, const char *format, ...){ |
1499 | va_list ap; |
1500 | fprintf(stderr, "%s:%d: " , filename, lineno); |
1501 | va_start(ap, format); |
1502 | vfprintf(stderr,format,ap); |
1503 | va_end(ap); |
1504 | fprintf(stderr, "\n" ); |
1505 | } |
1506 | /**************** From the file "main.c" ************************************/ |
1507 | /* |
1508 | ** Main program file for the LEMON parser generator. |
1509 | */ |
1510 | |
1511 | /* Report an out-of-memory condition and abort. This function |
1512 | ** is used mostly by the "MemoryCheck" macro in struct.h |
1513 | */ |
1514 | void memory_error(void){ |
1515 | fprintf(stderr,"Out of memory. Aborting...\n" ); |
1516 | exit(1); |
1517 | } |
1518 | |
1519 | static int nDefine = 0; /* Number of -D options on the command line */ |
1520 | static char **azDefine = 0; /* Name of the -D macros */ |
1521 | |
1522 | /* This routine is called with the argument to each -D command-line option. |
1523 | ** Add the macro defined to the azDefine array. |
1524 | */ |
1525 | static void handle_D_option(char *z){ |
1526 | char **paz; |
1527 | nDefine++; |
1528 | azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); |
1529 | if( azDefine==0 ){ |
1530 | fprintf(stderr,"out of memory\n" ); |
1531 | exit(1); |
1532 | } |
1533 | paz = &azDefine[nDefine-1]; |
1534 | *paz = (char *) malloc( lemonStrlen(z)+1 ); |
1535 | if( *paz==0 ){ |
1536 | fprintf(stderr,"out of memory\n" ); |
1537 | exit(1); |
1538 | } |
1539 | lemon_strcpy(*paz, z); |
1540 | for(z=*paz; *z && *z!='='; z++){} |
1541 | *z = 0; |
1542 | } |
1543 | |
1544 | /* Rember the name of the output directory |
1545 | */ |
1546 | static char *outputDir = NULL; |
1547 | static void handle_d_option(char *z){ |
1548 | outputDir = (char *) malloc( lemonStrlen(z)+1 ); |
1549 | if( outputDir==0 ){ |
1550 | fprintf(stderr,"out of memory\n" ); |
1551 | exit(1); |
1552 | } |
1553 | lemon_strcpy(outputDir, z); |
1554 | } |
1555 | |
1556 | static char *user_templatename = NULL; |
1557 | static void handle_T_option(char *z){ |
1558 | user_templatename = (char *) malloc( lemonStrlen(z)+1 ); |
1559 | if( user_templatename==0 ){ |
1560 | memory_error(); |
1561 | } |
1562 | lemon_strcpy(user_templatename, z); |
1563 | } |
1564 | |
1565 | /* Merge together to lists of rules ordered by rule.iRule */ |
1566 | static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ |
1567 | struct rule *pFirst = 0; |
1568 | struct rule **ppPrev = &pFirst; |
1569 | while( pA && pB ){ |
1570 | if( pA->iRule<pB->iRule ){ |
1571 | *ppPrev = pA; |
1572 | ppPrev = &pA->next; |
1573 | pA = pA->next; |
1574 | }else{ |
1575 | *ppPrev = pB; |
1576 | ppPrev = &pB->next; |
1577 | pB = pB->next; |
1578 | } |
1579 | } |
1580 | if( pA ){ |
1581 | *ppPrev = pA; |
1582 | }else{ |
1583 | *ppPrev = pB; |
1584 | } |
1585 | return pFirst; |
1586 | } |
1587 | |
1588 | /* |
1589 | ** Sort a list of rules in order of increasing iRule value |
1590 | */ |
1591 | static struct rule *Rule_sort(struct rule *rp){ |
1592 | unsigned int i; |
1593 | struct rule *pNext; |
1594 | struct rule *x[32]; |
1595 | memset(x, 0, sizeof(x)); |
1596 | while( rp ){ |
1597 | pNext = rp->next; |
1598 | rp->next = 0; |
1599 | for(i=0; i<sizeof(x)/sizeof(x[0])-1 && x[i]; i++){ |
1600 | rp = Rule_merge(x[i], rp); |
1601 | x[i] = 0; |
1602 | } |
1603 | x[i] = rp; |
1604 | rp = pNext; |
1605 | } |
1606 | rp = 0; |
1607 | for(i=0; i<sizeof(x)/sizeof(x[0]); i++){ |
1608 | rp = Rule_merge(x[i], rp); |
1609 | } |
1610 | return rp; |
1611 | } |
1612 | |
1613 | /* forward reference */ |
1614 | static const char *minimum_size_type(int lwr, int upr, int *pnByte); |
1615 | |
1616 | /* Print a single line of the "Parser Stats" output |
1617 | */ |
1618 | static void stats_line(const char *zLabel, int iValue){ |
1619 | int nLabel = lemonStrlen(zLabel); |
1620 | printf(" %s%.*s %5d\n" , zLabel, |
1621 | 35-nLabel, "................................" , |
1622 | iValue); |
1623 | } |
1624 | |
1625 | /* The main program. Parse the command line and do it... */ |
1626 | int main(int argc, char **argv){ |
1627 | static int version = 0; |
1628 | static int rpflag = 0; |
1629 | static int basisflag = 0; |
1630 | static int compress = 0; |
1631 | static int quiet = 0; |
1632 | static int statistics = 0; |
1633 | static int mhflag = 0; |
1634 | static int nolinenosflag = 0; |
1635 | static int noResort = 0; |
1636 | static int sqlFlag = 0; |
1637 | static int printPP = 0; |
1638 | |
1639 | static struct s_options options[] = { |
1640 | {OPT_FLAG, "b" , (char*)&basisflag, "Print only the basis in report." }, |
1641 | {OPT_FLAG, "c" , (char*)&compress, "Don't compress the action table." }, |
1642 | {OPT_FSTR, "d" , (char*)&handle_d_option, "Output directory. Default '.'" }, |
1643 | {OPT_FSTR, "D" , (char*)handle_D_option, "Define an %ifdef macro." }, |
1644 | {OPT_FLAG, "E" , (char*)&printPP, "Print input file after preprocessing." }, |
1645 | {OPT_FSTR, "f" , 0, "Ignored. (Placeholder for -f compiler options.)" }, |
1646 | {OPT_FLAG, "g" , (char*)&rpflag, "Print grammar without actions." }, |
1647 | {OPT_FSTR, "I" , 0, "Ignored. (Placeholder for '-I' compiler options.)" }, |
1648 | {OPT_FLAG, "m" , (char*)&mhflag, "Output a makeheaders compatible file." }, |
1649 | {OPT_FLAG, "l" , (char*)&nolinenosflag, "Do not print #line statements." }, |
1650 | {OPT_FSTR, "O" , 0, "Ignored. (Placeholder for '-O' compiler options.)" }, |
1651 | {OPT_FLAG, "p" , (char*)&showPrecedenceConflict, |
1652 | "Show conflicts resolved by precedence rules" }, |
1653 | {OPT_FLAG, "q" , (char*)&quiet, "(Quiet) Don't print the report file." }, |
1654 | {OPT_FLAG, "r" , (char*)&noResort, "Do not sort or renumber states" }, |
1655 | {OPT_FLAG, "s" , (char*)&statistics, |
1656 | "Print parser stats to standard output." }, |
1657 | {OPT_FLAG, "S" , (char*)&sqlFlag, |
1658 | "Generate the *.sql file describing the parser tables." }, |
1659 | {OPT_FLAG, "x" , (char*)&version, "Print the version number." }, |
1660 | {OPT_FSTR, "T" , (char*)handle_T_option, "Specify a template file." }, |
1661 | {OPT_FSTR, "W" , 0, "Ignored. (Placeholder for '-W' compiler options.)" }, |
1662 | {OPT_FLAG,0,0,0} |
1663 | }; |
1664 | int i; |
1665 | int exitcode; |
1666 | struct lemon lem; |
1667 | struct rule *rp; |
1668 | |
1669 | (void)argc; |
1670 | OptInit(argv,options,stderr); |
1671 | if( version ){ |
1672 | printf("Lemon version 1.0\n" ); |
1673 | exit(0); |
1674 | } |
1675 | if( OptNArgs()!=1 ){ |
1676 | fprintf(stderr,"Exactly one filename argument is required.\n" ); |
1677 | exit(1); |
1678 | } |
1679 | memset(&lem, 0, sizeof(lem)); |
1680 | lem.errorcnt = 0; |
1681 | |
1682 | /* Initialize the machine */ |
1683 | Strsafe_init(); |
1684 | Symbol_init(); |
1685 | State_init(); |
1686 | lem.argv0 = argv[0]; |
1687 | lem.filename = OptArg(0); |
1688 | lem.basisflag = basisflag; |
1689 | lem.nolinenosflag = nolinenosflag; |
1690 | lem.printPreprocessed = printPP; |
1691 | Symbol_new("$" ); |
1692 | |
1693 | /* Parse the input file */ |
1694 | Parse(&lem); |
1695 | if( lem.printPreprocessed || lem.errorcnt ) exit(lem.errorcnt); |
1696 | if( lem.nrule==0 ){ |
1697 | fprintf(stderr,"Empty grammar.\n" ); |
1698 | exit(1); |
1699 | } |
1700 | lem.errsym = Symbol_find("error" ); |
1701 | |
1702 | /* Count and index the symbols of the grammar */ |
1703 | Symbol_new("{default}" ); |
1704 | lem.nsymbol = Symbol_count(); |
1705 | lem.symbols = Symbol_arrayof(); |
1706 | for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; |
1707 | qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); |
1708 | for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; |
1709 | while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } |
1710 | assert( strcmp(lem.symbols[i-1]->name,"{default}" )==0 ); |
1711 | lem.nsymbol = i - 1; |
1712 | for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); |
1713 | lem.nterminal = i; |
1714 | |
1715 | /* Assign sequential rule numbers. Start with 0. Put rules that have no |
1716 | ** reduce action C-code associated with them last, so that the switch() |
1717 | ** statement that selects reduction actions will have a smaller jump table. |
1718 | */ |
1719 | for(i=0, rp=lem.rule; rp; rp=rp->next){ |
1720 | rp->iRule = rp->code ? i++ : -1; |
1721 | } |
1722 | lem.nruleWithAction = i; |
1723 | for(rp=lem.rule; rp; rp=rp->next){ |
1724 | if( rp->iRule<0 ) rp->iRule = i++; |
1725 | } |
1726 | lem.startRule = lem.rule; |
1727 | lem.rule = Rule_sort(lem.rule); |
1728 | |
1729 | /* Generate a reprint of the grammar, if requested on the command line */ |
1730 | if( rpflag ){ |
1731 | Reprint(&lem); |
1732 | }else{ |
1733 | /* Initialize the size for all follow and first sets */ |
1734 | SetSize(lem.nterminal+1); |
1735 | |
1736 | /* Find the precedence for every production rule (that has one) */ |
1737 | FindRulePrecedences(&lem); |
1738 | |
1739 | /* Compute the lambda-nonterminals and the first-sets for every |
1740 | ** nonterminal */ |
1741 | FindFirstSets(&lem); |
1742 | |
1743 | /* Compute all LR(0) states. Also record follow-set propagation |
1744 | ** links so that the follow-set can be computed later */ |
1745 | lem.nstate = 0; |
1746 | FindStates(&lem); |
1747 | lem.sorted = State_arrayof(); |
1748 | |
1749 | /* Tie up loose ends on the propagation links */ |
1750 | FindLinks(&lem); |
1751 | |
1752 | /* Compute the follow set of every reducible configuration */ |
1753 | FindFollowSets(&lem); |
1754 | |
1755 | /* Compute the action tables */ |
1756 | FindActions(&lem); |
1757 | |
1758 | /* Compress the action tables */ |
1759 | if( compress==0 ) CompressTables(&lem); |
1760 | |
1761 | /* Reorder and renumber the states so that states with fewer choices |
1762 | ** occur at the end. This is an optimization that helps make the |
1763 | ** generated parser tables smaller. */ |
1764 | if( noResort==0 ) ResortStates(&lem); |
1765 | |
1766 | /* Generate a report of the parser generated. (the "y.output" file) */ |
1767 | if( !quiet ) ReportOutput(&lem); |
1768 | |
1769 | /* Generate the source code for the parser */ |
1770 | ReportTable(&lem, mhflag, sqlFlag); |
1771 | |
1772 | /* Produce a header file for use by the scanner. (This step is |
1773 | ** omitted if the "-m" option is used because makeheaders will |
1774 | ** generate the file for us.) */ |
1775 | if( !mhflag ) ReportHeader(&lem); |
1776 | } |
1777 | if( statistics ){ |
1778 | printf("Parser statistics:\n" ); |
1779 | stats_line("terminal symbols" , lem.nterminal); |
1780 | stats_line("non-terminal symbols" , lem.nsymbol - lem.nterminal); |
1781 | stats_line("total symbols" , lem.nsymbol); |
1782 | stats_line("rules" , lem.nrule); |
1783 | stats_line("states" , lem.nxstate); |
1784 | stats_line("conflicts" , lem.nconflict); |
1785 | stats_line("action table entries" , lem.nactiontab); |
1786 | stats_line("lookahead table entries" , lem.nlookaheadtab); |
1787 | stats_line("total table size (bytes)" , lem.tablesize); |
1788 | } |
1789 | if( lem.nconflict > 0 ){ |
1790 | fprintf(stderr,"%d parsing conflicts.\n" ,lem.nconflict); |
1791 | } |
1792 | |
1793 | /* return 0 on success, 1 on failure. */ |
1794 | exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; |
1795 | exit(exitcode); |
1796 | return (exitcode); |
1797 | } |
1798 | /******************** From the file "msort.c" *******************************/ |
1799 | /* |
1800 | ** A generic merge-sort program. |
1801 | ** |
1802 | ** USAGE: |
1803 | ** Let "ptr" be a pointer to some structure which is at the head of |
1804 | ** a null-terminated list. Then to sort the list call: |
1805 | ** |
1806 | ** ptr = msort(ptr,&(ptr->next),cmpfnc); |
1807 | ** |
1808 | ** In the above, "cmpfnc" is a pointer to a function which compares |
1809 | ** two instances of the structure and returns an integer, as in |
1810 | ** strcmp. The second argument is a pointer to the pointer to the |
1811 | ** second element of the linked list. This address is used to compute |
1812 | ** the offset to the "next" field within the structure. The offset to |
1813 | ** the "next" field must be constant for all structures in the list. |
1814 | ** |
1815 | ** The function returns a new pointer which is the head of the list |
1816 | ** after sorting. |
1817 | ** |
1818 | ** ALGORITHM: |
1819 | ** Merge-sort. |
1820 | */ |
1821 | |
1822 | /* |
1823 | ** Return a pointer to the next structure in the linked list. |
1824 | */ |
1825 | #define NEXT(A) (*(char**)(((char*)A)+offset)) |
1826 | |
1827 | /* |
1828 | ** Inputs: |
1829 | ** a: A sorted, null-terminated linked list. (May be null). |
1830 | ** b: A sorted, null-terminated linked list. (May be null). |
1831 | ** cmp: A pointer to the comparison function. |
1832 | ** offset: Offset in the structure to the "next" field. |
1833 | ** |
1834 | ** Return Value: |
1835 | ** A pointer to the head of a sorted list containing the elements |
1836 | ** of both a and b. |
1837 | ** |
1838 | ** Side effects: |
1839 | ** The "next" pointers for elements in the lists a and b are |
1840 | ** changed. |
1841 | */ |
1842 | static char *merge( |
1843 | char *a, |
1844 | char *b, |
1845 | int (*cmp)(const char*,const char*), |
1846 | int offset |
1847 | ){ |
1848 | char *ptr, *head; |
1849 | |
1850 | if( a==0 ){ |
1851 | head = b; |
1852 | }else if( b==0 ){ |
1853 | head = a; |
1854 | }else{ |
1855 | if( (*cmp)(a,b)<=0 ){ |
1856 | ptr = a; |
1857 | a = NEXT(a); |
1858 | }else{ |
1859 | ptr = b; |
1860 | b = NEXT(b); |
1861 | } |
1862 | head = ptr; |
1863 | while( a && b ){ |
1864 | if( (*cmp)(a,b)<=0 ){ |
1865 | NEXT(ptr) = a; |
1866 | ptr = a; |
1867 | a = NEXT(a); |
1868 | }else{ |
1869 | NEXT(ptr) = b; |
1870 | ptr = b; |
1871 | b = NEXT(b); |
1872 | } |
1873 | } |
1874 | if( a ) NEXT(ptr) = a; |
1875 | else NEXT(ptr) = b; |
1876 | } |
1877 | return head; |
1878 | } |
1879 | |
1880 | /* |
1881 | ** Inputs: |
1882 | ** list: Pointer to a singly-linked list of structures. |
1883 | ** next: Pointer to pointer to the second element of the list. |
1884 | ** cmp: A comparison function. |
1885 | ** |
1886 | ** Return Value: |
1887 | ** A pointer to the head of a sorted list containing the elements |
1888 | ** originally in list. |
1889 | ** |
1890 | ** Side effects: |
1891 | ** The "next" pointers for elements in list are changed. |
1892 | */ |
1893 | #define LISTSIZE 30 |
1894 | static char *msort( |
1895 | char *list, |
1896 | char **next, |
1897 | int (*cmp)(const char*,const char*) |
1898 | ){ |
1899 | unsigned long offset; |
1900 | char *ep; |
1901 | char *set[LISTSIZE]; |
1902 | int i; |
1903 | offset = (unsigned long)((char*)next - (char*)list); |
1904 | for(i=0; i<LISTSIZE; i++) set[i] = 0; |
1905 | while( list ){ |
1906 | ep = list; |
1907 | list = NEXT(list); |
1908 | NEXT(ep) = 0; |
1909 | for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){ |
1910 | ep = merge(ep,set[i],cmp,offset); |
1911 | set[i] = 0; |
1912 | } |
1913 | set[i] = ep; |
1914 | } |
1915 | ep = 0; |
1916 | for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset); |
1917 | return ep; |
1918 | } |
1919 | /************************ From the file "option.c" **************************/ |
1920 | static char **g_argv; |
1921 | static struct s_options *op; |
1922 | static FILE *errstream; |
1923 | |
1924 | #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0) |
1925 | |
1926 | /* |
1927 | ** Print the command line with a carrot pointing to the k-th character |
1928 | ** of the n-th field. |
1929 | */ |
1930 | static void errline(int n, int k, FILE *err) |
1931 | { |
1932 | int spcnt, i; |
1933 | if( g_argv[0] ){ |
1934 | fprintf(err,"%s" ,g_argv[0]); |
1935 | spcnt = lemonStrlen(g_argv[0]) + 1; |
1936 | }else{ |
1937 | spcnt = 0; |
1938 | } |
1939 | for(i=1; i<n && g_argv[i]; i++){ |
1940 | fprintf(err," %s" ,g_argv[i]); |
1941 | spcnt += lemonStrlen(g_argv[i])+1; |
1942 | } |
1943 | spcnt += k; |
1944 | for(; g_argv[i]; i++) fprintf(err," %s" ,g_argv[i]); |
1945 | if( spcnt<20 ){ |
1946 | fprintf(err,"\n%*s^-- here\n" ,spcnt,"" ); |
1947 | }else{ |
1948 | fprintf(err,"\n%*shere --^\n" ,spcnt-7,"" ); |
1949 | } |
1950 | } |
1951 | |
1952 | /* |
1953 | ** Return the index of the N-th non-switch argument. Return -1 |
1954 | ** if N is out of range. |
1955 | */ |
1956 | static int argindex(int n) |
1957 | { |
1958 | int i; |
1959 | int dashdash = 0; |
1960 | if( g_argv!=0 && *g_argv!=0 ){ |
1961 | for(i=1; g_argv[i]; i++){ |
1962 | if( dashdash || !ISOPT(g_argv[i]) ){ |
1963 | if( n==0 ) return i; |
1964 | n--; |
1965 | } |
1966 | if( strcmp(g_argv[i],"--" )==0 ) dashdash = 1; |
1967 | } |
1968 | } |
1969 | return -1; |
1970 | } |
1971 | |
1972 | static char emsg[] = "Command line syntax error: " ; |
1973 | |
1974 | /* |
1975 | ** Process a flag command line argument. |
1976 | */ |
1977 | static int handleflags(int i, FILE *err) |
1978 | { |
1979 | int v; |
1980 | int errcnt = 0; |
1981 | int j; |
1982 | for(j=0; op[j].label; j++){ |
1983 | if( strncmp(&g_argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break; |
1984 | } |
1985 | v = g_argv[i][0]=='-' ? 1 : 0; |
1986 | if( op[j].label==0 ){ |
1987 | if( err ){ |
1988 | fprintf(err,"%sundefined option.\n" ,emsg); |
1989 | errline(i,1,err); |
1990 | } |
1991 | errcnt++; |
1992 | }else if( op[j].arg==0 ){ |
1993 | /* Ignore this option */ |
1994 | }else if( op[j].type==OPT_FLAG ){ |
1995 | *((int*)op[j].arg) = v; |
1996 | }else if( op[j].type==OPT_FFLAG ){ |
1997 | (*(void(*)(int))(op[j].arg))(v); |
1998 | }else if( op[j].type==OPT_FSTR ){ |
1999 | (*(void(*)(char *))(op[j].arg))(&g_argv[i][2]); |
2000 | }else{ |
2001 | if( err ){ |
2002 | fprintf(err,"%smissing argument on switch.\n" ,emsg); |
2003 | errline(i,1,err); |
2004 | } |
2005 | errcnt++; |
2006 | } |
2007 | return errcnt; |
2008 | } |
2009 | |
2010 | /* |
2011 | ** Process a command line switch which has an argument. |
2012 | */ |
2013 | static int handleswitch(int i, FILE *err) |
2014 | { |
2015 | int lv = 0; |
2016 | double dv = 0.0; |
2017 | char *sv = 0, *end; |
2018 | char *cp; |
2019 | int j; |
2020 | int errcnt = 0; |
2021 | cp = strchr(g_argv[i],'='); |
2022 | assert( cp!=0 ); |
2023 | *cp = 0; |
2024 | for(j=0; op[j].label; j++){ |
2025 | if( strcmp(g_argv[i],op[j].label)==0 ) break; |
2026 | } |
2027 | *cp = '='; |
2028 | if( op[j].label==0 ){ |
2029 | if( err ){ |
2030 | fprintf(err,"%sundefined option.\n" ,emsg); |
2031 | errline(i,0,err); |
2032 | } |
2033 | errcnt++; |
2034 | }else{ |
2035 | cp++; |
2036 | switch( op[j].type ){ |
2037 | case OPT_FLAG: |
2038 | case OPT_FFLAG: |
2039 | if( err ){ |
2040 | fprintf(err,"%soption requires an argument.\n" ,emsg); |
2041 | errline(i,0,err); |
2042 | } |
2043 | errcnt++; |
2044 | break; |
2045 | case OPT_DBL: |
2046 | case OPT_FDBL: |
2047 | dv = strtod(cp,&end); |
2048 | if( *end ){ |
2049 | if( err ){ |
2050 | fprintf(err, |
2051 | "%sillegal character in floating-point argument.\n" ,emsg); |
2052 | errline(i,(int)((char*)end-(char*)g_argv[i]),err); |
2053 | } |
2054 | errcnt++; |
2055 | } |
2056 | break; |
2057 | case OPT_INT: |
2058 | case OPT_FINT: |
2059 | lv = strtol(cp,&end,0); |
2060 | if( *end ){ |
2061 | if( err ){ |
2062 | fprintf(err,"%sillegal character in integer argument.\n" ,emsg); |
2063 | errline(i,(int)((char*)end-(char*)g_argv[i]),err); |
2064 | } |
2065 | errcnt++; |
2066 | } |
2067 | break; |
2068 | case OPT_STR: |
2069 | case OPT_FSTR: |
2070 | sv = cp; |
2071 | break; |
2072 | } |
2073 | switch( op[j].type ){ |
2074 | case OPT_FLAG: |
2075 | case OPT_FFLAG: |
2076 | break; |
2077 | case OPT_DBL: |
2078 | *(double*)(op[j].arg) = dv; |
2079 | break; |
2080 | case OPT_FDBL: |
2081 | (*(void(*)(double))(op[j].arg))(dv); |
2082 | break; |
2083 | case OPT_INT: |
2084 | *(int*)(op[j].arg) = lv; |
2085 | break; |
2086 | case OPT_FINT: |
2087 | (*(void(*)(int))(op[j].arg))((int)lv); |
2088 | break; |
2089 | case OPT_STR: |
2090 | *(char**)(op[j].arg) = sv; |
2091 | break; |
2092 | case OPT_FSTR: |
2093 | (*(void(*)(char *))(op[j].arg))(sv); |
2094 | break; |
2095 | } |
2096 | } |
2097 | return errcnt; |
2098 | } |
2099 | |
2100 | int OptInit(char **a, struct s_options *o, FILE *err) |
2101 | { |
2102 | int errcnt = 0; |
2103 | g_argv = a; |
2104 | op = o; |
2105 | errstream = err; |
2106 | if( g_argv && *g_argv && op ){ |
2107 | int i; |
2108 | for(i=1; g_argv[i]; i++){ |
2109 | if( g_argv[i][0]=='+' || g_argv[i][0]=='-' ){ |
2110 | errcnt += handleflags(i,err); |
2111 | }else if( strchr(g_argv[i],'=') ){ |
2112 | errcnt += handleswitch(i,err); |
2113 | } |
2114 | } |
2115 | } |
2116 | if( errcnt>0 ){ |
2117 | fprintf(err,"Valid command line options for \"%s\" are:\n" ,*a); |
2118 | OptPrint(); |
2119 | exit(1); |
2120 | } |
2121 | return 0; |
2122 | } |
2123 | |
2124 | int OptNArgs(void){ |
2125 | int cnt = 0; |
2126 | int dashdash = 0; |
2127 | int i; |
2128 | if( g_argv!=0 && g_argv[0]!=0 ){ |
2129 | for(i=1; g_argv[i]; i++){ |
2130 | if( dashdash || !ISOPT(g_argv[i]) ) cnt++; |
2131 | if( strcmp(g_argv[i],"--" )==0 ) dashdash = 1; |
2132 | } |
2133 | } |
2134 | return cnt; |
2135 | } |
2136 | |
2137 | char *OptArg(int n) |
2138 | { |
2139 | int i; |
2140 | i = argindex(n); |
2141 | return i>=0 ? g_argv[i] : 0; |
2142 | } |
2143 | |
2144 | void OptErr(int n) |
2145 | { |
2146 | int i; |
2147 | i = argindex(n); |
2148 | if( i>=0 ) errline(i,0,errstream); |
2149 | } |
2150 | |
2151 | void OptPrint(void){ |
2152 | int i; |
2153 | int max, len; |
2154 | max = 0; |
2155 | for(i=0; op[i].label; i++){ |
2156 | len = lemonStrlen(op[i].label) + 1; |
2157 | switch( op[i].type ){ |
2158 | case OPT_FLAG: |
2159 | case OPT_FFLAG: |
2160 | break; |
2161 | case OPT_INT: |
2162 | case OPT_FINT: |
2163 | len += 9; /* length of "<integer>" */ |
2164 | break; |
2165 | case OPT_DBL: |
2166 | case OPT_FDBL: |
2167 | len += 6; /* length of "<real>" */ |
2168 | break; |
2169 | case OPT_STR: |
2170 | case OPT_FSTR: |
2171 | len += 8; /* length of "<string>" */ |
2172 | break; |
2173 | } |
2174 | if( len>max ) max = len; |
2175 | } |
2176 | for(i=0; op[i].label; i++){ |
2177 | switch( op[i].type ){ |
2178 | case OPT_FLAG: |
2179 | case OPT_FFLAG: |
2180 | fprintf(errstream," -%-*s %s\n" ,max,op[i].label,op[i].message); |
2181 | break; |
2182 | case OPT_INT: |
2183 | case OPT_FINT: |
2184 | fprintf(errstream," -%s<integer>%*s %s\n" ,op[i].label, |
2185 | (int)(max-lemonStrlen(op[i].label)-9),"" ,op[i].message); |
2186 | break; |
2187 | case OPT_DBL: |
2188 | case OPT_FDBL: |
2189 | fprintf(errstream," -%s<real>%*s %s\n" ,op[i].label, |
2190 | (int)(max-lemonStrlen(op[i].label)-6),"" ,op[i].message); |
2191 | break; |
2192 | case OPT_STR: |
2193 | case OPT_FSTR: |
2194 | fprintf(errstream," -%s<string>%*s %s\n" ,op[i].label, |
2195 | (int)(max-lemonStrlen(op[i].label)-8),"" ,op[i].message); |
2196 | break; |
2197 | } |
2198 | } |
2199 | } |
2200 | /*********************** From the file "parse.c" ****************************/ |
2201 | /* |
2202 | ** Input file parser for the LEMON parser generator. |
2203 | */ |
2204 | |
2205 | /* The state of the parser */ |
2206 | enum e_state { |
2207 | INITIALIZE, |
2208 | WAITING_FOR_DECL_OR_RULE, |
2209 | WAITING_FOR_DECL_KEYWORD, |
2210 | WAITING_FOR_DECL_ARG, |
2211 | WAITING_FOR_PRECEDENCE_SYMBOL, |
2212 | WAITING_FOR_ARROW, |
2213 | IN_RHS, |
2214 | LHS_ALIAS_1, |
2215 | LHS_ALIAS_2, |
2216 | LHS_ALIAS_3, |
2217 | RHS_ALIAS_1, |
2218 | RHS_ALIAS_2, |
2219 | PRECEDENCE_MARK_1, |
2220 | PRECEDENCE_MARK_2, |
2221 | RESYNC_AFTER_RULE_ERROR, |
2222 | RESYNC_AFTER_DECL_ERROR, |
2223 | WAITING_FOR_DESTRUCTOR_SYMBOL, |
2224 | WAITING_FOR_DATATYPE_SYMBOL, |
2225 | WAITING_FOR_FALLBACK_ID, |
2226 | WAITING_FOR_WILDCARD_ID, |
2227 | WAITING_FOR_CLASS_ID, |
2228 | WAITING_FOR_CLASS_TOKEN, |
2229 | WAITING_FOR_TOKEN_NAME |
2230 | }; |
2231 | struct pstate { |
2232 | char *filename; /* Name of the input file */ |
2233 | int tokenlineno; /* Linenumber at which current token starts */ |
2234 | int errorcnt; /* Number of errors so far */ |
2235 | char *tokenstart; /* Text of current token */ |
2236 | struct lemon *gp; /* Global state vector */ |
2237 | enum e_state state; /* The state of the parser */ |
2238 | struct symbol *fallback; /* The fallback token */ |
2239 | struct symbol *tkclass; /* Token class symbol */ |
2240 | struct symbol *lhs; /* Left-hand side of current rule */ |
2241 | const char *lhsalias; /* Alias for the LHS */ |
2242 | int nrhs; /* Number of right-hand side symbols seen */ |
2243 | struct symbol *rhs[MAXRHS]; /* RHS symbols */ |
2244 | const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ |
2245 | struct rule *prevrule; /* Previous rule parsed */ |
2246 | const char *declkeyword; /* Keyword of a declaration */ |
2247 | char **declargslot; /* Where the declaration argument should be put */ |
2248 | int insertLineMacro; /* Add #line before declaration insert */ |
2249 | int *decllinenoslot; /* Where to write declaration line number */ |
2250 | enum e_assoc declassoc; /* Assign this association to decl arguments */ |
2251 | int preccounter; /* Assign this precedence to decl arguments */ |
2252 | struct rule *firstrule; /* Pointer to first rule in the grammar */ |
2253 | struct rule *lastrule; /* Pointer to the most recently parsed rule */ |
2254 | }; |
2255 | |
2256 | /* Parse a single token */ |
2257 | static void parseonetoken(struct pstate *psp) |
2258 | { |
2259 | const char *x; |
2260 | x = Strsafe(psp->tokenstart); /* Save the token permanently */ |
2261 | #if 0 |
2262 | printf("%s:%d: Token=[%s] state=%d\n" ,psp->filename,psp->tokenlineno, |
2263 | x,psp->state); |
2264 | #endif |
2265 | switch( psp->state ){ |
2266 | case INITIALIZE: |
2267 | psp->prevrule = 0; |
2268 | psp->preccounter = 0; |
2269 | psp->firstrule = psp->lastrule = 0; |
2270 | psp->gp->nrule = 0; |
2271 | /* fall through */ |
2272 | case WAITING_FOR_DECL_OR_RULE: |
2273 | if( x[0]=='%' ){ |
2274 | psp->state = WAITING_FOR_DECL_KEYWORD; |
2275 | }else if( ISLOWER(x[0]) ){ |
2276 | psp->lhs = Symbol_new(x); |
2277 | psp->nrhs = 0; |
2278 | psp->lhsalias = 0; |
2279 | psp->state = WAITING_FOR_ARROW; |
2280 | }else if( x[0]=='{' ){ |
2281 | if( psp->prevrule==0 ){ |
2282 | ErrorMsg(psp->filename,psp->tokenlineno, |
2283 | "There is no prior rule upon which to attach the code " |
2284 | "fragment which begins on this line." ); |
2285 | psp->errorcnt++; |
2286 | }else if( psp->prevrule->code!=0 ){ |
2287 | ErrorMsg(psp->filename,psp->tokenlineno, |
2288 | "Code fragment beginning on this line is not the first " |
2289 | "to follow the previous rule." ); |
2290 | psp->errorcnt++; |
2291 | }else if( strcmp(x, "{NEVER-REDUCE" )==0 ){ |
2292 | psp->prevrule->neverReduce = 1; |
2293 | }else{ |
2294 | psp->prevrule->line = psp->tokenlineno; |
2295 | psp->prevrule->code = &x[1]; |
2296 | psp->prevrule->noCode = 0; |
2297 | } |
2298 | }else if( x[0]=='[' ){ |
2299 | psp->state = PRECEDENCE_MARK_1; |
2300 | }else{ |
2301 | ErrorMsg(psp->filename,psp->tokenlineno, |
2302 | "Token \"%s\" should be either \"%%\" or a nonterminal name." , |
2303 | x); |
2304 | psp->errorcnt++; |
2305 | } |
2306 | break; |
2307 | case PRECEDENCE_MARK_1: |
2308 | if( !ISUPPER(x[0]) ){ |
2309 | ErrorMsg(psp->filename,psp->tokenlineno, |
2310 | "The precedence symbol must be a terminal." ); |
2311 | psp->errorcnt++; |
2312 | }else if( psp->prevrule==0 ){ |
2313 | ErrorMsg(psp->filename,psp->tokenlineno, |
2314 | "There is no prior rule to assign precedence \"[%s]\"." ,x); |
2315 | psp->errorcnt++; |
2316 | }else if( psp->prevrule->precsym!=0 ){ |
2317 | ErrorMsg(psp->filename,psp->tokenlineno, |
2318 | "Precedence mark on this line is not the first " |
2319 | "to follow the previous rule." ); |
2320 | psp->errorcnt++; |
2321 | }else{ |
2322 | psp->prevrule->precsym = Symbol_new(x); |
2323 | } |
2324 | psp->state = PRECEDENCE_MARK_2; |
2325 | break; |
2326 | case PRECEDENCE_MARK_2: |
2327 | if( x[0]!=']' ){ |
2328 | ErrorMsg(psp->filename,psp->tokenlineno, |
2329 | "Missing \"]\" on precedence mark." ); |
2330 | psp->errorcnt++; |
2331 | } |
2332 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2333 | break; |
2334 | case WAITING_FOR_ARROW: |
2335 | if( x[0]==':' && x[1]==':' && x[2]=='=' ){ |
2336 | psp->state = IN_RHS; |
2337 | }else if( x[0]=='(' ){ |
2338 | psp->state = LHS_ALIAS_1; |
2339 | }else{ |
2340 | ErrorMsg(psp->filename,psp->tokenlineno, |
2341 | "Expected to see a \":\" following the LHS symbol \"%s\"." , |
2342 | psp->lhs->name); |
2343 | psp->errorcnt++; |
2344 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2345 | } |
2346 | break; |
2347 | case LHS_ALIAS_1: |
2348 | if( ISALPHA(x[0]) ){ |
2349 | psp->lhsalias = x; |
2350 | psp->state = LHS_ALIAS_2; |
2351 | }else{ |
2352 | ErrorMsg(psp->filename,psp->tokenlineno, |
2353 | "\"%s\" is not a valid alias for the LHS \"%s\"\n" , |
2354 | x,psp->lhs->name); |
2355 | psp->errorcnt++; |
2356 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2357 | } |
2358 | break; |
2359 | case LHS_ALIAS_2: |
2360 | if( x[0]==')' ){ |
2361 | psp->state = LHS_ALIAS_3; |
2362 | }else{ |
2363 | ErrorMsg(psp->filename,psp->tokenlineno, |
2364 | "Missing \")\" following LHS alias name \"%s\"." ,psp->lhsalias); |
2365 | psp->errorcnt++; |
2366 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2367 | } |
2368 | break; |
2369 | case LHS_ALIAS_3: |
2370 | if( x[0]==':' && x[1]==':' && x[2]=='=' ){ |
2371 | psp->state = IN_RHS; |
2372 | }else{ |
2373 | ErrorMsg(psp->filename,psp->tokenlineno, |
2374 | "Missing \"->\" following: \"%s(%s)\"." , |
2375 | psp->lhs->name,psp->lhsalias); |
2376 | psp->errorcnt++; |
2377 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2378 | } |
2379 | break; |
2380 | case IN_RHS: |
2381 | if( x[0]=='.' ){ |
2382 | struct rule *rp; |
2383 | rp = (struct rule *)calloc( sizeof(struct rule) + |
2384 | sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); |
2385 | if( rp==0 ){ |
2386 | ErrorMsg(psp->filename,psp->tokenlineno, |
2387 | "Can't allocate enough memory for this rule." ); |
2388 | psp->errorcnt++; |
2389 | psp->prevrule = 0; |
2390 | }else{ |
2391 | int i; |
2392 | rp->ruleline = psp->tokenlineno; |
2393 | rp->rhs = (struct symbol**)&rp[1]; |
2394 | rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); |
2395 | for(i=0; i<psp->nrhs; i++){ |
2396 | rp->rhs[i] = psp->rhs[i]; |
2397 | rp->rhsalias[i] = psp->alias[i]; |
2398 | if( rp->rhsalias[i]!=0 ){ rp->rhs[i]->bContent = 1; } |
2399 | } |
2400 | rp->lhs = psp->lhs; |
2401 | rp->lhsalias = psp->lhsalias; |
2402 | rp->nrhs = psp->nrhs; |
2403 | rp->code = 0; |
2404 | rp->noCode = 1; |
2405 | rp->precsym = 0; |
2406 | rp->index = psp->gp->nrule++; |
2407 | rp->nextlhs = rp->lhs->rule; |
2408 | rp->lhs->rule = rp; |
2409 | rp->next = 0; |
2410 | if( psp->firstrule==0 ){ |
2411 | psp->firstrule = psp->lastrule = rp; |
2412 | }else{ |
2413 | psp->lastrule->next = rp; |
2414 | psp->lastrule = rp; |
2415 | } |
2416 | psp->prevrule = rp; |
2417 | } |
2418 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2419 | }else if( ISALPHA(x[0]) ){ |
2420 | if( psp->nrhs>=MAXRHS ){ |
2421 | ErrorMsg(psp->filename,psp->tokenlineno, |
2422 | "Too many symbols on RHS of rule beginning at \"%s\"." , |
2423 | x); |
2424 | psp->errorcnt++; |
2425 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2426 | }else{ |
2427 | psp->rhs[psp->nrhs] = Symbol_new(x); |
2428 | psp->alias[psp->nrhs] = 0; |
2429 | psp->nrhs++; |
2430 | } |
2431 | }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 && ISUPPER(x[1]) ){ |
2432 | struct symbol *msp = psp->rhs[psp->nrhs-1]; |
2433 | if( msp->type!=MULTITERMINAL ){ |
2434 | struct symbol *origsp = msp; |
2435 | msp = (struct symbol *) calloc(1,sizeof(*msp)); |
2436 | memset(msp, 0, sizeof(*msp)); |
2437 | msp->type = MULTITERMINAL; |
2438 | msp->nsubsym = 1; |
2439 | msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); |
2440 | msp->subsym[0] = origsp; |
2441 | msp->name = origsp->name; |
2442 | psp->rhs[psp->nrhs-1] = msp; |
2443 | } |
2444 | msp->nsubsym++; |
2445 | msp->subsym = (struct symbol **) realloc(msp->subsym, |
2446 | sizeof(struct symbol*)*msp->nsubsym); |
2447 | msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); |
2448 | if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ |
2449 | ErrorMsg(psp->filename,psp->tokenlineno, |
2450 | "Cannot form a compound containing a non-terminal" ); |
2451 | psp->errorcnt++; |
2452 | } |
2453 | }else if( x[0]=='(' && psp->nrhs>0 ){ |
2454 | psp->state = RHS_ALIAS_1; |
2455 | }else{ |
2456 | ErrorMsg(psp->filename,psp->tokenlineno, |
2457 | "Illegal character on RHS of rule: \"%s\"." ,x); |
2458 | psp->errorcnt++; |
2459 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2460 | } |
2461 | break; |
2462 | case RHS_ALIAS_1: |
2463 | if( ISALPHA(x[0]) ){ |
2464 | psp->alias[psp->nrhs-1] = x; |
2465 | psp->state = RHS_ALIAS_2; |
2466 | }else{ |
2467 | ErrorMsg(psp->filename,psp->tokenlineno, |
2468 | "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n" , |
2469 | x,psp->rhs[psp->nrhs-1]->name); |
2470 | psp->errorcnt++; |
2471 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2472 | } |
2473 | break; |
2474 | case RHS_ALIAS_2: |
2475 | if( x[0]==')' ){ |
2476 | psp->state = IN_RHS; |
2477 | }else{ |
2478 | ErrorMsg(psp->filename,psp->tokenlineno, |
2479 | "Missing \")\" following LHS alias name \"%s\"." ,psp->lhsalias); |
2480 | psp->errorcnt++; |
2481 | psp->state = RESYNC_AFTER_RULE_ERROR; |
2482 | } |
2483 | break; |
2484 | case WAITING_FOR_DECL_KEYWORD: |
2485 | if( ISALPHA(x[0]) ){ |
2486 | psp->declkeyword = x; |
2487 | psp->declargslot = 0; |
2488 | psp->decllinenoslot = 0; |
2489 | psp->insertLineMacro = 1; |
2490 | psp->state = WAITING_FOR_DECL_ARG; |
2491 | if( strcmp(x,"name" )==0 ){ |
2492 | psp->declargslot = &(psp->gp->name); |
2493 | psp->insertLineMacro = 0; |
2494 | }else if( strcmp(x,"include" )==0 ){ |
2495 | psp->declargslot = &(psp->gp->include); |
2496 | }else if( strcmp(x,"code" )==0 ){ |
2497 | psp->declargslot = &(psp->gp->extracode); |
2498 | }else if( strcmp(x,"token_destructor" )==0 ){ |
2499 | psp->declargslot = &psp->gp->tokendest; |
2500 | }else if( strcmp(x,"default_destructor" )==0 ){ |
2501 | psp->declargslot = &psp->gp->vardest; |
2502 | }else if( strcmp(x,"token_prefix" )==0 ){ |
2503 | psp->declargslot = &psp->gp->tokenprefix; |
2504 | psp->insertLineMacro = 0; |
2505 | }else if( strcmp(x,"syntax_error" )==0 ){ |
2506 | psp->declargslot = &(psp->gp->error); |
2507 | }else if( strcmp(x,"parse_accept" )==0 ){ |
2508 | psp->declargslot = &(psp->gp->accept); |
2509 | }else if( strcmp(x,"parse_failure" )==0 ){ |
2510 | psp->declargslot = &(psp->gp->failure); |
2511 | }else if( strcmp(x,"stack_overflow" )==0 ){ |
2512 | psp->declargslot = &(psp->gp->overflow); |
2513 | }else if( strcmp(x,"extra_argument" )==0 ){ |
2514 | psp->declargslot = &(psp->gp->arg); |
2515 | psp->insertLineMacro = 0; |
2516 | }else if( strcmp(x,"extra_context" )==0 ){ |
2517 | psp->declargslot = &(psp->gp->ctx); |
2518 | psp->insertLineMacro = 0; |
2519 | }else if( strcmp(x,"token_type" )==0 ){ |
2520 | psp->declargslot = &(psp->gp->tokentype); |
2521 | psp->insertLineMacro = 0; |
2522 | }else if( strcmp(x,"default_type" )==0 ){ |
2523 | psp->declargslot = &(psp->gp->vartype); |
2524 | psp->insertLineMacro = 0; |
2525 | }else if( strcmp(x,"stack_size" )==0 ){ |
2526 | psp->declargslot = &(psp->gp->stacksize); |
2527 | psp->insertLineMacro = 0; |
2528 | }else if( strcmp(x,"start_symbol" )==0 ){ |
2529 | psp->declargslot = &(psp->gp->start); |
2530 | psp->insertLineMacro = 0; |
2531 | }else if( strcmp(x,"left" )==0 ){ |
2532 | psp->preccounter++; |
2533 | psp->declassoc = LEFT; |
2534 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; |
2535 | }else if( strcmp(x,"right" )==0 ){ |
2536 | psp->preccounter++; |
2537 | psp->declassoc = RIGHT; |
2538 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; |
2539 | }else if( strcmp(x,"nonassoc" )==0 ){ |
2540 | psp->preccounter++; |
2541 | psp->declassoc = NONE; |
2542 | psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; |
2543 | }else if( strcmp(x,"destructor" )==0 ){ |
2544 | psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; |
2545 | }else if( strcmp(x,"type" )==0 ){ |
2546 | psp->state = WAITING_FOR_DATATYPE_SYMBOL; |
2547 | }else if( strcmp(x,"fallback" )==0 ){ |
2548 | psp->fallback = 0; |
2549 | psp->state = WAITING_FOR_FALLBACK_ID; |
2550 | }else if( strcmp(x,"token" )==0 ){ |
2551 | psp->state = WAITING_FOR_TOKEN_NAME; |
2552 | }else if( strcmp(x,"wildcard" )==0 ){ |
2553 | psp->state = WAITING_FOR_WILDCARD_ID; |
2554 | }else if( strcmp(x,"token_class" )==0 ){ |
2555 | psp->state = WAITING_FOR_CLASS_ID; |
2556 | }else{ |
2557 | ErrorMsg(psp->filename,psp->tokenlineno, |
2558 | "Unknown declaration keyword: \"%%%s\"." ,x); |
2559 | psp->errorcnt++; |
2560 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2561 | } |
2562 | }else{ |
2563 | ErrorMsg(psp->filename,psp->tokenlineno, |
2564 | "Illegal declaration keyword: \"%s\"." ,x); |
2565 | psp->errorcnt++; |
2566 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2567 | } |
2568 | break; |
2569 | case WAITING_FOR_DESTRUCTOR_SYMBOL: |
2570 | if( !ISALPHA(x[0]) ){ |
2571 | ErrorMsg(psp->filename,psp->tokenlineno, |
2572 | "Symbol name missing after %%destructor keyword" ); |
2573 | psp->errorcnt++; |
2574 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2575 | }else{ |
2576 | struct symbol *sp = Symbol_new(x); |
2577 | psp->declargslot = &sp->destructor; |
2578 | psp->decllinenoslot = &sp->destLineno; |
2579 | psp->insertLineMacro = 1; |
2580 | psp->state = WAITING_FOR_DECL_ARG; |
2581 | } |
2582 | break; |
2583 | case WAITING_FOR_DATATYPE_SYMBOL: |
2584 | if( !ISALPHA(x[0]) ){ |
2585 | ErrorMsg(psp->filename,psp->tokenlineno, |
2586 | "Symbol name missing after %%type keyword" ); |
2587 | psp->errorcnt++; |
2588 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2589 | }else{ |
2590 | struct symbol *sp = Symbol_find(x); |
2591 | if((sp) && (sp->datatype)){ |
2592 | ErrorMsg(psp->filename,psp->tokenlineno, |
2593 | "Symbol %%type \"%s\" already defined" , x); |
2594 | psp->errorcnt++; |
2595 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2596 | }else{ |
2597 | if (!sp){ |
2598 | sp = Symbol_new(x); |
2599 | } |
2600 | psp->declargslot = &sp->datatype; |
2601 | psp->insertLineMacro = 0; |
2602 | psp->state = WAITING_FOR_DECL_ARG; |
2603 | } |
2604 | } |
2605 | break; |
2606 | case WAITING_FOR_PRECEDENCE_SYMBOL: |
2607 | if( x[0]=='.' ){ |
2608 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2609 | }else if( ISUPPER(x[0]) ){ |
2610 | struct symbol *sp; |
2611 | sp = Symbol_new(x); |
2612 | if( sp->prec>=0 ){ |
2613 | ErrorMsg(psp->filename,psp->tokenlineno, |
2614 | "Symbol \"%s\" has already be given a precedence." ,x); |
2615 | psp->errorcnt++; |
2616 | }else{ |
2617 | sp->prec = psp->preccounter; |
2618 | sp->assoc = psp->declassoc; |
2619 | } |
2620 | }else{ |
2621 | ErrorMsg(psp->filename,psp->tokenlineno, |
2622 | "Can't assign a precedence to \"%s\"." ,x); |
2623 | psp->errorcnt++; |
2624 | } |
2625 | break; |
2626 | case WAITING_FOR_DECL_ARG: |
2627 | if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ |
2628 | const char *zOld, *zNew; |
2629 | char *zBuf, *z; |
2630 | int nOld, n, nLine = 0, nNew, nBack; |
2631 | int addLineMacro; |
2632 | char zLine[50]; |
2633 | zNew = x; |
2634 | if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; |
2635 | nNew = lemonStrlen(zNew); |
2636 | if( *psp->declargslot ){ |
2637 | zOld = *psp->declargslot; |
2638 | }else{ |
2639 | zOld = "" ; |
2640 | } |
2641 | nOld = lemonStrlen(zOld); |
2642 | n = nOld + nNew + 20; |
2643 | addLineMacro = !psp->gp->nolinenosflag |
2644 | && psp->insertLineMacro |
2645 | && psp->tokenlineno>1 |
2646 | && (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); |
2647 | if( addLineMacro ){ |
2648 | for(z=psp->filename, nBack=0; *z; z++){ |
2649 | if( *z=='\\' ) nBack++; |
2650 | } |
2651 | lemon_sprintf(zLine, "#line %d " , psp->tokenlineno); |
2652 | nLine = lemonStrlen(zLine); |
2653 | n += nLine + lemonStrlen(psp->filename) + nBack; |
2654 | } |
2655 | *psp->declargslot = (char *) realloc(*psp->declargslot, n); |
2656 | zBuf = *psp->declargslot + nOld; |
2657 | if( addLineMacro ){ |
2658 | if( nOld && zBuf[-1]!='\n' ){ |
2659 | *(zBuf++) = '\n'; |
2660 | } |
2661 | memcpy(zBuf, zLine, nLine); |
2662 | zBuf += nLine; |
2663 | *(zBuf++) = '"'; |
2664 | for(z=psp->filename; *z; z++){ |
2665 | if( *z=='\\' ){ |
2666 | *(zBuf++) = '\\'; |
2667 | } |
2668 | *(zBuf++) = *z; |
2669 | } |
2670 | *(zBuf++) = '"'; |
2671 | *(zBuf++) = '\n'; |
2672 | } |
2673 | if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ |
2674 | psp->decllinenoslot[0] = psp->tokenlineno; |
2675 | } |
2676 | memcpy(zBuf, zNew, nNew); |
2677 | zBuf += nNew; |
2678 | *zBuf = 0; |
2679 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2680 | }else{ |
2681 | ErrorMsg(psp->filename,psp->tokenlineno, |
2682 | "Illegal argument to %%%s: %s" ,psp->declkeyword,x); |
2683 | psp->errorcnt++; |
2684 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2685 | } |
2686 | break; |
2687 | case WAITING_FOR_FALLBACK_ID: |
2688 | if( x[0]=='.' ){ |
2689 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2690 | }else if( !ISUPPER(x[0]) ){ |
2691 | ErrorMsg(psp->filename, psp->tokenlineno, |
2692 | "%%fallback argument \"%s\" should be a token" , x); |
2693 | psp->errorcnt++; |
2694 | }else{ |
2695 | struct symbol *sp = Symbol_new(x); |
2696 | if( psp->fallback==0 ){ |
2697 | psp->fallback = sp; |
2698 | }else if( sp->fallback ){ |
2699 | ErrorMsg(psp->filename, psp->tokenlineno, |
2700 | "More than one fallback assigned to token %s" , x); |
2701 | psp->errorcnt++; |
2702 | }else{ |
2703 | sp->fallback = psp->fallback; |
2704 | psp->gp->has_fallback = 1; |
2705 | } |
2706 | } |
2707 | break; |
2708 | case WAITING_FOR_TOKEN_NAME: |
2709 | /* Tokens do not have to be declared before use. But they can be |
2710 | ** in order to control their assigned integer number. The number for |
2711 | ** each token is assigned when it is first seen. So by including |
2712 | ** |
2713 | ** %token ONE TWO THREE. |
2714 | ** |
2715 | ** early in the grammar file, that assigns small consecutive values |
2716 | ** to each of the tokens ONE TWO and THREE. |
2717 | */ |
2718 | if( x[0]=='.' ){ |
2719 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2720 | }else if( !ISUPPER(x[0]) ){ |
2721 | ErrorMsg(psp->filename, psp->tokenlineno, |
2722 | "%%token argument \"%s\" should be a token" , x); |
2723 | psp->errorcnt++; |
2724 | }else{ |
2725 | (void)Symbol_new(x); |
2726 | } |
2727 | break; |
2728 | case WAITING_FOR_WILDCARD_ID: |
2729 | if( x[0]=='.' ){ |
2730 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2731 | }else if( !ISUPPER(x[0]) ){ |
2732 | ErrorMsg(psp->filename, psp->tokenlineno, |
2733 | "%%wildcard argument \"%s\" should be a token" , x); |
2734 | psp->errorcnt++; |
2735 | }else{ |
2736 | struct symbol *sp = Symbol_new(x); |
2737 | if( psp->gp->wildcard==0 ){ |
2738 | psp->gp->wildcard = sp; |
2739 | }else{ |
2740 | ErrorMsg(psp->filename, psp->tokenlineno, |
2741 | "Extra wildcard to token: %s" , x); |
2742 | psp->errorcnt++; |
2743 | } |
2744 | } |
2745 | break; |
2746 | case WAITING_FOR_CLASS_ID: |
2747 | if( !ISLOWER(x[0]) ){ |
2748 | ErrorMsg(psp->filename, psp->tokenlineno, |
2749 | "%%token_class must be followed by an identifier: %s" , x); |
2750 | psp->errorcnt++; |
2751 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2752 | }else if( Symbol_find(x) ){ |
2753 | ErrorMsg(psp->filename, psp->tokenlineno, |
2754 | "Symbol \"%s\" already used" , x); |
2755 | psp->errorcnt++; |
2756 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2757 | }else{ |
2758 | psp->tkclass = Symbol_new(x); |
2759 | psp->tkclass->type = MULTITERMINAL; |
2760 | psp->state = WAITING_FOR_CLASS_TOKEN; |
2761 | } |
2762 | break; |
2763 | case WAITING_FOR_CLASS_TOKEN: |
2764 | if( x[0]=='.' ){ |
2765 | psp->state = WAITING_FOR_DECL_OR_RULE; |
2766 | }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ |
2767 | struct symbol *msp = psp->tkclass; |
2768 | msp->nsubsym++; |
2769 | msp->subsym = (struct symbol **) realloc(msp->subsym, |
2770 | sizeof(struct symbol*)*msp->nsubsym); |
2771 | if( !ISUPPER(x[0]) ) x++; |
2772 | msp->subsym[msp->nsubsym-1] = Symbol_new(x); |
2773 | }else{ |
2774 | ErrorMsg(psp->filename, psp->tokenlineno, |
2775 | "%%token_class argument \"%s\" should be a token" , x); |
2776 | psp->errorcnt++; |
2777 | psp->state = RESYNC_AFTER_DECL_ERROR; |
2778 | } |
2779 | break; |
2780 | case RESYNC_AFTER_RULE_ERROR: |
2781 | /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; |
2782 | ** break; */ |
2783 | case RESYNC_AFTER_DECL_ERROR: |
2784 | if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; |
2785 | if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; |
2786 | break; |
2787 | } |
2788 | } |
2789 | |
2790 | /* The text in the input is part of the argument to an %ifdef or %ifndef. |
2791 | ** Evaluate the text as a boolean expression. Return true or false. |
2792 | */ |
2793 | static int eval_preprocessor_boolean(char *z, int lineno){ |
2794 | int neg = 0; |
2795 | int res = 0; |
2796 | int okTerm = 1; |
2797 | int i; |
2798 | for(i=0; z[i]!=0; i++){ |
2799 | if( ISSPACE(z[i]) ) continue; |
2800 | if( z[i]=='!' ){ |
2801 | if( !okTerm ) goto pp_syntax_error; |
2802 | neg = !neg; |
2803 | continue; |
2804 | } |
2805 | if( z[i]=='|' && z[i+1]=='|' ){ |
2806 | if( okTerm ) goto pp_syntax_error; |
2807 | if( res ) return 1; |
2808 | i++; |
2809 | okTerm = 1; |
2810 | continue; |
2811 | } |
2812 | if( z[i]=='&' && z[i+1]=='&' ){ |
2813 | if( okTerm ) goto pp_syntax_error; |
2814 | if( !res ) return 0; |
2815 | i++; |
2816 | okTerm = 1; |
2817 | continue; |
2818 | } |
2819 | if( z[i]=='(' ){ |
2820 | int k; |
2821 | int n = 1; |
2822 | if( !okTerm ) goto pp_syntax_error; |
2823 | for(k=i+1; z[k]; k++){ |
2824 | if( z[k]==')' ){ |
2825 | n--; |
2826 | if( n==0 ){ |
2827 | z[k] = 0; |
2828 | res = eval_preprocessor_boolean(&z[i+1], -1); |
2829 | z[k] = ')'; |
2830 | if( res<0 ){ |
2831 | i = i-res; |
2832 | goto pp_syntax_error; |
2833 | } |
2834 | i = k; |
2835 | break; |
2836 | } |
2837 | }else if( z[k]=='(' ){ |
2838 | n++; |
2839 | }else if( z[k]==0 ){ |
2840 | i = k; |
2841 | goto pp_syntax_error; |
2842 | } |
2843 | } |
2844 | if( neg ){ |
2845 | res = !res; |
2846 | neg = 0; |
2847 | } |
2848 | okTerm = 0; |
2849 | continue; |
2850 | } |
2851 | if( ISALPHA(z[i]) ){ |
2852 | int j, k, n; |
2853 | if( !okTerm ) goto pp_syntax_error; |
2854 | for(k=i+1; ISALNUM(z[k]) || z[k]=='_'; k++){} |
2855 | n = k - i; |
2856 | res = 0; |
2857 | for(j=0; j<nDefine; j++){ |
2858 | if( strncmp(azDefine[j],&z[i],n)==0 && azDefine[j][n]==0 ){ |
2859 | res = 1; |
2860 | break; |
2861 | } |
2862 | } |
2863 | i = k-1; |
2864 | if( neg ){ |
2865 | res = !res; |
2866 | neg = 0; |
2867 | } |
2868 | okTerm = 0; |
2869 | continue; |
2870 | } |
2871 | goto pp_syntax_error; |
2872 | } |
2873 | return res; |
2874 | |
2875 | pp_syntax_error: |
2876 | if( lineno>0 ){ |
2877 | fprintf(stderr, "%%if syntax error on line %d.\n" , lineno); |
2878 | fprintf(stderr, " %.*s <-- syntax error here\n" , i+1, z); |
2879 | exit(1); |
2880 | }else{ |
2881 | return -(i+1); |
2882 | } |
2883 | } |
2884 | |
2885 | /* Run the preprocessor over the input file text. The global variables |
2886 | ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined |
2887 | ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and |
2888 | ** comments them out. Text in between is also commented out as appropriate. |
2889 | */ |
2890 | static void preprocess_input(char *z){ |
2891 | int i, j, k; |
2892 | int exclude = 0; |
2893 | int start = 0; |
2894 | int lineno = 1; |
2895 | int start_lineno = 1; |
2896 | for(i=0; z[i]; i++){ |
2897 | if( z[i]=='\n' ) lineno++; |
2898 | if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; |
2899 | if( strncmp(&z[i],"%endif" ,6)==0 && ISSPACE(z[i+6]) ){ |
2900 | if( exclude ){ |
2901 | exclude--; |
2902 | if( exclude==0 ){ |
2903 | for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; |
2904 | } |
2905 | } |
2906 | for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; |
2907 | }else if( strncmp(&z[i],"%else" ,5)==0 && ISSPACE(z[i+5]) ){ |
2908 | if( exclude==1){ |
2909 | exclude = 0; |
2910 | for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; |
2911 | }else if( exclude==0 ){ |
2912 | exclude = 1; |
2913 | start = i; |
2914 | start_lineno = lineno; |
2915 | } |
2916 | for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; |
2917 | }else if( strncmp(&z[i],"%ifdef " ,7)==0 |
2918 | || strncmp(&z[i],"%if " ,4)==0 |
2919 | || strncmp(&z[i],"%ifndef " ,8)==0 ){ |
2920 | if( exclude ){ |
2921 | exclude++; |
2922 | }else{ |
2923 | int isNot; |
2924 | int iBool; |
2925 | for(j=i; z[j] && !ISSPACE(z[j]); j++){} |
2926 | iBool = j; |
2927 | isNot = (j==i+7); |
2928 | while( z[j] && z[j]!='\n' ){ j++; } |
2929 | k = z[j]; |
2930 | z[j] = 0; |
2931 | exclude = eval_preprocessor_boolean(&z[iBool], lineno); |
2932 | z[j] = k; |
2933 | if( !isNot ) exclude = !exclude; |
2934 | if( exclude ){ |
2935 | start = i; |
2936 | start_lineno = lineno; |
2937 | } |
2938 | } |
2939 | for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; |
2940 | } |
2941 | } |
2942 | if( exclude ){ |
2943 | fprintf(stderr,"unterminated %%ifdef starting on line %d\n" , start_lineno); |
2944 | exit(1); |
2945 | } |
2946 | } |
2947 | |
2948 | /* In spite of its name, this function is really a scanner. It read |
2949 | ** in the entire input file (all at once) then tokenizes it. Each |
2950 | ** token is passed to the function "parseonetoken" which builds all |
2951 | ** the appropriate data structures in the global state vector "gp". |
2952 | */ |
2953 | void Parse(struct lemon *gp) |
2954 | { |
2955 | struct pstate ps; |
2956 | FILE *fp; |
2957 | char *filebuf; |
2958 | unsigned int filesize; |
2959 | int lineno; |
2960 | int c; |
2961 | char *cp, *nextcp; |
2962 | int startline = 0; |
2963 | |
2964 | memset(&ps, '\0', sizeof(ps)); |
2965 | ps.gp = gp; |
2966 | ps.filename = gp->filename; |
2967 | ps.errorcnt = 0; |
2968 | ps.state = INITIALIZE; |
2969 | |
2970 | /* Begin by reading the input file */ |
2971 | fp = fopen(ps.filename,"rb" ); |
2972 | if( fp==0 ){ |
2973 | ErrorMsg(ps.filename,0,"Can't open this file for reading." ); |
2974 | gp->errorcnt++; |
2975 | return; |
2976 | } |
2977 | fseek(fp,0,2); |
2978 | filesize = ftell(fp); |
2979 | rewind(fp); |
2980 | filebuf = (char *)malloc( filesize+1 ); |
2981 | if( filesize>100000000 || filebuf==0 ){ |
2982 | ErrorMsg(ps.filename,0,"Input file too large." ); |
2983 | free(filebuf); |
2984 | gp->errorcnt++; |
2985 | fclose(fp); |
2986 | return; |
2987 | } |
2988 | if( fread(filebuf,1,filesize,fp)!=filesize ){ |
2989 | ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file." , |
2990 | filesize); |
2991 | free(filebuf); |
2992 | gp->errorcnt++; |
2993 | fclose(fp); |
2994 | return; |
2995 | } |
2996 | fclose(fp); |
2997 | filebuf[filesize] = 0; |
2998 | |
2999 | /* Make an initial pass through the file to handle %ifdef and %ifndef */ |
3000 | preprocess_input(filebuf); |
3001 | if( gp->printPreprocessed ){ |
3002 | printf("%s\n" , filebuf); |
3003 | return; |
3004 | } |
3005 | |
3006 | /* Now scan the text of the input file */ |
3007 | lineno = 1; |
3008 | for(cp=filebuf; (c= *cp)!=0; ){ |
3009 | if( c=='\n' ) lineno++; /* Keep track of the line number */ |
3010 | if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ |
3011 | if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ |
3012 | cp+=2; |
3013 | while( (c= *cp)!=0 && c!='\n' ) cp++; |
3014 | continue; |
3015 | } |
3016 | if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ |
3017 | cp+=2; |
3018 | if( (*cp)=='/' ) cp++; |
3019 | while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ |
3020 | if( c=='\n' ) lineno++; |
3021 | cp++; |
3022 | } |
3023 | if( c ) cp++; |
3024 | continue; |
3025 | } |
3026 | ps.tokenstart = cp; /* Mark the beginning of the token */ |
3027 | ps.tokenlineno = lineno; /* Linenumber on which token begins */ |
3028 | if( c=='\"' ){ /* String literals */ |
3029 | cp++; |
3030 | while( (c= *cp)!=0 && c!='\"' ){ |
3031 | if( c=='\n' ) lineno++; |
3032 | cp++; |
3033 | } |
3034 | if( c==0 ){ |
3035 | ErrorMsg(ps.filename,startline, |
3036 | "String starting on this line is not terminated before " |
3037 | "the end of the file." ); |
3038 | ps.errorcnt++; |
3039 | nextcp = cp; |
3040 | }else{ |
3041 | nextcp = cp+1; |
3042 | } |
3043 | }else if( c=='{' ){ /* A block of C code */ |
3044 | int level; |
3045 | cp++; |
3046 | for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ |
3047 | if( c=='\n' ) lineno++; |
3048 | else if( c=='{' ) level++; |
3049 | else if( c=='}' ) level--; |
3050 | else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ |
3051 | int prevc; |
3052 | cp = &cp[2]; |
3053 | prevc = 0; |
3054 | while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ |
3055 | if( c=='\n' ) lineno++; |
3056 | prevc = c; |
3057 | cp++; |
3058 | } |
3059 | }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ |
3060 | cp = &cp[2]; |
3061 | while( (c= *cp)!=0 && c!='\n' ) cp++; |
3062 | if( c ) lineno++; |
3063 | }else if( c=='\'' || c=='\"' ){ /* String a character literals */ |
3064 | int startchar, prevc; |
3065 | startchar = c; |
3066 | prevc = 0; |
3067 | for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ |
3068 | if( c=='\n' ) lineno++; |
3069 | if( prevc=='\\' ) prevc = 0; |
3070 | else prevc = c; |
3071 | } |
3072 | } |
3073 | } |
3074 | if( c==0 ){ |
3075 | ErrorMsg(ps.filename,ps.tokenlineno, |
3076 | "C code starting on this line is not terminated before " |
3077 | "the end of the file." ); |
3078 | ps.errorcnt++; |
3079 | nextcp = cp; |
3080 | }else{ |
3081 | nextcp = cp+1; |
3082 | } |
3083 | }else if( ISALNUM(c) ){ /* Identifiers */ |
3084 | while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; |
3085 | nextcp = cp; |
3086 | }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ |
3087 | cp += 3; |
3088 | nextcp = cp; |
3089 | }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ |
3090 | cp += 2; |
3091 | while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; |
3092 | nextcp = cp; |
3093 | }else{ /* All other (one character) operators */ |
3094 | cp++; |
3095 | nextcp = cp; |
3096 | } |
3097 | c = *cp; |
3098 | *cp = 0; /* Null terminate the token */ |
3099 | parseonetoken(&ps); /* Parse the token */ |
3100 | *cp = (char)c; /* Restore the buffer */ |
3101 | cp = nextcp; |
3102 | } |
3103 | free(filebuf); /* Release the buffer after parsing */ |
3104 | gp->rule = ps.firstrule; |
3105 | gp->errorcnt = ps.errorcnt; |
3106 | } |
3107 | /*************************** From the file "plink.c" *********************/ |
3108 | /* |
3109 | ** Routines processing configuration follow-set propagation links |
3110 | ** in the LEMON parser generator. |
3111 | */ |
3112 | static struct plink *plink_freelist = 0; |
3113 | |
3114 | /* Allocate a new plink */ |
3115 | struct plink *Plink_new(void){ |
3116 | struct plink *newlink; |
3117 | |
3118 | if( plink_freelist==0 ){ |
3119 | int i; |
3120 | int amt = 100; |
3121 | plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); |
3122 | if( plink_freelist==0 ){ |
3123 | fprintf(stderr, |
3124 | "Unable to allocate memory for a new follow-set propagation link.\n" ); |
3125 | exit(1); |
3126 | } |
3127 | for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; |
3128 | plink_freelist[amt-1].next = 0; |
3129 | } |
3130 | newlink = plink_freelist; |
3131 | plink_freelist = plink_freelist->next; |
3132 | return newlink; |
3133 | } |
3134 | |
3135 | /* Add a plink to a plink list */ |
3136 | void Plink_add(struct plink **plpp, struct config *cfp) |
3137 | { |
3138 | struct plink *newlink; |
3139 | newlink = Plink_new(); |
3140 | newlink->next = *plpp; |
3141 | *plpp = newlink; |
3142 | newlink->cfp = cfp; |
3143 | } |
3144 | |
3145 | /* Transfer every plink on the list "from" to the list "to" */ |
3146 | void Plink_copy(struct plink **to, struct plink *from) |
3147 | { |
3148 | struct plink *nextpl; |
3149 | while( from ){ |
3150 | nextpl = from->next; |
3151 | from->next = *to; |
3152 | *to = from; |
3153 | from = nextpl; |
3154 | } |
3155 | } |
3156 | |
3157 | /* Delete every plink on the list */ |
3158 | void Plink_delete(struct plink *plp) |
3159 | { |
3160 | struct plink *nextpl; |
3161 | |
3162 | while( plp ){ |
3163 | nextpl = plp->next; |
3164 | plp->next = plink_freelist; |
3165 | plink_freelist = plp; |
3166 | plp = nextpl; |
3167 | } |
3168 | } |
3169 | /*********************** From the file "report.c" **************************/ |
3170 | /* |
3171 | ** Procedures for generating reports and tables in the LEMON parser generator. |
3172 | */ |
3173 | |
3174 | /* Generate a filename with the given suffix. Space to hold the |
3175 | ** name comes from malloc() and must be freed by the calling |
3176 | ** function. |
3177 | */ |
3178 | PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) |
3179 | { |
3180 | char *name; |
3181 | char *cp; |
3182 | char *filename = lemp->filename; |
3183 | int sz; |
3184 | |
3185 | if( outputDir ){ |
3186 | cp = strrchr(filename, '/'); |
3187 | if( cp ) filename = cp + 1; |
3188 | } |
3189 | sz = lemonStrlen(filename); |
3190 | sz += lemonStrlen(suffix); |
3191 | if( outputDir ) sz += lemonStrlen(outputDir) + 1; |
3192 | sz += 5; |
3193 | name = (char*)malloc( sz ); |
3194 | if( name==0 ){ |
3195 | fprintf(stderr,"Can't allocate space for a filename.\n" ); |
3196 | exit(1); |
3197 | } |
3198 | name[0] = 0; |
3199 | if( outputDir ){ |
3200 | lemon_strcpy(name, outputDir); |
3201 | lemon_strcat(name, "/" ); |
3202 | } |
3203 | lemon_strcat(name,filename); |
3204 | cp = strrchr(name,'.'); |
3205 | if( cp ) *cp = 0; |
3206 | lemon_strcat(name,suffix); |
3207 | return name; |
3208 | } |
3209 | |
3210 | /* Open a file with a name based on the name of the input file, |
3211 | ** but with a different (specified) suffix, and return a pointer |
3212 | ** to the stream */ |
3213 | PRIVATE FILE *file_open( |
3214 | struct lemon *lemp, |
3215 | const char *suffix, |
3216 | const char *mode |
3217 | ){ |
3218 | FILE *fp; |
3219 | |
3220 | if( lemp->outname ) free(lemp->outname); |
3221 | lemp->outname = file_makename(lemp, suffix); |
3222 | fp = fopen(lemp->outname,mode); |
3223 | if( fp==0 && *mode=='w' ){ |
3224 | fprintf(stderr,"Can't open file \"%s\".\n" ,lemp->outname); |
3225 | lemp->errorcnt++; |
3226 | return 0; |
3227 | } |
3228 | return fp; |
3229 | } |
3230 | |
3231 | /* Print the text of a rule |
3232 | */ |
3233 | void rule_print(FILE *out, struct rule *rp){ |
3234 | int i, j; |
3235 | fprintf(out, "%s" ,rp->lhs->name); |
3236 | /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */ |
3237 | fprintf(out," ::=" ); |
3238 | for(i=0; i<rp->nrhs; i++){ |
3239 | struct symbol *sp = rp->rhs[i]; |
3240 | if( sp->type==MULTITERMINAL ){ |
3241 | fprintf(out," %s" , sp->subsym[0]->name); |
3242 | for(j=1; j<sp->nsubsym; j++){ |
3243 | fprintf(out,"|%s" , sp->subsym[j]->name); |
3244 | } |
3245 | }else{ |
3246 | fprintf(out," %s" , sp->name); |
3247 | } |
3248 | /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */ |
3249 | } |
3250 | } |
3251 | |
3252 | /* Duplicate the input file without comments and without actions |
3253 | ** on rules */ |
3254 | void Reprint(struct lemon *lemp) |
3255 | { |
3256 | struct rule *rp; |
3257 | struct symbol *sp; |
3258 | int i, j, maxlen, len, ncolumns, skip; |
3259 | printf("// Reprint of input file \"%s\".\n// Symbols:\n" ,lemp->filename); |
3260 | maxlen = 10; |
3261 | for(i=0; i<lemp->nsymbol; i++){ |
3262 | sp = lemp->symbols[i]; |
3263 | len = lemonStrlen(sp->name); |
3264 | if( len>maxlen ) maxlen = len; |
3265 | } |
3266 | ncolumns = 76/(maxlen+5); |
3267 | if( ncolumns<1 ) ncolumns = 1; |
3268 | skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; |
3269 | for(i=0; i<skip; i++){ |
3270 | printf("//" ); |
3271 | for(j=i; j<lemp->nsymbol; j+=skip){ |
3272 | sp = lemp->symbols[j]; |
3273 | assert( sp->index==j ); |
3274 | printf(" %3d %-*.*s" ,j,maxlen,maxlen,sp->name); |
3275 | } |
3276 | printf("\n" ); |
3277 | } |
3278 | for(rp=lemp->rule; rp; rp=rp->next){ |
3279 | rule_print(stdout, rp); |
3280 | printf("." ); |
3281 | if( rp->precsym ) printf(" [%s]" ,rp->precsym->name); |
3282 | /* if( rp->code ) printf("\n %s",rp->code); */ |
3283 | printf("\n" ); |
3284 | } |
3285 | } |
3286 | |
3287 | /* Print a single rule. |
3288 | */ |
3289 | void RulePrint(FILE *fp, struct rule *rp, int iCursor){ |
3290 | struct symbol *sp; |
3291 | int i, j; |
3292 | fprintf(fp,"%s ::=" ,rp->lhs->name); |
3293 | for(i=0; i<=rp->nrhs; i++){ |
3294 | if( i==iCursor ) fprintf(fp," *" ); |
3295 | if( i==rp->nrhs ) break; |
3296 | sp = rp->rhs[i]; |
3297 | if( sp->type==MULTITERMINAL ){ |
3298 | fprintf(fp," %s" , sp->subsym[0]->name); |
3299 | for(j=1; j<sp->nsubsym; j++){ |
3300 | fprintf(fp,"|%s" ,sp->subsym[j]->name); |
3301 | } |
3302 | }else{ |
3303 | fprintf(fp," %s" , sp->name); |
3304 | } |
3305 | } |
3306 | } |
3307 | |
3308 | /* Print the rule for a configuration. |
3309 | */ |
3310 | void ConfigPrint(FILE *fp, struct config *cfp){ |
3311 | RulePrint(fp, cfp->rp, cfp->dot); |
3312 | } |
3313 | |
3314 | /* #define TEST */ |
3315 | #if 0 |
3316 | /* Print a set */ |
3317 | PRIVATE void SetPrint(out,set,lemp) |
3318 | FILE *out; |
3319 | char *set; |
3320 | struct lemon *lemp; |
3321 | { |
3322 | int i; |
3323 | char *spacer; |
3324 | spacer = "" ; |
3325 | fprintf(out,"%12s[" ,"" ); |
3326 | for(i=0; i<lemp->nterminal; i++){ |
3327 | if( SetFind(set,i) ){ |
3328 | fprintf(out,"%s%s" ,spacer,lemp->symbols[i]->name); |
3329 | spacer = " " ; |
3330 | } |
3331 | } |
3332 | fprintf(out,"]\n" ); |
3333 | } |
3334 | |
3335 | /* Print a plink chain */ |
3336 | PRIVATE void PlinkPrint(out,plp,tag) |
3337 | FILE *out; |
3338 | struct plink *plp; |
3339 | char *tag; |
3340 | { |
3341 | while( plp ){ |
3342 | fprintf(out,"%12s%s (state %2d) " ,"" ,tag,plp->cfp->stp->statenum); |
3343 | ConfigPrint(out,plp->cfp); |
3344 | fprintf(out,"\n" ); |
3345 | plp = plp->next; |
3346 | } |
3347 | } |
3348 | #endif |
3349 | |
3350 | /* Print an action to the given file descriptor. Return FALSE if |
3351 | ** nothing was actually printed. |
3352 | */ |
3353 | int PrintAction( |
3354 | struct action *ap, /* The action to print */ |
3355 | FILE *fp, /* Print the action here */ |
3356 | int indent /* Indent by this amount */ |
3357 | ){ |
3358 | int result = 1; |
3359 | switch( ap->type ){ |
3360 | case SHIFT: { |
3361 | struct state *stp = ap->x.stp; |
3362 | fprintf(fp,"%*s shift %-7d" ,indent,ap->sp->name,stp->statenum); |
3363 | break; |
3364 | } |
3365 | case REDUCE: { |
3366 | struct rule *rp = ap->x.rp; |
3367 | fprintf(fp,"%*s reduce %-7d" ,indent,ap->sp->name,rp->iRule); |
3368 | RulePrint(fp, rp, -1); |
3369 | break; |
3370 | } |
3371 | case SHIFTREDUCE: { |
3372 | struct rule *rp = ap->x.rp; |
3373 | fprintf(fp,"%*s shift-reduce %-7d" ,indent,ap->sp->name,rp->iRule); |
3374 | RulePrint(fp, rp, -1); |
3375 | break; |
3376 | } |
3377 | case ACCEPT: |
3378 | fprintf(fp,"%*s accept" ,indent,ap->sp->name); |
3379 | break; |
3380 | case ERROR: |
3381 | fprintf(fp,"%*s error" ,indent,ap->sp->name); |
3382 | break; |
3383 | case SRCONFLICT: |
3384 | case RRCONFLICT: |
3385 | fprintf(fp,"%*s reduce %-7d ** Parsing conflict **" , |
3386 | indent,ap->sp->name,ap->x.rp->iRule); |
3387 | break; |
3388 | case SSCONFLICT: |
3389 | fprintf(fp,"%*s shift %-7d ** Parsing conflict **" , |
3390 | indent,ap->sp->name,ap->x.stp->statenum); |
3391 | break; |
3392 | case SH_RESOLVED: |
3393 | if( showPrecedenceConflict ){ |
3394 | fprintf(fp,"%*s shift %-7d -- dropped by precedence" , |
3395 | indent,ap->sp->name,ap->x.stp->statenum); |
3396 | }else{ |
3397 | result = 0; |
3398 | } |
3399 | break; |
3400 | case RD_RESOLVED: |
3401 | if( showPrecedenceConflict ){ |
3402 | fprintf(fp,"%*s reduce %-7d -- dropped by precedence" , |
3403 | indent,ap->sp->name,ap->x.rp->iRule); |
3404 | }else{ |
3405 | result = 0; |
3406 | } |
3407 | break; |
3408 | case NOT_USED: |
3409 | result = 0; |
3410 | break; |
3411 | } |
3412 | if( result && ap->spOpt ){ |
3413 | fprintf(fp," /* because %s==%s */" , ap->sp->name, ap->spOpt->name); |
3414 | } |
3415 | return result; |
3416 | } |
3417 | |
3418 | /* Generate the "*.out" log file */ |
3419 | void ReportOutput(struct lemon *lemp) |
3420 | { |
3421 | int i, n; |
3422 | struct state *stp; |
3423 | struct config *cfp; |
3424 | struct action *ap; |
3425 | struct rule *rp; |
3426 | FILE *fp; |
3427 | |
3428 | fp = file_open(lemp,".out" ,"wb" ); |
3429 | if( fp==0 ) return; |
3430 | for(i=0; i<lemp->nxstate; i++){ |
3431 | stp = lemp->sorted[i]; |
3432 | fprintf(fp,"State %d:\n" ,stp->statenum); |
3433 | if( lemp->basisflag ) cfp=stp->bp; |
3434 | else cfp=stp->cfp; |
3435 | while( cfp ){ |
3436 | char buf[20]; |
3437 | if( cfp->dot==cfp->rp->nrhs ){ |
3438 | lemon_sprintf(buf,"(%d)" ,cfp->rp->iRule); |
3439 | fprintf(fp," %5s " ,buf); |
3440 | }else{ |
3441 | fprintf(fp," " ); |
3442 | } |
3443 | ConfigPrint(fp,cfp); |
3444 | fprintf(fp,"\n" ); |
3445 | #if 0 |
3446 | SetPrint(fp,cfp->fws,lemp); |
3447 | PlinkPrint(fp,cfp->fplp,"To " ); |
3448 | PlinkPrint(fp,cfp->bplp,"From" ); |
3449 | #endif |
3450 | if( lemp->basisflag ) cfp=cfp->bp; |
3451 | else cfp=cfp->next; |
3452 | } |
3453 | fprintf(fp,"\n" ); |
3454 | for(ap=stp->ap; ap; ap=ap->next){ |
3455 | if( PrintAction(ap,fp,30) ) fprintf(fp,"\n" ); |
3456 | } |
3457 | fprintf(fp,"\n" ); |
3458 | } |
3459 | fprintf(fp, "----------------------------------------------------\n" ); |
3460 | fprintf(fp, "Symbols:\n" ); |
3461 | fprintf(fp, "The first-set of non-terminals is shown after the name.\n\n" ); |
3462 | for(i=0; i<lemp->nsymbol; i++){ |
3463 | int j; |
3464 | struct symbol *sp; |
3465 | |
3466 | sp = lemp->symbols[i]; |
3467 | fprintf(fp, " %3d: %s" , i, sp->name); |
3468 | if( sp->type==NONTERMINAL ){ |
3469 | fprintf(fp, ":" ); |
3470 | if( sp->lambda ){ |
3471 | fprintf(fp, " <lambda>" ); |
3472 | } |
3473 | for(j=0; j<lemp->nterminal; j++){ |
3474 | if( sp->firstset && SetFind(sp->firstset, j) ){ |
3475 | fprintf(fp, " %s" , lemp->symbols[j]->name); |
3476 | } |
3477 | } |
3478 | } |
3479 | if( sp->prec>=0 ) fprintf(fp," (precedence=%d)" , sp->prec); |
3480 | fprintf(fp, "\n" ); |
3481 | } |
3482 | fprintf(fp, "----------------------------------------------------\n" ); |
3483 | fprintf(fp, "Syntax-only Symbols:\n" ); |
3484 | fprintf(fp, "The following symbols never carry semantic content.\n\n" ); |
3485 | for(i=n=0; i<lemp->nsymbol; i++){ |
3486 | int w; |
3487 | struct symbol *sp = lemp->symbols[i]; |
3488 | if( sp->bContent ) continue; |
3489 | w = (int)strlen(sp->name); |
3490 | if( n>0 && n+w>75 ){ |
3491 | fprintf(fp,"\n" ); |
3492 | n = 0; |
3493 | } |
3494 | if( n>0 ){ |
3495 | fprintf(fp, " " ); |
3496 | n++; |
3497 | } |
3498 | fprintf(fp, "%s" , sp->name); |
3499 | n += w; |
3500 | } |
3501 | if( n>0 ) fprintf(fp, "\n" ); |
3502 | fprintf(fp, "----------------------------------------------------\n" ); |
3503 | fprintf(fp, "Rules:\n" ); |
3504 | for(rp=lemp->rule; rp; rp=rp->next){ |
3505 | fprintf(fp, "%4d: " , rp->iRule); |
3506 | rule_print(fp, rp); |
3507 | fprintf(fp,"." ); |
3508 | if( rp->precsym ){ |
3509 | fprintf(fp," [%s precedence=%d]" , |
3510 | rp->precsym->name, rp->precsym->prec); |
3511 | } |
3512 | fprintf(fp,"\n" ); |
3513 | } |
3514 | fclose(fp); |
3515 | return; |
3516 | } |
3517 | |
3518 | /* Search for the file "name" which is in the same directory as |
3519 | ** the executable */ |
3520 | PRIVATE char *pathsearch(char *argv0, char *name, int modemask) |
3521 | { |
3522 | const char *pathlist; |
3523 | char *pathbufptr = 0; |
3524 | char *pathbuf = 0; |
3525 | char *path,*cp; |
3526 | char c; |
3527 | |
3528 | #ifdef __WIN32__ |
3529 | cp = strrchr(argv0,'\\'); |
3530 | #else |
3531 | cp = strrchr(argv0,'/'); |
3532 | #endif |
3533 | if( cp ){ |
3534 | c = *cp; |
3535 | *cp = 0; |
3536 | path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); |
3537 | if( path ) lemon_sprintf(path,"%s/%s" ,argv0,name); |
3538 | *cp = c; |
3539 | }else{ |
3540 | pathlist = getenv("PATH" ); |
3541 | if( pathlist==0 ) pathlist = ".:/bin:/usr/bin" ; |
3542 | pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); |
3543 | path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); |
3544 | if( (pathbuf != 0) && (path!=0) ){ |
3545 | pathbufptr = pathbuf; |
3546 | lemon_strcpy(pathbuf, pathlist); |
3547 | while( *pathbuf ){ |
3548 | cp = strchr(pathbuf,':'); |
3549 | if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; |
3550 | c = *cp; |
3551 | *cp = 0; |
3552 | lemon_sprintf(path,"%s/%s" ,pathbuf,name); |
3553 | *cp = c; |
3554 | if( c==0 ) pathbuf[0] = 0; |
3555 | else pathbuf = &cp[1]; |
3556 | if( access(path,modemask)==0 ) break; |
3557 | } |
3558 | } |
3559 | free(pathbufptr); |
3560 | } |
3561 | return path; |
3562 | } |
3563 | |
3564 | /* Given an action, compute the integer value for that action |
3565 | ** which is to be put in the action table of the generated machine. |
3566 | ** Return negative if no action should be generated. |
3567 | */ |
3568 | PRIVATE int compute_action(struct lemon *lemp, struct action *ap) |
3569 | { |
3570 | int act; |
3571 | switch( ap->type ){ |
3572 | case SHIFT: act = ap->x.stp->statenum; break; |
3573 | case SHIFTREDUCE: { |
3574 | /* Since a SHIFT is inherient after a prior REDUCE, convert any |
3575 | ** SHIFTREDUCE action with a nonterminal on the LHS into a simple |
3576 | ** REDUCE action: */ |
3577 | if( ap->sp->index>=lemp->nterminal |
3578 | && (lemp->errsym==0 || ap->sp->index!=lemp->errsym->index) |
3579 | ){ |
3580 | act = lemp->minReduce + ap->x.rp->iRule; |
3581 | }else{ |
3582 | act = lemp->minShiftReduce + ap->x.rp->iRule; |
3583 | } |
3584 | break; |
3585 | } |
3586 | case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break; |
3587 | case ERROR: act = lemp->errAction; break; |
3588 | case ACCEPT: act = lemp->accAction; break; |
3589 | default: act = -1; break; |
3590 | } |
3591 | return act; |
3592 | } |
3593 | |
3594 | #define LINESIZE 1000 |
3595 | /* The next cluster of routines are for reading the template file |
3596 | ** and writing the results to the generated parser */ |
3597 | /* The first function transfers data from "in" to "out" until |
3598 | ** a line is seen which begins with "%%". The line number is |
3599 | ** tracked. |
3600 | ** |
3601 | ** if name!=0, then any word that begin with "Parse" is changed to |
3602 | ** begin with *name instead. |
3603 | */ |
3604 | PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) |
3605 | { |
3606 | int i, iStart; |
3607 | char line[LINESIZE]; |
3608 | while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ |
3609 | (*lineno)++; |
3610 | iStart = 0; |
3611 | if( name ){ |
3612 | for(i=0; line[i]; i++){ |
3613 | if( line[i]=='P' && strncmp(&line[i],"Parse" ,5)==0 |
3614 | && (i==0 || !ISALPHA(line[i-1])) |
3615 | ){ |
3616 | if( i>iStart ) fprintf(out,"%.*s" ,i-iStart,&line[iStart]); |
3617 | fprintf(out,"%s" ,name); |
3618 | i += 4; |
3619 | iStart = i+1; |
3620 | } |
3621 | } |
3622 | } |
3623 | fprintf(out,"%s" ,&line[iStart]); |
3624 | } |
3625 | } |
3626 | |
3627 | /* Skip forward past the header of the template file to the first "%%" |
3628 | */ |
3629 | PRIVATE void (FILE *in, int *lineno) |
3630 | { |
3631 | char line[LINESIZE]; |
3632 | while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ |
3633 | (*lineno)++; |
3634 | } |
3635 | } |
3636 | |
3637 | /* The next function finds the template file and opens it, returning |
3638 | ** a pointer to the opened file. */ |
3639 | PRIVATE FILE *tplt_open(struct lemon *lemp) |
3640 | { |
3641 | static char templatename[] = "lempar.c" ; |
3642 | char buf[1000]; |
3643 | FILE *in; |
3644 | char *tpltname; |
3645 | char *toFree = 0; |
3646 | char *cp; |
3647 | |
3648 | /* first, see if user specified a template filename on the command line. */ |
3649 | if (user_templatename != 0) { |
3650 | if( access(user_templatename,004)==-1 ){ |
3651 | fprintf(stderr,"Can't find the parser driver template file \"%s\".\n" , |
3652 | user_templatename); |
3653 | lemp->errorcnt++; |
3654 | return 0; |
3655 | } |
3656 | in = fopen(user_templatename,"rb" ); |
3657 | if( in==0 ){ |
3658 | fprintf(stderr,"Can't open the template file \"%s\".\n" , |
3659 | user_templatename); |
3660 | lemp->errorcnt++; |
3661 | return 0; |
3662 | } |
3663 | return in; |
3664 | } |
3665 | |
3666 | cp = strrchr(lemp->filename,'.'); |
3667 | if( cp ){ |
3668 | lemon_sprintf(buf,"%.*s.lt" ,(int)(cp-lemp->filename),lemp->filename); |
3669 | }else{ |
3670 | lemon_sprintf(buf,"%s.lt" ,lemp->filename); |
3671 | } |
3672 | if( access(buf,004)==0 ){ |
3673 | tpltname = buf; |
3674 | }else if( access(templatename,004)==0 ){ |
3675 | tpltname = templatename; |
3676 | }else{ |
3677 | toFree = tpltname = pathsearch(lemp->argv0,templatename,0); |
3678 | } |
3679 | if( tpltname==0 ){ |
3680 | fprintf(stderr,"Can't find the parser driver template file \"%s\".\n" , |
3681 | templatename); |
3682 | lemp->errorcnt++; |
3683 | return 0; |
3684 | } |
3685 | in = fopen(tpltname,"rb" ); |
3686 | if( in==0 ){ |
3687 | fprintf(stderr,"Can't open the template file \"%s\".\n" ,tpltname); |
3688 | lemp->errorcnt++; |
3689 | } |
3690 | free(toFree); |
3691 | return in; |
3692 | } |
3693 | |
3694 | /* Print a #line directive line to the output file. */ |
3695 | PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) |
3696 | { |
3697 | fprintf(out,"#line %d \"" ,lineno); |
3698 | while( *filename ){ |
3699 | if( *filename == '\\' ) putc('\\',out); |
3700 | putc(*filename,out); |
3701 | filename++; |
3702 | } |
3703 | fprintf(out,"\"\n" ); |
3704 | } |
3705 | |
3706 | /* Print a string to the file and keep the linenumber up to date */ |
3707 | PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) |
3708 | { |
3709 | if( str==0 ) return; |
3710 | while( *str ){ |
3711 | putc(*str,out); |
3712 | if( *str=='\n' ) (*lineno)++; |
3713 | str++; |
3714 | } |
3715 | if( str[-1]!='\n' ){ |
3716 | putc('\n',out); |
3717 | (*lineno)++; |
3718 | } |
3719 | if (!lemp->nolinenosflag) { |
3720 | (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); |
3721 | } |
3722 | return; |
3723 | } |
3724 | |
3725 | /* |
3726 | ** The following routine emits code for the destructor for the |
3727 | ** symbol sp |
3728 | */ |
3729 | void emit_destructor_code( |
3730 | FILE *out, |
3731 | struct symbol *sp, |
3732 | struct lemon *lemp, |
3733 | int *lineno |
3734 | ){ |
3735 | char *cp = 0; |
3736 | |
3737 | if( sp->type==TERMINAL ){ |
3738 | cp = lemp->tokendest; |
3739 | if( cp==0 ) return; |
3740 | fprintf(out,"{\n" ); (*lineno)++; |
3741 | }else if( sp->destructor ){ |
3742 | cp = sp->destructor; |
3743 | fprintf(out,"{\n" ); (*lineno)++; |
3744 | if( !lemp->nolinenosflag ){ |
3745 | (*lineno)++; |
3746 | tplt_linedir(out,sp->destLineno,lemp->filename); |
3747 | } |
3748 | }else if( lemp->vardest ){ |
3749 | cp = lemp->vardest; |
3750 | if( cp==0 ) return; |
3751 | fprintf(out,"{\n" ); (*lineno)++; |
3752 | }else{ |
3753 | assert( 0 ); /* Cannot happen */ |
3754 | } |
3755 | for(; *cp; cp++){ |
3756 | if( *cp=='$' && cp[1]=='$' ){ |
3757 | fprintf(out,"(yypminor->yy%d)" ,sp->dtnum); |
3758 | cp++; |
3759 | continue; |
3760 | } |
3761 | if( *cp=='\n' ) (*lineno)++; |
3762 | fputc(*cp,out); |
3763 | } |
3764 | fprintf(out,"\n" ); (*lineno)++; |
3765 | if (!lemp->nolinenosflag) { |
3766 | (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); |
3767 | } |
3768 | fprintf(out,"}\n" ); (*lineno)++; |
3769 | return; |
3770 | } |
3771 | |
3772 | /* |
3773 | ** Return TRUE (non-zero) if the given symbol has a destructor. |
3774 | */ |
3775 | int has_destructor(struct symbol *sp, struct lemon *lemp) |
3776 | { |
3777 | int ret; |
3778 | if( sp->type==TERMINAL ){ |
3779 | ret = lemp->tokendest!=0; |
3780 | }else{ |
3781 | ret = lemp->vardest!=0 || sp->destructor!=0; |
3782 | } |
3783 | return ret; |
3784 | } |
3785 | |
3786 | /* |
3787 | ** Append text to a dynamically allocated string. If zText is 0 then |
3788 | ** reset the string to be empty again. Always return the complete text |
3789 | ** of the string (which is overwritten with each call). |
3790 | ** |
3791 | ** n bytes of zText are stored. If n==0 then all of zText up to the first |
3792 | ** \000 terminator is stored. zText can contain up to two instances of |
3793 | ** %d. The values of p1 and p2 are written into the first and second |
3794 | ** %d. |
3795 | ** |
3796 | ** If n==-1, then the previous character is overwritten. |
3797 | */ |
3798 | PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ |
3799 | static char empty[1] = { 0 }; |
3800 | static char *z = 0; |
3801 | static int alloced = 0; |
3802 | static int used = 0; |
3803 | int c; |
3804 | char zInt[40]; |
3805 | if( zText==0 ){ |
3806 | if( used==0 && z!=0 ) z[0] = 0; |
3807 | used = 0; |
3808 | return z; |
3809 | } |
3810 | if( n<=0 ){ |
3811 | if( n<0 ){ |
3812 | used += n; |
3813 | assert( used>=0 ); |
3814 | } |
3815 | n = lemonStrlen(zText); |
3816 | } |
3817 | if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ |
3818 | alloced = n + sizeof(zInt)*2 + used + 200; |
3819 | z = (char *) realloc(z, alloced); |
3820 | } |
3821 | if( z==0 ) return empty; |
3822 | while( n-- > 0 ){ |
3823 | c = *(zText++); |
3824 | if( c=='%' && n>0 && zText[0]=='d' ){ |
3825 | lemon_sprintf(zInt, "%d" , p1); |
3826 | p1 = p2; |
3827 | lemon_strcpy(&z[used], zInt); |
3828 | used += lemonStrlen(&z[used]); |
3829 | zText++; |
3830 | n--; |
3831 | }else{ |
3832 | z[used++] = (char)c; |
3833 | } |
3834 | } |
3835 | z[used] = 0; |
3836 | return z; |
3837 | } |
3838 | |
3839 | /* |
3840 | ** Write and transform the rp->code string so that symbols are expanded. |
3841 | ** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. |
3842 | ** |
3843 | ** Return 1 if the expanded code requires that "yylhsminor" local variable |
3844 | ** to be defined. |
3845 | */ |
3846 | PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ |
3847 | char *cp, *xp; |
3848 | int i; |
3849 | int rc = 0; /* True if yylhsminor is used */ |
3850 | int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ |
3851 | const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ |
3852 | char lhsused = 0; /* True if the LHS element has been used */ |
3853 | char lhsdirect; /* True if LHS writes directly into stack */ |
3854 | char used[MAXRHS]; /* True for each RHS element which is used */ |
3855 | char zLhs[50]; /* Convert the LHS symbol into this string */ |
3856 | char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ |
3857 | |
3858 | for(i=0; i<rp->nrhs; i++) used[i] = 0; |
3859 | lhsused = 0; |
3860 | |
3861 | if( rp->code==0 ){ |
3862 | static char newlinestr[2] = { '\n', '\0' }; |
3863 | rp->code = newlinestr; |
3864 | rp->line = rp->ruleline; |
3865 | rp->noCode = 1; |
3866 | }else{ |
3867 | rp->noCode = 0; |
3868 | } |
3869 | |
3870 | |
3871 | if( rp->nrhs==0 ){ |
3872 | /* If there are no RHS symbols, then writing directly to the LHS is ok */ |
3873 | lhsdirect = 1; |
3874 | }else if( rp->rhsalias[0]==0 ){ |
3875 | /* The left-most RHS symbol has no value. LHS direct is ok. But |
3876 | ** we have to call the destructor on the RHS symbol first. */ |
3877 | lhsdirect = 1; |
3878 | if( has_destructor(rp->rhs[0],lemp) ){ |
3879 | append_str(0,0,0,0); |
3880 | append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n" , 0, |
3881 | rp->rhs[0]->index,1-rp->nrhs); |
3882 | rp->codePrefix = Strsafe(append_str(0,0,0,0)); |
3883 | rp->noCode = 0; |
3884 | } |
3885 | }else if( rp->lhsalias==0 ){ |
3886 | /* There is no LHS value symbol. */ |
3887 | lhsdirect = 1; |
3888 | }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ |
3889 | /* The LHS symbol and the left-most RHS symbol are the same, so |
3890 | ** direct writing is allowed */ |
3891 | lhsdirect = 1; |
3892 | lhsused = 1; |
3893 | used[0] = 1; |
3894 | if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ |
3895 | ErrorMsg(lemp->filename,rp->ruleline, |
3896 | "%s(%s) and %s(%s) share the same label but have " |
3897 | "different datatypes." , |
3898 | rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); |
3899 | lemp->errorcnt++; |
3900 | } |
3901 | }else{ |
3902 | lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/" , |
3903 | rp->lhsalias, rp->rhsalias[0]); |
3904 | zSkip = strstr(rp->code, zOvwrt); |
3905 | if( zSkip!=0 ){ |
3906 | /* The code contains a special comment that indicates that it is safe |
3907 | ** for the LHS label to overwrite left-most RHS label. */ |
3908 | lhsdirect = 1; |
3909 | }else{ |
3910 | lhsdirect = 0; |
3911 | } |
3912 | } |
3913 | if( lhsdirect ){ |
3914 | sprintf(zLhs, "yymsp[%d].minor.yy%d" ,1-rp->nrhs,rp->lhs->dtnum); |
3915 | }else{ |
3916 | rc = 1; |
3917 | sprintf(zLhs, "yylhsminor.yy%d" ,rp->lhs->dtnum); |
3918 | } |
3919 | |
3920 | append_str(0,0,0,0); |
3921 | |
3922 | /* This const cast is wrong but harmless, if we're careful. */ |
3923 | for(cp=(char *)rp->code; *cp; cp++){ |
3924 | if( cp==zSkip ){ |
3925 | append_str(zOvwrt,0,0,0); |
3926 | cp += lemonStrlen(zOvwrt)-1; |
3927 | dontUseRhs0 = 1; |
3928 | continue; |
3929 | } |
3930 | if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ |
3931 | char saved; |
3932 | for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); |
3933 | saved = *xp; |
3934 | *xp = 0; |
3935 | if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ |
3936 | append_str(zLhs,0,0,0); |
3937 | cp = xp; |
3938 | lhsused = 1; |
3939 | }else{ |
3940 | for(i=0; i<rp->nrhs; i++){ |
3941 | if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ |
3942 | if( i==0 && dontUseRhs0 ){ |
3943 | ErrorMsg(lemp->filename,rp->ruleline, |
3944 | "Label %s used after '%s'." , |
3945 | rp->rhsalias[0], zOvwrt); |
3946 | lemp->errorcnt++; |
3947 | }else if( cp!=rp->code && cp[-1]=='@' ){ |
3948 | /* If the argument is of the form @X then substituted |
3949 | ** the token number of X, not the value of X */ |
3950 | append_str("yymsp[%d].major" ,-1,i-rp->nrhs+1,0); |
3951 | }else{ |
3952 | struct symbol *sp = rp->rhs[i]; |
3953 | int dtnum; |
3954 | if( sp->type==MULTITERMINAL ){ |
3955 | dtnum = sp->subsym[0]->dtnum; |
3956 | }else{ |
3957 | dtnum = sp->dtnum; |
3958 | } |
3959 | append_str("yymsp[%d].minor.yy%d" ,0,i-rp->nrhs+1, dtnum); |
3960 | } |
3961 | cp = xp; |
3962 | used[i] = 1; |
3963 | break; |
3964 | } |
3965 | } |
3966 | } |
3967 | *xp = saved; |
3968 | } |
3969 | append_str(cp, 1, 0, 0); |
3970 | } /* End loop */ |
3971 | |
3972 | /* Main code generation completed */ |
3973 | cp = append_str(0,0,0,0); |
3974 | if( cp && cp[0] ) rp->code = Strsafe(cp); |
3975 | append_str(0,0,0,0); |
3976 | |
3977 | /* Check to make sure the LHS has been used */ |
3978 | if( rp->lhsalias && !lhsused ){ |
3979 | ErrorMsg(lemp->filename,rp->ruleline, |
3980 | "Label \"%s\" for \"%s(%s)\" is never used." , |
3981 | rp->lhsalias,rp->lhs->name,rp->lhsalias); |
3982 | lemp->errorcnt++; |
3983 | } |
3984 | |
3985 | /* Generate destructor code for RHS minor values which are not referenced. |
3986 | ** Generate error messages for unused labels and duplicate labels. |
3987 | */ |
3988 | for(i=0; i<rp->nrhs; i++){ |
3989 | if( rp->rhsalias[i] ){ |
3990 | if( i>0 ){ |
3991 | int j; |
3992 | if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ |
3993 | ErrorMsg(lemp->filename,rp->ruleline, |
3994 | "%s(%s) has the same label as the LHS but is not the left-most " |
3995 | "symbol on the RHS." , |
3996 | rp->rhs[i]->name, rp->rhsalias[i]); |
3997 | lemp->errorcnt++; |
3998 | } |
3999 | for(j=0; j<i; j++){ |
4000 | if( rp->rhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ |
4001 | ErrorMsg(lemp->filename,rp->ruleline, |
4002 | "Label %s used for multiple symbols on the RHS of a rule." , |
4003 | rp->rhsalias[i]); |
4004 | lemp->errorcnt++; |
4005 | break; |
4006 | } |
4007 | } |
4008 | } |
4009 | if( !used[i] ){ |
4010 | ErrorMsg(lemp->filename,rp->ruleline, |
4011 | "Label %s for \"%s(%s)\" is never used." , |
4012 | rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); |
4013 | lemp->errorcnt++; |
4014 | } |
4015 | }else if( i>0 && has_destructor(rp->rhs[i],lemp) ){ |
4016 | append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n" , 0, |
4017 | rp->rhs[i]->index,i-rp->nrhs+1); |
4018 | } |
4019 | } |
4020 | |
4021 | /* If unable to write LHS values directly into the stack, write the |
4022 | ** saved LHS value now. */ |
4023 | if( lhsdirect==0 ){ |
4024 | append_str(" yymsp[%d].minor.yy%d = " , 0, 1-rp->nrhs, rp->lhs->dtnum); |
4025 | append_str(zLhs, 0, 0, 0); |
4026 | append_str(";\n" , 0, 0, 0); |
4027 | } |
4028 | |
4029 | /* Suffix code generation complete */ |
4030 | cp = append_str(0,0,0,0); |
4031 | if( cp && cp[0] ){ |
4032 | rp->codeSuffix = Strsafe(cp); |
4033 | rp->noCode = 0; |
4034 | } |
4035 | |
4036 | return rc; |
4037 | } |
4038 | |
4039 | /* |
4040 | ** Generate code which executes when the rule "rp" is reduced. Write |
4041 | ** the code to "out". Make sure lineno stays up-to-date. |
4042 | */ |
4043 | PRIVATE void emit_code( |
4044 | FILE *out, |
4045 | struct rule *rp, |
4046 | struct lemon *lemp, |
4047 | int *lineno |
4048 | ){ |
4049 | const char *cp; |
4050 | |
4051 | /* Setup code prior to the #line directive */ |
4052 | if( rp->codePrefix && rp->codePrefix[0] ){ |
4053 | fprintf(out, "{%s" , rp->codePrefix); |
4054 | for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } |
4055 | } |
4056 | |
4057 | /* Generate code to do the reduce action */ |
4058 | if( rp->code ){ |
4059 | if( !lemp->nolinenosflag ){ |
4060 | (*lineno)++; |
4061 | tplt_linedir(out,rp->line,lemp->filename); |
4062 | } |
4063 | fprintf(out,"{%s" ,rp->code); |
4064 | for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } |
4065 | fprintf(out,"}\n" ); (*lineno)++; |
4066 | if( !lemp->nolinenosflag ){ |
4067 | (*lineno)++; |
4068 | tplt_linedir(out,*lineno,lemp->outname); |
4069 | } |
4070 | } |
4071 | |
4072 | /* Generate breakdown code that occurs after the #line directive */ |
4073 | if( rp->codeSuffix && rp->codeSuffix[0] ){ |
4074 | fprintf(out, "%s" , rp->codeSuffix); |
4075 | for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } |
4076 | } |
4077 | |
4078 | if( rp->codePrefix ){ |
4079 | fprintf(out, "}\n" ); (*lineno)++; |
4080 | } |
4081 | |
4082 | return; |
4083 | } |
4084 | |
4085 | /* |
4086 | ** Print the definition of the union used for the parser's data stack. |
4087 | ** This union contains fields for every possible data type for tokens |
4088 | ** and nonterminals. In the process of computing and printing this |
4089 | ** union, also set the ".dtnum" field of every terminal and nonterminal |
4090 | ** symbol. |
4091 | */ |
4092 | void print_stack_union( |
4093 | FILE *out, /* The output stream */ |
4094 | struct lemon *lemp, /* The main info structure for this parser */ |
4095 | int *plineno, /* Pointer to the line number */ |
4096 | int mhflag /* True if generating makeheaders output */ |
4097 | ){ |
4098 | int lineno; /* The line number of the output */ |
4099 | char **types; /* A hash table of datatypes */ |
4100 | int arraysize; /* Size of the "types" array */ |
4101 | int maxdtlength; /* Maximum length of any ".datatype" field. */ |
4102 | char *stddt; /* Standardized name for a datatype */ |
4103 | int i,j; /* Loop counters */ |
4104 | unsigned hash; /* For hashing the name of a type */ |
4105 | const char *name; /* Name of the parser */ |
4106 | |
4107 | /* Allocate and initialize types[] and allocate stddt[] */ |
4108 | arraysize = lemp->nsymbol * 2; |
4109 | types = (char**)calloc( arraysize, sizeof(char*) ); |
4110 | if( types==0 ){ |
4111 | fprintf(stderr,"Out of memory.\n" ); |
4112 | exit(1); |
4113 | } |
4114 | for(i=0; i<arraysize; i++) types[i] = 0; |
4115 | maxdtlength = 0; |
4116 | if( lemp->vartype ){ |
4117 | maxdtlength = lemonStrlen(lemp->vartype); |
4118 | } |
4119 | for(i=0; i<lemp->nsymbol; i++){ |
4120 | int len; |
4121 | struct symbol *sp = lemp->symbols[i]; |
4122 | if( sp->datatype==0 ) continue; |
4123 | len = lemonStrlen(sp->datatype); |
4124 | if( len>maxdtlength ) maxdtlength = len; |
4125 | } |
4126 | stddt = (char*)malloc( maxdtlength*2 + 1 ); |
4127 | if( stddt==0 ){ |
4128 | fprintf(stderr,"Out of memory.\n" ); |
4129 | exit(1); |
4130 | } |
4131 | |
4132 | /* Build a hash table of datatypes. The ".dtnum" field of each symbol |
4133 | ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is |
4134 | ** used for terminal symbols. If there is no %default_type defined then |
4135 | ** 0 is also used as the .dtnum value for nonterminals which do not specify |
4136 | ** a datatype using the %type directive. |
4137 | */ |
4138 | for(i=0; i<lemp->nsymbol; i++){ |
4139 | struct symbol *sp = lemp->symbols[i]; |
4140 | char *cp; |
4141 | if( sp==lemp->errsym ){ |
4142 | sp->dtnum = arraysize+1; |
4143 | continue; |
4144 | } |
4145 | if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ |
4146 | sp->dtnum = 0; |
4147 | continue; |
4148 | } |
4149 | cp = sp->datatype; |
4150 | if( cp==0 ) cp = lemp->vartype; |
4151 | j = 0; |
4152 | while( ISSPACE(*cp) ) cp++; |
4153 | while( *cp ) stddt[j++] = *cp++; |
4154 | while( j>0 && ISSPACE(stddt[j-1]) ) j--; |
4155 | stddt[j] = 0; |
4156 | if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ |
4157 | sp->dtnum = 0; |
4158 | continue; |
4159 | } |
4160 | hash = 0; |
4161 | for(j=0; stddt[j]; j++){ |
4162 | hash = hash*53 + stddt[j]; |
4163 | } |
4164 | hash = (hash & 0x7fffffff)%arraysize; |
4165 | while( types[hash] ){ |
4166 | if( strcmp(types[hash],stddt)==0 ){ |
4167 | sp->dtnum = hash + 1; |
4168 | break; |
4169 | } |
4170 | hash++; |
4171 | if( hash>=(unsigned)arraysize ) hash = 0; |
4172 | } |
4173 | if( types[hash]==0 ){ |
4174 | sp->dtnum = hash + 1; |
4175 | types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); |
4176 | if( types[hash]==0 ){ |
4177 | fprintf(stderr,"Out of memory.\n" ); |
4178 | exit(1); |
4179 | } |
4180 | lemon_strcpy(types[hash],stddt); |
4181 | } |
4182 | } |
4183 | |
4184 | /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ |
4185 | name = lemp->name ? lemp->name : "Parse" ; |
4186 | lineno = *plineno; |
4187 | if( mhflag ){ fprintf(out,"#if INTERFACE\n" ); lineno++; } |
4188 | fprintf(out,"#define %sTOKENTYPE %s\n" ,name, |
4189 | lemp->tokentype?lemp->tokentype:"void*" ); lineno++; |
4190 | if( mhflag ){ fprintf(out,"#endif\n" ); lineno++; } |
4191 | fprintf(out,"typedef union {\n" ); lineno++; |
4192 | fprintf(out," int yyinit;\n" ); lineno++; |
4193 | fprintf(out," %sTOKENTYPE yy0;\n" ,name); lineno++; |
4194 | for(i=0; i<arraysize; i++){ |
4195 | if( types[i]==0 ) continue; |
4196 | fprintf(out," %s yy%d;\n" ,types[i],i+1); lineno++; |
4197 | free(types[i]); |
4198 | } |
4199 | if( lemp->errsym && lemp->errsym->useCnt ){ |
4200 | fprintf(out," int yy%d;\n" ,lemp->errsym->dtnum); lineno++; |
4201 | } |
4202 | free(stddt); |
4203 | free(types); |
4204 | fprintf(out,"} YYMINORTYPE;\n" ); lineno++; |
4205 | *plineno = lineno; |
4206 | } |
4207 | |
4208 | /* |
4209 | ** Return the name of a C datatype able to represent values between |
4210 | ** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof |
4211 | ** for that type (1, 2, or 4) into *pnByte. |
4212 | */ |
4213 | static const char *minimum_size_type(int lwr, int upr, int *pnByte){ |
4214 | const char *zType = "int" ; |
4215 | int nByte = 4; |
4216 | if( lwr>=0 ){ |
4217 | if( upr<=255 ){ |
4218 | zType = "unsigned char" ; |
4219 | nByte = 1; |
4220 | }else if( upr<65535 ){ |
4221 | zType = "unsigned short int" ; |
4222 | nByte = 2; |
4223 | }else{ |
4224 | zType = "unsigned int" ; |
4225 | nByte = 4; |
4226 | } |
4227 | }else if( lwr>=-127 && upr<=127 ){ |
4228 | zType = "signed char" ; |
4229 | nByte = 1; |
4230 | }else if( lwr>=-32767 && upr<32767 ){ |
4231 | zType = "short" ; |
4232 | nByte = 2; |
4233 | } |
4234 | if( pnByte ) *pnByte = nByte; |
4235 | return zType; |
4236 | } |
4237 | |
4238 | /* |
4239 | ** Each state contains a set of token transaction and a set of |
4240 | ** nonterminal transactions. Each of these sets makes an instance |
4241 | ** of the following structure. An array of these structures is used |
4242 | ** to order the creation of entries in the yy_action[] table. |
4243 | */ |
4244 | struct axset { |
4245 | struct state *stp; /* A pointer to a state */ |
4246 | int isTkn; /* True to use tokens. False for non-terminals */ |
4247 | int nAction; /* Number of actions */ |
4248 | int iOrder; /* Original order of action sets */ |
4249 | }; |
4250 | |
4251 | /* |
4252 | ** Compare to axset structures for sorting purposes |
4253 | */ |
4254 | static int axset_compare(const void *a, const void *b){ |
4255 | struct axset *p1 = (struct axset*)a; |
4256 | struct axset *p2 = (struct axset*)b; |
4257 | int c; |
4258 | c = p2->nAction - p1->nAction; |
4259 | if( c==0 ){ |
4260 | c = p1->iOrder - p2->iOrder; |
4261 | } |
4262 | assert( c!=0 || p1==p2 ); |
4263 | return c; |
4264 | } |
4265 | |
4266 | /* |
4267 | ** Write text on "out" that describes the rule "rp". |
4268 | */ |
4269 | static void writeRuleText(FILE *out, struct rule *rp){ |
4270 | int j; |
4271 | fprintf(out,"%s ::=" , rp->lhs->name); |
4272 | for(j=0; j<rp->nrhs; j++){ |
4273 | struct symbol *sp = rp->rhs[j]; |
4274 | if( sp->type!=MULTITERMINAL ){ |
4275 | fprintf(out," %s" , sp->name); |
4276 | }else{ |
4277 | int k; |
4278 | fprintf(out," %s" , sp->subsym[0]->name); |
4279 | for(k=1; k<sp->nsubsym; k++){ |
4280 | fprintf(out,"|%s" ,sp->subsym[k]->name); |
4281 | } |
4282 | } |
4283 | } |
4284 | } |
4285 | |
4286 | |
4287 | /* Generate C source code for the parser */ |
4288 | void ReportTable( |
4289 | struct lemon *lemp, |
4290 | int mhflag, /* Output in makeheaders format if true */ |
4291 | int sqlFlag /* Generate the *.sql file too */ |
4292 | ){ |
4293 | FILE *out, *in, *sql; |
4294 | int lineno; |
4295 | struct state *stp; |
4296 | struct action *ap; |
4297 | struct rule *rp; |
4298 | struct acttab *pActtab; |
4299 | int i, j, n, sz; |
4300 | int nLookAhead; |
4301 | int szActionType; /* sizeof(YYACTIONTYPE) */ |
4302 | int szCodeType; /* sizeof(YYCODETYPE) */ |
4303 | const char *name; |
4304 | int mnTknOfst, mxTknOfst; |
4305 | int mnNtOfst, mxNtOfst; |
4306 | struct axset *ax; |
4307 | char *prefix; |
4308 | |
4309 | lemp->minShiftReduce = lemp->nstate; |
4310 | lemp->errAction = lemp->minShiftReduce + lemp->nrule; |
4311 | lemp->accAction = lemp->errAction + 1; |
4312 | lemp->noAction = lemp->accAction + 1; |
4313 | lemp->minReduce = lemp->noAction + 1; |
4314 | lemp->maxAction = lemp->minReduce + lemp->nrule; |
4315 | |
4316 | in = tplt_open(lemp); |
4317 | if( in==0 ) return; |
4318 | out = file_open(lemp,".c" ,"wb" ); |
4319 | if( out==0 ){ |
4320 | fclose(in); |
4321 | return; |
4322 | } |
4323 | if( sqlFlag==0 ){ |
4324 | sql = 0; |
4325 | }else{ |
4326 | sql = file_open(lemp, ".sql" , "wb" ); |
4327 | if( sql==0 ){ |
4328 | fclose(in); |
4329 | fclose(out); |
4330 | return; |
4331 | } |
4332 | fprintf(sql, |
4333 | "BEGIN;\n" |
4334 | "CREATE TABLE symbol(\n" |
4335 | " id INTEGER PRIMARY KEY,\n" |
4336 | " name TEXT NOT NULL,\n" |
4337 | " isTerminal BOOLEAN NOT NULL,\n" |
4338 | " fallback INTEGER REFERENCES symbol" |
4339 | " DEFERRABLE INITIALLY DEFERRED\n" |
4340 | ");\n" |
4341 | ); |
4342 | for(i=0; i<lemp->nsymbol; i++){ |
4343 | fprintf(sql, |
4344 | "INSERT INTO symbol(id,name,isTerminal,fallback)" |
4345 | "VALUES(%d,'%s',%s" , |
4346 | i, lemp->symbols[i]->name, |
4347 | i<lemp->nterminal ? "TRUE" : "FALSE" |
4348 | ); |
4349 | if( lemp->symbols[i]->fallback ){ |
4350 | fprintf(sql, ",%d);\n" , lemp->symbols[i]->fallback->index); |
4351 | }else{ |
4352 | fprintf(sql, ",NULL);\n" ); |
4353 | } |
4354 | } |
4355 | fprintf(sql, |
4356 | "CREATE TABLE rule(\n" |
4357 | " ruleid INTEGER PRIMARY KEY,\n" |
4358 | " lhs INTEGER REFERENCES symbol(id),\n" |
4359 | " txt TEXT\n" |
4360 | ");\n" |
4361 | "CREATE TABLE rulerhs(\n" |
4362 | " ruleid INTEGER REFERENCES rule(ruleid),\n" |
4363 | " pos INTEGER,\n" |
4364 | " sym INTEGER REFERENCES symbol(id)\n" |
4365 | ");\n" |
4366 | ); |
4367 | for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ |
4368 | assert( i==rp->iRule ); |
4369 | fprintf(sql, |
4370 | "INSERT INTO rule(ruleid,lhs,txt)VALUES(%d,%d,'" , |
4371 | rp->iRule, rp->lhs->index |
4372 | ); |
4373 | writeRuleText(sql, rp); |
4374 | fprintf(sql,"');\n" ); |
4375 | for(j=0; j<rp->nrhs; j++){ |
4376 | struct symbol *sp = rp->rhs[j]; |
4377 | if( sp->type!=MULTITERMINAL ){ |
4378 | fprintf(sql, |
4379 | "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n" , |
4380 | i,j,sp->index |
4381 | ); |
4382 | }else{ |
4383 | int k; |
4384 | for(k=0; k<sp->nsubsym; k++){ |
4385 | fprintf(sql, |
4386 | "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n" , |
4387 | i,j,sp->subsym[k]->index |
4388 | ); |
4389 | } |
4390 | } |
4391 | } |
4392 | } |
4393 | fprintf(sql, "COMMIT;\n" ); |
4394 | } |
4395 | lineno = 1; |
4396 | |
4397 | fprintf(out, |
4398 | "/* This file is automatically generated by Lemon from input grammar\n" |
4399 | "** source file \"%s\". */\n" , lemp->filename); lineno += 2; |
4400 | |
4401 | /* The first %include directive begins with a C-language comment, |
4402 | ** then skip over the header comment of the template file |
4403 | */ |
4404 | if( lemp->include==0 ) lemp->include = "" ; |
4405 | for(i=0; ISSPACE(lemp->include[i]); i++){ |
4406 | if( lemp->include[i]=='\n' ){ |
4407 | lemp->include += i+1; |
4408 | i = -1; |
4409 | } |
4410 | } |
4411 | if( lemp->include[0]=='/' ){ |
4412 | tplt_skip_header(in,&lineno); |
4413 | }else{ |
4414 | tplt_xfer(lemp->name,in,out,&lineno); |
4415 | } |
4416 | |
4417 | /* Generate the include code, if any */ |
4418 | tplt_print(out,lemp,lemp->include,&lineno); |
4419 | if( mhflag ){ |
4420 | char *incName = file_makename(lemp, ".h" ); |
4421 | fprintf(out,"#include \"%s\"\n" , incName); lineno++; |
4422 | free(incName); |
4423 | } |
4424 | tplt_xfer(lemp->name,in,out,&lineno); |
4425 | |
4426 | /* Generate #defines for all tokens */ |
4427 | if( lemp->tokenprefix ) prefix = lemp->tokenprefix; |
4428 | else prefix = "" ; |
4429 | if( mhflag ){ |
4430 | fprintf(out,"#if INTERFACE\n" ); lineno++; |
4431 | }else{ |
4432 | fprintf(out,"#ifndef %s%s\n" , prefix, lemp->symbols[1]->name); |
4433 | } |
4434 | for(i=1; i<lemp->nterminal; i++){ |
4435 | fprintf(out,"#define %s%-30s %2d\n" ,prefix,lemp->symbols[i]->name,i); |
4436 | lineno++; |
4437 | } |
4438 | fprintf(out,"#endif\n" ); lineno++; |
4439 | tplt_xfer(lemp->name,in,out,&lineno); |
4440 | |
4441 | /* Generate the defines */ |
4442 | fprintf(out,"#define YYCODETYPE %s\n" , |
4443 | minimum_size_type(0, lemp->nsymbol, &szCodeType)); lineno++; |
4444 | fprintf(out,"#define YYNOCODE %d\n" ,lemp->nsymbol); lineno++; |
4445 | fprintf(out,"#define YYACTIONTYPE %s\n" , |
4446 | minimum_size_type(0,lemp->maxAction,&szActionType)); lineno++; |
4447 | if( lemp->wildcard ){ |
4448 | fprintf(out,"#define YYWILDCARD %d\n" , |
4449 | lemp->wildcard->index); lineno++; |
4450 | } |
4451 | print_stack_union(out,lemp,&lineno,mhflag); |
4452 | fprintf(out, "#ifndef YYSTACKDEPTH\n" ); lineno++; |
4453 | if( lemp->stacksize ){ |
4454 | fprintf(out,"#define YYSTACKDEPTH %s\n" ,lemp->stacksize); lineno++; |
4455 | }else{ |
4456 | fprintf(out,"#define YYSTACKDEPTH 100\n" ); lineno++; |
4457 | } |
4458 | fprintf(out, "#endif\n" ); lineno++; |
4459 | if( mhflag ){ |
4460 | fprintf(out,"#if INTERFACE\n" ); lineno++; |
4461 | } |
4462 | name = lemp->name ? lemp->name : "Parse" ; |
4463 | if( lemp->arg && lemp->arg[0] ){ |
4464 | i = lemonStrlen(lemp->arg); |
4465 | while( i>=1 && ISSPACE(lemp->arg[i-1]) ) i--; |
4466 | while( i>=1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; |
4467 | fprintf(out,"#define %sARG_SDECL %s;\n" ,name,lemp->arg); lineno++; |
4468 | fprintf(out,"#define %sARG_PDECL ,%s\n" ,name,lemp->arg); lineno++; |
4469 | fprintf(out,"#define %sARG_PARAM ,%s\n" ,name,&lemp->arg[i]); lineno++; |
4470 | fprintf(out,"#define %sARG_FETCH %s=yypParser->%s;\n" , |
4471 | name,lemp->arg,&lemp->arg[i]); lineno++; |
4472 | fprintf(out,"#define %sARG_STORE yypParser->%s=%s;\n" , |
4473 | name,&lemp->arg[i],&lemp->arg[i]); lineno++; |
4474 | }else{ |
4475 | fprintf(out,"#define %sARG_SDECL\n" ,name); lineno++; |
4476 | fprintf(out,"#define %sARG_PDECL\n" ,name); lineno++; |
4477 | fprintf(out,"#define %sARG_PARAM\n" ,name); lineno++; |
4478 | fprintf(out,"#define %sARG_FETCH\n" ,name); lineno++; |
4479 | fprintf(out,"#define %sARG_STORE\n" ,name); lineno++; |
4480 | } |
4481 | if( lemp->ctx && lemp->ctx[0] ){ |
4482 | i = lemonStrlen(lemp->ctx); |
4483 | while( i>=1 && ISSPACE(lemp->ctx[i-1]) ) i--; |
4484 | while( i>=1 && (ISALNUM(lemp->ctx[i-1]) || lemp->ctx[i-1]=='_') ) i--; |
4485 | fprintf(out,"#define %sCTX_SDECL %s;\n" ,name,lemp->ctx); lineno++; |
4486 | fprintf(out,"#define %sCTX_PDECL ,%s\n" ,name,lemp->ctx); lineno++; |
4487 | fprintf(out,"#define %sCTX_PARAM ,%s\n" ,name,&lemp->ctx[i]); lineno++; |
4488 | fprintf(out,"#define %sCTX_FETCH %s=yypParser->%s;\n" , |
4489 | name,lemp->ctx,&lemp->ctx[i]); lineno++; |
4490 | fprintf(out,"#define %sCTX_STORE yypParser->%s=%s;\n" , |
4491 | name,&lemp->ctx[i],&lemp->ctx[i]); lineno++; |
4492 | }else{ |
4493 | fprintf(out,"#define %sCTX_SDECL\n" ,name); lineno++; |
4494 | fprintf(out,"#define %sCTX_PDECL\n" ,name); lineno++; |
4495 | fprintf(out,"#define %sCTX_PARAM\n" ,name); lineno++; |
4496 | fprintf(out,"#define %sCTX_FETCH\n" ,name); lineno++; |
4497 | fprintf(out,"#define %sCTX_STORE\n" ,name); lineno++; |
4498 | } |
4499 | if( mhflag ){ |
4500 | fprintf(out,"#endif\n" ); lineno++; |
4501 | } |
4502 | if( lemp->errsym && lemp->errsym->useCnt ){ |
4503 | fprintf(out,"#define YYERRORSYMBOL %d\n" ,lemp->errsym->index); lineno++; |
4504 | fprintf(out,"#define YYERRSYMDT yy%d\n" ,lemp->errsym->dtnum); lineno++; |
4505 | } |
4506 | if( lemp->has_fallback ){ |
4507 | fprintf(out,"#define YYFALLBACK 1\n" ); lineno++; |
4508 | } |
4509 | |
4510 | /* Compute the action table, but do not output it yet. The action |
4511 | ** table must be computed before generating the YYNSTATE macro because |
4512 | ** we need to know how many states can be eliminated. |
4513 | */ |
4514 | ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); |
4515 | if( ax==0 ){ |
4516 | fprintf(stderr,"malloc failed\n" ); |
4517 | exit(1); |
4518 | } |
4519 | for(i=0; i<lemp->nxstate; i++){ |
4520 | stp = lemp->sorted[i]; |
4521 | ax[i*2].stp = stp; |
4522 | ax[i*2].isTkn = 1; |
4523 | ax[i*2].nAction = stp->nTknAct; |
4524 | ax[i*2+1].stp = stp; |
4525 | ax[i*2+1].isTkn = 0; |
4526 | ax[i*2+1].nAction = stp->nNtAct; |
4527 | } |
4528 | mxTknOfst = mnTknOfst = 0; |
4529 | mxNtOfst = mnNtOfst = 0; |
4530 | /* In an effort to minimize the action table size, use the heuristic |
4531 | ** of placing the largest action sets first */ |
4532 | for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i; |
4533 | qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); |
4534 | pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); |
4535 | for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){ |
4536 | stp = ax[i].stp; |
4537 | if( ax[i].isTkn ){ |
4538 | for(ap=stp->ap; ap; ap=ap->next){ |
4539 | int action; |
4540 | if( ap->sp->index>=lemp->nterminal ) continue; |
4541 | action = compute_action(lemp, ap); |
4542 | if( action<0 ) continue; |
4543 | acttab_action(pActtab, ap->sp->index, action); |
4544 | } |
4545 | stp->iTknOfst = acttab_insert(pActtab, 1); |
4546 | if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; |
4547 | if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; |
4548 | }else{ |
4549 | for(ap=stp->ap; ap; ap=ap->next){ |
4550 | int action; |
4551 | if( ap->sp->index<lemp->nterminal ) continue; |
4552 | if( ap->sp->index==lemp->nsymbol ) continue; |
4553 | action = compute_action(lemp, ap); |
4554 | if( action<0 ) continue; |
4555 | acttab_action(pActtab, ap->sp->index, action); |
4556 | } |
4557 | stp->iNtOfst = acttab_insert(pActtab, 0); |
4558 | if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; |
4559 | if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; |
4560 | } |
4561 | #if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ |
4562 | { int jj, nn; |
4563 | for(jj=nn=0; jj<pActtab->nAction; jj++){ |
4564 | if( pActtab->aAction[jj].action<0 ) nn++; |
4565 | } |
4566 | printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n" , |
4567 | i, stp->statenum, ax[i].isTkn ? "Token" : "Var " , |
4568 | ax[i].nAction, pActtab->nAction, nn); |
4569 | } |
4570 | #endif |
4571 | } |
4572 | free(ax); |
4573 | |
4574 | /* Mark rules that are actually used for reduce actions after all |
4575 | ** optimizations have been applied |
4576 | */ |
4577 | for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; |
4578 | for(i=0; i<lemp->nxstate; i++){ |
4579 | for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ |
4580 | if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ |
4581 | ap->x.rp->doesReduce = 1; |
4582 | } |
4583 | } |
4584 | } |
4585 | |
4586 | /* Finish rendering the constants now that the action table has |
4587 | ** been computed */ |
4588 | fprintf(out,"#define YYNSTATE %d\n" ,lemp->nxstate); lineno++; |
4589 | fprintf(out,"#define YYNRULE %d\n" ,lemp->nrule); lineno++; |
4590 | fprintf(out,"#define YYNRULE_WITH_ACTION %d\n" ,lemp->nruleWithAction); |
4591 | lineno++; |
4592 | fprintf(out,"#define YYNTOKEN %d\n" ,lemp->nterminal); lineno++; |
4593 | fprintf(out,"#define YY_MAX_SHIFT %d\n" ,lemp->nxstate-1); lineno++; |
4594 | i = lemp->minShiftReduce; |
4595 | fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n" ,i); lineno++; |
4596 | i += lemp->nrule; |
4597 | fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n" , i-1); lineno++; |
4598 | fprintf(out,"#define YY_ERROR_ACTION %d\n" , lemp->errAction); lineno++; |
4599 | fprintf(out,"#define YY_ACCEPT_ACTION %d\n" , lemp->accAction); lineno++; |
4600 | fprintf(out,"#define YY_NO_ACTION %d\n" , lemp->noAction); lineno++; |
4601 | fprintf(out,"#define YY_MIN_REDUCE %d\n" , lemp->minReduce); lineno++; |
4602 | i = lemp->minReduce + lemp->nrule; |
4603 | fprintf(out,"#define YY_MAX_REDUCE %d\n" , i-1); lineno++; |
4604 | tplt_xfer(lemp->name,in,out,&lineno); |
4605 | |
4606 | /* Now output the action table and its associates: |
4607 | ** |
4608 | ** yy_action[] A single table containing all actions. |
4609 | ** yy_lookahead[] A table containing the lookahead for each entry in |
4610 | ** yy_action. Used to detect hash collisions. |
4611 | ** yy_shift_ofst[] For each state, the offset into yy_action for |
4612 | ** shifting terminals. |
4613 | ** yy_reduce_ofst[] For each state, the offset into yy_action for |
4614 | ** shifting non-terminals after a reduce. |
4615 | ** yy_default[] Default action for each state. |
4616 | */ |
4617 | |
4618 | /* Output the yy_action table */ |
4619 | lemp->nactiontab = n = acttab_action_size(pActtab); |
4620 | lemp->tablesize += n*szActionType; |
4621 | fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n" , n); lineno++; |
4622 | fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n" ); lineno++; |
4623 | for(i=j=0; i<n; i++){ |
4624 | int action = acttab_yyaction(pActtab, i); |
4625 | if( action<0 ) action = lemp->noAction; |
4626 | if( j==0 ) fprintf(out," /* %5d */ " , i); |
4627 | fprintf(out, " %4d," , action); |
4628 | if( j==9 || i==n-1 ){ |
4629 | fprintf(out, "\n" ); lineno++; |
4630 | j = 0; |
4631 | }else{ |
4632 | j++; |
4633 | } |
4634 | } |
4635 | fprintf(out, "};\n" ); lineno++; |
4636 | |
4637 | /* Output the yy_lookahead table */ |
4638 | lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); |
4639 | lemp->tablesize += n*szCodeType; |
4640 | fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n" ); lineno++; |
4641 | for(i=j=0; i<n; i++){ |
4642 | int la = acttab_yylookahead(pActtab, i); |
4643 | if( la<0 ) la = lemp->nsymbol; |
4644 | if( j==0 ) fprintf(out," /* %5d */ " , i); |
4645 | fprintf(out, " %4d," , la); |
4646 | if( j==9 ){ |
4647 | fprintf(out, "\n" ); lineno++; |
4648 | j = 0; |
4649 | }else{ |
4650 | j++; |
4651 | } |
4652 | } |
4653 | /* Add extra entries to the end of the yy_lookahead[] table so that |
4654 | ** yy_shift_ofst[]+iToken will always be a valid index into the array, |
4655 | ** even for the largest possible value of yy_shift_ofst[] and iToken. */ |
4656 | nLookAhead = lemp->nterminal + lemp->nactiontab; |
4657 | while( i<nLookAhead ){ |
4658 | if( j==0 ) fprintf(out," /* %5d */ " , i); |
4659 | fprintf(out, " %4d," , lemp->nterminal); |
4660 | if( j==9 ){ |
4661 | fprintf(out, "\n" ); lineno++; |
4662 | j = 0; |
4663 | }else{ |
4664 | j++; |
4665 | } |
4666 | i++; |
4667 | } |
4668 | if( j>0 ){ fprintf(out, "\n" ); lineno++; } |
4669 | fprintf(out, "};\n" ); lineno++; |
4670 | |
4671 | /* Output the yy_shift_ofst[] table */ |
4672 | n = lemp->nxstate; |
4673 | while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; |
4674 | fprintf(out, "#define YY_SHIFT_COUNT (%d)\n" , n-1); lineno++; |
4675 | fprintf(out, "#define YY_SHIFT_MIN (%d)\n" , mnTknOfst); lineno++; |
4676 | fprintf(out, "#define YY_SHIFT_MAX (%d)\n" , mxTknOfst); lineno++; |
4677 | fprintf(out, "static const %s yy_shift_ofst[] = {\n" , |
4678 | minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); |
4679 | lineno++; |
4680 | lemp->tablesize += n*sz; |
4681 | for(i=j=0; i<n; i++){ |
4682 | int ofst; |
4683 | stp = lemp->sorted[i]; |
4684 | ofst = stp->iTknOfst; |
4685 | if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; |
4686 | if( j==0 ) fprintf(out," /* %5d */ " , i); |
4687 | fprintf(out, " %4d," , ofst); |
4688 | if( j==9 || i==n-1 ){ |
4689 | fprintf(out, "\n" ); lineno++; |
4690 | j = 0; |
4691 | }else{ |
4692 | j++; |
4693 | } |
4694 | } |
4695 | fprintf(out, "};\n" ); lineno++; |
4696 | |
4697 | /* Output the yy_reduce_ofst[] table */ |
4698 | n = lemp->nxstate; |
4699 | while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; |
4700 | fprintf(out, "#define YY_REDUCE_COUNT (%d)\n" , n-1); lineno++; |
4701 | fprintf(out, "#define YY_REDUCE_MIN (%d)\n" , mnNtOfst); lineno++; |
4702 | fprintf(out, "#define YY_REDUCE_MAX (%d)\n" , mxNtOfst); lineno++; |
4703 | fprintf(out, "static const %s yy_reduce_ofst[] = {\n" , |
4704 | minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; |
4705 | lemp->tablesize += n*sz; |
4706 | for(i=j=0; i<n; i++){ |
4707 | int ofst; |
4708 | stp = lemp->sorted[i]; |
4709 | ofst = stp->iNtOfst; |
4710 | if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; |
4711 | if( j==0 ) fprintf(out," /* %5d */ " , i); |
4712 | fprintf(out, " %4d," , ofst); |
4713 | if( j==9 || i==n-1 ){ |
4714 | fprintf(out, "\n" ); lineno++; |
4715 | j = 0; |
4716 | }else{ |
4717 | j++; |
4718 | } |
4719 | } |
4720 | fprintf(out, "};\n" ); lineno++; |
4721 | |
4722 | /* Output the default action table */ |
4723 | fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n" ); lineno++; |
4724 | n = lemp->nxstate; |
4725 | lemp->tablesize += n*szActionType; |
4726 | for(i=j=0; i<n; i++){ |
4727 | stp = lemp->sorted[i]; |
4728 | if( j==0 ) fprintf(out," /* %5d */ " , i); |
4729 | if( stp->iDfltReduce<0 ){ |
4730 | fprintf(out, " %4d," , lemp->errAction); |
4731 | }else{ |
4732 | fprintf(out, " %4d," , stp->iDfltReduce + lemp->minReduce); |
4733 | } |
4734 | if( j==9 || i==n-1 ){ |
4735 | fprintf(out, "\n" ); lineno++; |
4736 | j = 0; |
4737 | }else{ |
4738 | j++; |
4739 | } |
4740 | } |
4741 | fprintf(out, "};\n" ); lineno++; |
4742 | tplt_xfer(lemp->name,in,out,&lineno); |
4743 | |
4744 | /* Generate the table of fallback tokens. |
4745 | */ |
4746 | if( lemp->has_fallback ){ |
4747 | int mx = lemp->nterminal - 1; |
4748 | /* 2019-08-28: Generate fallback entries for every token to avoid |
4749 | ** having to do a range check on the index */ |
4750 | /* while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } */ |
4751 | lemp->tablesize += (mx+1)*szCodeType; |
4752 | for(i=0; i<=mx; i++){ |
4753 | struct symbol *p = lemp->symbols[i]; |
4754 | if( p->fallback==0 ){ |
4755 | fprintf(out, " 0, /* %10s => nothing */\n" , p->name); |
4756 | }else{ |
4757 | fprintf(out, " %3d, /* %10s => %s */\n" , p->fallback->index, |
4758 | p->name, p->fallback->name); |
4759 | } |
4760 | lineno++; |
4761 | } |
4762 | } |
4763 | tplt_xfer(lemp->name, in, out, &lineno); |
4764 | |
4765 | /* Generate a table containing the symbolic name of every symbol |
4766 | */ |
4767 | for(i=0; i<lemp->nsymbol; i++){ |
4768 | fprintf(out," /* %4d */ \"%s\",\n" ,i, lemp->symbols[i]->name); lineno++; |
4769 | } |
4770 | tplt_xfer(lemp->name,in,out,&lineno); |
4771 | |
4772 | /* Generate a table containing a text string that describes every |
4773 | ** rule in the rule set of the grammar. This information is used |
4774 | ** when tracing REDUCE actions. |
4775 | */ |
4776 | for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ |
4777 | assert( rp->iRule==i ); |
4778 | fprintf(out," /* %3d */ \"" , i); |
4779 | writeRuleText(out, rp); |
4780 | fprintf(out,"\",\n" ); lineno++; |
4781 | } |
4782 | tplt_xfer(lemp->name,in,out,&lineno); |
4783 | |
4784 | /* Generate code which executes every time a symbol is popped from |
4785 | ** the stack while processing errors or while destroying the parser. |
4786 | ** (In other words, generate the %destructor actions) |
4787 | */ |
4788 | if( lemp->tokendest ){ |
4789 | int once = 1; |
4790 | for(i=0; i<lemp->nsymbol; i++){ |
4791 | struct symbol *sp = lemp->symbols[i]; |
4792 | if( sp==0 || sp->type!=TERMINAL ) continue; |
4793 | if( once ){ |
4794 | fprintf(out, " /* TERMINAL Destructor */\n" ); lineno++; |
4795 | once = 0; |
4796 | } |
4797 | fprintf(out," case %d: /* %s */\n" , sp->index, sp->name); lineno++; |
4798 | } |
4799 | for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); |
4800 | if( i<lemp->nsymbol ){ |
4801 | emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); |
4802 | fprintf(out," break;\n" ); lineno++; |
4803 | } |
4804 | } |
4805 | if( lemp->vardest ){ |
4806 | struct symbol *dflt_sp = 0; |
4807 | int once = 1; |
4808 | for(i=0; i<lemp->nsymbol; i++){ |
4809 | struct symbol *sp = lemp->symbols[i]; |
4810 | if( sp==0 || sp->type==TERMINAL || |
4811 | sp->index<=0 || sp->destructor!=0 ) continue; |
4812 | if( once ){ |
4813 | fprintf(out, " /* Default NON-TERMINAL Destructor */\n" );lineno++; |
4814 | once = 0; |
4815 | } |
4816 | fprintf(out," case %d: /* %s */\n" , sp->index, sp->name); lineno++; |
4817 | dflt_sp = sp; |
4818 | } |
4819 | if( dflt_sp!=0 ){ |
4820 | emit_destructor_code(out,dflt_sp,lemp,&lineno); |
4821 | } |
4822 | fprintf(out," break;\n" ); lineno++; |
4823 | } |
4824 | for(i=0; i<lemp->nsymbol; i++){ |
4825 | struct symbol *sp = lemp->symbols[i]; |
4826 | if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; |
4827 | if( sp->destLineno<0 ) continue; /* Already emitted */ |
4828 | fprintf(out," case %d: /* %s */\n" , sp->index, sp->name); lineno++; |
4829 | |
4830 | /* Combine duplicate destructors into a single case */ |
4831 | for(j=i+1; j<lemp->nsymbol; j++){ |
4832 | struct symbol *sp2 = lemp->symbols[j]; |
4833 | if( sp2 && sp2->type!=TERMINAL && sp2->destructor |
4834 | && sp2->dtnum==sp->dtnum |
4835 | && strcmp(sp->destructor,sp2->destructor)==0 ){ |
4836 | fprintf(out," case %d: /* %s */\n" , |
4837 | sp2->index, sp2->name); lineno++; |
4838 | sp2->destLineno = -1; /* Avoid emitting this destructor again */ |
4839 | } |
4840 | } |
4841 | |
4842 | emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); |
4843 | fprintf(out," break;\n" ); lineno++; |
4844 | } |
4845 | tplt_xfer(lemp->name,in,out,&lineno); |
4846 | |
4847 | /* Generate code which executes whenever the parser stack overflows */ |
4848 | tplt_print(out,lemp,lemp->overflow,&lineno); |
4849 | tplt_xfer(lemp->name,in,out,&lineno); |
4850 | |
4851 | /* Generate the tables of rule information. yyRuleInfoLhs[] and |
4852 | ** yyRuleInfoNRhs[]. |
4853 | ** |
4854 | ** Note: This code depends on the fact that rules are number |
4855 | ** sequentially beginning with 0. |
4856 | */ |
4857 | for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ |
4858 | fprintf(out," %4d, /* (%d) " , rp->lhs->index, i); |
4859 | rule_print(out, rp); |
4860 | fprintf(out," */\n" ); lineno++; |
4861 | } |
4862 | tplt_xfer(lemp->name,in,out,&lineno); |
4863 | for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ |
4864 | fprintf(out," %3d, /* (%d) " , -rp->nrhs, i); |
4865 | rule_print(out, rp); |
4866 | fprintf(out," */\n" ); lineno++; |
4867 | } |
4868 | tplt_xfer(lemp->name,in,out,&lineno); |
4869 | |
4870 | /* Generate code which execution during each REDUCE action */ |
4871 | i = 0; |
4872 | for(rp=lemp->rule; rp; rp=rp->next){ |
4873 | i += translate_code(lemp, rp); |
4874 | } |
4875 | if( i ){ |
4876 | fprintf(out," YYMINORTYPE yylhsminor;\n" ); lineno++; |
4877 | } |
4878 | /* First output rules other than the default: rule */ |
4879 | for(rp=lemp->rule; rp; rp=rp->next){ |
4880 | struct rule *rp2; /* Other rules with the same action */ |
4881 | if( rp->codeEmitted ) continue; |
4882 | if( rp->noCode ){ |
4883 | /* No C code actions, so this will be part of the "default:" rule */ |
4884 | continue; |
4885 | } |
4886 | fprintf(out," case %d: /* " , rp->iRule); |
4887 | writeRuleText(out, rp); |
4888 | fprintf(out, " */\n" ); lineno++; |
4889 | for(rp2=rp->next; rp2; rp2=rp2->next){ |
4890 | if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix |
4891 | && rp2->codeSuffix==rp->codeSuffix ){ |
4892 | fprintf(out," case %d: /* " , rp2->iRule); |
4893 | writeRuleText(out, rp2); |
4894 | fprintf(out," */ yytestcase(yyruleno==%d);\n" , rp2->iRule); lineno++; |
4895 | rp2->codeEmitted = 1; |
4896 | } |
4897 | } |
4898 | emit_code(out,rp,lemp,&lineno); |
4899 | fprintf(out," break;\n" ); lineno++; |
4900 | rp->codeEmitted = 1; |
4901 | } |
4902 | /* Finally, output the default: rule. We choose as the default: all |
4903 | ** empty actions. */ |
4904 | fprintf(out," default:\n" ); lineno++; |
4905 | for(rp=lemp->rule; rp; rp=rp->next){ |
4906 | if( rp->codeEmitted ) continue; |
4907 | assert( rp->noCode ); |
4908 | fprintf(out," /* (%d) " , rp->iRule); |
4909 | writeRuleText(out, rp); |
4910 | if( rp->neverReduce ){ |
4911 | fprintf(out, " (NEVER REDUCES) */ assert(yyruleno!=%d);\n" , |
4912 | rp->iRule); lineno++; |
4913 | }else if( rp->doesReduce ){ |
4914 | fprintf(out, " */ yytestcase(yyruleno==%d);\n" , rp->iRule); lineno++; |
4915 | }else{ |
4916 | fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n" , |
4917 | rp->iRule); lineno++; |
4918 | } |
4919 | } |
4920 | fprintf(out," break;\n" ); lineno++; |
4921 | tplt_xfer(lemp->name,in,out,&lineno); |
4922 | |
4923 | /* Generate code which executes if a parse fails */ |
4924 | tplt_print(out,lemp,lemp->failure,&lineno); |
4925 | tplt_xfer(lemp->name,in,out,&lineno); |
4926 | |
4927 | /* Generate code which executes when a syntax error occurs */ |
4928 | tplt_print(out,lemp,lemp->error,&lineno); |
4929 | tplt_xfer(lemp->name,in,out,&lineno); |
4930 | |
4931 | /* Generate code which executes when the parser accepts its input */ |
4932 | tplt_print(out,lemp,lemp->accept,&lineno); |
4933 | tplt_xfer(lemp->name,in,out,&lineno); |
4934 | |
4935 | /* Append any addition code the user desires */ |
4936 | tplt_print(out,lemp,lemp->extracode,&lineno); |
4937 | |
4938 | acttab_free(pActtab); |
4939 | fclose(in); |
4940 | fclose(out); |
4941 | if( sql ) fclose(sql); |
4942 | return; |
4943 | } |
4944 | |
4945 | /* Generate a header file for the parser */ |
4946 | void (struct lemon *lemp) |
4947 | { |
4948 | FILE *out, *in; |
4949 | const char *prefix; |
4950 | char line[LINESIZE]; |
4951 | char pattern[LINESIZE]; |
4952 | int i; |
4953 | |
4954 | if( lemp->tokenprefix ) prefix = lemp->tokenprefix; |
4955 | else prefix = "" ; |
4956 | in = file_open(lemp,".h" ,"rb" ); |
4957 | if( in ){ |
4958 | int nextChar; |
4959 | for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ |
4960 | lemon_sprintf(pattern,"#define %s%-30s %3d\n" , |
4961 | prefix,lemp->symbols[i]->name,i); |
4962 | if( strcmp(line,pattern) ) break; |
4963 | } |
4964 | nextChar = fgetc(in); |
4965 | fclose(in); |
4966 | if( i==lemp->nterminal && nextChar==EOF ){ |
4967 | /* No change in the file. Don't rewrite it. */ |
4968 | return; |
4969 | } |
4970 | } |
4971 | out = file_open(lemp,".h" ,"wb" ); |
4972 | if( out ){ |
4973 | for(i=1; i<lemp->nterminal; i++){ |
4974 | fprintf(out,"#define %s%-30s %3d\n" ,prefix,lemp->symbols[i]->name,i); |
4975 | } |
4976 | fclose(out); |
4977 | } |
4978 | return; |
4979 | } |
4980 | |
4981 | /* Reduce the size of the action tables, if possible, by making use |
4982 | ** of defaults. |
4983 | ** |
4984 | ** In this version, we take the most frequent REDUCE action and make |
4985 | ** it the default. Except, there is no default if the wildcard token |
4986 | ** is a possible look-ahead. |
4987 | */ |
4988 | void CompressTables(struct lemon *lemp) |
4989 | { |
4990 | struct state *stp; |
4991 | struct action *ap, *ap2, *nextap; |
4992 | struct rule *rp, *rp2, *rbest; |
4993 | int nbest, n; |
4994 | int i; |
4995 | int usesWildcard; |
4996 | |
4997 | for(i=0; i<lemp->nstate; i++){ |
4998 | stp = lemp->sorted[i]; |
4999 | nbest = 0; |
5000 | rbest = 0; |
5001 | usesWildcard = 0; |
5002 | |
5003 | for(ap=stp->ap; ap; ap=ap->next){ |
5004 | if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ |
5005 | usesWildcard = 1; |
5006 | } |
5007 | if( ap->type!=REDUCE ) continue; |
5008 | rp = ap->x.rp; |
5009 | if( rp->lhsStart ) continue; |
5010 | if( rp==rbest ) continue; |
5011 | n = 1; |
5012 | for(ap2=ap->next; ap2; ap2=ap2->next){ |
5013 | if( ap2->type!=REDUCE ) continue; |
5014 | rp2 = ap2->x.rp; |
5015 | if( rp2==rbest ) continue; |
5016 | if( rp2==rp ) n++; |
5017 | } |
5018 | if( n>nbest ){ |
5019 | nbest = n; |
5020 | rbest = rp; |
5021 | } |
5022 | } |
5023 | |
5024 | /* Do not make a default if the number of rules to default |
5025 | ** is not at least 1 or if the wildcard token is a possible |
5026 | ** lookahead. |
5027 | */ |
5028 | if( nbest<1 || usesWildcard ) continue; |
5029 | |
5030 | |
5031 | /* Combine matching REDUCE actions into a single default */ |
5032 | for(ap=stp->ap; ap; ap=ap->next){ |
5033 | if( ap->type==REDUCE && ap->x.rp==rbest ) break; |
5034 | } |
5035 | assert( ap ); |
5036 | ap->sp = Symbol_new("{default}" ); |
5037 | for(ap=ap->next; ap; ap=ap->next){ |
5038 | if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; |
5039 | } |
5040 | stp->ap = Action_sort(stp->ap); |
5041 | |
5042 | for(ap=stp->ap; ap; ap=ap->next){ |
5043 | if( ap->type==SHIFT ) break; |
5044 | if( ap->type==REDUCE && ap->x.rp!=rbest ) break; |
5045 | } |
5046 | if( ap==0 ){ |
5047 | stp->autoReduce = 1; |
5048 | stp->pDfltReduce = rbest; |
5049 | } |
5050 | } |
5051 | |
5052 | /* Make a second pass over all states and actions. Convert |
5053 | ** every action that is a SHIFT to an autoReduce state into |
5054 | ** a SHIFTREDUCE action. |
5055 | */ |
5056 | for(i=0; i<lemp->nstate; i++){ |
5057 | stp = lemp->sorted[i]; |
5058 | for(ap=stp->ap; ap; ap=ap->next){ |
5059 | struct state *pNextState; |
5060 | if( ap->type!=SHIFT ) continue; |
5061 | pNextState = ap->x.stp; |
5062 | if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ |
5063 | ap->type = SHIFTREDUCE; |
5064 | ap->x.rp = pNextState->pDfltReduce; |
5065 | } |
5066 | } |
5067 | } |
5068 | |
5069 | /* If a SHIFTREDUCE action specifies a rule that has a single RHS term |
5070 | ** (meaning that the SHIFTREDUCE will land back in the state where it |
5071 | ** started) and if there is no C-code associated with the reduce action, |
5072 | ** then we can go ahead and convert the action to be the same as the |
5073 | ** action for the RHS of the rule. |
5074 | */ |
5075 | for(i=0; i<lemp->nstate; i++){ |
5076 | stp = lemp->sorted[i]; |
5077 | for(ap=stp->ap; ap; ap=nextap){ |
5078 | nextap = ap->next; |
5079 | if( ap->type!=SHIFTREDUCE ) continue; |
5080 | rp = ap->x.rp; |
5081 | if( rp->noCode==0 ) continue; |
5082 | if( rp->nrhs!=1 ) continue; |
5083 | #if 1 |
5084 | /* Only apply this optimization to non-terminals. It would be OK to |
5085 | ** apply it to terminal symbols too, but that makes the parser tables |
5086 | ** larger. */ |
5087 | if( ap->sp->index<lemp->nterminal ) continue; |
5088 | #endif |
5089 | /* If we reach this point, it means the optimization can be applied */ |
5090 | nextap = ap; |
5091 | for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} |
5092 | assert( ap2!=0 ); |
5093 | ap->spOpt = ap2->sp; |
5094 | ap->type = ap2->type; |
5095 | ap->x = ap2->x; |
5096 | } |
5097 | } |
5098 | } |
5099 | |
5100 | |
5101 | /* |
5102 | ** Compare two states for sorting purposes. The smaller state is the |
5103 | ** one with the most non-terminal actions. If they have the same number |
5104 | ** of non-terminal actions, then the smaller is the one with the most |
5105 | ** token actions. |
5106 | */ |
5107 | static int stateResortCompare(const void *a, const void *b){ |
5108 | const struct state *pA = *(const struct state**)a; |
5109 | const struct state *pB = *(const struct state**)b; |
5110 | int n; |
5111 | |
5112 | n = pB->nNtAct - pA->nNtAct; |
5113 | if( n==0 ){ |
5114 | n = pB->nTknAct - pA->nTknAct; |
5115 | if( n==0 ){ |
5116 | n = pB->statenum - pA->statenum; |
5117 | } |
5118 | } |
5119 | assert( n!=0 ); |
5120 | return n; |
5121 | } |
5122 | |
5123 | |
5124 | /* |
5125 | ** Renumber and resort states so that states with fewer choices |
5126 | ** occur at the end. Except, keep state 0 as the first state. |
5127 | */ |
5128 | void ResortStates(struct lemon *lemp) |
5129 | { |
5130 | int i; |
5131 | struct state *stp; |
5132 | struct action *ap; |
5133 | |
5134 | for(i=0; i<lemp->nstate; i++){ |
5135 | stp = lemp->sorted[i]; |
5136 | stp->nTknAct = stp->nNtAct = 0; |
5137 | stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */ |
5138 | stp->iTknOfst = NO_OFFSET; |
5139 | stp->iNtOfst = NO_OFFSET; |
5140 | for(ap=stp->ap; ap; ap=ap->next){ |
5141 | int iAction = compute_action(lemp,ap); |
5142 | if( iAction>=0 ){ |
5143 | if( ap->sp->index<lemp->nterminal ){ |
5144 | stp->nTknAct++; |
5145 | }else if( ap->sp->index<lemp->nsymbol ){ |
5146 | stp->nNtAct++; |
5147 | }else{ |
5148 | assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); |
5149 | stp->iDfltReduce = iAction; |
5150 | } |
5151 | } |
5152 | } |
5153 | } |
5154 | qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), |
5155 | stateResortCompare); |
5156 | for(i=0; i<lemp->nstate; i++){ |
5157 | lemp->sorted[i]->statenum = i; |
5158 | } |
5159 | lemp->nxstate = lemp->nstate; |
5160 | while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ |
5161 | lemp->nxstate--; |
5162 | } |
5163 | } |
5164 | |
5165 | |
5166 | /***************** From the file "set.c" ************************************/ |
5167 | /* |
5168 | ** Set manipulation routines for the LEMON parser generator. |
5169 | */ |
5170 | |
5171 | static int size = 0; |
5172 | |
5173 | /* Set the set size */ |
5174 | void SetSize(int n) |
5175 | { |
5176 | size = n+1; |
5177 | } |
5178 | |
5179 | /* Allocate a new set */ |
5180 | char *SetNew(void){ |
5181 | char *s; |
5182 | s = (char*)calloc( size, 1); |
5183 | if( s==0 ){ |
5184 | memory_error(); |
5185 | } |
5186 | return s; |
5187 | } |
5188 | |
5189 | /* Deallocate a set */ |
5190 | void SetFree(char *s) |
5191 | { |
5192 | free(s); |
5193 | } |
5194 | |
5195 | /* Add a new element to the set. Return TRUE if the element was added |
5196 | ** and FALSE if it was already there. */ |
5197 | int SetAdd(char *s, int e) |
5198 | { |
5199 | int rv; |
5200 | assert( e>=0 && e<size ); |
5201 | rv = s[e]; |
5202 | s[e] = 1; |
5203 | return !rv; |
5204 | } |
5205 | |
5206 | /* Add every element of s2 to s1. Return TRUE if s1 changes. */ |
5207 | int SetUnion(char *s1, char *s2) |
5208 | { |
5209 | int i, progress; |
5210 | progress = 0; |
5211 | for(i=0; i<size; i++){ |
5212 | if( s2[i]==0 ) continue; |
5213 | if( s1[i]==0 ){ |
5214 | progress = 1; |
5215 | s1[i] = 1; |
5216 | } |
5217 | } |
5218 | return progress; |
5219 | } |
5220 | /********************** From the file "table.c" ****************************/ |
5221 | /* |
5222 | ** All code in this file has been automatically generated |
5223 | ** from a specification in the file |
5224 | ** "table.q" |
5225 | ** by the associative array code building program "aagen". |
5226 | ** Do not edit this file! Instead, edit the specification |
5227 | ** file, then rerun aagen. |
5228 | */ |
5229 | /* |
5230 | ** Code for processing tables in the LEMON parser generator. |
5231 | */ |
5232 | |
5233 | PRIVATE unsigned strhash(const char *x) |
5234 | { |
5235 | unsigned h = 0; |
5236 | while( *x ) h = h*13 + *(x++); |
5237 | return h; |
5238 | } |
5239 | |
5240 | /* Works like strdup, sort of. Save a string in malloced memory, but |
5241 | ** keep strings in a table so that the same string is not in more |
5242 | ** than one place. |
5243 | */ |
5244 | const char *Strsafe(const char *y) |
5245 | { |
5246 | const char *z; |
5247 | char *cpy; |
5248 | |
5249 | if( y==0 ) return 0; |
5250 | z = Strsafe_find(y); |
5251 | if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){ |
5252 | lemon_strcpy(cpy,y); |
5253 | z = cpy; |
5254 | Strsafe_insert(z); |
5255 | } |
5256 | MemoryCheck(z); |
5257 | return z; |
5258 | } |
5259 | |
5260 | /* There is one instance of the following structure for each |
5261 | ** associative array of type "x1". |
5262 | */ |
5263 | struct s_x1 { |
5264 | int size; /* The number of available slots. */ |
5265 | /* Must be a power of 2 greater than or */ |
5266 | /* equal to 1 */ |
5267 | int count; /* Number of currently slots filled */ |
5268 | struct s_x1node *tbl; /* The data stored here */ |
5269 | struct s_x1node **ht; /* Hash table for lookups */ |
5270 | }; |
5271 | |
5272 | /* There is one instance of this structure for every data element |
5273 | ** in an associative array of type "x1". |
5274 | */ |
5275 | typedef struct s_x1node { |
5276 | const char *data; /* The data */ |
5277 | struct s_x1node *next; /* Next entry with the same hash */ |
5278 | struct s_x1node **from; /* Previous link */ |
5279 | } x1node; |
5280 | |
5281 | /* There is only one instance of the array, which is the following */ |
5282 | static struct s_x1 *x1a; |
5283 | |
5284 | /* Allocate a new associative array */ |
5285 | void Strsafe_init(void){ |
5286 | if( x1a ) return; |
5287 | x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); |
5288 | if( x1a ){ |
5289 | x1a->size = 1024; |
5290 | x1a->count = 0; |
5291 | x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); |
5292 | if( x1a->tbl==0 ){ |
5293 | free(x1a); |
5294 | x1a = 0; |
5295 | }else{ |
5296 | int i; |
5297 | x1a->ht = (x1node**)&(x1a->tbl[1024]); |
5298 | for(i=0; i<1024; i++) x1a->ht[i] = 0; |
5299 | } |
5300 | } |
5301 | } |
5302 | /* Insert a new record into the array. Return TRUE if successful. |
5303 | ** Prior data with the same key is NOT overwritten */ |
5304 | int Strsafe_insert(const char *data) |
5305 | { |
5306 | x1node *np; |
5307 | unsigned h; |
5308 | unsigned ph; |
5309 | |
5310 | if( x1a==0 ) return 0; |
5311 | ph = strhash(data); |
5312 | h = ph & (x1a->size-1); |
5313 | np = x1a->ht[h]; |
5314 | while( np ){ |
5315 | if( strcmp(np->data,data)==0 ){ |
5316 | /* An existing entry with the same key is found. */ |
5317 | /* Fail because overwrite is not allows. */ |
5318 | return 0; |
5319 | } |
5320 | np = np->next; |
5321 | } |
5322 | if( x1a->count>=x1a->size ){ |
5323 | /* Need to make the hash table bigger */ |
5324 | int i,arrSize; |
5325 | struct s_x1 array; |
5326 | array.size = arrSize = x1a->size*2; |
5327 | array.count = x1a->count; |
5328 | array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); |
5329 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ |
5330 | array.ht = (x1node**)&(array.tbl[arrSize]); |
5331 | for(i=0; i<arrSize; i++) array.ht[i] = 0; |
5332 | for(i=0; i<x1a->count; i++){ |
5333 | x1node *oldnp, *newnp; |
5334 | oldnp = &(x1a->tbl[i]); |
5335 | h = strhash(oldnp->data) & (arrSize-1); |
5336 | newnp = &(array.tbl[i]); |
5337 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); |
5338 | newnp->next = array.ht[h]; |
5339 | newnp->data = oldnp->data; |
5340 | newnp->from = &(array.ht[h]); |
5341 | array.ht[h] = newnp; |
5342 | } |
5343 | /* free(x1a->tbl); // This program was originally for 16-bit machines. |
5344 | ** Don't worry about freeing memory on modern platforms. */ |
5345 | *x1a = array; |
5346 | } |
5347 | /* Insert the new data */ |
5348 | h = ph & (x1a->size-1); |
5349 | np = &(x1a->tbl[x1a->count++]); |
5350 | np->data = data; |
5351 | if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); |
5352 | np->next = x1a->ht[h]; |
5353 | x1a->ht[h] = np; |
5354 | np->from = &(x1a->ht[h]); |
5355 | return 1; |
5356 | } |
5357 | |
5358 | /* Return a pointer to data assigned to the given key. Return NULL |
5359 | ** if no such key. */ |
5360 | const char *Strsafe_find(const char *key) |
5361 | { |
5362 | unsigned h; |
5363 | x1node *np; |
5364 | |
5365 | if( x1a==0 ) return 0; |
5366 | h = strhash(key) & (x1a->size-1); |
5367 | np = x1a->ht[h]; |
5368 | while( np ){ |
5369 | if( strcmp(np->data,key)==0 ) break; |
5370 | np = np->next; |
5371 | } |
5372 | return np ? np->data : 0; |
5373 | } |
5374 | |
5375 | /* Return a pointer to the (terminal or nonterminal) symbol "x". |
5376 | ** Create a new symbol if this is the first time "x" has been seen. |
5377 | */ |
5378 | struct symbol *Symbol_new(const char *x) |
5379 | { |
5380 | struct symbol *sp; |
5381 | |
5382 | sp = Symbol_find(x); |
5383 | if( sp==0 ){ |
5384 | sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); |
5385 | MemoryCheck(sp); |
5386 | sp->name = Strsafe(x); |
5387 | sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; |
5388 | sp->rule = 0; |
5389 | sp->fallback = 0; |
5390 | sp->prec = -1; |
5391 | sp->assoc = UNK; |
5392 | sp->firstset = 0; |
5393 | sp->lambda = LEMON_FALSE; |
5394 | sp->destructor = 0; |
5395 | sp->destLineno = 0; |
5396 | sp->datatype = 0; |
5397 | sp->useCnt = 0; |
5398 | Symbol_insert(sp,sp->name); |
5399 | } |
5400 | sp->useCnt++; |
5401 | return sp; |
5402 | } |
5403 | |
5404 | /* Compare two symbols for sorting purposes. Return negative, |
5405 | ** zero, or positive if a is less then, equal to, or greater |
5406 | ** than b. |
5407 | ** |
5408 | ** Symbols that begin with upper case letters (terminals or tokens) |
5409 | ** must sort before symbols that begin with lower case letters |
5410 | ** (non-terminals). And MULTITERMINAL symbols (created using the |
5411 | ** %token_class directive) must sort at the very end. Other than |
5412 | ** that, the order does not matter. |
5413 | ** |
5414 | ** We find experimentally that leaving the symbols in their original |
5415 | ** order (the order they appeared in the grammar file) gives the |
5416 | ** smallest parser tables in SQLite. |
5417 | */ |
5418 | int Symbolcmpp(const void *_a, const void *_b) |
5419 | { |
5420 | const struct symbol *a = *(const struct symbol **) _a; |
5421 | const struct symbol *b = *(const struct symbol **) _b; |
5422 | int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; |
5423 | int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; |
5424 | return i1==i2 ? a->index - b->index : i1 - i2; |
5425 | } |
5426 | |
5427 | /* There is one instance of the following structure for each |
5428 | ** associative array of type "x2". |
5429 | */ |
5430 | struct s_x2 { |
5431 | int size; /* The number of available slots. */ |
5432 | /* Must be a power of 2 greater than or */ |
5433 | /* equal to 1 */ |
5434 | int count; /* Number of currently slots filled */ |
5435 | struct s_x2node *tbl; /* The data stored here */ |
5436 | struct s_x2node **ht; /* Hash table for lookups */ |
5437 | }; |
5438 | |
5439 | /* There is one instance of this structure for every data element |
5440 | ** in an associative array of type "x2". |
5441 | */ |
5442 | typedef struct s_x2node { |
5443 | struct symbol *data; /* The data */ |
5444 | const char *key; /* The key */ |
5445 | struct s_x2node *next; /* Next entry with the same hash */ |
5446 | struct s_x2node **from; /* Previous link */ |
5447 | } x2node; |
5448 | |
5449 | /* There is only one instance of the array, which is the following */ |
5450 | static struct s_x2 *x2a; |
5451 | |
5452 | /* Allocate a new associative array */ |
5453 | void Symbol_init(void){ |
5454 | if( x2a ) return; |
5455 | x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); |
5456 | if( x2a ){ |
5457 | x2a->size = 128; |
5458 | x2a->count = 0; |
5459 | x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); |
5460 | if( x2a->tbl==0 ){ |
5461 | free(x2a); |
5462 | x2a = 0; |
5463 | }else{ |
5464 | int i; |
5465 | x2a->ht = (x2node**)&(x2a->tbl[128]); |
5466 | for(i=0; i<128; i++) x2a->ht[i] = 0; |
5467 | } |
5468 | } |
5469 | } |
5470 | /* Insert a new record into the array. Return TRUE if successful. |
5471 | ** Prior data with the same key is NOT overwritten */ |
5472 | int Symbol_insert(struct symbol *data, const char *key) |
5473 | { |
5474 | x2node *np; |
5475 | unsigned h; |
5476 | unsigned ph; |
5477 | |
5478 | if( x2a==0 ) return 0; |
5479 | ph = strhash(key); |
5480 | h = ph & (x2a->size-1); |
5481 | np = x2a->ht[h]; |
5482 | while( np ){ |
5483 | if( strcmp(np->key,key)==0 ){ |
5484 | /* An existing entry with the same key is found. */ |
5485 | /* Fail because overwrite is not allows. */ |
5486 | return 0; |
5487 | } |
5488 | np = np->next; |
5489 | } |
5490 | if( x2a->count>=x2a->size ){ |
5491 | /* Need to make the hash table bigger */ |
5492 | int i,arrSize; |
5493 | struct s_x2 array; |
5494 | array.size = arrSize = x2a->size*2; |
5495 | array.count = x2a->count; |
5496 | array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); |
5497 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ |
5498 | array.ht = (x2node**)&(array.tbl[arrSize]); |
5499 | for(i=0; i<arrSize; i++) array.ht[i] = 0; |
5500 | for(i=0; i<x2a->count; i++){ |
5501 | x2node *oldnp, *newnp; |
5502 | oldnp = &(x2a->tbl[i]); |
5503 | h = strhash(oldnp->key) & (arrSize-1); |
5504 | newnp = &(array.tbl[i]); |
5505 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); |
5506 | newnp->next = array.ht[h]; |
5507 | newnp->key = oldnp->key; |
5508 | newnp->data = oldnp->data; |
5509 | newnp->from = &(array.ht[h]); |
5510 | array.ht[h] = newnp; |
5511 | } |
5512 | /* free(x2a->tbl); // This program was originally written for 16-bit |
5513 | ** machines. Don't worry about freeing this trivial amount of memory |
5514 | ** on modern platforms. Just leak it. */ |
5515 | *x2a = array; |
5516 | } |
5517 | /* Insert the new data */ |
5518 | h = ph & (x2a->size-1); |
5519 | np = &(x2a->tbl[x2a->count++]); |
5520 | np->key = key; |
5521 | np->data = data; |
5522 | if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); |
5523 | np->next = x2a->ht[h]; |
5524 | x2a->ht[h] = np; |
5525 | np->from = &(x2a->ht[h]); |
5526 | return 1; |
5527 | } |
5528 | |
5529 | /* Return a pointer to data assigned to the given key. Return NULL |
5530 | ** if no such key. */ |
5531 | struct symbol *Symbol_find(const char *key) |
5532 | { |
5533 | unsigned h; |
5534 | x2node *np; |
5535 | |
5536 | if( x2a==0 ) return 0; |
5537 | h = strhash(key) & (x2a->size-1); |
5538 | np = x2a->ht[h]; |
5539 | while( np ){ |
5540 | if( strcmp(np->key,key)==0 ) break; |
5541 | np = np->next; |
5542 | } |
5543 | return np ? np->data : 0; |
5544 | } |
5545 | |
5546 | /* Return the n-th data. Return NULL if n is out of range. */ |
5547 | struct symbol *Symbol_Nth(int n) |
5548 | { |
5549 | struct symbol *data; |
5550 | if( x2a && n>0 && n<=x2a->count ){ |
5551 | data = x2a->tbl[n-1].data; |
5552 | }else{ |
5553 | data = 0; |
5554 | } |
5555 | return data; |
5556 | } |
5557 | |
5558 | /* Return the size of the array */ |
5559 | int Symbol_count() |
5560 | { |
5561 | return x2a ? x2a->count : 0; |
5562 | } |
5563 | |
5564 | /* Return an array of pointers to all data in the table. |
5565 | ** The array is obtained from malloc. Return NULL if memory allocation |
5566 | ** problems, or if the array is empty. */ |
5567 | struct symbol **Symbol_arrayof() |
5568 | { |
5569 | struct symbol **array; |
5570 | int i,arrSize; |
5571 | if( x2a==0 ) return 0; |
5572 | arrSize = x2a->count; |
5573 | array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); |
5574 | if( array ){ |
5575 | for(i=0; i<arrSize; i++) array[i] = x2a->tbl[i].data; |
5576 | } |
5577 | return array; |
5578 | } |
5579 | |
5580 | /* Compare two configurations */ |
5581 | int Configcmp(const char *_a,const char *_b) |
5582 | { |
5583 | const struct config *a = (struct config *) _a; |
5584 | const struct config *b = (struct config *) _b; |
5585 | int x; |
5586 | x = a->rp->index - b->rp->index; |
5587 | if( x==0 ) x = a->dot - b->dot; |
5588 | return x; |
5589 | } |
5590 | |
5591 | /* Compare two states */ |
5592 | PRIVATE int statecmp(struct config *a, struct config *b) |
5593 | { |
5594 | int rc; |
5595 | for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ |
5596 | rc = a->rp->index - b->rp->index; |
5597 | if( rc==0 ) rc = a->dot - b->dot; |
5598 | } |
5599 | if( rc==0 ){ |
5600 | if( a ) rc = 1; |
5601 | if( b ) rc = -1; |
5602 | } |
5603 | return rc; |
5604 | } |
5605 | |
5606 | /* Hash a state */ |
5607 | PRIVATE unsigned statehash(struct config *a) |
5608 | { |
5609 | unsigned h=0; |
5610 | while( a ){ |
5611 | h = h*571 + a->rp->index*37 + a->dot; |
5612 | a = a->bp; |
5613 | } |
5614 | return h; |
5615 | } |
5616 | |
5617 | /* Allocate a new state structure */ |
5618 | struct state *State_new() |
5619 | { |
5620 | struct state *newstate; |
5621 | newstate = (struct state *)calloc(1, sizeof(struct state) ); |
5622 | MemoryCheck(newstate); |
5623 | return newstate; |
5624 | } |
5625 | |
5626 | /* There is one instance of the following structure for each |
5627 | ** associative array of type "x3". |
5628 | */ |
5629 | struct s_x3 { |
5630 | int size; /* The number of available slots. */ |
5631 | /* Must be a power of 2 greater than or */ |
5632 | /* equal to 1 */ |
5633 | int count; /* Number of currently slots filled */ |
5634 | struct s_x3node *tbl; /* The data stored here */ |
5635 | struct s_x3node **ht; /* Hash table for lookups */ |
5636 | }; |
5637 | |
5638 | /* There is one instance of this structure for every data element |
5639 | ** in an associative array of type "x3". |
5640 | */ |
5641 | typedef struct s_x3node { |
5642 | struct state *data; /* The data */ |
5643 | struct config *key; /* The key */ |
5644 | struct s_x3node *next; /* Next entry with the same hash */ |
5645 | struct s_x3node **from; /* Previous link */ |
5646 | } x3node; |
5647 | |
5648 | /* There is only one instance of the array, which is the following */ |
5649 | static struct s_x3 *x3a; |
5650 | |
5651 | /* Allocate a new associative array */ |
5652 | void State_init(void){ |
5653 | if( x3a ) return; |
5654 | x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); |
5655 | if( x3a ){ |
5656 | x3a->size = 128; |
5657 | x3a->count = 0; |
5658 | x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); |
5659 | if( x3a->tbl==0 ){ |
5660 | free(x3a); |
5661 | x3a = 0; |
5662 | }else{ |
5663 | int i; |
5664 | x3a->ht = (x3node**)&(x3a->tbl[128]); |
5665 | for(i=0; i<128; i++) x3a->ht[i] = 0; |
5666 | } |
5667 | } |
5668 | } |
5669 | /* Insert a new record into the array. Return TRUE if successful. |
5670 | ** Prior data with the same key is NOT overwritten */ |
5671 | int State_insert(struct state *data, struct config *key) |
5672 | { |
5673 | x3node *np; |
5674 | unsigned h; |
5675 | unsigned ph; |
5676 | |
5677 | if( x3a==0 ) return 0; |
5678 | ph = statehash(key); |
5679 | h = ph & (x3a->size-1); |
5680 | np = x3a->ht[h]; |
5681 | while( np ){ |
5682 | if( statecmp(np->key,key)==0 ){ |
5683 | /* An existing entry with the same key is found. */ |
5684 | /* Fail because overwrite is not allows. */ |
5685 | return 0; |
5686 | } |
5687 | np = np->next; |
5688 | } |
5689 | if( x3a->count>=x3a->size ){ |
5690 | /* Need to make the hash table bigger */ |
5691 | int i,arrSize; |
5692 | struct s_x3 array; |
5693 | array.size = arrSize = x3a->size*2; |
5694 | array.count = x3a->count; |
5695 | array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); |
5696 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ |
5697 | array.ht = (x3node**)&(array.tbl[arrSize]); |
5698 | for(i=0; i<arrSize; i++) array.ht[i] = 0; |
5699 | for(i=0; i<x3a->count; i++){ |
5700 | x3node *oldnp, *newnp; |
5701 | oldnp = &(x3a->tbl[i]); |
5702 | h = statehash(oldnp->key) & (arrSize-1); |
5703 | newnp = &(array.tbl[i]); |
5704 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); |
5705 | newnp->next = array.ht[h]; |
5706 | newnp->key = oldnp->key; |
5707 | newnp->data = oldnp->data; |
5708 | newnp->from = &(array.ht[h]); |
5709 | array.ht[h] = newnp; |
5710 | } |
5711 | free(x3a->tbl); |
5712 | *x3a = array; |
5713 | } |
5714 | /* Insert the new data */ |
5715 | h = ph & (x3a->size-1); |
5716 | np = &(x3a->tbl[x3a->count++]); |
5717 | np->key = key; |
5718 | np->data = data; |
5719 | if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); |
5720 | np->next = x3a->ht[h]; |
5721 | x3a->ht[h] = np; |
5722 | np->from = &(x3a->ht[h]); |
5723 | return 1; |
5724 | } |
5725 | |
5726 | /* Return a pointer to data assigned to the given key. Return NULL |
5727 | ** if no such key. */ |
5728 | struct state *State_find(struct config *key) |
5729 | { |
5730 | unsigned h; |
5731 | x3node *np; |
5732 | |
5733 | if( x3a==0 ) return 0; |
5734 | h = statehash(key) & (x3a->size-1); |
5735 | np = x3a->ht[h]; |
5736 | while( np ){ |
5737 | if( statecmp(np->key,key)==0 ) break; |
5738 | np = np->next; |
5739 | } |
5740 | return np ? np->data : 0; |
5741 | } |
5742 | |
5743 | /* Return an array of pointers to all data in the table. |
5744 | ** The array is obtained from malloc. Return NULL if memory allocation |
5745 | ** problems, or if the array is empty. */ |
5746 | struct state **State_arrayof(void) |
5747 | { |
5748 | struct state **array; |
5749 | int i,arrSize; |
5750 | if( x3a==0 ) return 0; |
5751 | arrSize = x3a->count; |
5752 | array = (struct state **)calloc(arrSize, sizeof(struct state *)); |
5753 | if( array ){ |
5754 | for(i=0; i<arrSize; i++) array[i] = x3a->tbl[i].data; |
5755 | } |
5756 | return array; |
5757 | } |
5758 | |
5759 | /* Hash a configuration */ |
5760 | PRIVATE unsigned confighash(struct config *a) |
5761 | { |
5762 | unsigned h=0; |
5763 | h = h*571 + a->rp->index*37 + a->dot; |
5764 | return h; |
5765 | } |
5766 | |
5767 | /* There is one instance of the following structure for each |
5768 | ** associative array of type "x4". |
5769 | */ |
5770 | struct s_x4 { |
5771 | int size; /* The number of available slots. */ |
5772 | /* Must be a power of 2 greater than or */ |
5773 | /* equal to 1 */ |
5774 | int count; /* Number of currently slots filled */ |
5775 | struct s_x4node *tbl; /* The data stored here */ |
5776 | struct s_x4node **ht; /* Hash table for lookups */ |
5777 | }; |
5778 | |
5779 | /* There is one instance of this structure for every data element |
5780 | ** in an associative array of type "x4". |
5781 | */ |
5782 | typedef struct s_x4node { |
5783 | struct config *data; /* The data */ |
5784 | struct s_x4node *next; /* Next entry with the same hash */ |
5785 | struct s_x4node **from; /* Previous link */ |
5786 | } x4node; |
5787 | |
5788 | /* There is only one instance of the array, which is the following */ |
5789 | static struct s_x4 *x4a; |
5790 | |
5791 | /* Allocate a new associative array */ |
5792 | void Configtable_init(void){ |
5793 | if( x4a ) return; |
5794 | x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); |
5795 | if( x4a ){ |
5796 | x4a->size = 64; |
5797 | x4a->count = 0; |
5798 | x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); |
5799 | if( x4a->tbl==0 ){ |
5800 | free(x4a); |
5801 | x4a = 0; |
5802 | }else{ |
5803 | int i; |
5804 | x4a->ht = (x4node**)&(x4a->tbl[64]); |
5805 | for(i=0; i<64; i++) x4a->ht[i] = 0; |
5806 | } |
5807 | } |
5808 | } |
5809 | /* Insert a new record into the array. Return TRUE if successful. |
5810 | ** Prior data with the same key is NOT overwritten */ |
5811 | int Configtable_insert(struct config *data) |
5812 | { |
5813 | x4node *np; |
5814 | unsigned h; |
5815 | unsigned ph; |
5816 | |
5817 | if( x4a==0 ) return 0; |
5818 | ph = confighash(data); |
5819 | h = ph & (x4a->size-1); |
5820 | np = x4a->ht[h]; |
5821 | while( np ){ |
5822 | if( Configcmp((const char *) np->data,(const char *) data)==0 ){ |
5823 | /* An existing entry with the same key is found. */ |
5824 | /* Fail because overwrite is not allows. */ |
5825 | return 0; |
5826 | } |
5827 | np = np->next; |
5828 | } |
5829 | if( x4a->count>=x4a->size ){ |
5830 | /* Need to make the hash table bigger */ |
5831 | int i,arrSize; |
5832 | struct s_x4 array; |
5833 | array.size = arrSize = x4a->size*2; |
5834 | array.count = x4a->count; |
5835 | array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); |
5836 | if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ |
5837 | array.ht = (x4node**)&(array.tbl[arrSize]); |
5838 | for(i=0; i<arrSize; i++) array.ht[i] = 0; |
5839 | for(i=0; i<x4a->count; i++){ |
5840 | x4node *oldnp, *newnp; |
5841 | oldnp = &(x4a->tbl[i]); |
5842 | h = confighash(oldnp->data) & (arrSize-1); |
5843 | newnp = &(array.tbl[i]); |
5844 | if( array.ht[h] ) array.ht[h]->from = &(newnp->next); |
5845 | newnp->next = array.ht[h]; |
5846 | newnp->data = oldnp->data; |
5847 | newnp->from = &(array.ht[h]); |
5848 | array.ht[h] = newnp; |
5849 | } |
5850 | /* free(x4a->tbl); // This code was originall written for 16-bit machines. |
5851 | ** on modern machines, don't worry about freeing this trival amount of |
5852 | ** memory. */ |
5853 | *x4a = array; |
5854 | } |
5855 | /* Insert the new data */ |
5856 | h = ph & (x4a->size-1); |
5857 | np = &(x4a->tbl[x4a->count++]); |
5858 | np->data = data; |
5859 | if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); |
5860 | np->next = x4a->ht[h]; |
5861 | x4a->ht[h] = np; |
5862 | np->from = &(x4a->ht[h]); |
5863 | return 1; |
5864 | } |
5865 | |
5866 | /* Return a pointer to data assigned to the given key. Return NULL |
5867 | ** if no such key. */ |
5868 | struct config *Configtable_find(struct config *key) |
5869 | { |
5870 | int h; |
5871 | x4node *np; |
5872 | |
5873 | if( x4a==0 ) return 0; |
5874 | h = confighash(key) & (x4a->size-1); |
5875 | np = x4a->ht[h]; |
5876 | while( np ){ |
5877 | if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; |
5878 | np = np->next; |
5879 | } |
5880 | return np ? np->data : 0; |
5881 | } |
5882 | |
5883 | /* Remove all data from the table. Pass each data to the function "f" |
5884 | ** as it is removed. ("f" may be null to avoid this step.) */ |
5885 | void Configtable_clear(int(*f)(struct config *)) |
5886 | { |
5887 | int i; |
5888 | if( x4a==0 || x4a->count==0 ) return; |
5889 | if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data); |
5890 | for(i=0; i<x4a->size; i++) x4a->ht[i] = 0; |
5891 | x4a->count = 0; |
5892 | return; |
5893 | } |
5894 | |