1 | /* Input parser for Bison |
2 | |
3 | Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007, |
4 | 2009-2015, 2018-2019 Free Software Foundation, Inc. |
5 | |
6 | This file is part of Bison, the GNU Compiler Compiler. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include <config.h> |
22 | #include "system.h" |
23 | |
24 | #include <quote.h> |
25 | |
26 | #include "complain.h" |
27 | #include "conflicts.h" |
28 | #include "files.h" |
29 | #include "fixits.h" |
30 | #include "getargs.h" |
31 | #include "gram.h" |
32 | #include "muscle-tab.h" |
33 | #include "reader.h" |
34 | #include "symlist.h" |
35 | #include "symtab.h" |
36 | #include "scan-gram.h" |
37 | #include "scan-code.h" |
38 | |
39 | static void prepare_percent_define_front_end_variables (void); |
40 | static void check_and_convert_grammar (void); |
41 | |
42 | static symbol_list *grammar = NULL; |
43 | static bool start_flag = false; |
44 | merger_list *merge_functions; |
45 | |
46 | /* Was %union seen? */ |
47 | bool union_seen = false; |
48 | |
49 | /* Should rules have a default precedence? */ |
50 | bool default_prec = true; |
51 | |
52 | /*-----------------------. |
53 | | Set the start symbol. | |
54 | `-----------------------*/ |
55 | |
56 | void |
57 | grammar_start_symbol_set (symbol *sym, location loc) |
58 | { |
59 | if (start_flag) |
60 | complain (&loc, complaint, _("multiple %s declarations" ), "%start" ); |
61 | else |
62 | { |
63 | start_flag = true; |
64 | startsymbol = sym; |
65 | startsymbol_loc = loc; |
66 | } |
67 | } |
68 | |
69 | |
70 | |
71 | /*------------------------------------------------------------------------. |
72 | | Return the merger index for a merging function named NAME. Records the | |
73 | | function, if new, in MERGER_LIST. | |
74 | `------------------------------------------------------------------------*/ |
75 | |
76 | static int |
77 | get_merge_function (uniqstr name) |
78 | { |
79 | if (! glr_parser) |
80 | return 0; |
81 | |
82 | merger_list *syms; |
83 | merger_list head; |
84 | int n; |
85 | |
86 | head.next = merge_functions; |
87 | for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1) |
88 | if (UNIQSTR_EQ (name, syms->next->name)) |
89 | break; |
90 | if (syms->next == NULL) |
91 | { |
92 | syms->next = xmalloc (sizeof syms->next[0]); |
93 | syms->next->name = uniqstr_new (name); |
94 | /* After all symbol type declarations have been parsed, packgram invokes |
95 | record_merge_function_type to set the type. */ |
96 | syms->next->type = NULL; |
97 | syms->next->next = NULL; |
98 | merge_functions = head.next; |
99 | } |
100 | return n; |
101 | } |
102 | |
103 | /*-------------------------------------------------------------------------. |
104 | | For the existing merging function with index MERGER, record the result | |
105 | | type as TYPE as required by the lhs of the rule whose %merge declaration | |
106 | | is at DECLARATION_LOC. | |
107 | `-------------------------------------------------------------------------*/ |
108 | |
109 | static void |
110 | record_merge_function_type (int merger, uniqstr type, location declaration_loc) |
111 | { |
112 | if (merger <= 0) |
113 | return; |
114 | |
115 | if (type == NULL) |
116 | type = uniqstr_new ("" ); |
117 | |
118 | merger_list *merge_function; |
119 | int merger_find = 1; |
120 | for (merge_function = merge_functions; |
121 | merge_function != NULL && merger_find != merger; |
122 | merge_function = merge_function->next) |
123 | merger_find += 1; |
124 | aver (merge_function != NULL && merger_find == merger); |
125 | if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type)) |
126 | { |
127 | unsigned indent = 0; |
128 | complain_indent (&declaration_loc, complaint, &indent, |
129 | _("result type clash on merge function %s: " |
130 | "<%s> != <%s>" ), |
131 | quote (merge_function->name), type, |
132 | merge_function->type); |
133 | indent += SUB_INDENT; |
134 | complain_indent (&merge_function->type_declaration_loc, complaint, |
135 | &indent, |
136 | _("previous declaration" )); |
137 | } |
138 | merge_function->type = uniqstr_new (type); |
139 | merge_function->type_declaration_loc = declaration_loc; |
140 | } |
141 | |
142 | /*--------------------------------------. |
143 | | Free all merge-function definitions. | |
144 | `--------------------------------------*/ |
145 | |
146 | void |
147 | free_merger_functions (void) |
148 | { |
149 | merger_list *L0 = merge_functions; |
150 | while (L0) |
151 | { |
152 | merger_list *L1 = L0->next; |
153 | free (L0); |
154 | L0 = L1; |
155 | } |
156 | } |
157 | |
158 | |
159 | /*-------------------------------------------------------------------. |
160 | | Parse the input grammar into a one symbol_list structure. Each | |
161 | | rule is represented by a sequence of symbols: the left hand side | |
162 | | followed by the contents of the right hand side, followed by a | |
163 | | null pointer instead of a symbol to terminate the rule. The next | |
164 | | symbol is the lhs of the following rule. | |
165 | | | |
166 | | All actions are copied out, labelled by the rule number they apply | |
167 | | to. | |
168 | `-------------------------------------------------------------------*/ |
169 | |
170 | /* The (currently) last symbol of GRAMMAR. */ |
171 | static symbol_list *grammar_end = NULL; |
172 | |
173 | /* Append SYM to the grammar. */ |
174 | static symbol_list * |
175 | grammar_symbol_append (symbol *sym, location loc) |
176 | { |
177 | symbol_list *p = symbol_list_sym_new (sym, loc); |
178 | |
179 | if (grammar_end) |
180 | grammar_end->next = p; |
181 | else |
182 | grammar = p; |
183 | |
184 | grammar_end = p; |
185 | |
186 | /* A null SYM stands for an end of rule; it is not an actual |
187 | part of it. */ |
188 | if (sym) |
189 | ++nritems; |
190 | |
191 | return p; |
192 | } |
193 | |
194 | static void |
195 | assign_named_ref (symbol_list *p, named_ref *name) |
196 | { |
197 | symbol *sym = p->content.sym; |
198 | |
199 | if (name->id == sym->tag) |
200 | { |
201 | complain (&name->loc, Wother, |
202 | _("duplicated symbol name for %s ignored" ), |
203 | quote (sym->tag)); |
204 | named_ref_free (name); |
205 | } |
206 | else |
207 | p->named_ref = name; |
208 | } |
209 | |
210 | |
211 | /* The rule currently being defined, and the previous rule. |
212 | CURRENT_RULE points to the first LHS of the current rule, while |
213 | PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */ |
214 | static symbol_list *current_rule = NULL; |
215 | static symbol_list *previous_rule_end = NULL; |
216 | |
217 | |
218 | /*----------------------------------------------. |
219 | | Create a new rule for LHS in to the GRAMMAR. | |
220 | `----------------------------------------------*/ |
221 | |
222 | void |
223 | grammar_current_rule_begin (symbol *lhs, location loc, |
224 | named_ref *lhs_name) |
225 | { |
226 | /* Start a new rule and record its lhs. */ |
227 | ++nrules; |
228 | previous_rule_end = grammar_end; |
229 | |
230 | current_rule = grammar_symbol_append (lhs, loc); |
231 | if (lhs_name) |
232 | assign_named_ref (current_rule, named_ref_copy (lhs_name)); |
233 | |
234 | /* Mark the rule's lhs as a nonterminal if not already so. */ |
235 | if (lhs->content->class == unknown_sym) |
236 | { |
237 | lhs->content->class = nterm_sym; |
238 | lhs->content->number = nvars; |
239 | ++nvars; |
240 | } |
241 | else if (lhs->content->class == token_sym) |
242 | complain (&loc, complaint, _("rule given for %s, which is a token" ), |
243 | lhs->tag); |
244 | } |
245 | |
246 | |
247 | /*----------------------------------------------------------------------. |
248 | | A symbol should be used if either: | |
249 | | 1. It has a destructor. | |
250 | | 2. The symbol is a midrule symbol (i.e., the generated LHS | |
251 | | replacing a midrule action) that was assigned to or used, as in | |
252 | | "exp: { $$ = 1; } { $$ = $1; }". | |
253 | `----------------------------------------------------------------------*/ |
254 | |
255 | static bool |
256 | symbol_should_be_used (symbol_list const *s, bool *midrule_warning) |
257 | { |
258 | if (symbol_code_props_get (s->content.sym, destructor)->code) |
259 | return true; |
260 | if ((s->midrule && s->midrule->action_props.is_value_used) |
261 | || (s->midrule_parent_rule |
262 | && (symbol_list_n_get (s->midrule_parent_rule, |
263 | s->midrule_parent_rhs_index) |
264 | ->action_props.is_value_used))) |
265 | { |
266 | *midrule_warning = true; |
267 | return true; |
268 | } |
269 | return false; |
270 | } |
271 | |
272 | /*-----------------------------------------------------------------. |
273 | | Check that the rule R is properly defined. For instance, there | |
274 | | should be no type clash on the default action. Possibly install | |
275 | | the default action. | |
276 | `-----------------------------------------------------------------*/ |
277 | |
278 | static void |
279 | grammar_rule_check_and_complete (symbol_list *r) |
280 | { |
281 | /* Type check. |
282 | |
283 | If there is an action, then there is nothing we can do: the user |
284 | is allowed to shoot herself in the foot. |
285 | |
286 | Don't worry about the default action if $$ is untyped, since $$'s |
287 | value can't be used. */ |
288 | if (!r->action_props.code && r->content.sym->content->type_name) |
289 | { |
290 | symbol *first_rhs = r->next->content.sym; |
291 | /* If $$ is being set in default way, report if any type mismatch. */ |
292 | if (first_rhs) |
293 | { |
294 | char const *lhs_type = r->content.sym->content->type_name; |
295 | char const *rhs_type = |
296 | first_rhs->content->type_name ? first_rhs->content->type_name : "" ; |
297 | if (!UNIQSTR_EQ (lhs_type, rhs_type)) |
298 | complain (&r->rhs_loc, Wother, |
299 | _("type clash on default action: <%s> != <%s>" ), |
300 | lhs_type, rhs_type); |
301 | else |
302 | { |
303 | /* Install the default action only for C++. */ |
304 | const bool is_cxx = |
305 | STREQ (language->language, "c++" ) |
306 | || (skeleton && (STREQ (skeleton, "glr.cc" ) |
307 | || STREQ (skeleton, "lalr1.cc" ))); |
308 | if (is_cxx) |
309 | { |
310 | code_props_rule_action_init (&r->action_props, "{ $$ = $1; }" , |
311 | r->rhs_loc, r, |
312 | /* name */ NULL, |
313 | /* type */ NULL, |
314 | /* is_predicate */ false); |
315 | code_props_translate_code (&r->action_props); |
316 | } |
317 | } |
318 | } |
319 | /* Warn if there is no default for $$ but we need one. */ |
320 | else |
321 | complain (&r->rhs_loc, Wother, |
322 | _("empty rule for typed nonterminal, and no action" )); |
323 | } |
324 | |
325 | /* Check that symbol values that should be used are in fact used. */ |
326 | { |
327 | int n = 0; |
328 | for (symbol_list const *l = r; l && l->content.sym; l = l->next, ++n) |
329 | { |
330 | bool midrule_warning = false; |
331 | if (!l->action_props.is_value_used |
332 | && symbol_should_be_used (l, &midrule_warning) |
333 | /* The default action, $$ = $1, 'uses' both. */ |
334 | && (r->action_props.code || (n != 0 && n != 1))) |
335 | { |
336 | warnings warn_flag = midrule_warning ? Wmidrule_values : Wother; |
337 | if (n) |
338 | complain (&l->sym_loc, warn_flag, _("unused value: $%d" ), n); |
339 | else |
340 | complain (&l->rhs_loc, warn_flag, _("unset value: $$" )); |
341 | } |
342 | } |
343 | } |
344 | |
345 | /* Check that %empty => empty rule. */ |
346 | if (r->percent_empty_loc.start.file |
347 | && r->next && r->next->content.sym) |
348 | { |
349 | complain (&r->percent_empty_loc, complaint, |
350 | _("%%empty on non-empty rule" )); |
351 | fixits_register (&r->percent_empty_loc, "" ); |
352 | } |
353 | |
354 | /* Check that empty rule => %empty. */ |
355 | if (!(r->next && r->next->content.sym) |
356 | && !r->midrule_parent_rule |
357 | && !r->percent_empty_loc.start.file |
358 | && warning_is_enabled (Wempty_rule)) |
359 | { |
360 | complain (&r->rhs_loc, Wempty_rule, _("empty rule without %%empty" )); |
361 | location loc = r->rhs_loc; |
362 | loc.end = loc.start; |
363 | fixits_register (&loc, " %empty " ); |
364 | } |
365 | |
366 | /* See comments in grammar_current_rule_prec_set for how POSIX |
367 | mandates this complaint. It's only for identifiers, so skip |
368 | it for char literals and strings, which are always tokens. */ |
369 | if (r->ruleprec |
370 | && r->ruleprec->tag[0] != '\'' && r->ruleprec->tag[0] != '"' |
371 | && r->ruleprec->content->status != declared |
372 | && !r->ruleprec->content->prec) |
373 | complain (&r->rhs_loc, Wother, |
374 | _("token for %%prec is not defined: %s" ), r->ruleprec->tag); |
375 | |
376 | /* Check that the (main) action was not typed. */ |
377 | if (r->action_props.type) |
378 | complain (&r->rhs_loc, Wother, |
379 | _("only midrule actions can be typed: %s" ), r->action_props.type); |
380 | } |
381 | |
382 | |
383 | /*-------------------------------------. |
384 | | End the currently being grown rule. | |
385 | `-------------------------------------*/ |
386 | |
387 | void |
388 | grammar_current_rule_end (location loc) |
389 | { |
390 | /* Put an empty link in the list to mark the end of this rule */ |
391 | grammar_symbol_append (NULL, grammar_end->rhs_loc); |
392 | current_rule->rhs_loc = loc; |
393 | } |
394 | |
395 | |
396 | /*-------------------------------------------------------------------. |
397 | | The previous action turns out to be a midrule action. Attach it | |
398 | | to the current rule, i.e., create a dummy symbol, attach it this | |
399 | | midrule action, and append this dummy nonterminal to the current | |
400 | | rule. | |
401 | `-------------------------------------------------------------------*/ |
402 | |
403 | void |
404 | grammar_midrule_action (void) |
405 | { |
406 | /* Since the action was written out with this rule's number, we must |
407 | give the new rule this number by inserting the new rule before |
408 | it. */ |
409 | |
410 | /* Make a DUMMY nonterminal, whose location is that of the midrule |
411 | action. Create the MIDRULE. */ |
412 | location dummy_loc = current_rule->action_props.location; |
413 | symbol *dummy = dummy_symbol_get (dummy_loc); |
414 | symbol_type_set(dummy, |
415 | current_rule->action_props.type, current_rule->action_props.location); |
416 | symbol_list *midrule = symbol_list_sym_new (dummy, dummy_loc); |
417 | |
418 | /* Remember named_ref of previous action. */ |
419 | named_ref *action_name = current_rule->action_props.named_ref; |
420 | |
421 | /* Make a new rule, whose body is empty, before the current one, so |
422 | that the action just read can belong to it. */ |
423 | ++nrules; |
424 | ++nritems; |
425 | /* Attach its location and actions to that of the DUMMY. */ |
426 | midrule->rhs_loc = dummy_loc; |
427 | code_props_rule_action_init (&midrule->action_props, |
428 | current_rule->action_props.code, |
429 | current_rule->action_props.location, |
430 | midrule, |
431 | /* name_ref */ NULL, |
432 | /* type */ NULL, |
433 | current_rule->action_props.is_predicate); |
434 | code_props_none_init (¤t_rule->action_props); |
435 | |
436 | midrule->expected_sr_conflicts = current_rule->expected_sr_conflicts; |
437 | midrule->expected_rr_conflicts = current_rule->expected_rr_conflicts; |
438 | current_rule->expected_sr_conflicts = -1; |
439 | current_rule->expected_rr_conflicts = -1; |
440 | |
441 | if (previous_rule_end) |
442 | previous_rule_end->next = midrule; |
443 | else |
444 | grammar = midrule; |
445 | |
446 | /* End the dummy's rule. */ |
447 | midrule->next = symbol_list_sym_new (NULL, dummy_loc); |
448 | midrule->next->next = current_rule; |
449 | |
450 | previous_rule_end = midrule->next; |
451 | |
452 | /* Insert the dummy nonterminal replacing the midrule action into |
453 | the current rule. Bind it to its dedicated rule. */ |
454 | grammar_current_rule_symbol_append (dummy, dummy_loc, |
455 | action_name); |
456 | grammar_end->midrule = midrule; |
457 | midrule->midrule_parent_rule = current_rule; |
458 | midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next); |
459 | } |
460 | |
461 | /* Set the precedence symbol of the current rule to PRECSYM. */ |
462 | |
463 | void |
464 | grammar_current_rule_prec_set (symbol *precsym, location loc) |
465 | { |
466 | /* POSIX says that any identifier is a nonterminal if it does not |
467 | appear on the LHS of a grammar rule and is not defined by %token |
468 | or by one of the directives that assigns precedence to a token. |
469 | We ignore this here because the only kind of identifier that |
470 | POSIX allows to follow a %prec is a token and because assuming |
471 | it's a token now can produce more logical error messages. |
472 | Nevertheless, grammar_rule_check_and_complete does obey what we |
473 | believe is the real intent of POSIX here: that an error be |
474 | reported for any identifier that appears after %prec but that is |
475 | not defined separately as a token. */ |
476 | symbol_class_set (precsym, token_sym, loc, false); |
477 | if (current_rule->ruleprec) |
478 | duplicate_rule_directive ("%prec" , |
479 | current_rule->ruleprec->location, loc); |
480 | else |
481 | current_rule->ruleprec = precsym; |
482 | } |
483 | |
484 | /* Set %empty for the current rule. */ |
485 | |
486 | void |
487 | grammar_current_rule_empty_set (location loc) |
488 | { |
489 | /* If %empty is used and -Wno-empty-rule is not, then enable |
490 | -Wempty-rule. */ |
491 | if (warning_is_unset (Wempty_rule)) |
492 | warning_argmatch ("empty-rule" , 0, 0); |
493 | if (current_rule->percent_empty_loc.start.file) |
494 | duplicate_rule_directive ("%empty" , |
495 | current_rule->percent_empty_loc, loc); |
496 | else |
497 | current_rule->percent_empty_loc = loc; |
498 | } |
499 | |
500 | /* Attach dynamic precedence DPREC to the current rule. */ |
501 | |
502 | void |
503 | grammar_current_rule_dprec_set (int dprec, location loc) |
504 | { |
505 | if (! glr_parser) |
506 | complain (&loc, Wother, _("%s affects only GLR parsers" ), |
507 | "%dprec" ); |
508 | if (dprec <= 0) |
509 | complain (&loc, complaint, _("%s must be followed by positive number" ), |
510 | "%dprec" ); |
511 | else if (current_rule->dprec != 0) |
512 | duplicate_rule_directive ("%dprec" , |
513 | current_rule->dprec_loc, loc); |
514 | else |
515 | { |
516 | current_rule->dprec = dprec; |
517 | current_rule->dprec_loc = loc; |
518 | } |
519 | } |
520 | |
521 | /* Attach a merge function NAME with argument type TYPE to current |
522 | rule. */ |
523 | |
524 | void |
525 | grammar_current_rule_merge_set (uniqstr name, location loc) |
526 | { |
527 | if (! glr_parser) |
528 | complain (&loc, Wother, _("%s affects only GLR parsers" ), |
529 | "%merge" ); |
530 | if (current_rule->merger != 0) |
531 | duplicate_rule_directive ("%merge" , |
532 | current_rule->merger_declaration_loc, loc); |
533 | else |
534 | { |
535 | current_rule->merger = get_merge_function (name); |
536 | current_rule->merger_declaration_loc = loc; |
537 | } |
538 | } |
539 | |
540 | /* Attach SYM to the current rule. If needed, move the previous |
541 | action as a midrule action. */ |
542 | |
543 | void |
544 | grammar_current_rule_symbol_append (symbol *sym, location loc, |
545 | named_ref *name) |
546 | { |
547 | if (current_rule->action_props.code) |
548 | grammar_midrule_action (); |
549 | symbol_list *p = grammar_symbol_append (sym, loc); |
550 | if (name) |
551 | assign_named_ref (p, name); |
552 | if (sym->content->status == undeclared || sym->content->status == used) |
553 | sym->content->status = needed; |
554 | } |
555 | |
556 | void |
557 | grammar_current_rule_action_append (const char *action, location loc, |
558 | named_ref *name, uniqstr type) |
559 | { |
560 | if (current_rule->action_props.code) |
561 | grammar_midrule_action (); |
562 | if (type) |
563 | complain (&loc, Wyacc, |
564 | _("POSIX Yacc does not support typed midrule actions" )); |
565 | /* After all symbol declarations have been parsed, packgram invokes |
566 | code_props_translate_code. */ |
567 | code_props_rule_action_init (¤t_rule->action_props, action, loc, |
568 | current_rule, |
569 | name, type, |
570 | /* is_predicate */ false); |
571 | } |
572 | |
573 | void |
574 | grammar_current_rule_predicate_append (const char *pred, location loc) |
575 | { |
576 | if (current_rule->action_props.code) |
577 | grammar_midrule_action (); |
578 | code_props_rule_action_init (¤t_rule->action_props, pred, loc, |
579 | current_rule, |
580 | NULL, NULL, |
581 | /* is_predicate */ true); |
582 | } |
583 | |
584 | /* Set the expected number of shift-reduce (reduce-reduce) conflicts for |
585 | * the current rule. If a midrule is encountered later, the count |
586 | * is transferred to it and reset in the current rule to -1. */ |
587 | |
588 | void |
589 | grammar_current_rule_expect_sr (int count, location loc) |
590 | { |
591 | (void) loc; |
592 | current_rule->expected_sr_conflicts = count; |
593 | } |
594 | |
595 | void |
596 | grammar_current_rule_expect_rr (int count, location loc) |
597 | { |
598 | if (! glr_parser) |
599 | complain (&loc, Wother, _("%s affects only GLR parsers" ), |
600 | "%expect-rr" ); |
601 | else |
602 | current_rule->expected_rr_conflicts = count; |
603 | } |
604 | |
605 | |
606 | /*---------------------------------------------------------------. |
607 | | Convert the rules into the representation using RRHS, RLHS and | |
608 | | RITEM. | |
609 | `---------------------------------------------------------------*/ |
610 | |
611 | static void |
612 | packgram (void) |
613 | { |
614 | unsigned itemno = 0; |
615 | ritem = xnmalloc (nritems + 1, sizeof *ritem); |
616 | /* This sentinel is used by build_relations in gram.c. */ |
617 | *ritem++ = 0; |
618 | |
619 | rule_number ruleno = 0; |
620 | rules = xnmalloc (nrules, sizeof *rules); |
621 | |
622 | for (symbol_list *p = grammar; p; p = p->next) |
623 | { |
624 | symbol_list *lhs = p; |
625 | record_merge_function_type (lhs->merger, lhs->content.sym->content->type_name, |
626 | lhs->merger_declaration_loc); |
627 | /* If the midrule's $$ is set or its $n is used, remove the '$' from the |
628 | symbol name so that it's a user-defined symbol so that the default |
629 | %destructor and %printer apply. */ |
630 | if (lhs->midrule_parent_rule /* i.e., symbol_is_dummy (lhs->content.sym). */ |
631 | && (lhs->action_props.is_value_used |
632 | || (symbol_list_n_get (lhs->midrule_parent_rule, |
633 | lhs->midrule_parent_rhs_index) |
634 | ->action_props.is_value_used))) |
635 | lhs->content.sym->tag += 1; |
636 | |
637 | /* Don't check the generated rule 0. It has no action, so some rhs |
638 | symbols may appear unused, but the parsing algorithm ensures that |
639 | %destructor's are invoked appropriately. */ |
640 | if (lhs != grammar) |
641 | grammar_rule_check_and_complete (lhs); |
642 | |
643 | rules[ruleno].user_number = ruleno; |
644 | rules[ruleno].number = ruleno; |
645 | rules[ruleno].lhs = lhs->content.sym->content; |
646 | rules[ruleno].rhs = ritem + itemno; |
647 | rules[ruleno].prec = NULL; |
648 | rules[ruleno].dprec = lhs->dprec; |
649 | rules[ruleno].merger = lhs->merger; |
650 | rules[ruleno].precsym = NULL; |
651 | rules[ruleno].location = lhs->rhs_loc; |
652 | rules[ruleno].useful = true; |
653 | rules[ruleno].action = lhs->action_props.code; |
654 | rules[ruleno].action_loc = lhs->action_props.location; |
655 | rules[ruleno].is_predicate = lhs->action_props.is_predicate; |
656 | rules[ruleno].expected_sr_conflicts = lhs->expected_sr_conflicts; |
657 | rules[ruleno].expected_rr_conflicts = lhs->expected_rr_conflicts; |
658 | |
659 | /* Traverse the rhs. */ |
660 | { |
661 | size_t rule_length = 0; |
662 | for (p = lhs->next; p->content.sym; p = p->next) |
663 | { |
664 | ++rule_length; |
665 | |
666 | /* Don't allow rule_length == INT_MAX, since that might |
667 | cause confusion with strtol if INT_MAX == LONG_MAX. */ |
668 | if (rule_length == INT_MAX) |
669 | complain (&rules[ruleno].location, fatal, _("rule is too long" )); |
670 | |
671 | /* item_number = symbol_number. |
672 | But the former needs to contain more: negative rule numbers. */ |
673 | ritem[itemno++] = |
674 | symbol_number_as_item_number (p->content.sym->content->number); |
675 | /* A rule gets by default the precedence and associativity |
676 | of its last token. */ |
677 | if (p->content.sym->content->class == token_sym && default_prec) |
678 | rules[ruleno].prec = p->content.sym->content; |
679 | } |
680 | } |
681 | |
682 | /* If this rule has a %prec, |
683 | the specified symbol's precedence replaces the default. */ |
684 | if (lhs->ruleprec) |
685 | { |
686 | rules[ruleno].precsym = lhs->ruleprec->content; |
687 | rules[ruleno].prec = lhs->ruleprec->content; |
688 | } |
689 | |
690 | /* An item ends by the rule number (negated). */ |
691 | ritem[itemno++] = rule_number_as_item_number (ruleno); |
692 | aver (itemno < ITEM_NUMBER_MAX); |
693 | ++ruleno; |
694 | aver (ruleno < RULE_NUMBER_MAX); |
695 | } |
696 | |
697 | aver (itemno == nritems); |
698 | |
699 | if (trace_flag & trace_sets) |
700 | ritem_print (stderr); |
701 | } |
702 | |
703 | /*------------------------------------------------------------------. |
704 | | Read in the grammar specification and record it in the format | |
705 | | described in gram.h. All actions are copied into ACTION_OBSTACK, | |
706 | | in each case forming the body of a C function (YYACTION) which | |
707 | | contains a switch statement to decide which action to execute. | |
708 | `------------------------------------------------------------------*/ |
709 | |
710 | void |
711 | reader (void) |
712 | { |
713 | /* Initialize the symbol table. */ |
714 | symbols_new (); |
715 | |
716 | /* Construct the accept symbol. */ |
717 | accept = symbol_get ("$accept" , empty_loc); |
718 | accept->content->class = nterm_sym; |
719 | accept->content->number = nvars++; |
720 | |
721 | /* Construct the error token */ |
722 | errtoken = symbol_get ("error" , empty_loc); |
723 | errtoken->content->class = token_sym; |
724 | errtoken->content->number = ntokens++; |
725 | |
726 | /* Construct a token that represents all undefined literal tokens. |
727 | It is always token number 2. */ |
728 | undeftoken = symbol_get ("$undefined" , empty_loc); |
729 | undeftoken->content->class = token_sym; |
730 | undeftoken->content->number = ntokens++; |
731 | |
732 | gram_in = xfopen (grammar_file, "r" ); |
733 | |
734 | gram__flex_debug = trace_flag & trace_scan; |
735 | gram_debug = trace_flag & trace_parse; |
736 | gram_scanner_initialize (); |
737 | gram_parse (); |
738 | prepare_percent_define_front_end_variables (); |
739 | |
740 | if (complaint_status < status_complaint) |
741 | check_and_convert_grammar (); |
742 | |
743 | xfclose (gram_in); |
744 | } |
745 | |
746 | static void |
747 | prepare_percent_define_front_end_variables (void) |
748 | { |
749 | /* Set %define front-end variable defaults. */ |
750 | muscle_percent_define_default ("lr.keep-unreachable-state" , "false" ); |
751 | { |
752 | /* IELR would be a better default, but LALR is historically the |
753 | default. */ |
754 | muscle_percent_define_default ("lr.type" , "lalr" ); |
755 | char *lr_type = muscle_percent_define_get ("lr.type" ); |
756 | if (STRNEQ (lr_type, "canonical-lr" )) |
757 | muscle_percent_define_default ("lr.default-reduction" , "most" ); |
758 | else |
759 | muscle_percent_define_default ("lr.default-reduction" , "accepting" ); |
760 | free (lr_type); |
761 | } |
762 | |
763 | /* Check %define front-end variables. */ |
764 | { |
765 | static char const * const values[] = |
766 | { |
767 | "lr.type" , "lr" "(0)" , "lalr" , "ielr" , "canonical-lr" , NULL, |
768 | "lr.default-reduction" , "most" , "consistent" , "accepting" , NULL, |
769 | NULL |
770 | }; |
771 | muscle_percent_define_check_values (values); |
772 | } |
773 | } |
774 | |
775 | /* Find the first LHS which is not a dummy. */ |
776 | |
777 | static symbol * |
778 | find_start_symbol (void) |
779 | { |
780 | symbol_list *res = grammar; |
781 | /* Skip all the possible dummy rules of the first rule. */ |
782 | for (; symbol_is_dummy (res->content.sym); res = res->next) |
783 | /* Skip the LHS, and then all the RHS of the dummy rule. */ |
784 | for (res = res->next; res->content.sym; res = res->next) |
785 | continue; |
786 | return res->content.sym; |
787 | } |
788 | |
789 | |
790 | /*-------------------------------------------------------------. |
791 | | Check the grammar that has just been read, and convert it to | |
792 | | internal form. | |
793 | `-------------------------------------------------------------*/ |
794 | |
795 | static void |
796 | check_and_convert_grammar (void) |
797 | { |
798 | /* Grammar has been read. Do some checking. */ |
799 | if (nrules == 0) |
800 | complain (NULL, fatal, _("no rules in the input grammar" )); |
801 | |
802 | /* If the user did not define her ENDTOKEN, do it now. */ |
803 | if (!endtoken) |
804 | { |
805 | endtoken = symbol_get ("$end" , empty_loc); |
806 | endtoken->content->class = token_sym; |
807 | endtoken->content->number = 0; |
808 | /* Value specified by POSIX. */ |
809 | endtoken->content->user_token_number = 0; |
810 | } |
811 | |
812 | /* Report any undefined symbols and consider them nonterminals. */ |
813 | symbols_check_defined (); |
814 | |
815 | /* Find the start symbol if no %start. */ |
816 | if (!start_flag) |
817 | { |
818 | symbol *start = find_start_symbol (); |
819 | grammar_start_symbol_set (start, start->location); |
820 | } |
821 | |
822 | /* Insert the initial rule, whose line is that of the first rule |
823 | (not that of the start symbol): |
824 | |
825 | $accept: %start $end. */ |
826 | { |
827 | symbol_list *p = symbol_list_sym_new (accept, empty_loc); |
828 | p->rhs_loc = grammar->rhs_loc; |
829 | p->next = symbol_list_sym_new (startsymbol, empty_loc); |
830 | p->next->next = symbol_list_sym_new (endtoken, empty_loc); |
831 | p->next->next->next = symbol_list_sym_new (NULL, empty_loc); |
832 | p->next->next->next->next = grammar; |
833 | nrules += 1; |
834 | nritems += 3; |
835 | grammar = p; |
836 | } |
837 | |
838 | aver (nsyms <= SYMBOL_NUMBER_MAXIMUM); |
839 | aver (nsyms == ntokens + nvars); |
840 | |
841 | /* Assign the symbols their symbol numbers. */ |
842 | symbols_pack (); |
843 | |
844 | /* Scan rule actions after invoking symbol_check_alias_consistency (in |
845 | symbols_pack above) so that token types are set correctly before the rule |
846 | action type checking. |
847 | |
848 | Before invoking grammar_rule_check_and_complete (in packgram |
849 | below) on any rule, make sure all actions have already been |
850 | scanned in order to set 'used' flags. Otherwise, checking that a |
851 | midrule's $$ should be set will not always work properly because |
852 | the check must forward-reference the midrule's parent rule. For |
853 | the same reason, all the 'used' flags must be set before checking |
854 | whether to remove '$' from any midrule symbol name (also in |
855 | packgram). */ |
856 | for (symbol_list *sym = grammar; sym; sym = sym->next) |
857 | code_props_translate_code (&sym->action_props); |
858 | |
859 | /* Convert the grammar into the format described in gram.h. */ |
860 | packgram (); |
861 | |
862 | /* The grammar as a symbol_list is no longer needed. */ |
863 | symbol_list_free (grammar); |
864 | } |
865 | |