reader.c source code [bison/src/reader.c]

1	/ Input parser for Bison*
2
3	Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007,
4	2009-2015, 2018-2019 Free Software Foundation, Inc.
5
6	This file is part of Bison, the GNU Compiler Compiler.
7
8	This program is free software: you can redistribute it and/or modify
9	it under the terms of the GNU General Public License as published by
10	the Free Software Foundation, either version 3 of the License, or
11	(at your option) any later version.
12
13	This program is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with this program. If not, see <http://www.gnu.org/licenses/>. /*
20
21	#include <config.h>
22	#include "system.h"
23
24	#include <quote.h>
25
26	#include "complain.h"
27	#include "conflicts.h"
28	#include "files.h"
29	#include "fixits.h"
30	#include "getargs.h"
31	#include "gram.h"
32	#include "muscle-tab.h"
33	#include "reader.h"
34	#include "symlist.h"
35	#include "symtab.h"
36	#include "scan-gram.h"
37	#include "scan-code.h"
38
39	static void prepare_percent_define_front_end_variables (void);
40	static void check_and_convert_grammar (void);
41
42	static symbol_list *grammar = NULL;
43	static bool start_flag = false;
44	merger_list *merge_functions;
45
46	/ Was %union seen? /
47	bool union_seen = false;
48
49	/ Should rules have a default precedence? /
50	bool default_prec = true;
51
52	/-----------------------.*
53	\| Set the start symbol. \|
54	`-----------------------/*
55
56	void
57	grammar_start_symbol_set (symbol *sym, location loc)
58	{
59	if (start_flag)
60	complain (&loc, complaint, _("multiple %s declarations"), "%start");
61	else
62	{
63	start_flag = true;
64	startsymbol = sym;
65	startsymbol_loc = loc;
66	}
67	}
68
69
70
71	/------------------------------------------------------------------------.*
72	\| Return the merger index for a merging function named NAME. Records the \|
73	\| function, if new, in MERGER_LIST. \|
74	`------------------------------------------------------------------------/*
75
76	static int
77	get_merge_function (uniqstr name)
78	{
79	if (! glr_parser)
80	return `0`;
81
82	merger_list *syms;
83	merger_list head;
84	int n;
85
86	head.next = merge_functions;
87	for (syms = &head, n = `1`; syms->next; syms = syms->next, n += `1`)
88	if (UNIQSTR_EQ (name, syms->next->name))
89	break;
90	if (syms->next == NULL)
91	{
92	syms->next = xmalloc (sizeof syms->next[`0`]);
93	syms->next->name = uniqstr_new (name);
94	/ After all symbol type declarations have been parsed, packgram invokes*
95	record_merge_function_type to set the type. /*
96	syms->next->type = NULL;
97	syms->next->next = NULL;
98	merge_functions = head.next;
99	}
100	return n;
101	}
102
103	/-------------------------------------------------------------------------.*
104	\| For the existing merging function with index MERGER, record the result \|
105	\| type as TYPE as required by the lhs of the rule whose %merge declaration \|
106	\| is at DECLARATION_LOC. \|
107	`-------------------------------------------------------------------------/*
108
109	static void
110	record_merge_function_type (int merger, uniqstr type, location declaration_loc)
111	{
112	if (merger <= `0`)
113	return;
114
115	if (type == NULL)
116	type = uniqstr_new ("");
117
118	merger_list *merge_function;
119	int merger_find = `1`;
120	for (merge_function = merge_functions;
121	merge_function != NULL && merger_find != merger;
122	merge_function = merge_function->next)
123	merger_find += `1`;
124	aver (merge_function != NULL && merger_find == merger);
125	if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
126	{
127	unsigned indent = `0`;
128	complain_indent (&declaration_loc, complaint, &indent,
129	_("result type clash on merge function %s: "
130	"<%s> != <%s>"),
131	quote (merge_function->name), type,
132	merge_function->type);
133	indent += SUB_INDENT;
134	complain_indent (&merge_function->type_declaration_loc, complaint,
135	&indent,
136	_("previous declaration"));
137	}
138	merge_function->type = uniqstr_new (type);
139	merge_function->type_declaration_loc = declaration_loc;
140	}
141
142	/--------------------------------------.*
143	\| Free all merge-function definitions. \|
144	`--------------------------------------/*
145
146	void
147	free_merger_functions (void)
148	{
149	merger_list *L0 = merge_functions;
150	while (L0)
151	{
152	merger_list *L1 = L0->next;
153	free (L0);
154	L0 = L1;
155	}
156	}
157
158
159	/-------------------------------------------------------------------.*
160	\| Parse the input grammar into a one symbol_list structure. Each \|
161	\| rule is represented by a sequence of symbols: the left hand side \|
162	\| followed by the contents of the right hand side, followed by a \|
163	\| null pointer instead of a symbol to terminate the rule. The next \|
164	\| symbol is the lhs of the following rule. \|
165	\| \|
166	\| All actions are copied out, labelled by the rule number they apply \|
167	\| to. \|
168	`-------------------------------------------------------------------/*
169
170	/ The (currently) last symbol of GRAMMAR. /
171	static symbol_list *grammar_end = NULL;
172
173	/ Append SYM to the grammar. /
174	static symbol_list *
175	grammar_symbol_append (symbol *sym, location loc)
176	{
177	symbol_list *p = symbol_list_sym_new (sym, loc);
178
179	if (grammar_end)
180	grammar_end->next = p;
181	else
182	grammar = p;
183
184	grammar_end = p;
185
186	/ A null SYM stands for an end of rule; it is not an actual*
187	part of it. /*
188	if (sym)
189	++nritems;
190
191	return p;
192	}
193
194	static void
195	assign_named_ref (symbol_list p, named_ref name)
196	{
197	symbol *sym = p->content.sym;
198
199	if (name->id == sym->tag)
200	{
201	complain (&name->loc, Wother,
202	_("duplicated symbol name for %s ignored"),
203	quote (sym->tag));
204	named_ref_free (name);
205	}
206	else
207	p->named_ref = name;
208	}
209
210
211	/ The rule currently being defined, and the previous rule.*
212	CURRENT_RULE points to the first LHS of the current rule, while
213	PREVIOUS_RULE_END points to the end* of the previous rule (NULL). /
214	static symbol_list *current_rule = NULL;
215	static symbol_list *previous_rule_end = NULL;
216
217
218	/----------------------------------------------.*
219	\| Create a new rule for LHS in to the GRAMMAR. \|
220	`----------------------------------------------/*
221
222	void
223	grammar_current_rule_begin (symbol *lhs, location loc,
224	named_ref *lhs_name)
225	{
226	/ Start a new rule and record its lhs. /
227	++nrules;
228	previous_rule_end = grammar_end;
229
230	current_rule = grammar_symbol_append (lhs, loc);
231	if (lhs_name)
232	assign_named_ref (current_rule, named_ref_copy (lhs_name));
233
234	/ Mark the rule's lhs as a nonterminal if not already so. /
235	if (lhs->content->class == unknown_sym)
236	{
237	lhs->content->class = nterm_sym;
238	lhs->content->number = nvars;
239	++nvars;
240	}
241	else if (lhs->content->class == token_sym)
242	complain (&loc, complaint, _("rule given for %s, which is a token"),
243	lhs->tag);
244	}
245
246
247	/----------------------------------------------------------------------.*
248	\| A symbol should be used if either: \|
249	\| 1. It has a destructor. \|
250	\| 2. The symbol is a midrule symbol (i.e., the generated LHS \|
251	\| replacing a midrule action) that was assigned to or used, as in \|
252	\| "exp: { $$ = 1; } { $$ = $1; }". \|
253	`----------------------------------------------------------------------/*
254
255	static bool
256	symbol_should_be_used (symbol_list const s, bool midrule_warning)
257	{
258	if (symbol_code_props_get (s->content.sym, destructor)->code)
259	return true;
260	if ((s->midrule && s->midrule->action_props.is_value_used)
261	\|\| (s->midrule_parent_rule
262	&& (symbol_list_n_get (s->midrule_parent_rule,
263	s->midrule_parent_rhs_index)
264	->action_props.is_value_used)))
265	{
266	*midrule_warning = true;
267	return true;
268	}
269	return false;
270	}
271
272	/-----------------------------------------------------------------.*
273	\| Check that the rule R is properly defined. For instance, there \|
274	\| should be no type clash on the default action. Possibly install \|
275	\| the default action. \|
276	`-----------------------------------------------------------------/*
277
278	static void
279	grammar_rule_check_and_complete (symbol_list *r)
280	{
281	/ Type check.*
282
283	If there is an action, then there is nothing we can do: the user
284	is allowed to shoot herself in the foot.
285
286	Don't worry about the default action if $$ is untyped, since $$'s
287	value can't be used. /*
288	if (!r->action_props.code && r->content.sym->content->type_name)
289	{
290	symbol *first_rhs = r->next->content.sym;
291	/ If $$ is being set in default way, report if any type mismatch. /
292	if (first_rhs)
293	{
294	char const *lhs_type = r->content.sym->content->type_name;
295	char const *rhs_type =
296	first_rhs->content->type_name ? first_rhs->content->type_name : "";
297	if (!UNIQSTR_EQ (lhs_type, rhs_type))
298	complain (&r->rhs_loc, Wother,
299	_("type clash on default action: <%s> != <%s>"),
300	lhs_type, rhs_type);
301	else
302	{
303	/ Install the default action only for C++. /
304	const bool is_cxx =
305	STREQ (language->language, "c++")
306	\|\| (skeleton && (STREQ (skeleton, "glr.cc")
307	\|\| STREQ (skeleton, "lalr1.cc")));
308	if (is_cxx)
309	{
310	code_props_rule_action_init (&r->action_props, "{ $$ = $1; }",
311	r->rhs_loc, r,
312	/ name / NULL,
313	/ type / NULL,
314	/ is_predicate / false);
315	code_props_translate_code (&r->action_props);
316	}
317	}
318	}
319	/ Warn if there is no default for $$ but we need one. /
320	else
321	complain (&r->rhs_loc, Wother,
322	_("empty rule for typed nonterminal, and no action"));
323	}
324
325	/ Check that symbol values that should be used are in fact used. /
326	{
327	int n = `0`;
328	for (symbol_list const *l = r; l && l->content.sym; l = l->next, ++n)
329	{
330	bool midrule_warning = false;
331	if (!l->action_props.is_value_used
332	&& symbol_should_be_used (l, &midrule_warning)
333	/ The default action, $$ = $1, 'uses' both. /
334	&& (r->action_props.code \|\| (n != `0` && n != `1`)))
335	{
336	warnings warn_flag = midrule_warning ? Wmidrule_values : Wother;
337	if (n)
338	complain (&l->sym_loc, warn_flag, _("unused value: $%d"), n);
339	else
340	complain (&l->rhs_loc, warn_flag, _("unset value: $$"));
341	}
342	}
343	}
344
345	/ Check that %empty => empty rule. /
346	if (r->percent_empty_loc.start.file
347	&& r->next && r->next->content.sym)
348	{
349	complain (&r->percent_empty_loc, complaint,
350	_("%%empty on non-empty rule"));
351	fixits_register (&r->percent_empty_loc, "");
352	}
353
354	/ Check that empty rule => %empty. /
355	if (!(r->next && r->next->content.sym)
356	&& !r->midrule_parent_rule
357	&& !r->percent_empty_loc.start.file
358	&& warning_is_enabled (Wempty_rule))
359	{
360	complain (&r->rhs_loc, Wempty_rule, _("empty rule without %%empty"));
361	location loc = r->rhs_loc;
362	loc.end = loc.start;
363	fixits_register (&loc, " %empty ");
364	}
365
366	/ See comments in grammar_current_rule_prec_set for how POSIX*
367	mandates this complaint. It's only for identifiers, so skip
368	it for char literals and strings, which are always tokens. /*
369	if (r->ruleprec
370	&& r->ruleprec->tag[`0`] != `'\''` && r->ruleprec->tag[`0`] != `'"'`
371	&& r->ruleprec->content->status != declared
372	&& !r->ruleprec->content->prec)
373	complain (&r->rhs_loc, Wother,
374	_("token for %%prec is not defined: %s"), r->ruleprec->tag);
375
376	/ Check that the (main) action was not typed. /
377	if (r->action_props.type)
378	complain (&r->rhs_loc, Wother,
379	_("only midrule actions can be typed: %s"), r->action_props.type);
380	}
381
382
383	/-------------------------------------.*
384	\| End the currently being grown rule. \|
385	`-------------------------------------/*
386
387	void
388	grammar_current_rule_end (location loc)
389	{
390	/ Put an empty link in the list to mark the end of this rule /
391	grammar_symbol_append (NULL, grammar_end->rhs_loc);
392	current_rule->rhs_loc = loc;
393	}
394
395
396	/-------------------------------------------------------------------.*
397	\| The previous action turns out to be a midrule action. Attach it \|
398	\| to the current rule, i.e., create a dummy symbol, attach it this \|
399	\| midrule action, and append this dummy nonterminal to the current \|
400	\| rule. \|
401	`-------------------------------------------------------------------/*
402
403	void
404	grammar_midrule_action (void)
405	{
406	/ Since the action was written out with this rule's number, we must*
407	give the new rule this number by inserting the new rule before
408	it. /*
409
410	/ Make a DUMMY nonterminal, whose location is that of the midrule*
411	action. Create the MIDRULE. /*
412	location dummy_loc = current_rule->action_props.location;
413	symbol *dummy = dummy_symbol_get (dummy_loc);
414	symbol_type_set(dummy,
415	current_rule->action_props.type, current_rule->action_props.location);
416	symbol_list *midrule = symbol_list_sym_new (dummy, dummy_loc);
417
418	/ Remember named_ref of previous action. /
419	named_ref *action_name = current_rule->action_props.named_ref;
420
421	/ Make a new rule, whose body is empty, before the current one, so*
422	that the action just read can belong to it. /*
423	++nrules;
424	++nritems;
425	/ Attach its location and actions to that of the DUMMY. /
426	midrule->rhs_loc = dummy_loc;
427	code_props_rule_action_init (&midrule->action_props,
428	current_rule->action_props.code,
429	current_rule->action_props.location,
430	midrule,
431	/ name_ref / NULL,
432	/ type / NULL,
433	current_rule->action_props.is_predicate);
434	code_props_none_init (&current_rule->action_props);
435
436	midrule->expected_sr_conflicts = current_rule->expected_sr_conflicts;
437	midrule->expected_rr_conflicts = current_rule->expected_rr_conflicts;
438	current_rule->expected_sr_conflicts = -`1`;
439	current_rule->expected_rr_conflicts = -`1`;
440
441	if (previous_rule_end)
442	previous_rule_end->next = midrule;
443	else
444	grammar = midrule;
445
446	/ End the dummy's rule. /
447	midrule->next = symbol_list_sym_new (NULL, dummy_loc);
448	midrule->next->next = current_rule;
449
450	previous_rule_end = midrule->next;
451
452	/ Insert the dummy nonterminal replacing the midrule action into*
453	the current rule. Bind it to its dedicated rule. /*
454	grammar_current_rule_symbol_append (dummy, dummy_loc,
455	action_name);
456	grammar_end->midrule = midrule;
457	midrule->midrule_parent_rule = current_rule;
458	midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
459	}
460
461	/ Set the precedence symbol of the current rule to PRECSYM. /
462
463	void
464	grammar_current_rule_prec_set (symbol *precsym, location loc)
465	{
466	/ POSIX says that any identifier is a nonterminal if it does not*
467	appear on the LHS of a grammar rule and is not defined by %token
468	or by one of the directives that assigns precedence to a token.
469	We ignore this here because the only kind of identifier that
470	POSIX allows to follow a %prec is a token and because assuming
471	it's a token now can produce more logical error messages.
472	Nevertheless, grammar_rule_check_and_complete does obey what we
473	believe is the real intent of POSIX here: that an error be
474	reported for any identifier that appears after %prec but that is
475	not defined separately as a token. /*
476	symbol_class_set (precsym, token_sym, loc, false);
477	if (current_rule->ruleprec)
478	duplicate_rule_directive ("%prec",
479	current_rule->ruleprec->location, loc);
480	else
481	current_rule->ruleprec = precsym;
482	}
483
484	/ Set %empty for the current rule. /
485
486	void
487	grammar_current_rule_empty_set (location loc)
488	{
489	/ If %empty is used and -Wno-empty-rule is not, then enable*
490	-Wempty-rule. /*
491	if (warning_is_unset (Wempty_rule))
492	warning_argmatch ("empty-rule", `0`, `0`);
493	if (current_rule->percent_empty_loc.start.file)
494	duplicate_rule_directive ("%empty",
495	current_rule->percent_empty_loc, loc);
496	else
497	current_rule->percent_empty_loc = loc;
498	}
499
500	/ Attach dynamic precedence DPREC to the current rule. /
501
502	void
503	grammar_current_rule_dprec_set (int dprec, location loc)
504	{
505	if (! glr_parser)
506	complain (&loc, Wother, _("%s affects only GLR parsers"),
507	"%dprec");
508	if (dprec <= `0`)
509	complain (&loc, complaint, _("%s must be followed by positive number"),
510	"%dprec");
511	else if (current_rule->dprec != `0`)
512	duplicate_rule_directive ("%dprec",
513	current_rule->dprec_loc, loc);
514	else
515	{
516	current_rule->dprec = dprec;
517	current_rule->dprec_loc = loc;
518	}
519	}
520
521	/ Attach a merge function NAME with argument type TYPE to current*
522	rule. /*
523
524	void
525	grammar_current_rule_merge_set (uniqstr name, location loc)
526	{
527	if (! glr_parser)
528	complain (&loc, Wother, _("%s affects only GLR parsers"),
529	"%merge");
530	if (current_rule->merger != `0`)
531	duplicate_rule_directive ("%merge",
532	current_rule->merger_declaration_loc, loc);
533	else
534	{
535	current_rule->merger = get_merge_function (name);
536	current_rule->merger_declaration_loc = loc;
537	}
538	}
539
540	/ Attach SYM to the current rule. If needed, move the previous*
541	action as a midrule action. /*
542
543	void
544	grammar_current_rule_symbol_append (symbol *sym, location loc,
545	named_ref *name)
546	{
547	if (current_rule->action_props.code)
548	grammar_midrule_action ();
549	symbol_list *p = grammar_symbol_append (sym, loc);
550	if (name)
551	assign_named_ref (p, name);
552	if (sym->content->status == undeclared \|\| sym->content->status == used)
553	sym->content->status = needed;
554	}
555
556	void
557	grammar_current_rule_action_append (const char *action, location loc,
558	named_ref *name, uniqstr type)
559	{
560	if (current_rule->action_props.code)
561	grammar_midrule_action ();
562	if (type)
563	complain (&loc, Wyacc,
564	_("POSIX Yacc does not support typed midrule actions"));
565	/ After all symbol declarations have been parsed, packgram invokes*
566	code_props_translate_code. /*
567	code_props_rule_action_init (&current_rule->action_props, action, loc,
568	current_rule,
569	name, type,
570	/ is_predicate / false);
571	}
572
573	void
574	grammar_current_rule_predicate_append (const char *pred, location loc)
575	{
576	if (current_rule->action_props.code)
577	grammar_midrule_action ();
578	code_props_rule_action_init (&current_rule->action_props, pred, loc,
579	current_rule,
580	NULL, NULL,
581	/ is_predicate / true);
582	}
583
584	/ Set the expected number of shift-reduce (reduce-reduce) conflicts for*
585	* the current rule. If a midrule is encountered later, the count
586	* is transferred to it and reset in the current rule to -1. */
587
588	void
589	grammar_current_rule_expect_sr (int count, location loc)
590	{
591	(void) loc;
592	current_rule->expected_sr_conflicts = count;
593	}
594
595	void
596	grammar_current_rule_expect_rr (int count, location loc)
597	{
598	if (! glr_parser)
599	complain (&loc, Wother, _("%s affects only GLR parsers"),
600	"%expect-rr");
601	else
602	current_rule->expected_rr_conflicts = count;
603	}
604
605
606	/---------------------------------------------------------------.*
607	\| Convert the rules into the representation using RRHS, RLHS and \|
608	\| RITEM. \|
609	`---------------------------------------------------------------/*
610
611	static void
612	packgram (void)
613	{
614	unsigned itemno = `0`;
615	ritem = xnmalloc (nritems + `1`, sizeof *ritem);
616	/ This sentinel is used by build_relations in gram.c. /
617	*ritem++ = `0`;
618
619	rule_number ruleno = `0`;
620	rules = xnmalloc (nrules, sizeof *rules);
621
622	for (symbol_list *p = grammar; p; p = p->next)
623	{
624	symbol_list *lhs = p;
625	record_merge_function_type (lhs->merger, lhs->content.sym->content->type_name,
626	lhs->merger_declaration_loc);
627	/ If the midrule's $$ is set or its $n is used, remove the '$' from the*
628	symbol name so that it's a user-defined symbol so that the default
629	%destructor and %printer apply. /*
630	if (lhs->midrule_parent_rule / i.e., symbol_is_dummy (lhs->content.sym). /
631	&& (lhs->action_props.is_value_used
632	\|\| (symbol_list_n_get (lhs->midrule_parent_rule,
633	lhs->midrule_parent_rhs_index)
634	->action_props.is_value_used)))
635	lhs->content.sym->tag += `1`;
636
637	/ Don't check the generated rule 0. It has no action, so some rhs*
638	symbols may appear unused, but the parsing algorithm ensures that
639	%destructor's are invoked appropriately. /*
640	if (lhs != grammar)
641	grammar_rule_check_and_complete (lhs);
642
643	rules[ruleno].user_number = ruleno;
644	rules[ruleno].number = ruleno;
645	rules[ruleno].lhs = lhs->content.sym->content;
646	rules[ruleno].rhs = ritem + itemno;
647	rules[ruleno].prec = NULL;
648	rules[ruleno].dprec = lhs->dprec;
649	rules[ruleno].merger = lhs->merger;
650	rules[ruleno].precsym = NULL;
651	rules[ruleno].location = lhs->rhs_loc;
652	rules[ruleno].useful = true;
653	rules[ruleno].action = lhs->action_props.code;
654	rules[ruleno].action_loc = lhs->action_props.location;
655	rules[ruleno].is_predicate = lhs->action_props.is_predicate;
656	rules[ruleno].expected_sr_conflicts = lhs->expected_sr_conflicts;
657	rules[ruleno].expected_rr_conflicts = lhs->expected_rr_conflicts;
658
659	/ Traverse the rhs. /
660	{
661	size_t rule_length = `0`;
662	for (p = lhs->next; p->content.sym; p = p->next)
663	{
664	++rule_length;
665
666	/ Don't allow rule_length == INT_MAX, since that might*
667	cause confusion with strtol if INT_MAX == LONG_MAX. /*
668	if (rule_length == INT_MAX)
669	complain (&rules[ruleno].location, fatal, _("rule is too long"));
670
671	/ item_number = symbol_number.*
672	But the former needs to contain more: negative rule numbers. /*
673	ritem[itemno++] =
674	symbol_number_as_item_number (p->content.sym->content->number);
675	/ A rule gets by default the precedence and associativity*
676	of its last token. /*
677	if (p->content.sym->content->class == token_sym && default_prec)
678	rules[ruleno].prec = p->content.sym->content;
679	}
680	}
681
682	/ If this rule has a %prec,*
683	the specified symbol's precedence replaces the default. /*
684	if (lhs->ruleprec)
685	{
686	rules[ruleno].precsym = lhs->ruleprec->content;
687	rules[ruleno].prec = lhs->ruleprec->content;
688	}
689
690	/ An item ends by the rule number (negated). /
691	ritem[itemno++] = rule_number_as_item_number (ruleno);
692	aver (itemno < ITEM_NUMBER_MAX);
693	++ruleno;
694	aver (ruleno < RULE_NUMBER_MAX);
695	}
696
697	aver (itemno == nritems);
698
699	if (trace_flag & trace_sets)
700	ritem_print (stderr);
701	}
702
703	/------------------------------------------------------------------.*
704	\| Read in the grammar specification and record it in the format \|
705	\| described in gram.h. All actions are copied into ACTION_OBSTACK, \|
706	\| in each case forming the body of a C function (YYACTION) which \|
707	\| contains a switch statement to decide which action to execute. \|
708	`------------------------------------------------------------------/*
709
710	void
711	reader (void)
712	{
713	/ Initialize the symbol table. /
714	symbols_new ();
715
716	/ Construct the accept symbol. /
717	accept = symbol_get ("$accept", empty_loc);
718	accept->content->class = nterm_sym;
719	accept->content->number = nvars++;
720
721	/ Construct the error token /
722	errtoken = symbol_get ("error", empty_loc);
723	errtoken->content->class = token_sym;
724	errtoken->content->number = ntokens++;
725
726	/ Construct a token that represents all undefined literal tokens.*
727	It is always token number 2. /*
728	undeftoken = symbol_get ("$undefined", empty_loc);
729	undeftoken->content->class = token_sym;
730	undeftoken->content->number = ntokens++;
731
732	gram_in = xfopen (grammar_file, "r");
733
734	gram__flex_debug = trace_flag & trace_scan;
735	gram_debug = trace_flag & trace_parse;
736	gram_scanner_initialize ();
737	gram_parse ();
738	prepare_percent_define_front_end_variables ();
739
740	if (complaint_status < status_complaint)
741	check_and_convert_grammar ();
742
743	xfclose (gram_in);
744	}
745
746	static void
747	prepare_percent_define_front_end_variables (void)
748	{
749	/ Set %define front-end variable defaults. /
750	muscle_percent_define_default ("lr.keep-unreachable-state", "false");
751	{
752	/ IELR would be a better default, but LALR is historically the*
753	default. /*
754	muscle_percent_define_default ("lr.type", "lalr");
755	char *lr_type = muscle_percent_define_get ("lr.type");
756	if (STRNEQ (lr_type, "canonical-lr"))
757	muscle_percent_define_default ("lr.default-reduction", "most");
758	else
759	muscle_percent_define_default ("lr.default-reduction", "accepting");
760	free (lr_type);
761	}
762
763	/ Check %define front-end variables. /
764	{
765	static char const * const values[] =
766	{
767	"lr.type", "lr""(0)", "lalr", "ielr", "canonical-lr", NULL,
768	"lr.default-reduction", "most", "consistent", "accepting", NULL,
769	NULL
770	};
771	muscle_percent_define_check_values (values);
772	}
773	}
774
775	/ Find the first LHS which is not a dummy. /
776
777	static symbol *
778	find_start_symbol (void)
779	{
780	symbol_list *res = grammar;
781	/ Skip all the possible dummy rules of the first rule. /
782	for (; symbol_is_dummy (res->content.sym); res = res->next)
783	/ Skip the LHS, and then all the RHS of the dummy rule. /
784	for (res = res->next; res->content.sym; res = res->next)
785	continue;
786	return res->content.sym;
787	}
788
789
790	/-------------------------------------------------------------.*
791	\| Check the grammar that has just been read, and convert it to \|
792	\| internal form. \|
793	`-------------------------------------------------------------/*
794
795	static void
796	check_and_convert_grammar (void)
797	{
798	/ Grammar has been read. Do some checking. /
799	if (nrules == `0`)
800	complain (NULL, fatal, _("no rules in the input grammar"));
801
802	/ If the user did not define her ENDTOKEN, do it now. /
803	if (!endtoken)
804	{
805	endtoken = symbol_get ("$end", empty_loc);
806	endtoken->content->class = token_sym;
807	endtoken->content->number = `0`;
808	/ Value specified by POSIX. /
809	endtoken->content->user_token_number = `0`;
810	}
811
812	/ Report any undefined symbols and consider them nonterminals. /
813	symbols_check_defined ();
814
815	/ Find the start symbol if no %start. /
816	if (!start_flag)
817	{
818	symbol *start = find_start_symbol ();
819	grammar_start_symbol_set (start, start->location);
820	}
821
822	/ Insert the initial rule, whose line is that of the first rule*
823	(not that of the start symbol):
824
825	$accept: %start $end. /*
826	{
827	symbol_list *p = symbol_list_sym_new (accept, empty_loc);
828	p->rhs_loc = grammar->rhs_loc;
829	p->next = symbol_list_sym_new (startsymbol, empty_loc);
830	p->next->next = symbol_list_sym_new (endtoken, empty_loc);
831	p->next->next->next = symbol_list_sym_new (NULL, empty_loc);
832	p->next->next->next->next = grammar;
833	nrules += `1`;
834	nritems += `3`;
835	grammar = p;
836	}
837
838	aver (nsyms <= SYMBOL_NUMBER_MAXIMUM);
839	aver (nsyms == ntokens + nvars);
840
841	/ Assign the symbols their symbol numbers. /
842	symbols_pack ();
843
844	/ Scan rule actions after invoking symbol_check_alias_consistency (in*
845	symbols_pack above) so that token types are set correctly before the rule
846	action type checking.
847
848	Before invoking grammar_rule_check_and_complete (in packgram
849	below) on any rule, make sure all actions have already been
850	scanned in order to set 'used' flags. Otherwise, checking that a
851	midrule's $$ should be set will not always work properly because
852	the check must forward-reference the midrule's parent rule. For
853	the same reason, all the 'used' flags must be set before checking
854	whether to remove '$' from any midrule symbol name (also in
855	packgram). /*
856	for (symbol_list *sym = grammar; sym; sym = sym->next)
857	code_props_translate_code (&sym->action_props);
858
859	/ Convert the grammar into the format described in gram.h. /
860	packgram ();
861
862	/ The grammar as a symbol_list is no longer needed. /
863	symbol_list_free (grammar);
864	}
865

Browse the source code of bison/src/reader.c