ielr.c source code [bison/src/ielr.c]

1	/ IELR main implementation.*
2
3	Copyright (C) 2009-2015, 2018-2019 Free Software Foundation, Inc.
4
5	This file is part of Bison, the GNU Compiler Compiler.
6
7	This program is free software: you can redistribute it and/or modify
8	it under the terms of the GNU General Public License as published by
9	the Free Software Foundation, either version 3 of the License, or
10	(at your option) any later version.
11
12	This program is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU General Public License for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with this program. If not, see <http://www.gnu.org/licenses/>. /*
19
20	#include <config.h>
21	#include "system.h"
22
23	#include "ielr.h"
24
25	#include <bitset.h>
26	#include <timevar.h>
27
28	#include "AnnotationList.h"
29	#include "derives.h"
30	#include "getargs.h"
31	#include "lalr.h"
32	#include "muscle-tab.h"
33	#include "nullable.h"
34	#include "relation.h"
35	#include "state.h"
36	#include "symtab.h"
37
38	/* Records the value of the \%define variable lr.type. /
39	typedef enum
40	{
41	LR_TYPE__LR0,
42	LR_TYPE__LALR,
43	LR_TYPE__IELR,
44	LR_TYPE__CANONICAL_LR
45	} LrType;
46
47	/ The user's requested LR type. /
48	static LrType
49	lr_type_get (void)
50	{
51	char *type = muscle_percent_define_get ("lr.type");
52	LrType res;
53	if (STREQ (type, "lr""(0)"))
54	res = LR_TYPE__LR0;
55	else if (STREQ (type, "lalr"))
56	res = LR_TYPE__LALR;
57	else if (STREQ (type, "ielr"))
58	res = LR_TYPE__IELR;
59	else if (STREQ (type, "canonical-lr"))
60	res = LR_TYPE__CANONICAL_LR;
61	else
62	{
63	aver (false);
64	abort ();
65	}
66	free (type);
67	return res;
68	}
69
70	/**
71	* \post:
72	* - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
73	* is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
74	* the same RHS are nullable nonterminals. In other words, the follows of
75	* a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
76	*/
77	static bitset
78	ielr_compute_ritem_sees_lookahead_set (void)
79	{
80	bitset result = bitset_create (nritems, BITSET_FIXED);
81	unsigned i = nritems-`1`;
82	while (`0` < i)
83	{
84	--i;
85	while (!item_number_is_rule_number (ritem[i])
86	&& ISVAR (ritem[i])
87	&& nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
88	bitset_set (result, i--);
89	if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
90	bitset_set (result, i--);
91	while (!item_number_is_rule_number (ritem[i]) && `0` < i)
92	--i;
93	}
94	if (trace_flag & trace_ielr)
95	{
96	fprintf (stderr, "ritem_sees_lookahead_set:\n");
97	debug_bitset (result);
98	fprintf (stderr, "\n");
99	}
100	return result;
101	}
102
103	/**
104	* \pre:
105	* - \c ritem_sees_lookahead_set was computed by
106	* \c ielr_compute_ritem_sees_lookahead_set.
107	* \post:
108	* - Each of \c edgesp and \c edge_countsp is a new array of size
109	* \c ::ngotos.
110	* - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
111	* <tt>(*edge_countsp)[i]+1</tt>.
112	* - In such a \c goto_number array, the last element is \c ::END_NODE.
113	* - All remaining elements are the indices of the gotos to which there is an
114	* internal follow edge from goto \c i.
115	* - There is an internal follow edge from goto \c i to goto \c j iff both:
116	* - The from states of gotos \c i and \c j are the same.
117	* - The transition nonterminal for goto \c i appears as the first RHS
118	* symbol of at least one production for which both:
119	* - The LHS is the transition symbol of goto \c j.
120	* - All other RHS symbols are nullable nonterminals.
121	* - In other words, the follows of goto \c i include the follows of
122	* goto \c j and it's an internal edge because the from states are the
123	* same.
124	*/
125	static void
126	ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
127	goto_number **edgesp, int* **edge_countsp)
128	{
129	edgesp = xnmalloc (ngotos, sizeof* **edgesp);
130	edge_countsp = xnmalloc (ngotos, sizeof* **edge_countsp);
131	{
132	bitset sources = bitset_create (ngotos, BITSET_FIXED);
133	for (goto_number i = `0`; i < ngotos; ++i)
134	(*edge_countsp)[i] = `0`;
135	for (goto_number i = `0`; i < ngotos; ++i)
136	{
137	int nsources = `0`;
138	{
139	for (rule **rulep = derives[states[to_state[i]]->accessing_symbol
140	- ntokens];
141	*rulep;
142	++rulep)
143	{
144	/ If there is at least one RHS symbol, if the first RHS symbol*
145	is a nonterminal, and if all remaining RHS symbols (if any)
146	are nullable nonterminals, create an edge from the LHS
147	symbol's goto to the first RHS symbol's goto such that the RHS
148	symbol's goto will be the source of the edge after the
149	eventual relation_transpose below.
150
151	Unlike in ielr_compute_always_follows, I use a bitset for
152	edges rather than an array because it is possible that
153	multiple RHS's with the same first symbol could fit and thus
154	that we could end up with redundant edges. With the
155	possibility of redundant edges, it's hard to know ahead of
156	time how large to make such an array. Another possible
157	redundancy is that source and destination might be the same
158	goto. Eliminating all these possible redundancies now might
159	possibly help performance a little. I have not proven any of
160	this, but I'm guessing the bitset shouldn't entail much of a
161	performance penalty, if any. /*
162	if (bitset_test (ritem_sees_lookahead_set,
163	(*rulep)->rhs - ritem))
164	{
165	goto_number source =
166	map_goto (from_state[i],
167	item_number_as_symbol_number ((rulep)->rhs));
168	if (i != source && !bitset_test (sources, source))
169	{
170	bitset_set (sources, source);
171	++nsources;
172	++(*edge_countsp)[source];
173	}
174	}
175	}
176	}
177	if (nsources == `0`)
178	(*edgesp)[i] = NULL;
179	else
180	{
181	(edgesp)[i] = xnmalloc (nsources + `1`, sizeof* (edgesp)[i]);
182	{
183	bitset_iterator biter_source;
184	bitset_bindex source;
185	int j = `0`;
186	BITSET_FOR_EACH (biter_source, sources, source, `0`)
187	(*edgesp)[i][j++] = source;
188	}
189	(*edgesp)[i][nsources] = END_NODE;
190	}
191	bitset_zero (sources);
192	}
193	bitset_free (sources);
194	}
195
196	relation_transpose (edgesp, ngotos);
197
198	if (trace_flag & trace_ielr)
199	relation_print ("internal_follow_edges", *edgesp, ngotos, NULL, stderr);
200	}
201
202	/**
203	* \pre:
204	* - \c ritem_sees_lookahead_set was computed by
205	* \c ielr_compute_ritem_sees_lookahead_set.
206	* - \c internal_follow_edges was computed by
207	* \c ielr_compute_internal_follow_edges.
208	* \post:
209	* - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
210	* is \c ngotos and the number of columns is maximum number of kernel items
211	* in any state.
212	* - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
213	* \c i include the lookahead set of item \c j in the from state of goto
214	* \c i.
215	* - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
216	* no item \c j in the from state of goto \c i.
217	*/
218	static void
219	ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
220	goto_number **internal_follow_edges,
221	bitsetv *follow_kernel_itemsp)
222	{
223	{
224	size_t max_nitems = `0`;
225	for (state_number i = `0`; i < nstates; ++i)
226	if (states[i]->nitems > max_nitems)
227	max_nitems = states[i]->nitems;
228	*follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
229	}
230	for (goto_number i = `0`; i < ngotos; ++i)
231	{
232	size_t nitems = states[from_state[i]]->nitems;
233	item_number *items = states[from_state[i]]->items;
234	for (size_t j = `0`; j < nitems; ++j)
235	/ If this item has this goto and if all subsequent symbols in this*
236	RHS (if any) are nullable nonterminals, then record this item as
237	one whose lookahead set is included in this goto's follows. /*
238	if (item_number_is_symbol_number (ritem[items[j]])
239	&& item_number_as_symbol_number (ritem[items[j]])
240	== states[to_state[i]]->accessing_symbol
241	&& bitset_test (ritem_sees_lookahead_set, items[j]))
242	bitset_set ((*follow_kernel_itemsp)[i], j);
243	}
244	relation_digraph (internal_follow_edges, ngotos, *follow_kernel_itemsp);
245
246	if (trace_flag & trace_ielr)
247	{
248	fprintf (stderr, "follow_kernel_items:\n");
249	debug_bitsetv (*follow_kernel_itemsp);
250	}
251	}
252
253	/**
254	* \pre
255	* - \c *edgesp and \c edge_counts were computed by
256	* \c ielr_compute_internal_follow_edges.
257	* \post
258	* - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
259	* \c ntokens columns.
260	* - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
261	* follow (that is, it's computed by internal and successor edges) of goto
262	* \c i.
263	* - Rows of \c *edgesp have been realloc'ed and extended to include
264	* successor follow edges. \c edge_counts has not been updated.
265	*/
266	static void
267	ielr_compute_always_follows (goto_number ***edgesp,
268	int const edge_counts[],
269	bitsetv *always_followsp)
270	{
271	*always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
272	{
273	goto_number edge_array = xnmalloc (ngotos, sizeof* *edge_array);
274	for (goto_number i = `0`; i < ngotos; ++i)
275	{
276	goto_number nedges = edge_counts[i];
277	{
278	int j;
279	transitions *trans = states[to_state[i]]->transitions;
280	FOR_EACH_SHIFT (trans, j)
281	bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
282	for (; j < trans->num; ++j)
283	{
284	symbol_number sym = TRANSITION_SYMBOL (trans, j);
285	if (nullable[sym - ntokens])
286	edge_array[nedges++] = map_goto (to_state[i], sym);
287	}
288	}
289	if (nedges - edge_counts[i])
290	{
291	(*edgesp)[i] =
292	xnrealloc ((edgesp)[i], nedges + `1`, sizeof* (edgesp)[i]);
293	memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
294	(nedges - edge_counts[i]) * sizeof (edgesp)[i]);
295	(*edgesp)[i][nedges] = END_NODE;
296	}
297	}
298	free (edge_array);
299	}
300	relation_digraph (edgesp, ngotos, always_followsp);
301
302	if (trace_flag & trace_ielr)
303	{
304	relation_print ("always follow edges", *edgesp, ngotos, NULL, stderr);
305	fprintf (stderr, "always_follows:\n");
306	debug_bitsetv (*always_followsp);
307	}
308	}
309
310	/**
311	* \post
312	* - \c result is a new array of size \c ::nstates.
313	* - <tt>result[i]</tt> is an array of pointers to the predecessor
314	* <tt>state</tt>'s of state \c i.
315	* - The last element of such an array is \c NULL.
316	*/
317	static state ***
318	ielr_compute_predecessors (void)
319	{
320	int predecessor_counts = xnmalloc (nstates, sizeof* *predecessor_counts);
321	state *result = xnmalloc (nstates, sizeof** *result);
322	for (state_number i = `0`; i < nstates; ++i)
323	predecessor_counts[i] = `0`;
324	for (state_number i = `0`; i < nstates; ++i)
325	for (int j = `0`; j < states[i]->transitions->num; ++j)
326	++predecessor_counts[states[i]->transitions->states[j]->number];
327	for (state_number i = `0`; i < nstates; ++i)
328	{
329	result[i] = xnmalloc (predecessor_counts[i]+`1`, sizeof *result[i]);
330	result[i][predecessor_counts[i]] = NULL;
331	predecessor_counts[i] = `0`;
332	}
333	for (state_number i = `0`; i < nstates; ++i)
334	for (int j = `0`; j < states[i]->transitions->num; ++j)
335	{
336	state_number k = states[i]->transitions->states[j]->number;
337	result[k][predecessor_counts[k]++] = states[i];
338	}
339	free (predecessor_counts);
340	return result;
341	}
342
343	/**
344	* \post
345	* - \c follow_kernel_itemsp and \c always_followsp were computed by
346	* \c ielr_compute_follow_kernel_items and
347	* \c ielr_compute_always_follows.
348	* - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
349	* by \c ielr_compute_predecessors.
350	*/
351	static void
352	ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
353	bitsetv *always_followsp,
354	state ****predecessorsp)
355	{
356	goto_number **edges;
357	int *edge_counts;
358	{
359	bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
360	ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
361	&edges, &edge_counts);
362	ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
363	follow_kernel_itemsp);
364	bitset_free (ritem_sees_lookahead_set);
365	}
366	ielr_compute_always_follows (&edges, edge_counts, always_followsp);
367	for (int i = `0`; i < ngotos; ++i)
368	free (edges[i]);
369	free (edges);
370	free (edge_counts);
371	if (predecessorsp)
372	*predecessorsp = ielr_compute_predecessors ();
373	}
374
375	/**
376	* \note
377	* - FIXME: It might be an interesting experiment to compare the space and
378	* time efficiency of computing \c item_lookahead_sets either:
379	* - Fully up front.
380	* - Just-in-time, as implemented below.
381	* - Not at all. That is, just let annotations continue even when
382	* unnecessary.
383	*/
384	bool
385	ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
386	symbol_number lookahead, state ***predecessors,
387	bitset **item_lookahead_sets)
388	{
389	if (!item_lookahead_sets[s->number])
390	{
391	item_lookahead_sets[s->number] =
392	xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][`0`]);
393	for (size_t i = `0`; i < s->nitems; ++i)
394	item_lookahead_sets[s->number][i] = NULL;
395	}
396	if (!item_lookahead_sets[s->number][item])
397	{
398	item_lookahead_sets[s->number][item] =
399	bitset_create (ntokens, BITSET_FIXED);
400	/ If this kernel item is the beginning of a RHS, it must be the kernel*
401	item in the start state, and so its LHS has no follows and no goto to
402	check. If, instead, this kernel item is the successor of the start
403	state's kernel item, there are still no follows and no goto. This
404	situation is fortunate because we want to avoid the - 2 below in both
405	cases.
406
407	Actually, IELR(1) should never invoke this function for either of
408	those cases because (1) follow_kernel_items will never reference a
409	kernel item for this RHS because the end token blocks sight of the
410	lookahead set from the RHS's only nonterminal, and (2) no reduction
411	has a lookback dependency on this lookahead set. Nevertheless, I
412	didn't change this test to an aver just in case the usage of this
413	function evolves to need those two cases. In both cases, the current
414	implementation returns the right result. /*
415	if (s->items[item] > `1`)
416	{
417	/ If the LHS symbol of this item isn't known (because this is a*
418	top-level invocation), go get it. /*
419	if (!lhs)
420	{
421	unsigned i;
422	for (i = s->items[item];
423	!item_number_is_rule_number (ritem[i]);
424	++i)
425	continue;
426	lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
427	}
428	/ If this kernel item is next to the beginning of the RHS, then*
429	check all predecessors' goto follows for the LHS. /*
430	if (item_number_is_rule_number (ritem[s->items[item] - `2`]))
431	{
432	aver (lhs != accept->content->number);
433	for (state **predecessor = predecessors[s->number];
434	*predecessor;
435	++predecessor)
436	bitset_or (item_lookahead_sets[s->number][item],
437	item_lookahead_sets[s->number][item],
438	goto_follows[map_goto ((*predecessor)->number,
439	lhs)]);
440	}
441	/ If this kernel item is later in the RHS, then check all*
442	predecessor items' lookahead sets. /*
443	else
444	{
445	for (state **predecessor = predecessors[s->number];
446	*predecessor;
447	++predecessor)
448	{
449	size_t predecessor_item;
450	for (predecessor_item = `0`;
451	predecessor_item < (*predecessor)->nitems;
452	++predecessor_item)
453	if ((*predecessor)->items[predecessor_item]
454	== s->items[item] - `1`)
455	break;
456	aver (predecessor_item != (*predecessor)->nitems);
457	ielr_item_has_lookahead (*predecessor, lhs,
458	predecessor_item, `0` /irrelevant/,
459	predecessors, item_lookahead_sets);
460	bitset_or (item_lookahead_sets[s->number][item],
461	item_lookahead_sets[s->number][item],
462	item_lookahead_sets[(*predecessor)->number]
463	[predecessor_item]);
464	}
465	}
466	}
467	}
468	return bitset_test (item_lookahead_sets[s->number][item], lookahead);
469	}
470
471	/**
472	* \pre
473	* - \c follow_kernel_items, \c always_follows, and \c predecessors
474	* were computed by \c ielr_compute_auxiliary_tables.
475	* \post
476	* - Each of <tt>inadequacy_listsp</tt> and <tt>annotation_listsp</tt>
477	* points to a new array of size \c ::nstates.
478	* - For <tt>0 <= i < ::nstates</tt>:
479	* - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
480	* node for <tt>states[i]</tt>.
481	* - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
482	* node for <tt>states[i]</tt>.
483	* - <tt>*max_annotationsp</tt> is the maximum number of annotations per
484	* state.
485	*/
486	static void
487	ielr_compute_annotation_lists (bitsetv follow_kernel_items,
488	bitsetv always_follows, state ***predecessors,
489	AnnotationIndex *max_annotationsp,
490	InadequacyList ***inadequacy_listsp,
491	AnnotationList ***annotation_listsp,
492	struct obstack *annotations_obstackp)
493	{
494	bitset **item_lookahead_sets =
495	xnmalloc (nstates, sizeof *item_lookahead_sets);
496	AnnotationIndex *annotation_counts =
497	xnmalloc (nstates, sizeof *annotation_counts);
498	ContributionIndex max_contributions = `0`;
499	unsigned total_annotations = `0`;
500
501	inadequacy_listsp = xnmalloc (nstates, sizeof* **inadequacy_listsp);
502	annotation_listsp = xnmalloc (nstates, sizeof* **annotation_listsp);
503	for (state_number i = `0`; i < nstates; ++i)
504	{
505	item_lookahead_sets[i] = NULL;
506	(*inadequacy_listsp)[i] = NULL;
507	(*annotation_listsp)[i] = NULL;
508	annotation_counts[i] = `0`;
509	}
510	{
511	InadequacyListNodeCount inadequacy_list_node_count = `0`;
512	for (state_number i = `0`; i < nstates; ++i)
513	AnnotationList__compute_from_inadequacies (
514	states[i], follow_kernel_items, always_follows, predecessors,
515	item_lookahead_sets, inadequacy_listsp, annotation_listsp,
516	annotation_counts, &max_contributions, annotations_obstackp,
517	&inadequacy_list_node_count);
518	}
519	*max_annotationsp = `0`;
520	for (state_number i = `0`; i < nstates; ++i)
521	{
522	if (annotation_counts[i] > *max_annotationsp)
523	*max_annotationsp = annotation_counts[i];
524	total_annotations += annotation_counts[i];
525	}
526	if (trace_flag & trace_ielr)
527	{
528	for (state_number i = `0`; i < nstates; ++i)
529	{
530	fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
531	AnnotationList__debug ((*annotation_listsp)[i],
532	states[i]->nitems, `2`);
533	}
534	fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
535	fprintf (stderr, "Average number of annotations per state: %f\n",
536	(float)total_annotations/nstates);
537	fprintf (stderr, "Max number of annotations per state: %d\n",
538	*max_annotationsp);
539	fprintf (stderr, "Max number of contributions per annotation: %d\n",
540	max_contributions);
541	}
542	for (state_number i = `0`; i < nstates; ++i)
543	if (item_lookahead_sets[i])
544	{
545	for (size_t j = `0`; j < states[i]->nitems; ++j)
546	if (item_lookahead_sets[i][j])
547	bitset_free (item_lookahead_sets[i][j]);
548	free (item_lookahead_sets[i]);
549	}
550	free (item_lookahead_sets);
551	free (annotation_counts);
552	}
553
554	typedef struct state_list
555	{
556	struct state_list *next;
557	state *state;
558	/* Has this state been recomputed as a successor of another state? /
559	bool recomputedAsSuccessor;
560	/**
561	* \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
562	* iff the lookahead set on item \c i is empty.
563	*/
564	bitset *lookaheads;
565	/**
566	* nextIsocore is the next state in a circularly linked-list of all states
567	* with the same core. The one originally computed by generate_states in
568	* lr0.c is lr0Isocore.
569	*/
570	struct state_list *lr0Isocore;
571	struct state_list *nextIsocore;
572	} state_list;
573
574	/**
575	* \pre
576	* - \c follow_kernel_items and \c always_follows were computed by
577	* \c ielr_compute_auxiliary_tables.
578	* - <tt>n->class = nterm_sym</tt>.
579	* \post
580	* - \c follow_set contains the follow set for the goto on nonterminal \c n
581	* in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
582	*/
583	static void
584	ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
585	bitsetv always_follows, state_list *s,
586	sym_content *n, bitset follow_set)
587	{
588	goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
589	bitset_copy (follow_set, always_follows[n_goto]);
590	if (s->lookaheads)
591	{
592	bitset_iterator biter_item;
593	bitset_bindex item;
594	BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, `0`)
595	if (s->lookaheads[item])
596	bitset_or (follow_set, follow_set, s->lookaheads[item]);
597	}
598	}
599
600	/**
601	* \pre
602	* - \c follow_kernel_items and \c always_follows were computed by
603	* \c ielr_compute_auxiliary_tables.
604	* - \c lookahead_filter was computed by
605	* \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
606	* of \c t.
607	* - The number of rows in \c lookaheads is at least the number of items in
608	* \c t, and the number of columns is \c ::ntokens.
609	* \post
610	* - <tt>lookaheads[i][j]</tt> is set iff both:
611	* - <tt>lookahead_filter[i][j]</tt> is set.
612	* - The isocore of \c t that will be the transition successor of \c s will
613	* inherit from \c s token \c j into the lookahead set of item \c i.
614	*/
615	static void
616	ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
617	state_list s, state t, bitsetv lookahead_filter,
618	bitsetv lookaheads)
619	{
620	size_t s_item = `0`;
621	bitsetv_zero (lookaheads);
622	for (size_t t_item = `0`; t_item < t->nitems; ++t_item)
623	{
624	/ If this kernel item is the beginning of a RHS, it must be the*
625	kernel item in the start state, but t is supposed to be a successor
626	state. If, instead, this kernel item is the successor of the start
627	state's kernel item, the lookahead set is empty. This second case is
628	a special case to avoid the - 2 below, but the next successor can be
629	handled fine without special casing it. /*
630	aver (t->items[t_item] != `0`);
631	if (t->items[t_item] > `1`
632	&& !bitset_empty_p (lookahead_filter[t_item]))
633	{
634	if (item_number_is_rule_number (ritem[t->items[t_item] - `2`]))
635	{
636	unsigned rule_item;
637	for (rule_item = t->items[t_item];
638	!item_number_is_rule_number (ritem[rule_item]);
639	++rule_item)
640	;
641	ielr_compute_goto_follow_set (
642	follow_kernel_items, always_follows, s,
643	rules[item_number_as_rule_number (ritem[rule_item])].lhs,
644	lookaheads[t_item]);
645	}
646	else if (s->lookaheads)
647	{
648	/ We don't have to start the s item search at the beginning*
649	every time because items from both states are sorted by their
650	indices in ritem. /*
651	for (; s_item < s->state->nitems; ++s_item)
652	if (s->state->items[s_item] == t->items[t_item] - `1`)
653	break;
654	aver (s_item != s->state->nitems);
655	if (s->lookaheads[s_item])
656	bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
657	}
658	bitset_and (lookaheads[t_item],
659	lookaheads[t_item], lookahead_filter[t_item]);
660	}
661	}
662	}
663
664	/**
665	* \pre
666	* - \c follow_kernel_items and \c always_follows were computed by
667	* \c ielr_compute_auxiliary_tables.
668	* - Either:
669	* - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
670	* - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
671	* - The number of rows in each of \c lookaheads and \c work2 is the maximum
672	* number of items in any state. The number of columns in each is
673	* \c ::ntokens.
674	* - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
675	* - \c ::nstates is the total number of states, some not yet fully computed,
676	* in the list ending at \c *last_statep. It is at least the number of
677	* original LR(0) states.
678	* - The size of \c work1 is at least the number of annotations for the LR(0)
679	* isocore of \c t.
680	* \post
681	* - Either:
682	* - In the case that <tt>annotation_lists != NULL</tt>,
683	* <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
684	* permitted by the annotations for the original LR(0) isocore of \c t.
685	* If this changed the lookaheads in that isocore, those changes were
686	* propagated to all already computed transition successors recursively
687	* possibly resulting in the splitting of some of those successors.
688	* - In the case that <tt>annotation_lists = NULL</tt>,
689	* <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
690	* isocore's lookahead sets were identical to those specified by
691	* <tt>lookaheads \@pre</tt>.
692	* - If no such merge was permitted, a new isocore of \c t containing
693	* <tt>lookaheads \@pre</tt> was appended to the state list whose
694	* previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
695	* incremented. It was also appended to \c t's isocore list.
696	* - <tt>*tp</tt> = the isocore of \c t into which
697	* <tt>lookaheads \@pre</tt> was placed/merged.
698	* - \c lookaheads, \c work1, and \c work2 may have been altered.
699	*/
700	static void
701	ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
702	AnnotationList *annotation_lists, state t,
703	bitsetv lookaheads, state_list **last_statep,
704	ContributionIndex work1[], bitsetv work2, state **tp)
705	{
706	state_list *lr0_isocore = t->state_list->lr0Isocore;
707	state_list **this_isocorep;
708
709	/ Determine whether there's an isocore of t with which these lookaheads can*
710	be merged. /*
711	{
712	AnnotationIndex ai;
713	AnnotationList *a;
714	if (annotation_lists)
715	for (ai = `0`, a = annotation_lists[lr0_isocore->state->number];
716	a;
717	++ai, a = a->next)
718	work1[ai] =
719	AnnotationList__computeDominantContribution (
720	a, lr0_isocore->state->nitems, lookaheads, false);
721	for (this_isocorep = &t->state_list;
722	this_isocorep == &t->state_list \|\| *this_isocorep != t->state_list;
723	this_isocorep = &(*this_isocorep)->nextIsocore)
724	{
725	if (!(*this_isocorep)->recomputedAsSuccessor)
726	break;
727	if (annotation_lists)
728	{
729	for (ai = `0`, a = annotation_lists[lr0_isocore->state->number];
730	a;
731	++ai, a = a->next)
732	{
733	if (work1[ai] != ContributionIndex__none)
734	{
735	/ This isocore compatibility test depends on the fact*
736	that, if the dominant contributions are the same for the
737	two isocores, then merging their lookahead sets will not
738	produce a state with a different dominant contribution.
739	*/
740	ContributionIndex ci =
741	AnnotationList__computeDominantContribution (
742	a, lr0_isocore->state->nitems,
743	(*this_isocorep)->lookaheads, false);
744	if (ci != ContributionIndex__none && work1[ai] != ci)
745	break;
746	}
747	}
748	if (!a)
749	break;
750	}
751	else
752	{
753	size_t i;
754	for (i = `0`; i < t->nitems; ++i)
755	{
756	if (!(*this_isocorep)->lookaheads
757	\|\| !(*this_isocorep)->lookaheads[i])
758	{
759	if (!bitset_empty_p (lookaheads[i]))
760	break;
761	}
762	/ bitset_equal_p uses the size of the first argument,*
763	so lookaheads[i] must be the second argument. /*
764	else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
765	lookaheads[i]))
766	break;
767	}
768	if (i == t->nitems)
769	break;
770	}
771	}
772	}
773
774	bool has_lookaheads = false;
775	for (size_t i = `0`; i < lr0_isocore->state->nitems; ++i)
776	if (!bitset_empty_p (lookaheads[i]))
777	{
778	has_lookaheads = true;
779	break;
780	}
781
782	/ Merge with an existing isocore. /
783	if (this_isocorep == &t->state_list \|\| *this_isocorep != t->state_list)
784	{
785	bool new_lookaheads = false;
786	tp = (this_isocorep)->state;
787
788	/ Merge lookaheads into the state and record whether any of them are*
789	actually new. /*
790	if (has_lookaheads)
791	{
792	size_t i;
793	if (!(*this_isocorep)->lookaheads)
794	{
795	(*this_isocorep)->lookaheads =
796	xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
797	for (i = `0`; i < t->nitems; ++i)
798	(*this_isocorep)->lookaheads[i] = NULL;
799	}
800	for (i = `0`; i < t->nitems; ++i)
801	if (!bitset_empty_p (lookaheads[i]))
802	{
803	if (!(*this_isocorep)->lookaheads[i])
804	(*this_isocorep)->lookaheads[i] =
805	bitset_create (ntokens, BITSET_FIXED);
806	bitset_andn (lookaheads[i],
807	lookaheads[i], (*this_isocorep)->lookaheads[i]);
808	bitset_or ((*this_isocorep)->lookaheads[i],
809	lookaheads[i], (*this_isocorep)->lookaheads[i]);
810	if (!bitset_empty_p (lookaheads[i]))
811	new_lookaheads = true;
812	}
813	}
814
815	/ If new lookaheads were merged, propagate those lookaheads to the*
816	successors, possibly splitting them. If tp is being recomputed for*
817	the first time, this isn't necessary because the main
818	ielr_split_states loop will handle the successors later. /*
819	if (!(*this_isocorep)->recomputedAsSuccessor)
820	(*this_isocorep)->recomputedAsSuccessor = true;
821	else if (new_lookaheads)
822	{
823	/ When merging demands identical lookahead sets, it is impossible to*
824	merge new lookaheads. /*
825	aver (annotation_lists);
826	for (int i = `0`; i < (*tp)->transitions->num; ++i)
827	{
828	state t2 = (tp)->transitions->states[i];
829	/ At any time, there's at most one state for which we have so*
830	far initially recomputed only some of its successors in the
831	main ielr_split_states loop. Because we recompute successors
832	in order, we can just stop at the first such successor. Of
833	course, tp might be a state some of whose successors have*
834	been recomputed as successors of other states rather than as
835	successors of tp. It's fine if we go ahead and propagate to*
836	some of those. We'll propagate to them again (but stop when
837	we see nothing changes) and to the others when we reach tp in*
838	the main ielr_split_states loop later. /*
839	if (!t2->state_list->recomputedAsSuccessor)
840	break;
841	AnnotationList__computeLookaheadFilter (
842	annotation_lists[t2->state_list->lr0Isocore->state->number],
843	t2->nitems, work2);
844	ielr_compute_lookaheads (follow_kernel_items, always_follows,
845	(*this_isocorep), t2, work2,
846	lookaheads);
847	/ FIXME: If splitting t2 here, it's possible that lookaheads*
848	that had already propagated from tp to t2 will be left in t2*
849	after tp has been removed as t2's predecessor:*
850	- We're going to recompute all lookaheads in phase 4, so these
851	extra lookaheads won't appear in the final parser table.
852	- If t2 has just one annotation, then these extra lookaheads
853	cannot alter the dominating contribution to the associated
854	inadequacy and thus cannot needlessly prevent a future merge
855	of some new state with t2. We can be sure of this because:
856	- The fact that we're splitting t2 now means that some
857	predecessors (at least one) other than tp must be*
858	propagating contributions to t2.
859	- The fact that t2 was merged in the first place means that,
860	if tp propagated any contributions, the dominating*
861	contribution must be the same as that from those other
862	predecessors.
863	- Thus, if some new state to be merged with t2 in the future
864	proves to be compatible with the contributions propagated
865	by the other predecessors, it will also be compatible with
866	the contributions made by the extra lookaheads left behind
867	by tp.*
868	- However, if t2 has more than one annotation and these extra
869	lookaheads contribute to one of their inadequacies, it's
870	possible these extra lookaheads may needlessly prevent a
871	future merge with t2. For example:
872	- Let's say there's an inadequacy A that makes the split
873	necessary as follows:
874	- There's currently just one other predecessor and it
875	propagates to t2 some contributions to inadequacy A.
876	- The new lookaheads that we were attempting to propagate
877	from tp to t2 made contributions to inadequacy A with a*
878	different dominating contribution than those from that
879	other predecessor.
880	- The extra lookaheads either make no contribution to
881	inadequacy A or have the same dominating contribution as
882	the contributions from the other predecessor. Either
883	way, as explained above, they can't prevent a future
884	merge.
885	- Let's say there's an inadequacy B that causes the trouble
886	with future merges as follows:
887	- The extra lookaheads make contributions to inadequacy B.
888	- Those extra contributions did not prevent the original
889	merge to create t2 because the other predecessor
890	propagates to t2 no contributions to inadequacy B.
891	- Thus, those extra contributions may prevent a future
892	merge with t2 even though the merge would be fine if tp*
893	had not left them behind.
894	- Is the latter case common enough to worry about?
895	- Perhaps we should track all predecessors and iterate them
896	now to recreate t2 without those extra lookaheads. /*
897	ielr_compute_state (follow_kernel_items, always_follows,
898	annotation_lists, t2, lookaheads,
899	last_statep, work1, work2,
900	&(*tp)->transitions->states[i]);
901	}
902	}
903	}
904
905	/ Create a new isocore. /
906	else
907	{
908	state_list old_isocore = this_isocorep;
909	(last_statep)->next = this_isocorep = xmalloc (sizeof **last_statep);
910	last_statep = this_isocorep;
911	(last_statep)->state = tp = state_new_isocore (t);
912	(tp)->state_list = last_statep;
913	(*last_statep)->recomputedAsSuccessor = true;
914	(*last_statep)->next = NULL;
915	(*last_statep)->lookaheads = NULL;
916	if (has_lookaheads)
917	{
918	(*last_statep)->lookaheads =
919	xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
920	for (size_t i = `0`; i < t->nitems; ++i)
921	{
922	if (bitset_empty_p (lookaheads[i]))
923	(*last_statep)->lookaheads[i] = NULL;
924	else
925	{
926	(*last_statep)->lookaheads[i] =
927	bitset_create (ntokens, BITSET_FIXED);
928	bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
929	}
930	}
931	}
932	(*last_statep)->lr0Isocore = lr0_isocore;
933	(*last_statep)->nextIsocore = old_isocore;
934	}
935	}
936
937	/**
938	* \pre
939	* - \c follow_kernel_items and \c always_follows were computed by
940	* \c ielr_compute_auxiliary_tables.
941	* - Either:
942	* - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
943	* - \c annotation_lists and \c max_annotations were computed by
944	* \c ielr_compute_annotation_lists.
945	* \post
946	* - \c ::states is of size \c ::nstates (which might be greater than
947	* <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
948	* inadequacy. \c annotation_lists was used to determine state
949	* compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
950	* LR(1) state compatibility test was used.
951	* - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
952	* computed in all states. tv_ielr_phase4 was pushed while they were
953	* computed from item lookahead sets.
954	*/
955	static void
956	ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
957	AnnotationList **annotation_lists,
958	AnnotationIndex max_annotations)
959	{
960	state_list *first_state;
961	state_list *last_state;
962	bitsetv lookahead_filter = NULL;
963	bitsetv lookaheads;
964
965	/ Set up state list and some reusable bitsets. /
966	{
967	size_t max_nitems = `0`;
968	state_list **nodep = &first_state;
969	for (state_number i = `0`; i < nstates; ++i)
970	{
971	nodep = states[i]->state_list = last_state = xmalloc (sizeof* **nodep);
972	(*nodep)->state = states[i];
973	(*nodep)->recomputedAsSuccessor = false;
974	(*nodep)->lookaheads = NULL;
975	(nodep)->lr0Isocore = nodep;
976	(nodep)->nextIsocore = nodep;
977	nodep = &(*nodep)->next;
978	if (states[i]->nitems > max_nitems)
979	max_nitems = states[i]->nitems;
980	}
981	*nodep = NULL;
982	lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
983	if (!annotation_lists)
984	bitsetv_ones (lookahead_filter);
985	lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
986	}
987
988	/ Recompute states. /
989	{
990	ContributionIndex work = xnmalloc (max_annotations, sizeof* *work);
991	for (state_list *this_state = first_state;
992	this_state;
993	this_state = this_state->next)
994	{
995	state *s = this_state->state;
996	for (int i = `0`; i < s->transitions->num; ++i)
997	{
998	state *t = s->transitions->states[i];
999	if (annotation_lists)
1000	AnnotationList__computeLookaheadFilter (
1001	annotation_lists[t->state_list->lr0Isocore->state->number],
1002	t->nitems, lookahead_filter);
1003	ielr_compute_lookaheads (follow_kernel_items, always_follows,
1004	this_state, t, lookahead_filter,
1005	lookaheads);
1006	ielr_compute_state (follow_kernel_items, always_follows,
1007	annotation_lists, t, lookaheads, &last_state,
1008	work, lookahead_filter,
1009	&s->transitions->states[i]);
1010	}
1011	}
1012	free (work);
1013	}
1014
1015	bitsetv_free (lookahead_filter);
1016	bitsetv_free (lookaheads);
1017
1018	/ Store states back in the states array. /
1019	states = xnrealloc (states, nstates, sizeof *states);
1020	for (state_list *node = first_state; node; node = node->next)
1021	states[node->state->number] = node->state;
1022
1023	/ In the case of canonical LR(1), copy item lookahead sets to reduction*
1024	lookahead sets. /*
1025	if (!annotation_lists)
1026	{
1027	timevar_push (tv_ielr_phase4);
1028	initialize_LA ();
1029	for (state_list *node = first_state; node; node = node->next)
1030	if (!node->state->consistent)
1031	{
1032	size_t i = `0`;
1033	item_number *itemset = node->state->items;
1034	for (size_t r = `0`; r < node->state->reductions->num; ++r)
1035	{
1036	rule *this_rule = node->state->reductions->rules[r];
1037	bitset lookahead_set =
1038	node->state->reductions->lookahead_tokens[r];
1039	if (item_number_is_rule_number (*this_rule->rhs))
1040	ielr_compute_goto_follow_set (follow_kernel_items,
1041	always_follows, node,
1042	this_rule->lhs, lookahead_set);
1043	else if (node->lookaheads)
1044	{
1045	/ We don't need to start the kernel item search back at*
1046	i=0 because both items and reductions are sorted on rule
1047	number. /*
1048	while (!item_number_is_rule_number (ritem[itemset[i]])
1049	\|\| item_number_as_rule_number (ritem[itemset[i]])
1050	!= this_rule->number)
1051	{
1052	++i;
1053	aver (i < node->state->nitems);
1054	}
1055	if (node->lookaheads[i])
1056	bitset_copy (lookahead_set, node->lookaheads[i]);
1057	}
1058	}
1059	}
1060	timevar_pop (tv_ielr_phase4);
1061	}
1062
1063	/ Free state list. /
1064	while (first_state)
1065	{
1066	state_list *node = first_state;
1067	if (node->lookaheads)
1068	{
1069	for (size_t i = `0`; i < node->state->nitems; ++i)
1070	if (node->lookaheads[i])
1071	bitset_free (node->lookaheads[i]);
1072	free (node->lookaheads);
1073	}
1074	first_state = node->next;
1075	free (node);
1076	}
1077	}
1078
1079
1080	void
1081	ielr (void)
1082	{
1083	LrType lr_type = lr_type_get ();
1084
1085	/ Phase 0: LALR(1). /
1086	switch (lr_type)
1087	{
1088	case LR_TYPE__LR0:
1089	timevar_push (tv_lalr);
1090	set_goto_map ();
1091	timevar_pop (tv_lalr);
1092	return;
1093
1094	case LR_TYPE__CANONICAL_LR:
1095	timevar_push (tv_lalr);
1096	set_goto_map ();
1097	timevar_pop (tv_lalr);
1098	break;
1099
1100	case LR_TYPE__LALR:
1101	timevar_push (tv_lalr);
1102	lalr ();
1103	bitsetv_free (goto_follows);
1104	timevar_pop (tv_lalr);
1105	return;
1106
1107	case LR_TYPE__IELR:
1108	timevar_push (tv_lalr);
1109	lalr ();
1110	timevar_pop (tv_lalr);
1111	break;
1112	}
1113
1114	{
1115	bitsetv follow_kernel_items;
1116	bitsetv always_follows;
1117	InadequacyList **inadequacy_lists = NULL;
1118	AnnotationList **annotation_lists = NULL;
1119	struct obstack annotations_obstack;
1120	AnnotationIndex max_annotations = `0`;
1121
1122	{
1123	/ Phase 1: Compute Auxiliary Tables. /
1124	state ***predecessors;
1125	timevar_push (tv_ielr_phase1);
1126	ielr_compute_auxiliary_tables (
1127	&follow_kernel_items, &always_follows,
1128	lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
1129	timevar_pop (tv_ielr_phase1);
1130
1131	/ Phase 2: Compute Annotations. /
1132	timevar_push (tv_ielr_phase2);
1133	if (lr_type != LR_TYPE__CANONICAL_LR)
1134	{
1135	obstack_init (&annotations_obstack);
1136	ielr_compute_annotation_lists (follow_kernel_items, always_follows,
1137	predecessors, &max_annotations,
1138	&inadequacy_lists, &annotation_lists,
1139	&annotations_obstack);
1140	for (state_number i = `0`; i < nstates; ++i)
1141	free (predecessors[i]);
1142	free (predecessors);
1143	bitsetv_free (goto_follows);
1144	lalr_free ();
1145	}
1146	timevar_pop (tv_ielr_phase2);
1147	}
1148
1149	/ Phase 3: Split States. /
1150	timevar_push (tv_ielr_phase3);
1151	{
1152	state_number nstates_lr0 = nstates;
1153	ielr_split_states (follow_kernel_items, always_follows,
1154	annotation_lists, max_annotations);
1155	if (inadequacy_lists)
1156	for (state_number i = `0`; i < nstates_lr0; ++i)
1157	InadequacyList__delete (inadequacy_lists[i]);
1158	}
1159	free (inadequacy_lists);
1160	if (annotation_lists)
1161	obstack_free (&annotations_obstack, NULL);
1162	free (annotation_lists);
1163	bitsetv_free (follow_kernel_items);
1164	bitsetv_free (always_follows);
1165	timevar_pop (tv_ielr_phase3);
1166	}
1167
1168	/ Phase 4: Compute Reduction Lookaheads. /
1169	timevar_push (tv_ielr_phase4);
1170	free (goto_map);
1171	free (from_state);
1172	free (to_state);
1173	if (lr_type == LR_TYPE__CANONICAL_LR)
1174	{
1175	/ Reduction lookaheads are computed in ielr_split_states above*
1176	but are timed as part of phase 4. /*
1177	set_goto_map ();
1178	}
1179	else
1180	{
1181	lalr ();
1182	bitsetv_free (goto_follows);
1183	}
1184	timevar_pop (tv_ielr_phase4);
1185	}
1186

Browse the source code of bison/src/ielr.c