1/* IELR main implementation.
2
3 Copyright (C) 2009-2015, 2018-2019 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20#include <config.h>
21#include "system.h"
22
23#include "ielr.h"
24
25#include <bitset.h>
26#include <timevar.h>
27
28#include "AnnotationList.h"
29#include "derives.h"
30#include "getargs.h"
31#include "lalr.h"
32#include "muscle-tab.h"
33#include "nullable.h"
34#include "relation.h"
35#include "state.h"
36#include "symtab.h"
37
38/** Records the value of the \%define variable lr.type. */
39typedef enum
40 {
41 LR_TYPE__LR0,
42 LR_TYPE__LALR,
43 LR_TYPE__IELR,
44 LR_TYPE__CANONICAL_LR
45 } LrType;
46
47/* The user's requested LR type. */
48static LrType
49lr_type_get (void)
50{
51 char *type = muscle_percent_define_get ("lr.type");
52 LrType res;
53 if (STREQ (type, "lr""(0)"))
54 res = LR_TYPE__LR0;
55 else if (STREQ (type, "lalr"))
56 res = LR_TYPE__LALR;
57 else if (STREQ (type, "ielr"))
58 res = LR_TYPE__IELR;
59 else if (STREQ (type, "canonical-lr"))
60 res = LR_TYPE__CANONICAL_LR;
61 else
62 {
63 aver (false);
64 abort ();
65 }
66 free (type);
67 return res;
68}
69
70/**
71 * \post:
72 * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
73 * is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
74 * the same RHS are nullable nonterminals. In other words, the follows of
75 * a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
76 */
77static bitset
78ielr_compute_ritem_sees_lookahead_set (void)
79{
80 bitset result = bitset_create (nritems, BITSET_FIXED);
81 unsigned i = nritems-1;
82 while (0 < i)
83 {
84 --i;
85 while (!item_number_is_rule_number (ritem[i])
86 && ISVAR (ritem[i])
87 && nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
88 bitset_set (result, i--);
89 if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
90 bitset_set (result, i--);
91 while (!item_number_is_rule_number (ritem[i]) && 0 < i)
92 --i;
93 }
94 if (trace_flag & trace_ielr)
95 {
96 fprintf (stderr, "ritem_sees_lookahead_set:\n");
97 debug_bitset (result);
98 fprintf (stderr, "\n");
99 }
100 return result;
101}
102
103/**
104 * \pre:
105 * - \c ritem_sees_lookahead_set was computed by
106 * \c ielr_compute_ritem_sees_lookahead_set.
107 * \post:
108 * - Each of \c *edgesp and \c *edge_countsp is a new array of size
109 * \c ::ngotos.
110 * - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
111 * <tt>(*edge_countsp)[i]+1</tt>.
112 * - In such a \c goto_number array, the last element is \c ::END_NODE.
113 * - All remaining elements are the indices of the gotos to which there is an
114 * internal follow edge from goto \c i.
115 * - There is an internal follow edge from goto \c i to goto \c j iff both:
116 * - The from states of gotos \c i and \c j are the same.
117 * - The transition nonterminal for goto \c i appears as the first RHS
118 * symbol of at least one production for which both:
119 * - The LHS is the transition symbol of goto \c j.
120 * - All other RHS symbols are nullable nonterminals.
121 * - In other words, the follows of goto \c i include the follows of
122 * goto \c j and it's an internal edge because the from states are the
123 * same.
124 */
125static void
126ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
127 goto_number ***edgesp, int **edge_countsp)
128{
129 *edgesp = xnmalloc (ngotos, sizeof **edgesp);
130 *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
131 {
132 bitset sources = bitset_create (ngotos, BITSET_FIXED);
133 for (goto_number i = 0; i < ngotos; ++i)
134 (*edge_countsp)[i] = 0;
135 for (goto_number i = 0; i < ngotos; ++i)
136 {
137 int nsources = 0;
138 {
139 for (rule **rulep = derives[states[to_state[i]]->accessing_symbol
140 - ntokens];
141 *rulep;
142 ++rulep)
143 {
144 /* If there is at least one RHS symbol, if the first RHS symbol
145 is a nonterminal, and if all remaining RHS symbols (if any)
146 are nullable nonterminals, create an edge from the LHS
147 symbol's goto to the first RHS symbol's goto such that the RHS
148 symbol's goto will be the source of the edge after the
149 eventual relation_transpose below.
150
151 Unlike in ielr_compute_always_follows, I use a bitset for
152 edges rather than an array because it is possible that
153 multiple RHS's with the same first symbol could fit and thus
154 that we could end up with redundant edges. With the
155 possibility of redundant edges, it's hard to know ahead of
156 time how large to make such an array. Another possible
157 redundancy is that source and destination might be the same
158 goto. Eliminating all these possible redundancies now might
159 possibly help performance a little. I have not proven any of
160 this, but I'm guessing the bitset shouldn't entail much of a
161 performance penalty, if any. */
162 if (bitset_test (ritem_sees_lookahead_set,
163 (*rulep)->rhs - ritem))
164 {
165 goto_number source =
166 map_goto (from_state[i],
167 item_number_as_symbol_number (*(*rulep)->rhs));
168 if (i != source && !bitset_test (sources, source))
169 {
170 bitset_set (sources, source);
171 ++nsources;
172 ++(*edge_countsp)[source];
173 }
174 }
175 }
176 }
177 if (nsources == 0)
178 (*edgesp)[i] = NULL;
179 else
180 {
181 (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
182 {
183 bitset_iterator biter_source;
184 bitset_bindex source;
185 int j = 0;
186 BITSET_FOR_EACH (biter_source, sources, source, 0)
187 (*edgesp)[i][j++] = source;
188 }
189 (*edgesp)[i][nsources] = END_NODE;
190 }
191 bitset_zero (sources);
192 }
193 bitset_free (sources);
194 }
195
196 relation_transpose (edgesp, ngotos);
197
198 if (trace_flag & trace_ielr)
199 relation_print ("internal_follow_edges", *edgesp, ngotos, NULL, stderr);
200}
201
202/**
203 * \pre:
204 * - \c ritem_sees_lookahead_set was computed by
205 * \c ielr_compute_ritem_sees_lookahead_set.
206 * - \c internal_follow_edges was computed by
207 * \c ielr_compute_internal_follow_edges.
208 * \post:
209 * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
210 * is \c ngotos and the number of columns is maximum number of kernel items
211 * in any state.
212 * - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
213 * \c i include the lookahead set of item \c j in the from state of goto
214 * \c i.
215 * - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
216 * no item \c j in the from state of goto \c i.
217 */
218static void
219ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
220 goto_number **internal_follow_edges,
221 bitsetv *follow_kernel_itemsp)
222{
223 {
224 size_t max_nitems = 0;
225 for (state_number i = 0; i < nstates; ++i)
226 if (states[i]->nitems > max_nitems)
227 max_nitems = states[i]->nitems;
228 *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
229 }
230 for (goto_number i = 0; i < ngotos; ++i)
231 {
232 size_t nitems = states[from_state[i]]->nitems;
233 item_number *items = states[from_state[i]]->items;
234 for (size_t j = 0; j < nitems; ++j)
235 /* If this item has this goto and if all subsequent symbols in this
236 RHS (if any) are nullable nonterminals, then record this item as
237 one whose lookahead set is included in this goto's follows. */
238 if (item_number_is_symbol_number (ritem[items[j]])
239 && item_number_as_symbol_number (ritem[items[j]])
240 == states[to_state[i]]->accessing_symbol
241 && bitset_test (ritem_sees_lookahead_set, items[j]))
242 bitset_set ((*follow_kernel_itemsp)[i], j);
243 }
244 relation_digraph (internal_follow_edges, ngotos, *follow_kernel_itemsp);
245
246 if (trace_flag & trace_ielr)
247 {
248 fprintf (stderr, "follow_kernel_items:\n");
249 debug_bitsetv (*follow_kernel_itemsp);
250 }
251}
252
253/**
254 * \pre
255 * - \c *edgesp and \c edge_counts were computed by
256 * \c ielr_compute_internal_follow_edges.
257 * \post
258 * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
259 * \c ntokens columns.
260 * - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
261 * follow (that is, it's computed by internal and successor edges) of goto
262 * \c i.
263 * - Rows of \c *edgesp have been realloc'ed and extended to include
264 * successor follow edges. \c edge_counts has not been updated.
265 */
266static void
267ielr_compute_always_follows (goto_number ***edgesp,
268 int const edge_counts[],
269 bitsetv *always_followsp)
270{
271 *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
272 {
273 goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
274 for (goto_number i = 0; i < ngotos; ++i)
275 {
276 goto_number nedges = edge_counts[i];
277 {
278 int j;
279 transitions *trans = states[to_state[i]]->transitions;
280 FOR_EACH_SHIFT (trans, j)
281 bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
282 for (; j < trans->num; ++j)
283 {
284 symbol_number sym = TRANSITION_SYMBOL (trans, j);
285 if (nullable[sym - ntokens])
286 edge_array[nedges++] = map_goto (to_state[i], sym);
287 }
288 }
289 if (nedges - edge_counts[i])
290 {
291 (*edgesp)[i] =
292 xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
293 memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
294 (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
295 (*edgesp)[i][nedges] = END_NODE;
296 }
297 }
298 free (edge_array);
299 }
300 relation_digraph (*edgesp, ngotos, *always_followsp);
301
302 if (trace_flag & trace_ielr)
303 {
304 relation_print ("always follow edges", *edgesp, ngotos, NULL, stderr);
305 fprintf (stderr, "always_follows:\n");
306 debug_bitsetv (*always_followsp);
307 }
308}
309
310/**
311 * \post
312 * - \c result is a new array of size \c ::nstates.
313 * - <tt>result[i]</tt> is an array of pointers to the predecessor
314 * <tt>state</tt>'s of state \c i.
315 * - The last element of such an array is \c NULL.
316 */
317static state ***
318ielr_compute_predecessors (void)
319{
320 int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
321 state ***result = xnmalloc (nstates, sizeof *result);
322 for (state_number i = 0; i < nstates; ++i)
323 predecessor_counts[i] = 0;
324 for (state_number i = 0; i < nstates; ++i)
325 for (int j = 0; j < states[i]->transitions->num; ++j)
326 ++predecessor_counts[states[i]->transitions->states[j]->number];
327 for (state_number i = 0; i < nstates; ++i)
328 {
329 result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]);
330 result[i][predecessor_counts[i]] = NULL;
331 predecessor_counts[i] = 0;
332 }
333 for (state_number i = 0; i < nstates; ++i)
334 for (int j = 0; j < states[i]->transitions->num; ++j)
335 {
336 state_number k = states[i]->transitions->states[j]->number;
337 result[k][predecessor_counts[k]++] = states[i];
338 }
339 free (predecessor_counts);
340 return result;
341}
342
343/**
344 * \post
345 * - \c *follow_kernel_itemsp and \c *always_followsp were computed by
346 * \c ielr_compute_follow_kernel_items and
347 * \c ielr_compute_always_follows.
348 * - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
349 * by \c ielr_compute_predecessors.
350 */
351static void
352ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
353 bitsetv *always_followsp,
354 state ****predecessorsp)
355{
356 goto_number **edges;
357 int *edge_counts;
358 {
359 bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
360 ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
361 &edges, &edge_counts);
362 ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
363 follow_kernel_itemsp);
364 bitset_free (ritem_sees_lookahead_set);
365 }
366 ielr_compute_always_follows (&edges, edge_counts, always_followsp);
367 for (int i = 0; i < ngotos; ++i)
368 free (edges[i]);
369 free (edges);
370 free (edge_counts);
371 if (predecessorsp)
372 *predecessorsp = ielr_compute_predecessors ();
373}
374
375/**
376 * \note
377 * - FIXME: It might be an interesting experiment to compare the space and
378 * time efficiency of computing \c item_lookahead_sets either:
379 * - Fully up front.
380 * - Just-in-time, as implemented below.
381 * - Not at all. That is, just let annotations continue even when
382 * unnecessary.
383 */
384bool
385ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
386 symbol_number lookahead, state ***predecessors,
387 bitset **item_lookahead_sets)
388{
389 if (!item_lookahead_sets[s->number])
390 {
391 item_lookahead_sets[s->number] =
392 xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
393 for (size_t i = 0; i < s->nitems; ++i)
394 item_lookahead_sets[s->number][i] = NULL;
395 }
396 if (!item_lookahead_sets[s->number][item])
397 {
398 item_lookahead_sets[s->number][item] =
399 bitset_create (ntokens, BITSET_FIXED);
400 /* If this kernel item is the beginning of a RHS, it must be the kernel
401 item in the start state, and so its LHS has no follows and no goto to
402 check. If, instead, this kernel item is the successor of the start
403 state's kernel item, there are still no follows and no goto. This
404 situation is fortunate because we want to avoid the - 2 below in both
405 cases.
406
407 Actually, IELR(1) should never invoke this function for either of
408 those cases because (1) follow_kernel_items will never reference a
409 kernel item for this RHS because the end token blocks sight of the
410 lookahead set from the RHS's only nonterminal, and (2) no reduction
411 has a lookback dependency on this lookahead set. Nevertheless, I
412 didn't change this test to an aver just in case the usage of this
413 function evolves to need those two cases. In both cases, the current
414 implementation returns the right result. */
415 if (s->items[item] > 1)
416 {
417 /* If the LHS symbol of this item isn't known (because this is a
418 top-level invocation), go get it. */
419 if (!lhs)
420 {
421 unsigned i;
422 for (i = s->items[item];
423 !item_number_is_rule_number (ritem[i]);
424 ++i)
425 continue;
426 lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
427 }
428 /* If this kernel item is next to the beginning of the RHS, then
429 check all predecessors' goto follows for the LHS. */
430 if (item_number_is_rule_number (ritem[s->items[item] - 2]))
431 {
432 aver (lhs != accept->content->number);
433 for (state **predecessor = predecessors[s->number];
434 *predecessor;
435 ++predecessor)
436 bitset_or (item_lookahead_sets[s->number][item],
437 item_lookahead_sets[s->number][item],
438 goto_follows[map_goto ((*predecessor)->number,
439 lhs)]);
440 }
441 /* If this kernel item is later in the RHS, then check all
442 predecessor items' lookahead sets. */
443 else
444 {
445 for (state **predecessor = predecessors[s->number];
446 *predecessor;
447 ++predecessor)
448 {
449 size_t predecessor_item;
450 for (predecessor_item = 0;
451 predecessor_item < (*predecessor)->nitems;
452 ++predecessor_item)
453 if ((*predecessor)->items[predecessor_item]
454 == s->items[item] - 1)
455 break;
456 aver (predecessor_item != (*predecessor)->nitems);
457 ielr_item_has_lookahead (*predecessor, lhs,
458 predecessor_item, 0 /*irrelevant*/,
459 predecessors, item_lookahead_sets);
460 bitset_or (item_lookahead_sets[s->number][item],
461 item_lookahead_sets[s->number][item],
462 item_lookahead_sets[(*predecessor)->number]
463 [predecessor_item]);
464 }
465 }
466 }
467 }
468 return bitset_test (item_lookahead_sets[s->number][item], lookahead);
469}
470
471/**
472 * \pre
473 * - \c follow_kernel_items, \c always_follows, and \c predecessors
474 * were computed by \c ielr_compute_auxiliary_tables.
475 * \post
476 * - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
477 * points to a new array of size \c ::nstates.
478 * - For <tt>0 <= i < ::nstates</tt>:
479 * - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
480 * node for <tt>states[i]</tt>.
481 * - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
482 * node for <tt>states[i]</tt>.
483 * - <tt>*max_annotationsp</tt> is the maximum number of annotations per
484 * state.
485 */
486static void
487ielr_compute_annotation_lists (bitsetv follow_kernel_items,
488 bitsetv always_follows, state ***predecessors,
489 AnnotationIndex *max_annotationsp,
490 InadequacyList ***inadequacy_listsp,
491 AnnotationList ***annotation_listsp,
492 struct obstack *annotations_obstackp)
493{
494 bitset **item_lookahead_sets =
495 xnmalloc (nstates, sizeof *item_lookahead_sets);
496 AnnotationIndex *annotation_counts =
497 xnmalloc (nstates, sizeof *annotation_counts);
498 ContributionIndex max_contributions = 0;
499 unsigned total_annotations = 0;
500
501 *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
502 *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
503 for (state_number i = 0; i < nstates; ++i)
504 {
505 item_lookahead_sets[i] = NULL;
506 (*inadequacy_listsp)[i] = NULL;
507 (*annotation_listsp)[i] = NULL;
508 annotation_counts[i] = 0;
509 }
510 {
511 InadequacyListNodeCount inadequacy_list_node_count = 0;
512 for (state_number i = 0; i < nstates; ++i)
513 AnnotationList__compute_from_inadequacies (
514 states[i], follow_kernel_items, always_follows, predecessors,
515 item_lookahead_sets, *inadequacy_listsp, *annotation_listsp,
516 annotation_counts, &max_contributions, annotations_obstackp,
517 &inadequacy_list_node_count);
518 }
519 *max_annotationsp = 0;
520 for (state_number i = 0; i < nstates; ++i)
521 {
522 if (annotation_counts[i] > *max_annotationsp)
523 *max_annotationsp = annotation_counts[i];
524 total_annotations += annotation_counts[i];
525 }
526 if (trace_flag & trace_ielr)
527 {
528 for (state_number i = 0; i < nstates; ++i)
529 {
530 fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
531 AnnotationList__debug ((*annotation_listsp)[i],
532 states[i]->nitems, 2);
533 }
534 fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
535 fprintf (stderr, "Average number of annotations per state: %f\n",
536 (float)total_annotations/nstates);
537 fprintf (stderr, "Max number of annotations per state: %d\n",
538 *max_annotationsp);
539 fprintf (stderr, "Max number of contributions per annotation: %d\n",
540 max_contributions);
541 }
542 for (state_number i = 0; i < nstates; ++i)
543 if (item_lookahead_sets[i])
544 {
545 for (size_t j = 0; j < states[i]->nitems; ++j)
546 if (item_lookahead_sets[i][j])
547 bitset_free (item_lookahead_sets[i][j]);
548 free (item_lookahead_sets[i]);
549 }
550 free (item_lookahead_sets);
551 free (annotation_counts);
552}
553
554typedef struct state_list
555{
556 struct state_list *next;
557 state *state;
558 /** Has this state been recomputed as a successor of another state? */
559 bool recomputedAsSuccessor;
560 /**
561 * \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
562 * iff the lookahead set on item \c i is empty.
563 */
564 bitset *lookaheads;
565 /**
566 * nextIsocore is the next state in a circularly linked-list of all states
567 * with the same core. The one originally computed by generate_states in
568 * lr0.c is lr0Isocore.
569 */
570 struct state_list *lr0Isocore;
571 struct state_list *nextIsocore;
572} state_list;
573
574/**
575 * \pre
576 * - \c follow_kernel_items and \c always_follows were computed by
577 * \c ielr_compute_auxiliary_tables.
578 * - <tt>n->class = nterm_sym</tt>.
579 * \post
580 * - \c follow_set contains the follow set for the goto on nonterminal \c n
581 * in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
582 */
583static void
584ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
585 bitsetv always_follows, state_list *s,
586 sym_content *n, bitset follow_set)
587{
588 goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
589 bitset_copy (follow_set, always_follows[n_goto]);
590 if (s->lookaheads)
591 {
592 bitset_iterator biter_item;
593 bitset_bindex item;
594 BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
595 if (s->lookaheads[item])
596 bitset_or (follow_set, follow_set, s->lookaheads[item]);
597 }
598}
599
600/**
601 * \pre
602 * - \c follow_kernel_items and \c always_follows were computed by
603 * \c ielr_compute_auxiliary_tables.
604 * - \c lookahead_filter was computed by
605 * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
606 * of \c t.
607 * - The number of rows in \c lookaheads is at least the number of items in
608 * \c t, and the number of columns is \c ::ntokens.
609 * \post
610 * - <tt>lookaheads[i][j]</tt> is set iff both:
611 * - <tt>lookahead_filter[i][j]</tt> is set.
612 * - The isocore of \c t that will be the transition successor of \c s will
613 * inherit from \c s token \c j into the lookahead set of item \c i.
614 */
615static void
616ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
617 state_list *s, state *t, bitsetv lookahead_filter,
618 bitsetv lookaheads)
619{
620 size_t s_item = 0;
621 bitsetv_zero (lookaheads);
622 for (size_t t_item = 0; t_item < t->nitems; ++t_item)
623 {
624 /* If this kernel item is the beginning of a RHS, it must be the
625 kernel item in the start state, but t is supposed to be a successor
626 state. If, instead, this kernel item is the successor of the start
627 state's kernel item, the lookahead set is empty. This second case is
628 a special case to avoid the - 2 below, but the next successor can be
629 handled fine without special casing it. */
630 aver (t->items[t_item] != 0);
631 if (t->items[t_item] > 1
632 && !bitset_empty_p (lookahead_filter[t_item]))
633 {
634 if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
635 {
636 unsigned rule_item;
637 for (rule_item = t->items[t_item];
638 !item_number_is_rule_number (ritem[rule_item]);
639 ++rule_item)
640 ;
641 ielr_compute_goto_follow_set (
642 follow_kernel_items, always_follows, s,
643 rules[item_number_as_rule_number (ritem[rule_item])].lhs,
644 lookaheads[t_item]);
645 }
646 else if (s->lookaheads)
647 {
648 /* We don't have to start the s item search at the beginning
649 every time because items from both states are sorted by their
650 indices in ritem. */
651 for (; s_item < s->state->nitems; ++s_item)
652 if (s->state->items[s_item] == t->items[t_item] - 1)
653 break;
654 aver (s_item != s->state->nitems);
655 if (s->lookaheads[s_item])
656 bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
657 }
658 bitset_and (lookaheads[t_item],
659 lookaheads[t_item], lookahead_filter[t_item]);
660 }
661 }
662}
663
664/**
665 * \pre
666 * - \c follow_kernel_items and \c always_follows were computed by
667 * \c ielr_compute_auxiliary_tables.
668 * - Either:
669 * - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
670 * - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
671 * - The number of rows in each of \c lookaheads and \c work2 is the maximum
672 * number of items in any state. The number of columns in each is
673 * \c ::ntokens.
674 * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
675 * - \c ::nstates is the total number of states, some not yet fully computed,
676 * in the list ending at \c *last_statep. It is at least the number of
677 * original LR(0) states.
678 * - The size of \c work1 is at least the number of annotations for the LR(0)
679 * isocore of \c t.
680 * \post
681 * - Either:
682 * - In the case that <tt>annotation_lists != NULL</tt>,
683 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
684 * permitted by the annotations for the original LR(0) isocore of \c t.
685 * If this changed the lookaheads in that isocore, those changes were
686 * propagated to all already computed transition successors recursively
687 * possibly resulting in the splitting of some of those successors.
688 * - In the case that <tt>annotation_lists = NULL</tt>,
689 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
690 * isocore's lookahead sets were identical to those specified by
691 * <tt>lookaheads \@pre</tt>.
692 * - If no such merge was permitted, a new isocore of \c t containing
693 * <tt>lookaheads \@pre</tt> was appended to the state list whose
694 * previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
695 * incremented. It was also appended to \c t's isocore list.
696 * - <tt>*tp</tt> = the isocore of \c t into which
697 * <tt>lookaheads \@pre</tt> was placed/merged.
698 * - \c lookaheads, \c work1, and \c work2 may have been altered.
699 */
700static void
701ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
702 AnnotationList **annotation_lists, state *t,
703 bitsetv lookaheads, state_list **last_statep,
704 ContributionIndex work1[], bitsetv work2, state **tp)
705{
706 state_list *lr0_isocore = t->state_list->lr0Isocore;
707 state_list **this_isocorep;
708
709 /* Determine whether there's an isocore of t with which these lookaheads can
710 be merged. */
711 {
712 AnnotationIndex ai;
713 AnnotationList *a;
714 if (annotation_lists)
715 for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
716 a;
717 ++ai, a = a->next)
718 work1[ai] =
719 AnnotationList__computeDominantContribution (
720 a, lr0_isocore->state->nitems, lookaheads, false);
721 for (this_isocorep = &t->state_list;
722 this_isocorep == &t->state_list || *this_isocorep != t->state_list;
723 this_isocorep = &(*this_isocorep)->nextIsocore)
724 {
725 if (!(*this_isocorep)->recomputedAsSuccessor)
726 break;
727 if (annotation_lists)
728 {
729 for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
730 a;
731 ++ai, a = a->next)
732 {
733 if (work1[ai] != ContributionIndex__none)
734 {
735 /* This isocore compatibility test depends on the fact
736 that, if the dominant contributions are the same for the
737 two isocores, then merging their lookahead sets will not
738 produce a state with a different dominant contribution.
739 */
740 ContributionIndex ci =
741 AnnotationList__computeDominantContribution (
742 a, lr0_isocore->state->nitems,
743 (*this_isocorep)->lookaheads, false);
744 if (ci != ContributionIndex__none && work1[ai] != ci)
745 break;
746 }
747 }
748 if (!a)
749 break;
750 }
751 else
752 {
753 size_t i;
754 for (i = 0; i < t->nitems; ++i)
755 {
756 if (!(*this_isocorep)->lookaheads
757 || !(*this_isocorep)->lookaheads[i])
758 {
759 if (!bitset_empty_p (lookaheads[i]))
760 break;
761 }
762 /* bitset_equal_p uses the size of the first argument,
763 so lookaheads[i] must be the second argument. */
764 else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
765 lookaheads[i]))
766 break;
767 }
768 if (i == t->nitems)
769 break;
770 }
771 }
772 }
773
774 bool has_lookaheads = false;
775 for (size_t i = 0; i < lr0_isocore->state->nitems; ++i)
776 if (!bitset_empty_p (lookaheads[i]))
777 {
778 has_lookaheads = true;
779 break;
780 }
781
782 /* Merge with an existing isocore. */
783 if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
784 {
785 bool new_lookaheads = false;
786 *tp = (*this_isocorep)->state;
787
788 /* Merge lookaheads into the state and record whether any of them are
789 actually new. */
790 if (has_lookaheads)
791 {
792 size_t i;
793 if (!(*this_isocorep)->lookaheads)
794 {
795 (*this_isocorep)->lookaheads =
796 xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
797 for (i = 0; i < t->nitems; ++i)
798 (*this_isocorep)->lookaheads[i] = NULL;
799 }
800 for (i = 0; i < t->nitems; ++i)
801 if (!bitset_empty_p (lookaheads[i]))
802 {
803 if (!(*this_isocorep)->lookaheads[i])
804 (*this_isocorep)->lookaheads[i] =
805 bitset_create (ntokens, BITSET_FIXED);
806 bitset_andn (lookaheads[i],
807 lookaheads[i], (*this_isocorep)->lookaheads[i]);
808 bitset_or ((*this_isocorep)->lookaheads[i],
809 lookaheads[i], (*this_isocorep)->lookaheads[i]);
810 if (!bitset_empty_p (lookaheads[i]))
811 new_lookaheads = true;
812 }
813 }
814
815 /* If new lookaheads were merged, propagate those lookaheads to the
816 successors, possibly splitting them. If *tp is being recomputed for
817 the first time, this isn't necessary because the main
818 ielr_split_states loop will handle the successors later. */
819 if (!(*this_isocorep)->recomputedAsSuccessor)
820 (*this_isocorep)->recomputedAsSuccessor = true;
821 else if (new_lookaheads)
822 {
823 /* When merging demands identical lookahead sets, it is impossible to
824 merge new lookaheads. */
825 aver (annotation_lists);
826 for (int i = 0; i < (*tp)->transitions->num; ++i)
827 {
828 state *t2 = (*tp)->transitions->states[i];
829 /* At any time, there's at most one state for which we have so
830 far initially recomputed only some of its successors in the
831 main ielr_split_states loop. Because we recompute successors
832 in order, we can just stop at the first such successor. Of
833 course, *tp might be a state some of whose successors have
834 been recomputed as successors of other states rather than as
835 successors of *tp. It's fine if we go ahead and propagate to
836 some of those. We'll propagate to them again (but stop when
837 we see nothing changes) and to the others when we reach *tp in
838 the main ielr_split_states loop later. */
839 if (!t2->state_list->recomputedAsSuccessor)
840 break;
841 AnnotationList__computeLookaheadFilter (
842 annotation_lists[t2->state_list->lr0Isocore->state->number],
843 t2->nitems, work2);
844 ielr_compute_lookaheads (follow_kernel_items, always_follows,
845 (*this_isocorep), t2, work2,
846 lookaheads);
847 /* FIXME: If splitting t2 here, it's possible that lookaheads
848 that had already propagated from *tp to t2 will be left in t2
849 after *tp has been removed as t2's predecessor:
850 - We're going to recompute all lookaheads in phase 4, so these
851 extra lookaheads won't appear in the final parser table.
852 - If t2 has just one annotation, then these extra lookaheads
853 cannot alter the dominating contribution to the associated
854 inadequacy and thus cannot needlessly prevent a future merge
855 of some new state with t2. We can be sure of this because:
856 - The fact that we're splitting t2 now means that some
857 predecessors (at least one) other than *tp must be
858 propagating contributions to t2.
859 - The fact that t2 was merged in the first place means that,
860 if *tp propagated any contributions, the dominating
861 contribution must be the same as that from those other
862 predecessors.
863 - Thus, if some new state to be merged with t2 in the future
864 proves to be compatible with the contributions propagated
865 by the other predecessors, it will also be compatible with
866 the contributions made by the extra lookaheads left behind
867 by *tp.
868 - However, if t2 has more than one annotation and these extra
869 lookaheads contribute to one of their inadequacies, it's
870 possible these extra lookaheads may needlessly prevent a
871 future merge with t2. For example:
872 - Let's say there's an inadequacy A that makes the split
873 necessary as follows:
874 - There's currently just one other predecessor and it
875 propagates to t2 some contributions to inadequacy A.
876 - The new lookaheads that we were attempting to propagate
877 from *tp to t2 made contributions to inadequacy A with a
878 different dominating contribution than those from that
879 other predecessor.
880 - The extra lookaheads either make no contribution to
881 inadequacy A or have the same dominating contribution as
882 the contributions from the other predecessor. Either
883 way, as explained above, they can't prevent a future
884 merge.
885 - Let's say there's an inadequacy B that causes the trouble
886 with future merges as follows:
887 - The extra lookaheads make contributions to inadequacy B.
888 - Those extra contributions did not prevent the original
889 merge to create t2 because the other predecessor
890 propagates to t2 no contributions to inadequacy B.
891 - Thus, those extra contributions may prevent a future
892 merge with t2 even though the merge would be fine if *tp
893 had not left them behind.
894 - Is the latter case common enough to worry about?
895 - Perhaps we should track all predecessors and iterate them
896 now to recreate t2 without those extra lookaheads. */
897 ielr_compute_state (follow_kernel_items, always_follows,
898 annotation_lists, t2, lookaheads,
899 last_statep, work1, work2,
900 &(*tp)->transitions->states[i]);
901 }
902 }
903 }
904
905 /* Create a new isocore. */
906 else
907 {
908 state_list *old_isocore = *this_isocorep;
909 (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
910 *last_statep = *this_isocorep;
911 (*last_statep)->state = *tp = state_new_isocore (t);
912 (*tp)->state_list = *last_statep;
913 (*last_statep)->recomputedAsSuccessor = true;
914 (*last_statep)->next = NULL;
915 (*last_statep)->lookaheads = NULL;
916 if (has_lookaheads)
917 {
918 (*last_statep)->lookaheads =
919 xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
920 for (size_t i = 0; i < t->nitems; ++i)
921 {
922 if (bitset_empty_p (lookaheads[i]))
923 (*last_statep)->lookaheads[i] = NULL;
924 else
925 {
926 (*last_statep)->lookaheads[i] =
927 bitset_create (ntokens, BITSET_FIXED);
928 bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
929 }
930 }
931 }
932 (*last_statep)->lr0Isocore = lr0_isocore;
933 (*last_statep)->nextIsocore = old_isocore;
934 }
935}
936
937/**
938 * \pre
939 * - \c follow_kernel_items and \c always_follows were computed by
940 * \c ielr_compute_auxiliary_tables.
941 * - Either:
942 * - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
943 * - \c annotation_lists and \c max_annotations were computed by
944 * \c ielr_compute_annotation_lists.
945 * \post
946 * - \c ::states is of size \c ::nstates (which might be greater than
947 * <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
948 * inadequacy. \c annotation_lists was used to determine state
949 * compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
950 * LR(1) state compatibility test was used.
951 * - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
952 * computed in all states. tv_ielr_phase4 was pushed while they were
953 * computed from item lookahead sets.
954 */
955static void
956ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
957 AnnotationList **annotation_lists,
958 AnnotationIndex max_annotations)
959{
960 state_list *first_state;
961 state_list *last_state;
962 bitsetv lookahead_filter = NULL;
963 bitsetv lookaheads;
964
965 /* Set up state list and some reusable bitsets. */
966 {
967 size_t max_nitems = 0;
968 state_list **nodep = &first_state;
969 for (state_number i = 0; i < nstates; ++i)
970 {
971 *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
972 (*nodep)->state = states[i];
973 (*nodep)->recomputedAsSuccessor = false;
974 (*nodep)->lookaheads = NULL;
975 (*nodep)->lr0Isocore = *nodep;
976 (*nodep)->nextIsocore = *nodep;
977 nodep = &(*nodep)->next;
978 if (states[i]->nitems > max_nitems)
979 max_nitems = states[i]->nitems;
980 }
981 *nodep = NULL;
982 lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
983 if (!annotation_lists)
984 bitsetv_ones (lookahead_filter);
985 lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
986 }
987
988 /* Recompute states. */
989 {
990 ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
991 for (state_list *this_state = first_state;
992 this_state;
993 this_state = this_state->next)
994 {
995 state *s = this_state->state;
996 for (int i = 0; i < s->transitions->num; ++i)
997 {
998 state *t = s->transitions->states[i];
999 if (annotation_lists)
1000 AnnotationList__computeLookaheadFilter (
1001 annotation_lists[t->state_list->lr0Isocore->state->number],
1002 t->nitems, lookahead_filter);
1003 ielr_compute_lookaheads (follow_kernel_items, always_follows,
1004 this_state, t, lookahead_filter,
1005 lookaheads);
1006 ielr_compute_state (follow_kernel_items, always_follows,
1007 annotation_lists, t, lookaheads, &last_state,
1008 work, lookahead_filter,
1009 &s->transitions->states[i]);
1010 }
1011 }
1012 free (work);
1013 }
1014
1015 bitsetv_free (lookahead_filter);
1016 bitsetv_free (lookaheads);
1017
1018 /* Store states back in the states array. */
1019 states = xnrealloc (states, nstates, sizeof *states);
1020 for (state_list *node = first_state; node; node = node->next)
1021 states[node->state->number] = node->state;
1022
1023 /* In the case of canonical LR(1), copy item lookahead sets to reduction
1024 lookahead sets. */
1025 if (!annotation_lists)
1026 {
1027 timevar_push (tv_ielr_phase4);
1028 initialize_LA ();
1029 for (state_list *node = first_state; node; node = node->next)
1030 if (!node->state->consistent)
1031 {
1032 size_t i = 0;
1033 item_number *itemset = node->state->items;
1034 for (size_t r = 0; r < node->state->reductions->num; ++r)
1035 {
1036 rule *this_rule = node->state->reductions->rules[r];
1037 bitset lookahead_set =
1038 node->state->reductions->lookahead_tokens[r];
1039 if (item_number_is_rule_number (*this_rule->rhs))
1040 ielr_compute_goto_follow_set (follow_kernel_items,
1041 always_follows, node,
1042 this_rule->lhs, lookahead_set);
1043 else if (node->lookaheads)
1044 {
1045 /* We don't need to start the kernel item search back at
1046 i=0 because both items and reductions are sorted on rule
1047 number. */
1048 while (!item_number_is_rule_number (ritem[itemset[i]])
1049 || item_number_as_rule_number (ritem[itemset[i]])
1050 != this_rule->number)
1051 {
1052 ++i;
1053 aver (i < node->state->nitems);
1054 }
1055 if (node->lookaheads[i])
1056 bitset_copy (lookahead_set, node->lookaheads[i]);
1057 }
1058 }
1059 }
1060 timevar_pop (tv_ielr_phase4);
1061 }
1062
1063 /* Free state list. */
1064 while (first_state)
1065 {
1066 state_list *node = first_state;
1067 if (node->lookaheads)
1068 {
1069 for (size_t i = 0; i < node->state->nitems; ++i)
1070 if (node->lookaheads[i])
1071 bitset_free (node->lookaheads[i]);
1072 free (node->lookaheads);
1073 }
1074 first_state = node->next;
1075 free (node);
1076 }
1077}
1078
1079
1080void
1081ielr (void)
1082{
1083 LrType lr_type = lr_type_get ();
1084
1085 /* Phase 0: LALR(1). */
1086 switch (lr_type)
1087 {
1088 case LR_TYPE__LR0:
1089 timevar_push (tv_lalr);
1090 set_goto_map ();
1091 timevar_pop (tv_lalr);
1092 return;
1093
1094 case LR_TYPE__CANONICAL_LR:
1095 timevar_push (tv_lalr);
1096 set_goto_map ();
1097 timevar_pop (tv_lalr);
1098 break;
1099
1100 case LR_TYPE__LALR:
1101 timevar_push (tv_lalr);
1102 lalr ();
1103 bitsetv_free (goto_follows);
1104 timevar_pop (tv_lalr);
1105 return;
1106
1107 case LR_TYPE__IELR:
1108 timevar_push (tv_lalr);
1109 lalr ();
1110 timevar_pop (tv_lalr);
1111 break;
1112 }
1113
1114 {
1115 bitsetv follow_kernel_items;
1116 bitsetv always_follows;
1117 InadequacyList **inadequacy_lists = NULL;
1118 AnnotationList **annotation_lists = NULL;
1119 struct obstack annotations_obstack;
1120 AnnotationIndex max_annotations = 0;
1121
1122 {
1123 /* Phase 1: Compute Auxiliary Tables. */
1124 state ***predecessors;
1125 timevar_push (tv_ielr_phase1);
1126 ielr_compute_auxiliary_tables (
1127 &follow_kernel_items, &always_follows,
1128 lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
1129 timevar_pop (tv_ielr_phase1);
1130
1131 /* Phase 2: Compute Annotations. */
1132 timevar_push (tv_ielr_phase2);
1133 if (lr_type != LR_TYPE__CANONICAL_LR)
1134 {
1135 obstack_init (&annotations_obstack);
1136 ielr_compute_annotation_lists (follow_kernel_items, always_follows,
1137 predecessors, &max_annotations,
1138 &inadequacy_lists, &annotation_lists,
1139 &annotations_obstack);
1140 for (state_number i = 0; i < nstates; ++i)
1141 free (predecessors[i]);
1142 free (predecessors);
1143 bitsetv_free (goto_follows);
1144 lalr_free ();
1145 }
1146 timevar_pop (tv_ielr_phase2);
1147 }
1148
1149 /* Phase 3: Split States. */
1150 timevar_push (tv_ielr_phase3);
1151 {
1152 state_number nstates_lr0 = nstates;
1153 ielr_split_states (follow_kernel_items, always_follows,
1154 annotation_lists, max_annotations);
1155 if (inadequacy_lists)
1156 for (state_number i = 0; i < nstates_lr0; ++i)
1157 InadequacyList__delete (inadequacy_lists[i]);
1158 }
1159 free (inadequacy_lists);
1160 if (annotation_lists)
1161 obstack_free (&annotations_obstack, NULL);
1162 free (annotation_lists);
1163 bitsetv_free (follow_kernel_items);
1164 bitsetv_free (always_follows);
1165 timevar_pop (tv_ielr_phase3);
1166 }
1167
1168 /* Phase 4: Compute Reduction Lookaheads. */
1169 timevar_push (tv_ielr_phase4);
1170 free (goto_map);
1171 free (from_state);
1172 free (to_state);
1173 if (lr_type == LR_TYPE__CANONICAL_LR)
1174 {
1175 /* Reduction lookaheads are computed in ielr_split_states above
1176 but are timed as part of phase 4. */
1177 set_goto_map ();
1178 }
1179 else
1180 {
1181 lalr ();
1182 bitsetv_free (goto_follows);
1183 }
1184 timevar_pop (tv_ielr_phase4);
1185}
1186