1/* Compute lookahead criteria for Bison.
2
3 Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2019 Free Software
4 Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21
22/* Find which rules need lookahead in each state, and which lookahead
23 tokens they accept. */
24
25#include <config.h>
26#include "system.h"
27
28#include <bitset.h>
29#include <bitsetv.h>
30
31#include "complain.h"
32#include "derives.h"
33#include "getargs.h"
34#include "gram.h"
35#include "lalr.h"
36#include "lr0.h"
37#include "muscle-tab.h"
38#include "nullable.h"
39#include "reader.h"
40#include "relation.h"
41#include "symtab.h"
42
43/* goto_map[nterm - NTOKENS] -> number of gotos. */
44goto_number *goto_map = NULL;
45goto_number ngotos = 0;
46state_number *from_state = NULL;
47state_number *to_state = NULL;
48bitsetv goto_follows = NULL;
49
50/* Linked list of goto numbers. */
51typedef struct goto_list
52{
53 struct goto_list *next;
54 goto_number value;
55} goto_list;
56
57static goto_list *
58goto_list_new (goto_number value, struct goto_list *next)
59{
60 goto_list *res = xmalloc (sizeof *res);
61 res->next = next;
62 res->value = value;
63 return res;
64}
65
66/* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
67 LArule[l] is applicable in the appropriate state when the next
68 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
69 it is a conflict. */
70
71static bitsetv LA = NULL;
72size_t nLA;
73
74
75/* "(p, A) includes (p', B)" iff
76 B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
77
78 Definition p.621 [DeRemer 1982].
79
80 INCLUDES[(p, A)] = [(p', B),...] */
81static goto_number **includes;
82
83/* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
84
85 Definition p.621 [DeRemer 1982]. */
86static goto_list **lookback;
87
88static void
89goto_print (goto_number i, FILE *out)
90{
91 const state_number src = from_state[i];
92 const state_number dst = to_state[i];
93 symbol_number var = states[dst]->accessing_symbol;
94 fprintf (out,
95 "goto[%ld] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
96}
97
98void
99set_goto_map (void)
100{
101 /* Count the number of gotos (ngotos) per nterm (goto_map). */
102 goto_map = xcalloc (nvars + 1, sizeof *goto_map);
103 ngotos = 0;
104 for (state_number s = 0; s < nstates; ++s)
105 {
106 transitions *trans = states[s]->transitions;
107 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
108 {
109 ngotos++;
110 /* Abort if (ngotos + 1) would overflow. */
111 aver (ngotos != GOTO_NUMBER_MAXIMUM);
112 goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
113 }
114 }
115
116 goto_number *temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
117 {
118 goto_number k = 0;
119 for (symbol_number i = ntokens; i < nsyms; ++i)
120 {
121 temp_map[i - ntokens] = k;
122 k += goto_map[i - ntokens];
123 }
124
125 for (symbol_number i = ntokens; i < nsyms; ++i)
126 goto_map[i - ntokens] = temp_map[i - ntokens];
127
128 goto_map[nsyms - ntokens] = ngotos;
129 temp_map[nsyms - ntokens] = ngotos;
130 }
131
132 from_state = xcalloc (ngotos, sizeof *from_state);
133 to_state = xcalloc (ngotos, sizeof *to_state);
134
135 for (state_number s = 0; s < nstates; ++s)
136 {
137 const transitions *trans = states[s]->transitions;
138 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
139 {
140 goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
141 from_state[k] = s;
142 to_state[k] = trans->states[i]->number;
143 }
144 }
145
146 free (temp_map);
147
148 if (trace_flag & trace_automaton)
149 for (int i = 0; i < ngotos; ++i)
150 {
151 goto_print (i, stderr);
152 fputc ('\n', stderr);
153 }
154}
155
156
157goto_number
158map_goto (state_number src, symbol_number sym)
159{
160 goto_number low = goto_map[sym - ntokens];
161 goto_number high = goto_map[sym - ntokens + 1] - 1;
162
163 for (;;)
164 {
165 aver (low <= high);
166 goto_number middle = (low + high) / 2;
167 state_number s = from_state[middle];
168 if (s == src)
169 return middle;
170 else if (s < src)
171 low = middle + 1;
172 else
173 high = middle - 1;
174 }
175}
176
177/* Print FOLLOWS for debugging. */
178static void
179follows_print (const char* title, FILE *out)
180{
181 fprintf (out, "%s:\n", title);
182 for (goto_number i = 0; i < ngotos; ++i)
183 {
184 fputs (" FOLLOWS[", out);
185 goto_print (i, out);
186 fputs ("] =", out);
187 bitset_iterator iter;
188 symbol_number sym;
189 BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
190 fprintf (out, " %s", symbols[sym]->tag);
191 fputc ('\n', out);
192 }
193 fputc ('\n', out);
194}
195
196/* Build goto_follows. */
197static void
198initialize_goto_follows (void)
199{
200 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
201 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
202
203 goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
204
205 for (goto_number i = 0; i < ngotos; ++i)
206 {
207 state_number dst = to_state[i];
208 const transitions *trans = states[dst]->transitions;
209
210 int j;
211 FOR_EACH_SHIFT (trans, j)
212 bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
213
214 /* Gotos outgoing from DST. */
215 goto_number nedges = 0;
216 for (; j < trans->num; ++j)
217 {
218 symbol_number sym = TRANSITION_SYMBOL (trans, j);
219 if (nullable[sym - ntokens])
220 {
221 assert (nedges < ngotos);
222 edge[nedges++] = map_goto (dst, sym);
223 }
224 }
225
226 if (nedges == 0)
227 reads[i] = NULL;
228 else
229 {
230 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
231 memcpy (reads[i], edge, nedges * sizeof edge[0]);
232 reads[i][nedges] = END_NODE;
233 }
234 }
235 if (trace_flag & trace_automaton)
236 {
237 follows_print ("follows after shifts", stderr);
238 relation_print ("reads", reads, ngotos, goto_print, stderr);
239 }
240
241 relation_digraph (reads, ngotos, goto_follows);
242 if (trace_flag & trace_automaton)
243 follows_print ("follows after read", stderr);
244
245 for (goto_number i = 0; i < ngotos; ++i)
246 free (reads[i]);
247 free (reads);
248 free (edge);
249}
250
251
252/* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */
253static const state *
254lookback_find_state (int lookback_index)
255{
256 state *res = NULL;
257 for (int j = 0; j < nstates; ++j)
258 if (states[j]->reductions
259 && states[j]->reductions->lookahead_tokens)
260 {
261 if (states[j]->reductions->lookahead_tokens - LA > lookback_index)
262 /* Went too far. */
263 break;
264 else
265 res = states[j];
266 }
267 /* Pacify "potential null pointer dereference" warning. */
268 if (!res)
269 abort ();
270 return res;
271}
272
273/* Print LOOKBACK for debugging. */
274static void
275lookback_print (FILE *out)
276{
277 fputs ("lookback:\n", out);
278 for (int i = 0; i < nLA; ++i)
279 if (lookback[i])
280 {
281 fprintf (out, " %3d = ", i);
282 const state *s = lookback_find_state (i);
283 int rnum = i - (s->reductions->lookahead_tokens - LA);
284 const rule *r = s->reductions->rules[rnum];
285 fprintf (out, "(%3d, ", s->number);
286 rule_print (r, NULL, out);
287 fputs (") ->", out);
288 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
289 {
290 fputc (' ', out);
291 goto_print (sp->value, out);
292 }
293 fputc ('\n', out);
294 }
295 fputc ('\n', out);
296}
297
298/* Add (S, R) -> GOTONO to LOOKBACK.
299
300 "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
301
302 The goto number GOTONO, whose source is S (which is
303 inconsistent), */
304static void
305add_lookback_edge (state *s, rule const *r, goto_number gotono)
306{
307 int ri = state_reduction_find (s, r);
308 int idx = (s->reductions->lookahead_tokens - LA) + ri;
309 lookback[idx] = goto_list_new (gotono, lookback[idx]);
310}
311
312
313/* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of
314 [DeRemer 1982]. */
315static void
316build_relations (void)
317{
318 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
319 state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
320
321 includes = xnmalloc (ngotos, sizeof *includes);
322
323 /* For each goto (from SRC to DST labeled by nterm VAR), iterate
324 over each rule with VAR as LHS, and find the path PATH from SRC
325 labeled with the RHS of the rule. */
326 for (goto_number i = 0; i < ngotos; ++i)
327 {
328 const state_number src = from_state[i];
329 const state_number dst = to_state[i];
330 symbol_number var = states[dst]->accessing_symbol;
331
332 /* Size of EDGE. */
333 int nedges = 0;
334 for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
335 {
336 rule const *r = *rulep;
337 state *s = states[src];
338 path[0] = s->number;
339
340 /* Length of PATH. */
341 int length = 1;
342 for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
343 {
344 symbol_number sym = item_number_as_symbol_number (*rp);
345 s = transitions_to (s, sym);
346 path[length++] = s->number;
347 }
348
349 /* S is the end of PATH. */
350 if (!s->consistent)
351 add_lookback_edge (s, r, i);
352
353 /* Walk back PATH from penultimate to beginning.
354
355 The "0 <= p" part is actually useless: each rhs ends in a
356 rule number (for which ISVAR(...) is false), and there is
357 a sentinel (ritem[-1]=0) before the first rhs. */
358 for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
359 {
360 symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
361 goto_number g = map_goto (path[p], sym);
362 /* Insert G if not already in EDGE.
363 FIXME: linear search. A bitset instead? */
364 {
365 bool found = false;
366 for (int j = 0; !found && j < nedges; ++j)
367 found = edge[j] == g;
368 if (!found)
369 {
370 assert (nedges < ngotos);
371 edge[nedges++] = g;
372 }
373 }
374 if (!nullable[sym - ntokens])
375 break;
376 }
377 }
378
379 if (trace_flag & trace_automaton)
380 {
381 goto_print (i, stderr);
382 fputs (" edges = ", stderr);
383 for (int j = 0; j < nedges; ++j)
384 {
385 fputc (' ', stderr);
386 goto_print (edge[j], stderr);
387 }
388 fputc ('\n', stderr);
389 }
390
391 if (nedges == 0)
392 includes[i] = NULL;
393 else
394 {
395 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
396 for (int j = 0; j < nedges; ++j)
397 includes[i][j] = edge[j];
398 includes[i][nedges] = END_NODE;
399 }
400 }
401
402 free (edge);
403 free (path);
404
405 relation_transpose (&includes, ngotos);
406 if (trace_flag & trace_automaton)
407 relation_print ("includes", includes, ngotos, goto_print, stderr);
408}
409
410/* Compute FOLLOWS from INCLUDES, and free INCLUDES. */
411static void
412compute_follows (void)
413{
414 relation_digraph (includes, ngotos, goto_follows);
415 if (trace_flag & trace_sets)
416 follows_print ("follows after includes", stderr);
417 for (goto_number i = 0; i < ngotos; ++i)
418 free (includes[i]);
419 free (includes);
420}
421
422
423static void
424compute_lookahead_tokens (void)
425{
426 if (trace_flag & trace_automaton)
427 lookback_print (stderr);
428
429 for (size_t i = 0; i < nLA; ++i)
430 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
431 bitset_or (LA[i], LA[i], goto_follows[sp->value]);
432
433 /* Free LOOKBACK. */
434 for (size_t i = 0; i < nLA; ++i)
435 LIST_FREE (goto_list, lookback[i]);
436 free (lookback);
437}
438
439
440/*----------------------------------------------------.
441| Count the number of lookahead tokens required for S |
442| (N_LOOKAHEAD_TOKENS member). |
443`----------------------------------------------------*/
444
445static int
446state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept)
447{
448 const reductions *reds = s->reductions;
449 const transitions *trans = s->transitions;
450
451 /* Transitions are only disabled during conflict resolution, and that
452 hasn't happened yet, so there should be no need to check that
453 transition 0 hasn't been disabled before checking if it is a shift.
454 However, this check was performed at one time, so we leave it as an
455 aver. */
456 aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
457
458 /* We need a lookahead either to distinguish different reductions
459 (i.e., there are two or more), or to distinguish a reduction from a
460 shift. Otherwise, it is straightforward, and the state is
461 'consistent'. However, do not treat a state with any reductions as
462 consistent unless it is the accepting state (because there is never
463 a lookahead token that makes sense there, and so no lookahead token
464 should be read) if the user has otherwise disabled default
465 reductions. */
466 s->consistent =
467 !(reds->num > 1
468 || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
469 || (reds->num == 1 && reds->rules[0]->number != 0
470 && default_reduction_only_for_accept));
471
472 return s->consistent ? 0 : reds->num;
473}
474
475
476/*----------------------------------------------------.
477| Compute LA, NLA, and the lookahead_tokens members. |
478`----------------------------------------------------*/
479
480void
481initialize_LA (void)
482{
483 bool default_reduction_only_for_accept;
484 {
485 char *default_reductions =
486 muscle_percent_define_get ("lr.default-reduction");
487 default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
488 free (default_reductions);
489 }
490
491 /* Compute the total number of reductions requiring a lookahead. */
492 nLA = 0;
493 for (state_number i = 0; i < nstates; ++i)
494 nLA +=
495 state_lookahead_tokens_count (states[i],
496 default_reduction_only_for_accept);
497 /* Avoid having to special case 0. */
498 if (!nLA)
499 nLA = 1;
500
501 bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
502
503 /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
504 require lookahead tokens. */
505 for (state_number i = 0; i < nstates; ++i)
506 {
507 int count =
508 state_lookahead_tokens_count (states[i],
509 default_reduction_only_for_accept);
510 if (count)
511 {
512 states[i]->reductions->lookahead_tokens = pLA;
513 pLA += count;
514 }
515 }
516}
517
518
519/*---------------------------------------------.
520| Output the lookahead tokens for each state. |
521`---------------------------------------------*/
522
523static void
524lookahead_tokens_print (FILE *out)
525{
526 fputs ("Lookaheads:\n", out);
527 for (state_number i = 0; i < nstates; ++i)
528 {
529 const reductions *reds = states[i]->reductions;
530 if (reds->num)
531 {
532 fprintf (out, " State %d:\n", i);
533 for (int j = 0; j < reds->num; ++j)
534 {
535 fprintf (out, " rule %d:", reds->rules[j]->number);
536 if (reds->lookahead_tokens)
537 {
538 bitset_iterator iter;
539 int k;
540 BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
541 fprintf (out, " %s", symbols[k]->tag);
542 }
543 fputc ('\n', out);
544 }
545 }
546 }
547 fputc ('\n', out);
548}
549
550void
551lalr (void)
552{
553 initialize_LA ();
554 set_goto_map ();
555 initialize_goto_follows ();
556 lookback = xcalloc (nLA, sizeof *lookback);
557 build_relations ();
558 compute_follows ();
559 compute_lookahead_tokens ();
560
561 if (trace_flag & trace_sets)
562 lookahead_tokens_print (stderr);
563}
564
565
566void
567lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
568{
569 goto_number ngotos_reachable = 0;
570 symbol_number nonterminal = 0;
571 aver (nsyms == nvars + ntokens);
572
573 for (goto_number i = 0; i < ngotos; ++i)
574 {
575 while (i == goto_map[nonterminal])
576 goto_map[nonterminal++] = ngotos_reachable;
577 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
578 entry. */
579 if (old_to_new[from_state[i]] != nstates_old)
580 {
581 /* from_state[i] is not removed, so it and thus to_state[i] are
582 reachable, so to_state[i] != nstates_old. */
583 aver (old_to_new[to_state[i]] != nstates_old);
584 from_state[ngotos_reachable] = old_to_new[from_state[i]];
585 to_state[ngotos_reachable] = old_to_new[to_state[i]];
586 ++ngotos_reachable;
587 }
588 }
589 while (nonterminal <= nvars)
590 {
591 aver (ngotos == goto_map[nonterminal]);
592 goto_map[nonterminal++] = ngotos_reachable;
593 }
594 ngotos = ngotos_reachable;
595}
596
597
598void
599lalr_free (void)
600{
601 for (state_number s = 0; s < nstates; ++s)
602 states[s]->reductions->lookahead_tokens = NULL;
603 bitsetv_free (LA);
604}
605