1 | /* Compute lookahead criteria for Bison. |
2 | |
3 | Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2019 Free Software |
4 | Foundation, Inc. |
5 | |
6 | This file is part of Bison, the GNU Compiler Compiler. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | |
22 | /* Find which rules need lookahead in each state, and which lookahead |
23 | tokens they accept. */ |
24 | |
25 | #include <config.h> |
26 | #include "system.h" |
27 | |
28 | #include <bitset.h> |
29 | #include <bitsetv.h> |
30 | |
31 | #include "complain.h" |
32 | #include "derives.h" |
33 | #include "getargs.h" |
34 | #include "gram.h" |
35 | #include "lalr.h" |
36 | #include "lr0.h" |
37 | #include "muscle-tab.h" |
38 | #include "nullable.h" |
39 | #include "reader.h" |
40 | #include "relation.h" |
41 | #include "symtab.h" |
42 | |
43 | /* goto_map[nterm - NTOKENS] -> number of gotos. */ |
44 | goto_number *goto_map = NULL; |
45 | goto_number ngotos = 0; |
46 | state_number *from_state = NULL; |
47 | state_number *to_state = NULL; |
48 | bitsetv goto_follows = NULL; |
49 | |
50 | /* Linked list of goto numbers. */ |
51 | typedef struct goto_list |
52 | { |
53 | struct goto_list *next; |
54 | goto_number value; |
55 | } goto_list; |
56 | |
57 | static goto_list * |
58 | goto_list_new (goto_number value, struct goto_list *next) |
59 | { |
60 | goto_list *res = xmalloc (sizeof *res); |
61 | res->next = next; |
62 | res->value = value; |
63 | return res; |
64 | } |
65 | |
66 | /* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule |
67 | LArule[l] is applicable in the appropriate state when the next |
68 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, |
69 | it is a conflict. */ |
70 | |
71 | static bitsetv LA = NULL; |
72 | size_t nLA; |
73 | |
74 | |
75 | /* "(p, A) includes (p', B)" iff |
76 | B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β). |
77 | |
78 | Definition p.621 [DeRemer 1982]. |
79 | |
80 | INCLUDES[(p, A)] = [(p', B),...] */ |
81 | static goto_number **includes; |
82 | |
83 | /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω. |
84 | |
85 | Definition p.621 [DeRemer 1982]. */ |
86 | static goto_list **lookback; |
87 | |
88 | static void |
89 | goto_print (goto_number i, FILE *out) |
90 | { |
91 | const state_number src = from_state[i]; |
92 | const state_number dst = to_state[i]; |
93 | symbol_number var = states[dst]->accessing_symbol; |
94 | fprintf (out, |
95 | "goto[%ld] = (%d, %s, %d)" , i, src, symbols[var]->tag, dst); |
96 | } |
97 | |
98 | void |
99 | set_goto_map (void) |
100 | { |
101 | /* Count the number of gotos (ngotos) per nterm (goto_map). */ |
102 | goto_map = xcalloc (nvars + 1, sizeof *goto_map); |
103 | ngotos = 0; |
104 | for (state_number s = 0; s < nstates; ++s) |
105 | { |
106 | transitions *trans = states[s]->transitions; |
107 | for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i) |
108 | { |
109 | ngotos++; |
110 | /* Abort if (ngotos + 1) would overflow. */ |
111 | aver (ngotos != GOTO_NUMBER_MAXIMUM); |
112 | goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++; |
113 | } |
114 | } |
115 | |
116 | goto_number *temp_map = xnmalloc (nvars + 1, sizeof *temp_map); |
117 | { |
118 | goto_number k = 0; |
119 | for (symbol_number i = ntokens; i < nsyms; ++i) |
120 | { |
121 | temp_map[i - ntokens] = k; |
122 | k += goto_map[i - ntokens]; |
123 | } |
124 | |
125 | for (symbol_number i = ntokens; i < nsyms; ++i) |
126 | goto_map[i - ntokens] = temp_map[i - ntokens]; |
127 | |
128 | goto_map[nsyms - ntokens] = ngotos; |
129 | temp_map[nsyms - ntokens] = ngotos; |
130 | } |
131 | |
132 | from_state = xcalloc (ngotos, sizeof *from_state); |
133 | to_state = xcalloc (ngotos, sizeof *to_state); |
134 | |
135 | for (state_number s = 0; s < nstates; ++s) |
136 | { |
137 | const transitions *trans = states[s]->transitions; |
138 | for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i) |
139 | { |
140 | goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++; |
141 | from_state[k] = s; |
142 | to_state[k] = trans->states[i]->number; |
143 | } |
144 | } |
145 | |
146 | free (temp_map); |
147 | |
148 | if (trace_flag & trace_automaton) |
149 | for (int i = 0; i < ngotos; ++i) |
150 | { |
151 | goto_print (i, stderr); |
152 | fputc ('\n', stderr); |
153 | } |
154 | } |
155 | |
156 | |
157 | goto_number |
158 | map_goto (state_number src, symbol_number sym) |
159 | { |
160 | goto_number low = goto_map[sym - ntokens]; |
161 | goto_number high = goto_map[sym - ntokens + 1] - 1; |
162 | |
163 | for (;;) |
164 | { |
165 | aver (low <= high); |
166 | goto_number middle = (low + high) / 2; |
167 | state_number s = from_state[middle]; |
168 | if (s == src) |
169 | return middle; |
170 | else if (s < src) |
171 | low = middle + 1; |
172 | else |
173 | high = middle - 1; |
174 | } |
175 | } |
176 | |
177 | /* Print FOLLOWS for debugging. */ |
178 | static void |
179 | follows_print (const char* title, FILE *out) |
180 | { |
181 | fprintf (out, "%s:\n" , title); |
182 | for (goto_number i = 0; i < ngotos; ++i) |
183 | { |
184 | fputs (" FOLLOWS[" , out); |
185 | goto_print (i, out); |
186 | fputs ("] =" , out); |
187 | bitset_iterator iter; |
188 | symbol_number sym; |
189 | BITSET_FOR_EACH (iter, goto_follows[i], sym, 0) |
190 | fprintf (out, " %s" , symbols[sym]->tag); |
191 | fputc ('\n', out); |
192 | } |
193 | fputc ('\n', out); |
194 | } |
195 | |
196 | /* Build goto_follows. */ |
197 | static void |
198 | initialize_goto_follows (void) |
199 | { |
200 | goto_number **reads = xnmalloc (ngotos, sizeof *reads); |
201 | goto_number *edge = xnmalloc (ngotos, sizeof *edge); |
202 | |
203 | goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
204 | |
205 | for (goto_number i = 0; i < ngotos; ++i) |
206 | { |
207 | state_number dst = to_state[i]; |
208 | const transitions *trans = states[dst]->transitions; |
209 | |
210 | int j; |
211 | FOR_EACH_SHIFT (trans, j) |
212 | bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j)); |
213 | |
214 | /* Gotos outgoing from DST. */ |
215 | goto_number nedges = 0; |
216 | for (; j < trans->num; ++j) |
217 | { |
218 | symbol_number sym = TRANSITION_SYMBOL (trans, j); |
219 | if (nullable[sym - ntokens]) |
220 | { |
221 | assert (nedges < ngotos); |
222 | edge[nedges++] = map_goto (dst, sym); |
223 | } |
224 | } |
225 | |
226 | if (nedges == 0) |
227 | reads[i] = NULL; |
228 | else |
229 | { |
230 | reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]); |
231 | memcpy (reads[i], edge, nedges * sizeof edge[0]); |
232 | reads[i][nedges] = END_NODE; |
233 | } |
234 | } |
235 | if (trace_flag & trace_automaton) |
236 | { |
237 | follows_print ("follows after shifts" , stderr); |
238 | relation_print ("reads" , reads, ngotos, goto_print, stderr); |
239 | } |
240 | |
241 | relation_digraph (reads, ngotos, goto_follows); |
242 | if (trace_flag & trace_automaton) |
243 | follows_print ("follows after read" , stderr); |
244 | |
245 | for (goto_number i = 0; i < ngotos; ++i) |
246 | free (reads[i]); |
247 | free (reads); |
248 | free (edge); |
249 | } |
250 | |
251 | |
252 | /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */ |
253 | static const state * |
254 | lookback_find_state (int lookback_index) |
255 | { |
256 | state *res = NULL; |
257 | for (int j = 0; j < nstates; ++j) |
258 | if (states[j]->reductions |
259 | && states[j]->reductions->lookahead_tokens) |
260 | { |
261 | if (states[j]->reductions->lookahead_tokens - LA > lookback_index) |
262 | /* Went too far. */ |
263 | break; |
264 | else |
265 | res = states[j]; |
266 | } |
267 | /* Pacify "potential null pointer dereference" warning. */ |
268 | if (!res) |
269 | abort (); |
270 | return res; |
271 | } |
272 | |
273 | /* Print LOOKBACK for debugging. */ |
274 | static void |
275 | lookback_print (FILE *out) |
276 | { |
277 | fputs ("lookback:\n" , out); |
278 | for (int i = 0; i < nLA; ++i) |
279 | if (lookback[i]) |
280 | { |
281 | fprintf (out, " %3d = " , i); |
282 | const state *s = lookback_find_state (i); |
283 | int rnum = i - (s->reductions->lookahead_tokens - LA); |
284 | const rule *r = s->reductions->rules[rnum]; |
285 | fprintf (out, "(%3d, " , s->number); |
286 | rule_print (r, NULL, out); |
287 | fputs (") ->" , out); |
288 | for (goto_list *sp = lookback[i]; sp; sp = sp->next) |
289 | { |
290 | fputc (' ', out); |
291 | goto_print (sp->value, out); |
292 | } |
293 | fputc ('\n', out); |
294 | } |
295 | fputc ('\n', out); |
296 | } |
297 | |
298 | /* Add (S, R) -> GOTONO to LOOKBACK. |
299 | |
300 | "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω. |
301 | |
302 | The goto number GOTONO, whose source is S (which is |
303 | inconsistent), */ |
304 | static void |
305 | add_lookback_edge (state *s, rule const *r, goto_number gotono) |
306 | { |
307 | int ri = state_reduction_find (s, r); |
308 | int idx = (s->reductions->lookahead_tokens - LA) + ri; |
309 | lookback[idx] = goto_list_new (gotono, lookback[idx]); |
310 | } |
311 | |
312 | |
313 | /* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of |
314 | [DeRemer 1982]. */ |
315 | static void |
316 | build_relations (void) |
317 | { |
318 | goto_number *edge = xnmalloc (ngotos, sizeof *edge); |
319 | state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path); |
320 | |
321 | includes = xnmalloc (ngotos, sizeof *includes); |
322 | |
323 | /* For each goto (from SRC to DST labeled by nterm VAR), iterate |
324 | over each rule with VAR as LHS, and find the path PATH from SRC |
325 | labeled with the RHS of the rule. */ |
326 | for (goto_number i = 0; i < ngotos; ++i) |
327 | { |
328 | const state_number src = from_state[i]; |
329 | const state_number dst = to_state[i]; |
330 | symbol_number var = states[dst]->accessing_symbol; |
331 | |
332 | /* Size of EDGE. */ |
333 | int nedges = 0; |
334 | for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep) |
335 | { |
336 | rule const *r = *rulep; |
337 | state *s = states[src]; |
338 | path[0] = s->number; |
339 | |
340 | /* Length of PATH. */ |
341 | int length = 1; |
342 | for (item_number const *rp = r->rhs; 0 <= *rp; rp++) |
343 | { |
344 | symbol_number sym = item_number_as_symbol_number (*rp); |
345 | s = transitions_to (s, sym); |
346 | path[length++] = s->number; |
347 | } |
348 | |
349 | /* S is the end of PATH. */ |
350 | if (!s->consistent) |
351 | add_lookback_edge (s, r, i); |
352 | |
353 | /* Walk back PATH from penultimate to beginning. |
354 | |
355 | The "0 <= p" part is actually useless: each rhs ends in a |
356 | rule number (for which ISVAR(...) is false), and there is |
357 | a sentinel (ritem[-1]=0) before the first rhs. */ |
358 | for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p) |
359 | { |
360 | symbol_number sym = item_number_as_symbol_number (r->rhs[p]); |
361 | goto_number g = map_goto (path[p], sym); |
362 | /* Insert G if not already in EDGE. |
363 | FIXME: linear search. A bitset instead? */ |
364 | { |
365 | bool found = false; |
366 | for (int j = 0; !found && j < nedges; ++j) |
367 | found = edge[j] == g; |
368 | if (!found) |
369 | { |
370 | assert (nedges < ngotos); |
371 | edge[nedges++] = g; |
372 | } |
373 | } |
374 | if (!nullable[sym - ntokens]) |
375 | break; |
376 | } |
377 | } |
378 | |
379 | if (trace_flag & trace_automaton) |
380 | { |
381 | goto_print (i, stderr); |
382 | fputs (" edges = " , stderr); |
383 | for (int j = 0; j < nedges; ++j) |
384 | { |
385 | fputc (' ', stderr); |
386 | goto_print (edge[j], stderr); |
387 | } |
388 | fputc ('\n', stderr); |
389 | } |
390 | |
391 | if (nedges == 0) |
392 | includes[i] = NULL; |
393 | else |
394 | { |
395 | includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]); |
396 | for (int j = 0; j < nedges; ++j) |
397 | includes[i][j] = edge[j]; |
398 | includes[i][nedges] = END_NODE; |
399 | } |
400 | } |
401 | |
402 | free (edge); |
403 | free (path); |
404 | |
405 | relation_transpose (&includes, ngotos); |
406 | if (trace_flag & trace_automaton) |
407 | relation_print ("includes" , includes, ngotos, goto_print, stderr); |
408 | } |
409 | |
410 | /* Compute FOLLOWS from INCLUDES, and free INCLUDES. */ |
411 | static void |
412 | compute_follows (void) |
413 | { |
414 | relation_digraph (includes, ngotos, goto_follows); |
415 | if (trace_flag & trace_sets) |
416 | follows_print ("follows after includes" , stderr); |
417 | for (goto_number i = 0; i < ngotos; ++i) |
418 | free (includes[i]); |
419 | free (includes); |
420 | } |
421 | |
422 | |
423 | static void |
424 | compute_lookahead_tokens (void) |
425 | { |
426 | if (trace_flag & trace_automaton) |
427 | lookback_print (stderr); |
428 | |
429 | for (size_t i = 0; i < nLA; ++i) |
430 | for (goto_list *sp = lookback[i]; sp; sp = sp->next) |
431 | bitset_or (LA[i], LA[i], goto_follows[sp->value]); |
432 | |
433 | /* Free LOOKBACK. */ |
434 | for (size_t i = 0; i < nLA; ++i) |
435 | LIST_FREE (goto_list, lookback[i]); |
436 | free (lookback); |
437 | } |
438 | |
439 | |
440 | /*----------------------------------------------------. |
441 | | Count the number of lookahead tokens required for S | |
442 | | (N_LOOKAHEAD_TOKENS member). | |
443 | `----------------------------------------------------*/ |
444 | |
445 | static int |
446 | state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept) |
447 | { |
448 | const reductions *reds = s->reductions; |
449 | const transitions *trans = s->transitions; |
450 | |
451 | /* Transitions are only disabled during conflict resolution, and that |
452 | hasn't happened yet, so there should be no need to check that |
453 | transition 0 hasn't been disabled before checking if it is a shift. |
454 | However, this check was performed at one time, so we leave it as an |
455 | aver. */ |
456 | aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0)); |
457 | |
458 | /* We need a lookahead either to distinguish different reductions |
459 | (i.e., there are two or more), or to distinguish a reduction from a |
460 | shift. Otherwise, it is straightforward, and the state is |
461 | 'consistent'. However, do not treat a state with any reductions as |
462 | consistent unless it is the accepting state (because there is never |
463 | a lookahead token that makes sense there, and so no lookahead token |
464 | should be read) if the user has otherwise disabled default |
465 | reductions. */ |
466 | s->consistent = |
467 | !(reds->num > 1 |
468 | || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0)) |
469 | || (reds->num == 1 && reds->rules[0]->number != 0 |
470 | && default_reduction_only_for_accept)); |
471 | |
472 | return s->consistent ? 0 : reds->num; |
473 | } |
474 | |
475 | |
476 | /*----------------------------------------------------. |
477 | | Compute LA, NLA, and the lookahead_tokens members. | |
478 | `----------------------------------------------------*/ |
479 | |
480 | void |
481 | initialize_LA (void) |
482 | { |
483 | bool default_reduction_only_for_accept; |
484 | { |
485 | char *default_reductions = |
486 | muscle_percent_define_get ("lr.default-reduction" ); |
487 | default_reduction_only_for_accept = STREQ (default_reductions, "accepting" ); |
488 | free (default_reductions); |
489 | } |
490 | |
491 | /* Compute the total number of reductions requiring a lookahead. */ |
492 | nLA = 0; |
493 | for (state_number i = 0; i < nstates; ++i) |
494 | nLA += |
495 | state_lookahead_tokens_count (states[i], |
496 | default_reduction_only_for_accept); |
497 | /* Avoid having to special case 0. */ |
498 | if (!nLA) |
499 | nLA = 1; |
500 | |
501 | bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); |
502 | |
503 | /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions |
504 | require lookahead tokens. */ |
505 | for (state_number i = 0; i < nstates; ++i) |
506 | { |
507 | int count = |
508 | state_lookahead_tokens_count (states[i], |
509 | default_reduction_only_for_accept); |
510 | if (count) |
511 | { |
512 | states[i]->reductions->lookahead_tokens = pLA; |
513 | pLA += count; |
514 | } |
515 | } |
516 | } |
517 | |
518 | |
519 | /*---------------------------------------------. |
520 | | Output the lookahead tokens for each state. | |
521 | `---------------------------------------------*/ |
522 | |
523 | static void |
524 | lookahead_tokens_print (FILE *out) |
525 | { |
526 | fputs ("Lookaheads:\n" , out); |
527 | for (state_number i = 0; i < nstates; ++i) |
528 | { |
529 | const reductions *reds = states[i]->reductions; |
530 | if (reds->num) |
531 | { |
532 | fprintf (out, " State %d:\n" , i); |
533 | for (int j = 0; j < reds->num; ++j) |
534 | { |
535 | fprintf (out, " rule %d:" , reds->rules[j]->number); |
536 | if (reds->lookahead_tokens) |
537 | { |
538 | bitset_iterator iter; |
539 | int k; |
540 | BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0) |
541 | fprintf (out, " %s" , symbols[k]->tag); |
542 | } |
543 | fputc ('\n', out); |
544 | } |
545 | } |
546 | } |
547 | fputc ('\n', out); |
548 | } |
549 | |
550 | void |
551 | lalr (void) |
552 | { |
553 | initialize_LA (); |
554 | set_goto_map (); |
555 | initialize_goto_follows (); |
556 | lookback = xcalloc (nLA, sizeof *lookback); |
557 | build_relations (); |
558 | compute_follows (); |
559 | compute_lookahead_tokens (); |
560 | |
561 | if (trace_flag & trace_sets) |
562 | lookahead_tokens_print (stderr); |
563 | } |
564 | |
565 | |
566 | void |
567 | lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old) |
568 | { |
569 | goto_number ngotos_reachable = 0; |
570 | symbol_number nonterminal = 0; |
571 | aver (nsyms == nvars + ntokens); |
572 | |
573 | for (goto_number i = 0; i < ngotos; ++i) |
574 | { |
575 | while (i == goto_map[nonterminal]) |
576 | goto_map[nonterminal++] = ngotos_reachable; |
577 | /* If old_to_new[from_state[i]] = nstates_old, remove this goto |
578 | entry. */ |
579 | if (old_to_new[from_state[i]] != nstates_old) |
580 | { |
581 | /* from_state[i] is not removed, so it and thus to_state[i] are |
582 | reachable, so to_state[i] != nstates_old. */ |
583 | aver (old_to_new[to_state[i]] != nstates_old); |
584 | from_state[ngotos_reachable] = old_to_new[from_state[i]]; |
585 | to_state[ngotos_reachable] = old_to_new[to_state[i]]; |
586 | ++ngotos_reachable; |
587 | } |
588 | } |
589 | while (nonterminal <= nvars) |
590 | { |
591 | aver (ngotos == goto_map[nonterminal]); |
592 | goto_map[nonterminal++] = ngotos_reachable; |
593 | } |
594 | ngotos = ngotos_reachable; |
595 | } |
596 | |
597 | |
598 | void |
599 | lalr_free (void) |
600 | { |
601 | for (state_number s = 0; s < nstates; ++s) |
602 | states[s]->reductions->lookahead_tokens = NULL; |
603 | bitsetv_free (LA); |
604 | } |
605 | |