1/* Generate the LR(0) parser states for Bison.
2
3 Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2019 Free
4 Software Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21
22/* See comments in state.h for the data structures that represent it.
23 The entry point is generate_states. */
24
25#include <config.h>
26#include "system.h"
27
28#include <bitset.h>
29
30#include "closure.h"
31#include "complain.h"
32#include "getargs.h"
33#include "gram.h"
34#include "lalr.h"
35#include "lr0.h"
36#include "reader.h"
37#include "reduce.h"
38#include "state.h"
39#include "symtab.h"
40
41typedef struct state_list
42{
43 struct state_list *next;
44 state *state;
45} state_list;
46
47static state_list *first_state = NULL;
48static state_list *last_state = NULL;
49
50/* Print CORE for debugging. */
51static void
52core_print (size_t core_size, item_number *core, FILE *out)
53{
54 for (int i = 0; i < core_size; ++i)
55 {
56 item_print (ritem + core[i], NULL, out);
57 fputc ('\n', out);
58 }
59}
60
61/*------------------------------------------------------------------.
62| A state was just discovered from another state. Queue it for |
63| later examination, in order to find its transitions. Return it. |
64`------------------------------------------------------------------*/
65
66static state *
67state_list_append (symbol_number sym, size_t core_size, item_number *core)
68{
69 state_list *node = xmalloc (sizeof *node);
70 state *res = state_new (sym, core_size, core);
71
72 if (trace_flag & trace_automaton)
73 fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
74 nstates, sym, symbols[sym]->tag);
75
76 node->next = NULL;
77 node->state = res;
78
79 if (!first_state)
80 first_state = node;
81 if (last_state)
82 last_state->next = node;
83 last_state = node;
84
85 return res;
86}
87
88/* Symbols that can be "shifted" (including non terminals) from the
89 current state. */
90bitset shift_symbol;
91
92static rule **redset;
93/* For the current state, the list of pointers to states that can be
94 reached via a shift/goto. Could be indexed by the reaching symbol,
95 but labels of incoming transitions can be recovered by the state
96 itself. */
97static state **shiftset;
98
99
100/* KERNEL_BASE[symbol-number] -> list of item numbers (offsets inside
101 RITEM) of lenngth KERNEL_SIZE[symbol-number]. */
102static item_number **kernel_base;
103static int *kernel_size;
104
105/* A single dimension array that serves as storage for
106 KERNEL_BASE. */
107static item_number *kernel_items;
108
109
110static void
111allocate_itemsets (void)
112{
113 /* Count the number of occurrences of all the symbols in RITEMS.
114 Note that useless productions (hence useless nonterminals) are
115 browsed too, hence we need to allocate room for _all_ the
116 symbols. */
117 size_t count = 0;
118 size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
119 sizeof *symbol_count);
120
121 for (rule_number r = 0; r < nrules; ++r)
122 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
123 {
124 symbol_number sym = item_number_as_symbol_number (*rhsp);
125 count += 1;
126 symbol_count[sym] += 1;
127 }
128
129 /* See comments before new_itemsets. All the vectors of items
130 live inside KERNEL_ITEMS. The number of active items after
131 some symbol S cannot be more than the number of times that S
132 appears as an item, which is SYMBOL_COUNT[S].
133 We allocate that much space for each symbol. */
134
135 kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
136 kernel_items = xnmalloc (count, sizeof *kernel_items);
137
138 count = 0;
139 for (symbol_number i = 0; i < nsyms; i++)
140 {
141 kernel_base[i] = kernel_items + count;
142 count += symbol_count[i];
143 }
144
145 free (symbol_count);
146 kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
147}
148
149/* Print the current kernel (in KERNEL_BASE). */
150static void
151kernel_print (FILE *out)
152{
153 for (symbol_number i = 0; i < nsyms; ++i)
154 if (kernel_size[i])
155 {
156 fprintf (out, "kernel[%s] =\n", symbols[i]->tag);
157 core_print (kernel_size[i], kernel_base[i], out);
158 }
159}
160
161static void
162allocate_storage (void)
163{
164 allocate_itemsets ();
165
166 shiftset = xnmalloc (nsyms, sizeof *shiftset);
167 redset = xnmalloc (nrules, sizeof *redset);
168 state_hash_new ();
169 shift_symbol = bitset_create (nsyms, BITSET_FIXED);
170}
171
172
173static void
174free_storage (void)
175{
176 bitset_free (shift_symbol);
177 free (redset);
178 free (shiftset);
179 free (kernel_base);
180 free (kernel_size);
181 free (kernel_items);
182 state_hash_free ();
183}
184
185
186
187
188/*------------------------------------------------------------------.
189| Find which term/nterm symbols can be "shifted" in S, and for each |
190| one record which items would be active after that transition. |
191| Uses the contents of itemset. |
192| |
193| shift_symbol is a bitset of the term/nterm symbols that can be |
194| shifted. For each symbol in the grammar, kernel_base[symbol] |
195| points to a vector of item numbers activated if that symbol is |
196| shifted, and kernel_size[symbol] is their numbers. |
197| |
198| itemset is sorted on item index in ritem, which is sorted on rule |
199| number. Compute each kernel_base[symbol] with the same sort. |
200`------------------------------------------------------------------*/
201
202static void
203new_itemsets (state *s)
204{
205 if (trace_flag & trace_automaton)
206 fprintf (stderr, "new_itemsets: begin: state = %d\n", s->number);
207
208 memset (kernel_size, 0, nsyms * sizeof *kernel_size);
209
210 bitset_zero (shift_symbol);
211
212 for (size_t i = 0; i < nitemset; ++i)
213 if (item_number_is_symbol_number (ritem[itemset[i]]))
214 {
215 symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
216 bitset_set (shift_symbol, sym);
217 kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
218 kernel_size[sym]++;
219 }
220
221 if (trace_flag & trace_automaton)
222 {
223 kernel_print (stderr);
224 fprintf (stderr, "new_itemsets: end: state = %d\n\n", s->number);
225 }
226}
227
228
229
230/*--------------------------------------------------------------.
231| Find the state we would get to (from the current state) by |
232| shifting SYM. Create a new state if no equivalent one exists |
233| already. Used by append_states. |
234`--------------------------------------------------------------*/
235
236static state *
237get_state (symbol_number sym, size_t core_size, item_number *core)
238{
239 if (trace_flag & trace_automaton)
240 {
241 fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n",
242 sym, symbols[sym]->tag);
243 core_print (core_size, core, stderr);
244 fputc ('\n', stderr);
245 }
246
247 state *s = state_hash_lookup (core_size, core);
248 if (!s)
249 s = state_list_append (sym, core_size, core);
250
251 if (trace_flag & trace_automaton)
252 fprintf (stderr, "Exiting get_state => %d\n", s->number);
253
254 return s;
255}
256
257/*---------------------------------------------------------------.
258| Use the information computed by new_itemsets to find the state |
259| numbers reached by each shift transition from S. |
260| |
261| SHIFTSET is set up as a vector of those states. |
262`---------------------------------------------------------------*/
263
264static void
265append_states (state *s)
266{
267 if (trace_flag & trace_automaton)
268 fprintf (stderr, "append_states: begin: state = %d\n", s->number);
269
270 bitset_iterator iter;
271 symbol_number sym;
272 int i = 0;
273 BITSET_FOR_EACH (iter, shift_symbol, sym, 0)
274 {
275 shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
276 ++i;
277 }
278
279 if (trace_flag & trace_automaton)
280 fprintf (stderr, "append_states: end: state = %d\n", s->number);
281}
282
283
284/*----------------------------------------------------------------.
285| Find which rules can be used for reduction transitions from the |
286| current state and make a reductions structure for the state to |
287| record their rule numbers. |
288`----------------------------------------------------------------*/
289
290static void
291save_reductions (state *s)
292{
293 int count = 0;
294
295 /* Find and count the active items that represent ends of rules. */
296 for (size_t i = 0; i < nitemset; ++i)
297 {
298 item_number item = ritem[itemset[i]];
299 if (item_number_is_rule_number (item))
300 {
301 rule_number r = item_number_as_rule_number (item);
302 redset[count++] = &rules[r];
303 if (r == 0)
304 {
305 /* This is "reduce 0", i.e., accept. */
306 aver (!final_state);
307 final_state = s;
308 }
309 }
310 }
311
312 /* Make a reductions structure and copy the data into it. */
313 state_reductions_set (s, count, redset);
314}
315
316
317/*---------------.
318| Build STATES. |
319`---------------*/
320
321static void
322set_states (void)
323{
324 states = xcalloc (nstates, sizeof *states);
325
326 while (first_state)
327 {
328 state_list *this = first_state;
329
330 /* Pessimization, but simplification of the code: make sure all
331 the states have valid transitions and reductions members,
332 even if reduced to 0. It is too soon for errs, which are
333 computed later, but set_conflicts. */
334 state *s = this->state;
335 if (!s->transitions)
336 state_transitions_set (s, 0, 0);
337 if (!s->reductions)
338 state_reductions_set (s, 0, 0);
339
340 states[s->number] = s;
341
342 first_state = this->next;
343 free (this);
344 }
345 first_state = NULL;
346 last_state = NULL;
347}
348
349
350/*-------------------------------------------------------------------.
351| Compute the LR(0) parser states (see state.h for details) from the |
352| grammar. |
353`-------------------------------------------------------------------*/
354
355void
356generate_states (void)
357{
358 allocate_storage ();
359 closure_new (nritems);
360
361 /* Create the initial state. The 0 at the lhs is the index of the
362 item of this initial rule. */
363 item_number initial_core = 0;
364 state_list_append (0, 1, &initial_core);
365
366 /* States are queued when they are created; process them all. */
367 for (state_list *list = first_state; list; list = list->next)
368 {
369 state *s = list->state;
370 if (trace_flag & trace_automaton)
371 fprintf (stderr, "Processing state %d (reached by %s)\n",
372 s->number,
373 symbols[s->accessing_symbol]->tag);
374 /* Set up itemset for the transitions out of this state. itemset gets a
375 vector of all the items that could be accepted next. */
376 closure (s->items, s->nitems);
377 /* Record the reductions allowed out of this state. */
378 save_reductions (s);
379 /* Find the itemsets of the states that shifts/gotos can reach. */
380 new_itemsets (s);
381 /* Find or create the core structures for those states. */
382 append_states (s);
383
384 /* Create the shifts structures for the shifts to those states,
385 now that the state numbers transitioning to are known. */
386 state_transitions_set (s, bitset_count (shift_symbol), shiftset);
387 }
388
389 /* discard various storage */
390 free_storage ();
391
392 /* Set up STATES. */
393 set_states ();
394}
395