1 | /* Generate the LR(0) parser states for Bison. |
2 | |
3 | Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2019 Free |
4 | Software Foundation, Inc. |
5 | |
6 | This file is part of Bison, the GNU Compiler Compiler. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | |
22 | /* See comments in state.h for the data structures that represent it. |
23 | The entry point is generate_states. */ |
24 | |
25 | #include <config.h> |
26 | #include "system.h" |
27 | |
28 | #include <bitset.h> |
29 | |
30 | #include "closure.h" |
31 | #include "complain.h" |
32 | #include "getargs.h" |
33 | #include "gram.h" |
34 | #include "lalr.h" |
35 | #include "lr0.h" |
36 | #include "reader.h" |
37 | #include "reduce.h" |
38 | #include "state.h" |
39 | #include "symtab.h" |
40 | |
41 | typedef struct state_list |
42 | { |
43 | struct state_list *next; |
44 | state *state; |
45 | } state_list; |
46 | |
47 | static state_list *first_state = NULL; |
48 | static state_list *last_state = NULL; |
49 | |
50 | /* Print CORE for debugging. */ |
51 | static void |
52 | core_print (size_t core_size, item_number *core, FILE *out) |
53 | { |
54 | for (int i = 0; i < core_size; ++i) |
55 | { |
56 | item_print (ritem + core[i], NULL, out); |
57 | fputc ('\n', out); |
58 | } |
59 | } |
60 | |
61 | /*------------------------------------------------------------------. |
62 | | A state was just discovered from another state. Queue it for | |
63 | | later examination, in order to find its transitions. Return it. | |
64 | `------------------------------------------------------------------*/ |
65 | |
66 | static state * |
67 | state_list_append (symbol_number sym, size_t core_size, item_number *core) |
68 | { |
69 | state_list *node = xmalloc (sizeof *node); |
70 | state *res = state_new (sym, core_size, core); |
71 | |
72 | if (trace_flag & trace_automaton) |
73 | fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n" , |
74 | nstates, sym, symbols[sym]->tag); |
75 | |
76 | node->next = NULL; |
77 | node->state = res; |
78 | |
79 | if (!first_state) |
80 | first_state = node; |
81 | if (last_state) |
82 | last_state->next = node; |
83 | last_state = node; |
84 | |
85 | return res; |
86 | } |
87 | |
88 | /* Symbols that can be "shifted" (including non terminals) from the |
89 | current state. */ |
90 | bitset shift_symbol; |
91 | |
92 | static rule **redset; |
93 | /* For the current state, the list of pointers to states that can be |
94 | reached via a shift/goto. Could be indexed by the reaching symbol, |
95 | but labels of incoming transitions can be recovered by the state |
96 | itself. */ |
97 | static state **shiftset; |
98 | |
99 | |
100 | /* KERNEL_BASE[symbol-number] -> list of item numbers (offsets inside |
101 | RITEM) of lenngth KERNEL_SIZE[symbol-number]. */ |
102 | static item_number **kernel_base; |
103 | static int *kernel_size; |
104 | |
105 | /* A single dimension array that serves as storage for |
106 | KERNEL_BASE. */ |
107 | static item_number *kernel_items; |
108 | |
109 | |
110 | static void |
111 | allocate_itemsets (void) |
112 | { |
113 | /* Count the number of occurrences of all the symbols in RITEMS. |
114 | Note that useless productions (hence useless nonterminals) are |
115 | browsed too, hence we need to allocate room for _all_ the |
116 | symbols. */ |
117 | size_t count = 0; |
118 | size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals, |
119 | sizeof *symbol_count); |
120 | |
121 | for (rule_number r = 0; r < nrules; ++r) |
122 | for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp) |
123 | { |
124 | symbol_number sym = item_number_as_symbol_number (*rhsp); |
125 | count += 1; |
126 | symbol_count[sym] += 1; |
127 | } |
128 | |
129 | /* See comments before new_itemsets. All the vectors of items |
130 | live inside KERNEL_ITEMS. The number of active items after |
131 | some symbol S cannot be more than the number of times that S |
132 | appears as an item, which is SYMBOL_COUNT[S]. |
133 | We allocate that much space for each symbol. */ |
134 | |
135 | kernel_base = xnmalloc (nsyms, sizeof *kernel_base); |
136 | kernel_items = xnmalloc (count, sizeof *kernel_items); |
137 | |
138 | count = 0; |
139 | for (symbol_number i = 0; i < nsyms; i++) |
140 | { |
141 | kernel_base[i] = kernel_items + count; |
142 | count += symbol_count[i]; |
143 | } |
144 | |
145 | free (symbol_count); |
146 | kernel_size = xnmalloc (nsyms, sizeof *kernel_size); |
147 | } |
148 | |
149 | /* Print the current kernel (in KERNEL_BASE). */ |
150 | static void |
151 | kernel_print (FILE *out) |
152 | { |
153 | for (symbol_number i = 0; i < nsyms; ++i) |
154 | if (kernel_size[i]) |
155 | { |
156 | fprintf (out, "kernel[%s] =\n" , symbols[i]->tag); |
157 | core_print (kernel_size[i], kernel_base[i], out); |
158 | } |
159 | } |
160 | |
161 | static void |
162 | allocate_storage (void) |
163 | { |
164 | allocate_itemsets (); |
165 | |
166 | shiftset = xnmalloc (nsyms, sizeof *shiftset); |
167 | redset = xnmalloc (nrules, sizeof *redset); |
168 | state_hash_new (); |
169 | shift_symbol = bitset_create (nsyms, BITSET_FIXED); |
170 | } |
171 | |
172 | |
173 | static void |
174 | free_storage (void) |
175 | { |
176 | bitset_free (shift_symbol); |
177 | free (redset); |
178 | free (shiftset); |
179 | free (kernel_base); |
180 | free (kernel_size); |
181 | free (kernel_items); |
182 | state_hash_free (); |
183 | } |
184 | |
185 | |
186 | |
187 | |
188 | /*------------------------------------------------------------------. |
189 | | Find which term/nterm symbols can be "shifted" in S, and for each | |
190 | | one record which items would be active after that transition. | |
191 | | Uses the contents of itemset. | |
192 | | | |
193 | | shift_symbol is a bitset of the term/nterm symbols that can be | |
194 | | shifted. For each symbol in the grammar, kernel_base[symbol] | |
195 | | points to a vector of item numbers activated if that symbol is | |
196 | | shifted, and kernel_size[symbol] is their numbers. | |
197 | | | |
198 | | itemset is sorted on item index in ritem, which is sorted on rule | |
199 | | number. Compute each kernel_base[symbol] with the same sort. | |
200 | `------------------------------------------------------------------*/ |
201 | |
202 | static void |
203 | new_itemsets (state *s) |
204 | { |
205 | if (trace_flag & trace_automaton) |
206 | fprintf (stderr, "new_itemsets: begin: state = %d\n" , s->number); |
207 | |
208 | memset (kernel_size, 0, nsyms * sizeof *kernel_size); |
209 | |
210 | bitset_zero (shift_symbol); |
211 | |
212 | for (size_t i = 0; i < nitemset; ++i) |
213 | if (item_number_is_symbol_number (ritem[itemset[i]])) |
214 | { |
215 | symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]); |
216 | bitset_set (shift_symbol, sym); |
217 | kernel_base[sym][kernel_size[sym]] = itemset[i] + 1; |
218 | kernel_size[sym]++; |
219 | } |
220 | |
221 | if (trace_flag & trace_automaton) |
222 | { |
223 | kernel_print (stderr); |
224 | fprintf (stderr, "new_itemsets: end: state = %d\n\n" , s->number); |
225 | } |
226 | } |
227 | |
228 | |
229 | |
230 | /*--------------------------------------------------------------. |
231 | | Find the state we would get to (from the current state) by | |
232 | | shifting SYM. Create a new state if no equivalent one exists | |
233 | | already. Used by append_states. | |
234 | `--------------------------------------------------------------*/ |
235 | |
236 | static state * |
237 | get_state (symbol_number sym, size_t core_size, item_number *core) |
238 | { |
239 | if (trace_flag & trace_automaton) |
240 | { |
241 | fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n" , |
242 | sym, symbols[sym]->tag); |
243 | core_print (core_size, core, stderr); |
244 | fputc ('\n', stderr); |
245 | } |
246 | |
247 | state *s = state_hash_lookup (core_size, core); |
248 | if (!s) |
249 | s = state_list_append (sym, core_size, core); |
250 | |
251 | if (trace_flag & trace_automaton) |
252 | fprintf (stderr, "Exiting get_state => %d\n" , s->number); |
253 | |
254 | return s; |
255 | } |
256 | |
257 | /*---------------------------------------------------------------. |
258 | | Use the information computed by new_itemsets to find the state | |
259 | | numbers reached by each shift transition from S. | |
260 | | | |
261 | | SHIFTSET is set up as a vector of those states. | |
262 | `---------------------------------------------------------------*/ |
263 | |
264 | static void |
265 | append_states (state *s) |
266 | { |
267 | if (trace_flag & trace_automaton) |
268 | fprintf (stderr, "append_states: begin: state = %d\n" , s->number); |
269 | |
270 | bitset_iterator iter; |
271 | symbol_number sym; |
272 | int i = 0; |
273 | BITSET_FOR_EACH (iter, shift_symbol, sym, 0) |
274 | { |
275 | shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]); |
276 | ++i; |
277 | } |
278 | |
279 | if (trace_flag & trace_automaton) |
280 | fprintf (stderr, "append_states: end: state = %d\n" , s->number); |
281 | } |
282 | |
283 | |
284 | /*----------------------------------------------------------------. |
285 | | Find which rules can be used for reduction transitions from the | |
286 | | current state and make a reductions structure for the state to | |
287 | | record their rule numbers. | |
288 | `----------------------------------------------------------------*/ |
289 | |
290 | static void |
291 | save_reductions (state *s) |
292 | { |
293 | int count = 0; |
294 | |
295 | /* Find and count the active items that represent ends of rules. */ |
296 | for (size_t i = 0; i < nitemset; ++i) |
297 | { |
298 | item_number item = ritem[itemset[i]]; |
299 | if (item_number_is_rule_number (item)) |
300 | { |
301 | rule_number r = item_number_as_rule_number (item); |
302 | redset[count++] = &rules[r]; |
303 | if (r == 0) |
304 | { |
305 | /* This is "reduce 0", i.e., accept. */ |
306 | aver (!final_state); |
307 | final_state = s; |
308 | } |
309 | } |
310 | } |
311 | |
312 | /* Make a reductions structure and copy the data into it. */ |
313 | state_reductions_set (s, count, redset); |
314 | } |
315 | |
316 | |
317 | /*---------------. |
318 | | Build STATES. | |
319 | `---------------*/ |
320 | |
321 | static void |
322 | set_states (void) |
323 | { |
324 | states = xcalloc (nstates, sizeof *states); |
325 | |
326 | while (first_state) |
327 | { |
328 | state_list *this = first_state; |
329 | |
330 | /* Pessimization, but simplification of the code: make sure all |
331 | the states have valid transitions and reductions members, |
332 | even if reduced to 0. It is too soon for errs, which are |
333 | computed later, but set_conflicts. */ |
334 | state *s = this->state; |
335 | if (!s->transitions) |
336 | state_transitions_set (s, 0, 0); |
337 | if (!s->reductions) |
338 | state_reductions_set (s, 0, 0); |
339 | |
340 | states[s->number] = s; |
341 | |
342 | first_state = this->next; |
343 | free (this); |
344 | } |
345 | first_state = NULL; |
346 | last_state = NULL; |
347 | } |
348 | |
349 | |
350 | /*-------------------------------------------------------------------. |
351 | | Compute the LR(0) parser states (see state.h for details) from the | |
352 | | grammar. | |
353 | `-------------------------------------------------------------------*/ |
354 | |
355 | void |
356 | generate_states (void) |
357 | { |
358 | allocate_storage (); |
359 | closure_new (nritems); |
360 | |
361 | /* Create the initial state. The 0 at the lhs is the index of the |
362 | item of this initial rule. */ |
363 | item_number initial_core = 0; |
364 | state_list_append (0, 1, &initial_core); |
365 | |
366 | /* States are queued when they are created; process them all. */ |
367 | for (state_list *list = first_state; list; list = list->next) |
368 | { |
369 | state *s = list->state; |
370 | if (trace_flag & trace_automaton) |
371 | fprintf (stderr, "Processing state %d (reached by %s)\n" , |
372 | s->number, |
373 | symbols[s->accessing_symbol]->tag); |
374 | /* Set up itemset for the transitions out of this state. itemset gets a |
375 | vector of all the items that could be accepted next. */ |
376 | closure (s->items, s->nitems); |
377 | /* Record the reductions allowed out of this state. */ |
378 | save_reductions (s); |
379 | /* Find the itemsets of the states that shifts/gotos can reach. */ |
380 | new_itemsets (s); |
381 | /* Find or create the core structures for those states. */ |
382 | append_states (s); |
383 | |
384 | /* Create the shifts structures for the shifts to those states, |
385 | now that the state numbers transitioning to are known. */ |
386 | state_transitions_set (s, bitset_count (shift_symbol), shiftset); |
387 | } |
388 | |
389 | /* discard various storage */ |
390 | free_storage (); |
391 | |
392 | /* Set up STATES. */ |
393 | set_states (); |
394 | } |
395 | |