1 | /* Grammar reduction for Bison. |
2 | |
3 | Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2019 Free |
4 | Software Foundation, Inc. |
5 | |
6 | This file is part of Bison, the GNU Compiler Compiler. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | |
22 | /* Reduce the grammar: Find and eliminate unreachable terminals, |
23 | nonterminals, and productions. David S. Bakin. */ |
24 | |
25 | /* Don't eliminate unreachable terminals: They may be used by the |
26 | user's parser. */ |
27 | |
28 | #include <config.h> |
29 | #include "system.h" |
30 | |
31 | #include <bitset.h> |
32 | |
33 | #include "complain.h" |
34 | #include "files.h" |
35 | #include "getargs.h" |
36 | #include "gram.h" |
37 | #include "print-xml.h" |
38 | #include "reader.h" |
39 | #include "reduce.h" |
40 | #include "symtab.h" |
41 | |
42 | /* Set of nonterminals whose language is not empty. */ |
43 | static bitset N; |
44 | |
45 | /* Set of rules that have no useless nonterminals in their RHS. */ |
46 | static bitset P; |
47 | |
48 | /* Set of accessible symbols. */ |
49 | static bitset V; |
50 | |
51 | /* Set of symbols used to define rule precedence (so they are |
52 | 'useless', but no warning should be issued). */ |
53 | static bitset V1; |
54 | |
55 | unsigned nuseless_productions; |
56 | unsigned nuseless_nonterminals; |
57 | |
58 | #define bitset_swap(Lhs, Rhs) \ |
59 | do { \ |
60 | bitset lhs__ = Lhs; \ |
61 | Lhs = Rhs; \ |
62 | Rhs = lhs__; \ |
63 | } while (0) |
64 | |
65 | /*-------------------------------------------------------------------. |
66 | | Another way to do this would be with a set for each production and | |
67 | | then do subset tests against N0, but even for the C grammar the | |
68 | | whole reducing process takes only 2 seconds on my 8Mhz AT. | |
69 | `-------------------------------------------------------------------*/ |
70 | |
71 | static bool |
72 | useful_production (rule_number r, bitset N0) |
73 | { |
74 | /* A production is useful if all of the nonterminals in its appear |
75 | in the set of useful nonterminals. */ |
76 | |
77 | for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp) |
78 | if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens)) |
79 | return false; |
80 | return true; |
81 | } |
82 | |
83 | |
84 | /*-----------------------------------------------------------------. |
85 | | Compute N, the set of nonterminals whose language is not empty. | |
86 | | | |
87 | | Remember that rules are 1-origin, symbols are 0-origin. | |
88 | `-----------------------------------------------------------------*/ |
89 | |
90 | static void |
91 | useless_nonterminals (void) |
92 | { |
93 | /* N is set as built. Np is set being built this iteration. P is |
94 | set of all productions which have a RHS all in N. */ |
95 | |
96 | bitset Np = bitset_create (nvars, BITSET_FIXED); |
97 | |
98 | /* The set being computed is a set of nonterminals which can derive |
99 | the empty string or strings consisting of all terminals. At each |
100 | iteration a nonterminal is added to the set if there is a |
101 | production with that nonterminal as its LHS for which all the |
102 | nonterminals in its RHS are already in the set. Iterate until |
103 | the set being computed remains unchanged. Any nonterminals not |
104 | in the set at that point are useless in that they will never be |
105 | used in deriving a sentence of the language. |
106 | |
107 | This iteration doesn't use any special traversal over the |
108 | productions. A set is kept of all productions for which all the |
109 | nonterminals in the RHS are in useful. Only productions not in |
110 | this set are scanned on each iteration. At the end, this set is |
111 | saved to be used when finding useful productions: only |
112 | productions in this set will appear in the final grammar. */ |
113 | |
114 | while (1) |
115 | { |
116 | bitset_copy (Np, N); |
117 | for (rule_number r = 0; r < nrules; ++r) |
118 | if (!bitset_test (P, r) |
119 | && useful_production (r, N)) |
120 | { |
121 | bitset_set (Np, rules[r].lhs->number - ntokens); |
122 | bitset_set (P, r); |
123 | } |
124 | if (bitset_equal_p (N, Np)) |
125 | break; |
126 | bitset_swap (N, Np); |
127 | } |
128 | bitset_free (N); |
129 | N = Np; |
130 | } |
131 | |
132 | |
133 | static void |
134 | inaccessable_symbols (void) |
135 | { |
136 | /* Find out which productions are reachable and which symbols are |
137 | used. Starting with an empty set of productions and a set of |
138 | symbols which only has the start symbol in it, iterate over all |
139 | productions until the set of productions remains unchanged for an |
140 | iteration. For each production which has a LHS in the set of |
141 | reachable symbols, add the production to the set of reachable |
142 | productions, and add all of the nonterminals in the RHS of the |
143 | production to the set of reachable symbols. |
144 | |
145 | Consider only the (partially) reduced grammar which has only |
146 | nonterminals in N and productions in P. |
147 | |
148 | The result is the set P of productions in the reduced grammar, |
149 | and the set V of symbols in the reduced grammar. |
150 | |
151 | Although this algorithm also computes the set of terminals which |
152 | are reachable, no terminal will be deleted from the grammar. Some |
153 | terminals might not be in the grammar but might be generated by |
154 | semantic routines, and so the user might want them available with |
155 | specified numbers. (Is this true?) However, the nonreachable |
156 | terminals are printed (if running in verbose mode) so that the |
157 | user can know. */ |
158 | |
159 | bitset Vp = bitset_create (nsyms, BITSET_FIXED); |
160 | bitset Pp = bitset_create (nrules, BITSET_FIXED); |
161 | |
162 | /* If the start symbol isn't useful, then nothing will be useful. */ |
163 | if (bitset_test (N, accept->content->number - ntokens)) |
164 | { |
165 | bitset_set (V, accept->content->number); |
166 | |
167 | while (1) |
168 | { |
169 | bitset_copy (Vp, V); |
170 | for (rule_number r = 0; r < nrules; ++r) |
171 | if (!bitset_test (Pp, r) |
172 | && bitset_test (P, r) |
173 | && bitset_test (V, rules[r].lhs->number)) |
174 | { |
175 | for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp) |
176 | if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens)) |
177 | bitset_set (Vp, *rhsp); |
178 | bitset_set (Pp, r); |
179 | } |
180 | if (bitset_equal_p (V, Vp)) |
181 | break; |
182 | bitset_swap (V, Vp); |
183 | } |
184 | } |
185 | |
186 | bitset_free (V); |
187 | V = Vp; |
188 | |
189 | /* These tokens (numbered 0, 1, and 2) are internal to Bison. |
190 | Consider them useful. */ |
191 | bitset_set (V, endtoken->content->number); /* end-of-input token */ |
192 | bitset_set (V, errtoken->content->number); /* error token */ |
193 | bitset_set (V, undeftoken->content->number); /* some undefined token */ |
194 | |
195 | bitset_free (P); |
196 | P = Pp; |
197 | |
198 | unsigned nuseful_productions = bitset_count (P); |
199 | nuseless_productions = nrules - nuseful_productions; |
200 | |
201 | unsigned nuseful_nonterminals = 0; |
202 | for (symbol_number i = ntokens; i < nsyms; ++i) |
203 | nuseful_nonterminals += bitset_test (V, i); |
204 | nuseless_nonterminals = nvars - nuseful_nonterminals; |
205 | |
206 | /* A token that was used in %prec should not be warned about. */ |
207 | for (rule_number r = 0; r < nrules; ++r) |
208 | if (rules[r].precsym != 0) |
209 | bitset_set (V1, rules[r].precsym->number); |
210 | } |
211 | |
212 | |
213 | /*-------------------------------------------------------------------. |
214 | | Put the useless productions at the end of RULES, and adjust NRULES | |
215 | | accordingly. | |
216 | `-------------------------------------------------------------------*/ |
217 | |
218 | static void |
219 | reduce_grammar_tables (void) |
220 | { |
221 | /* Report and flag useless productions. */ |
222 | { |
223 | for (rule_number r = 0; r < nrules; ++r) |
224 | rules[r].useful = bitset_test (P, r); |
225 | grammar_rules_useless_report (_("rule useless in grammar" )); |
226 | } |
227 | |
228 | /* Map the nonterminals to their new index: useful first, useless |
229 | afterwards. Kept for later report. */ |
230 | { |
231 | int useful = 0; |
232 | int useless = nrules - nuseless_productions; |
233 | rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted); |
234 | for (rule_number r = 0; r < nrules; ++r) |
235 | rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r]; |
236 | free (rules); |
237 | rules = rules_sorted; |
238 | |
239 | /* Renumber the rules markers in RITEMS. */ |
240 | for (rule_number r = 0; r < nrules; ++r) |
241 | { |
242 | item_number *rhsp = rules[r].rhs; |
243 | for (/* Nothing. */; 0 <= *rhsp; ++rhsp) |
244 | continue; |
245 | *rhsp = rule_number_as_item_number (r); |
246 | rules[r].number = r; |
247 | } |
248 | nrules -= nuseless_productions; |
249 | } |
250 | |
251 | /* Adjust NRITEMS. */ |
252 | for (rule_number r = nrules; r < nrules + nuseless_productions; ++r) |
253 | nritems -= rule_rhs_length (&rules[r]) + 1; |
254 | } |
255 | |
256 | |
257 | /*------------------------------. |
258 | | Remove useless nonterminals. | |
259 | `------------------------------*/ |
260 | |
261 | symbol_number *nterm_map = NULL; |
262 | |
263 | static void |
264 | nonterminals_reduce (void) |
265 | { |
266 | nterm_map = xnmalloc (nvars, sizeof *nterm_map); |
267 | /* Map the nonterminals to their new index: useful first, useless |
268 | afterwards. Kept for later report. */ |
269 | { |
270 | symbol_number n = ntokens; |
271 | for (symbol_number i = ntokens; i < nsyms; ++i) |
272 | if (bitset_test (V, i)) |
273 | nterm_map[i - ntokens] = n++; |
274 | for (symbol_number i = ntokens; i < nsyms; ++i) |
275 | if (!bitset_test (V, i)) |
276 | { |
277 | nterm_map[i - ntokens] = n++; |
278 | if (symbols[i]->content->status != used) |
279 | complain (&symbols[i]->location, Wother, |
280 | _("nonterminal useless in grammar: %s" ), |
281 | symbols[i]->tag); |
282 | } |
283 | } |
284 | |
285 | /* Shuffle elements of tables indexed by symbol number. */ |
286 | { |
287 | symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted); |
288 | for (symbol_number i = ntokens; i < nsyms; ++i) |
289 | symbols[i]->content->number = nterm_map[i - ntokens]; |
290 | for (symbol_number i = ntokens; i < nsyms; ++i) |
291 | symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i]; |
292 | for (symbol_number i = ntokens; i < nsyms; ++i) |
293 | symbols[i] = symbols_sorted[i - ntokens]; |
294 | free (symbols_sorted); |
295 | } |
296 | |
297 | /* Update nonterminal numbers in the RHS of the rules. LHS are |
298 | pointers to the symbol structure, they don't need renumbering. */ |
299 | { |
300 | for (rule_number r = 0; r < nrules; ++r) |
301 | for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp) |
302 | if (ISVAR (*rhsp)) |
303 | *rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]); |
304 | accept->content->number = nterm_map[accept->content->number - ntokens]; |
305 | } |
306 | |
307 | nsyms -= nuseless_nonterminals; |
308 | nvars -= nuseless_nonterminals; |
309 | } |
310 | |
311 | |
312 | /*------------------------------------------------------------------. |
313 | | Output the detailed results of the reductions. For FILE.output. | |
314 | `------------------------------------------------------------------*/ |
315 | |
316 | void |
317 | reduce_output (FILE *out) |
318 | { |
319 | if (nuseless_nonterminals) |
320 | { |
321 | fprintf (out, "%s\n\n" , _("Nonterminals useless in grammar" )); |
322 | for (int i = 0; i < nuseless_nonterminals; ++i) |
323 | fprintf (out, " %s\n" , symbols[nsyms + i]->tag); |
324 | fputs ("\n\n" , out); |
325 | } |
326 | |
327 | { |
328 | bool b = false; |
329 | for (int i = 0; i < ntokens; ++i) |
330 | if (reduce_token_unused_in_grammar (i)) |
331 | { |
332 | if (!b) |
333 | fprintf (out, "%s\n\n" , _("Terminals unused in grammar" )); |
334 | b = true; |
335 | fprintf (out, " %s\n" , symbols[i]->tag); |
336 | } |
337 | if (b) |
338 | fputs ("\n\n" , out); |
339 | } |
340 | |
341 | if (nuseless_productions) |
342 | grammar_rules_partial_print (out, _("Rules useless in grammar" ), |
343 | rule_useless_in_grammar_p); |
344 | } |
345 | |
346 | |
347 | /*-------------------------------. |
348 | | Report the results to STDERR. | |
349 | `-------------------------------*/ |
350 | |
351 | static void |
352 | reduce_print (void) |
353 | { |
354 | if (nuseless_nonterminals) |
355 | complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar" , |
356 | "%d nonterminals useless in grammar" , |
357 | nuseless_nonterminals), |
358 | nuseless_nonterminals); |
359 | if (nuseless_productions) |
360 | complain (NULL, Wother, ngettext ("%d rule useless in grammar" , |
361 | "%d rules useless in grammar" , |
362 | nuseless_productions), |
363 | nuseless_productions); |
364 | } |
365 | |
366 | void |
367 | reduce_grammar (void) |
368 | { |
369 | /* Allocate the global sets used to compute the reduced grammar */ |
370 | |
371 | N = bitset_create (nvars, BITSET_FIXED); |
372 | P = bitset_create (nrules, BITSET_FIXED); |
373 | V = bitset_create (nsyms, BITSET_FIXED); |
374 | V1 = bitset_create (nsyms, BITSET_FIXED); |
375 | |
376 | useless_nonterminals (); |
377 | inaccessable_symbols (); |
378 | |
379 | /* Did we reduce something? */ |
380 | if (nuseless_nonterminals || nuseless_productions) |
381 | { |
382 | reduce_print (); |
383 | |
384 | if (!bitset_test (N, accept->content->number - ntokens)) |
385 | complain (&startsymbol_loc, fatal, |
386 | _("start symbol %s does not derive any sentence" ), |
387 | startsymbol->tag); |
388 | |
389 | /* First reduce the nonterminals, as they renumber themselves in the |
390 | whole grammar. If you change the order, nonterms would be |
391 | renumbered only in the reduced grammar. */ |
392 | if (nuseless_nonterminals) |
393 | nonterminals_reduce (); |
394 | if (nuseless_productions) |
395 | reduce_grammar_tables (); |
396 | } |
397 | |
398 | if (trace_flag & trace_grammar) |
399 | { |
400 | grammar_dump (stderr, "Reduced Grammar" ); |
401 | |
402 | fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals" |
403 | ", and %d productions.\n" , |
404 | grammar_file, ntokens, nvars, nrules); |
405 | } |
406 | } |
407 | |
408 | bool |
409 | reduce_token_unused_in_grammar (symbol_number i) |
410 | { |
411 | aver (i < ntokens); |
412 | return !bitset_test (V, i) && !bitset_test (V1, i); |
413 | } |
414 | |
415 | bool |
416 | reduce_nonterminal_useless_in_grammar (const sym_content *sym) |
417 | { |
418 | symbol_number n = sym->number; |
419 | aver (ntokens <= n && n < nsyms + nuseless_nonterminals); |
420 | return nsyms <= n; |
421 | } |
422 | |
423 | /*-----------------------------------------------------------. |
424 | | Free the global sets used to compute the reduced grammar. | |
425 | `-----------------------------------------------------------*/ |
426 | |
427 | void |
428 | reduce_free (void) |
429 | { |
430 | bitset_free (N); |
431 | bitset_free (V); |
432 | bitset_free (V1); |
433 | bitset_free (P); |
434 | free (nterm_map); |
435 | nterm_map = NULL; |
436 | } |
437 | |