1 | /* Type definitions for the finite state machine for Bison. |
2 | |
3 | Copyright (C) 1984, 1989, 2000-2004, 2007, 2009-2015, 2018-2019 Free |
4 | Software Foundation, Inc. |
5 | |
6 | This file is part of Bison, the GNU Compiler Compiler. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | |
22 | /* These type definitions are used to represent a nondeterministic |
23 | finite state machine that parses the specified grammar. This |
24 | information is generated by the function generate_states in the |
25 | file LR0. |
26 | |
27 | Each state of the machine is described by a set of items -- |
28 | particular positions in particular rules -- that are the possible |
29 | places where parsing could continue when the machine is in this |
30 | state. These symbols at these items are the allowable inputs that |
31 | can follow now. |
32 | |
33 | A core represents one state. States are numbered in the NUMBER |
34 | field. When generate_states is finished, the starting state is |
35 | state 0 and NSTATES is the number of states. (FIXME: This sentence |
36 | is no longer true: A transition to a state whose state number is |
37 | NSTATES indicates termination.) All the cores are chained together |
38 | and FIRST_STATE points to the first one (state 0). |
39 | |
40 | For each state there is a particular symbol which must have been |
41 | the last thing accepted to reach that state. It is the |
42 | ACCESSING_SYMBOL of the core. |
43 | |
44 | Each core contains a vector of NITEMS items which are the indices |
45 | in the RITEM vector of the items that are selected in this state. |
46 | |
47 | The two types of actions are shifts/gotos (push the lookahead token |
48 | and read another/goto to the state designated by a nterm) and |
49 | reductions (combine the last n things on the stack via a rule, |
50 | replace them with the symbol that the rule derives, and leave the |
51 | lookahead token alone). When the states are generated, these |
52 | actions are represented in two other lists. |
53 | |
54 | Each transition structure describes the possible transitions out of |
55 | one state (there are NUM of them). Each contains a vector of |
56 | numbers of the states that transitions can go to. The |
57 | accessing_symbol fields of those states' cores say what kind of |
58 | input leads to them. |
59 | |
60 | A transition to state zero should be ignored: conflict resolution |
61 | deletes transitions by having them point to zero. |
62 | |
63 | Each reductions structure describes the possible reductions at the |
64 | state whose number is in the number field. rules is an array of |
65 | num rules. lookahead_tokens is an array of bitsets, one per rule. |
66 | |
67 | Conflict resolution can decide that certain tokens in certain |
68 | states should explicitly be errors (for implementing %nonassoc). |
69 | For each state, the tokens that are errors for this reason are |
70 | recorded in an errs structure, which holds the token numbers. |
71 | |
72 | There is at least one goto transition present in state zero. It |
73 | leads to a next-to-final state whose accessing_symbol is the |
74 | grammar's start symbol. The next-to-final state has one shift to |
75 | the final state, whose accessing_symbol is zero (end of input). |
76 | The final state has one shift, which goes to the termination state. |
77 | The reason for the extra state at the end is to placate the |
78 | parser's strategy of making all decisions one token ahead of its |
79 | actions. */ |
80 | |
81 | #ifndef STATE_H_ |
82 | # define STATE_H_ |
83 | |
84 | # include <stdbool.h> |
85 | |
86 | # include <bitset.h> |
87 | |
88 | # include "gram.h" |
89 | # include "symtab.h" |
90 | |
91 | |
92 | /*-------------------. |
93 | | Numbering states. | |
94 | `-------------------*/ |
95 | |
96 | typedef int state_number; |
97 | # define STATE_NUMBER_MAXIMUM INT_MAX |
98 | |
99 | /* Be ready to map a state_number to an int. */ |
100 | static inline int |
101 | state_number_as_int (state_number s) |
102 | { |
103 | return s; |
104 | } |
105 | |
106 | |
107 | typedef struct state state; |
108 | |
109 | /*--------------. |
110 | | Transitions. | |
111 | `--------------*/ |
112 | |
113 | typedef struct |
114 | { |
115 | int num; /** Size of destination STATES. */ |
116 | state *states[1]; |
117 | } transitions; |
118 | |
119 | |
120 | /* What is the symbol labelling the transition to |
121 | TRANSITIONS->states[Num]? Can be a token (amongst which the error |
122 | token), or nonterminals in case of gotos. */ |
123 | |
124 | # define TRANSITION_SYMBOL(Transitions, Num) \ |
125 | (Transitions->states[Num]->accessing_symbol) |
126 | |
127 | /* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ |
128 | |
129 | # define TRANSITION_IS_SHIFT(Transitions, Num) \ |
130 | (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) |
131 | |
132 | /* Is the TRANSITIONS->states[Num] a goto?. */ |
133 | |
134 | # define TRANSITION_IS_GOTO(Transitions, Num) \ |
135 | (!TRANSITION_IS_SHIFT (Transitions, Num)) |
136 | |
137 | /* Is the TRANSITIONS->states[Num] labelled by the error token? */ |
138 | |
139 | # define TRANSITION_IS_ERROR(Transitions, Num) \ |
140 | (TRANSITION_SYMBOL (Transitions, Num) == errtoken->content->number) |
141 | |
142 | /* When resolving a SR conflicts, if the reduction wins, the shift is |
143 | disabled. */ |
144 | |
145 | # define TRANSITION_DISABLE(Transitions, Num) \ |
146 | (Transitions->states[Num] = NULL) |
147 | |
148 | # define TRANSITION_IS_DISABLED(Transitions, Num) \ |
149 | (Transitions->states[Num] == NULL) |
150 | |
151 | |
152 | /* Iterate over each transition over a token (shifts). */ |
153 | # define FOR_EACH_SHIFT(Transitions, Iter) \ |
154 | for (Iter = 0; \ |
155 | Iter < Transitions->num \ |
156 | && (TRANSITION_IS_DISABLED (Transitions, Iter) \ |
157 | || TRANSITION_IS_SHIFT (Transitions, Iter)); \ |
158 | ++Iter) \ |
159 | if (!TRANSITION_IS_DISABLED (Transitions, Iter)) |
160 | |
161 | |
162 | /* The destination of the transition (shift/goto) from state S on |
163 | label SYM (term or nterm). Abort if none found. */ |
164 | struct state *transitions_to (state *s, symbol_number sym); |
165 | |
166 | |
167 | /*-------. |
168 | | Errs. | |
169 | `-------*/ |
170 | |
171 | typedef struct |
172 | { |
173 | int num; |
174 | symbol *symbols[1]; |
175 | } errs; |
176 | |
177 | errs *errs_new (int num, symbol **tokens); |
178 | |
179 | |
180 | /*-------------. |
181 | | Reductions. | |
182 | `-------------*/ |
183 | |
184 | typedef struct |
185 | { |
186 | int num; |
187 | bitset *lookahead_tokens; |
188 | /* Sorted ascendingly on rule number. */ |
189 | rule *rules[1]; |
190 | } reductions; |
191 | |
192 | |
193 | |
194 | /*---------. |
195 | | states. | |
196 | `---------*/ |
197 | |
198 | struct state_list; |
199 | |
200 | struct state |
201 | { |
202 | state_number number; |
203 | symbol_number accessing_symbol; |
204 | transitions *transitions; |
205 | reductions *reductions; |
206 | errs *errs; |
207 | |
208 | /* When an includer (such as ielr.c) needs to store states in a list, the |
209 | includer can define struct state_list as the list node structure and can |
210 | store in this member a reference to the node containing each state. */ |
211 | struct state_list *state_list; |
212 | |
213 | /* Whether no lookahead sets on reduce actions are needed to decide |
214 | what to do in state S. */ |
215 | bool consistent; |
216 | |
217 | /* If some conflicts were solved thanks to precedence/associativity, |
218 | a human readable description of the resolution. */ |
219 | const char *solved_conflicts; |
220 | const char *solved_conflicts_xml; |
221 | |
222 | /* Its items. Must be last, since ITEMS can be arbitrarily large. Sorted |
223 | ascendingly on item index in RITEM, which is sorted on rule number. */ |
224 | size_t nitems; |
225 | item_number items[1]; |
226 | }; |
227 | |
228 | extern state_number nstates; |
229 | extern state *final_state; |
230 | |
231 | /* Create a new state with ACCESSING_SYMBOL for those items. */ |
232 | state *state_new (symbol_number accessing_symbol, |
233 | size_t core_size, item_number *core); |
234 | state *state_new_isocore (state const *s); |
235 | |
236 | /* Record that from S we can reach all the DST states (NUM of them). */ |
237 | void state_transitions_set (state *s, int num, state **dst); |
238 | |
239 | /* Print the transitions of state s for debug. */ |
240 | void state_transitions_print (const state *s, FILE *out); |
241 | |
242 | /* Set the reductions of STATE. */ |
243 | void state_reductions_set (state *s, int num, rule **reds); |
244 | |
245 | /* The index of the reduction of state S that corresponds to rule R. |
246 | Aborts if there is no reduction of R in S. */ |
247 | int state_reduction_find (state *s, rule const *r); |
248 | |
249 | /* Set the errs of STATE. */ |
250 | void state_errs_set (state *s, int num, symbol **errors); |
251 | |
252 | /* Print on OUT all the lookahead tokens such that this STATE wants to |
253 | reduce R. */ |
254 | void state_rule_lookahead_tokens_print (state *s, rule const *r, FILE *out); |
255 | void state_rule_lookahead_tokens_print_xml (state *s, rule const *r, |
256 | FILE *out, int level); |
257 | |
258 | /* Create/destroy the states hash table. */ |
259 | void state_hash_new (void); |
260 | void state_hash_free (void); |
261 | |
262 | /* Find the state associated to the CORE, and return it. If it does |
263 | not exist yet, return NULL. */ |
264 | state *state_hash_lookup (size_t core_size, item_number *core); |
265 | |
266 | /* Insert STATE in the state hash table. */ |
267 | void state_hash_insert (state *s); |
268 | |
269 | /* Remove unreachable states, renumber remaining states, update NSTATES, and |
270 | write to OLD_TO_NEW a mapping of old state numbers to new state numbers such |
271 | that the old value of NSTATES is written as the new state number for removed |
272 | states. The size of OLD_TO_NEW must be the old value of NSTATES. */ |
273 | void state_remove_unreachable_states (state_number old_to_new[]); |
274 | |
275 | /* All the states, indexed by the state number. */ |
276 | extern state **states; |
277 | |
278 | /* Free all the states. */ |
279 | void states_free (void); |
280 | |
281 | #endif /* !STATE_H_ */ |
282 | |