1 | /* Data definitions for internal representation of Bison's input. |
2 | |
3 | Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2019 |
4 | Free Software Foundation, Inc. |
5 | |
6 | This file is part of Bison, the GNU Compiler Compiler. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #ifndef GRAM_H_ |
22 | # define GRAM_H_ |
23 | |
24 | /* Representation of the grammar rules: |
25 | |
26 | NTOKENS is the number of tokens, and NVARS is the number of |
27 | variables (nonterminals). NSYMS is the total number, ntokens + |
28 | nvars. |
29 | |
30 | Each symbol (either token or variable) receives a symbol number. |
31 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 |
32 | are for variables. Symbol number zero is the end-of-input token. |
33 | This token is counted in ntokens. The true number of token values |
34 | assigned is NTOKENS reduced by one for each alias declaration. |
35 | |
36 | The rules receive rule numbers 1 to NRULES in the order they are |
37 | written. More precisely Bison augments the grammar with the |
38 | initial rule, '$accept: START-SYMBOL $end', which is numbered 1, |
39 | all the user rules are 2, 3 etc. Each time a rule number is |
40 | presented to the user, we subtract 1, so *displayed* rule numbers |
41 | are 0, 1, 2... |
42 | |
43 | Internally, we cannot use the number 0 for a rule because for |
44 | instance RITEM stores both symbol (the RHS) and rule numbers: the |
45 | symbols are shorts >= 0, and rule number are stored negative. |
46 | Therefore 0 cannot be used, since it would be both the rule number |
47 | 0, and the token $end). |
48 | |
49 | Actions are accessed via the rule number. |
50 | |
51 | The rules themselves are described by several arrays: amongst which |
52 | RITEM, and RULES. |
53 | |
54 | RULES is an array of rules, whose members are: |
55 | |
56 | RULES[R].lhs -- the symbol of the left hand side of rule R. |
57 | |
58 | RULES[R].rhs -- the index in RITEM of the beginning of the portion |
59 | for rule R. |
60 | |
61 | RULES[R].prec -- the symbol providing the precedence level of R. |
62 | |
63 | RULES[R].precsym -- the symbol attached (via %prec) to give its |
64 | precedence to R. Of course, if set, it is equal to 'prec', but we |
65 | need to distinguish one from the other when reducing: a symbol used |
66 | in a %prec is not useless. |
67 | |
68 | RULES[R].assoc -- the associativity of R. |
69 | |
70 | RULES[R].dprec -- the dynamic precedence level of R (for GLR |
71 | parsing). |
72 | |
73 | RULES[R].merger -- index of merging function for R (for GLR |
74 | parsing). |
75 | |
76 | RULES[R].line -- the line where R was defined. |
77 | |
78 | RULES[R].useful -- whether the rule is used (i.e., false if thrown |
79 | away by reduce). |
80 | |
81 | The right hand side is stored as symbol numbers in a portion of |
82 | RITEM. |
83 | |
84 | The length of the portion is one greater than the number of symbols |
85 | in the rule's right hand side. The last element in the portion |
86 | contains minus R, which identifies it as the end of a portion and |
87 | says which rule it is for. |
88 | |
89 | The portions of RITEM come in order of increasing rule number. |
90 | NRITEMS is the total length of RITEM. Each element of RITEM is |
91 | called an "item" and its index in RITEM is an item number. |
92 | |
93 | Item numbers are used in the finite state machine to represent |
94 | places that parsing can get to. |
95 | |
96 | SYMBOLS[I]->prec records the precedence level of each symbol. |
97 | |
98 | Precedence levels are assigned in increasing order starting with 1 |
99 | so that numerically higher precedence values mean tighter binding |
100 | as they ought to. Zero as a symbol or rule's precedence means none |
101 | is assigned. |
102 | |
103 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ |
104 | |
105 | # include "location.h" |
106 | # include "symtab.h" |
107 | |
108 | # define ISTOKEN(i) ((i) < ntokens) |
109 | # define ISVAR(i) ((i) >= ntokens) |
110 | |
111 | extern int nsyms; |
112 | extern int ntokens; |
113 | extern int nvars; |
114 | |
115 | typedef int item_number; |
116 | # define ITEM_NUMBER_MAX INT_MAX |
117 | extern item_number *ritem; |
118 | extern unsigned nritems; |
119 | |
120 | /* There is weird relationship between OT1H item_number and OTOH |
121 | symbol_number and rule_number: we store the latter in |
122 | item_number. symbol_number values are stored as-is, while |
123 | the negation of (rule_number + 1) is stored. |
124 | |
125 | Therefore, a symbol_number must be a valid item_number, and we |
126 | sometimes have to perform the converse transformation. */ |
127 | |
128 | static inline item_number |
129 | symbol_number_as_item_number (symbol_number sym) |
130 | { |
131 | return sym; |
132 | } |
133 | |
134 | static inline symbol_number |
135 | item_number_as_symbol_number (item_number i) |
136 | { |
137 | return i; |
138 | } |
139 | |
140 | static inline bool |
141 | item_number_is_symbol_number (item_number i) |
142 | { |
143 | return i >= 0; |
144 | } |
145 | |
146 | /* Rule numbers. */ |
147 | typedef int rule_number; |
148 | # define RULE_NUMBER_MAX INT_MAX |
149 | |
150 | static inline item_number |
151 | rule_number_as_item_number (rule_number r) |
152 | { |
153 | return -1 - r; |
154 | } |
155 | |
156 | static inline rule_number |
157 | item_number_as_rule_number (item_number i) |
158 | { |
159 | return -1 - i; |
160 | } |
161 | |
162 | static inline bool |
163 | item_number_is_rule_number (item_number i) |
164 | { |
165 | return i < 0; |
166 | } |
167 | |
168 | |
169 | /*--------. |
170 | | Rules. | |
171 | `--------*/ |
172 | |
173 | typedef struct |
174 | { |
175 | /* The number of the rule in the source. It is usually the index in |
176 | RULES too, except if there are useless rules. */ |
177 | rule_number user_number; |
178 | |
179 | /* The index in RULES. Usually the rule number in the source, |
180 | except if some rules are useless. */ |
181 | rule_number number; |
182 | |
183 | sym_content *lhs; |
184 | item_number *rhs; |
185 | |
186 | /* This symbol provides both the associativity, and the precedence. */ |
187 | sym_content *prec; |
188 | |
189 | int dprec; |
190 | int merger; |
191 | |
192 | /* This symbol was attached to the rule via %prec. */ |
193 | sym_content *precsym; |
194 | |
195 | /* Location of the rhs. */ |
196 | location location; |
197 | bool useful; |
198 | bool is_predicate; |
199 | |
200 | /* Counts of the numbers of expected conflicts for this rule, or -1 if none |
201 | given. */ |
202 | int expected_sr_conflicts; |
203 | int expected_rr_conflicts; |
204 | |
205 | const char *action; |
206 | location action_loc; |
207 | } rule; |
208 | |
209 | /* The used rules (size NRULES). */ |
210 | extern rule *rules; |
211 | extern rule_number nrules; |
212 | |
213 | /* Get the rule associated to this item. ITEM points inside RITEM. */ |
214 | rule const *item_rule (item_number const *item); |
215 | |
216 | /* Pretty-print this ITEM (as in the report). ITEM points inside |
217 | RITEM. PREVIOUS_RULE is used to see if the lhs is common, in which |
218 | case LHS is factored. Passing NULL is fine. */ |
219 | void item_print (item_number *item, rule const *previous_rule, |
220 | FILE *out); |
221 | |
222 | /* A function that selects a rule. */ |
223 | typedef bool (*rule_filter) (rule const *); |
224 | |
225 | /* Whether the rule has a 'number' smaller than NRULES. That is, it |
226 | is useful in the grammar. */ |
227 | bool rule_useful_in_grammar_p (rule const *r); |
228 | |
229 | /* Whether the rule has a 'number' higher than NRULES. That is, it is |
230 | useless in the grammar. */ |
231 | bool rule_useless_in_grammar_p (rule const *r); |
232 | |
233 | /* Whether the rule is not flagged as useful but is useful in the |
234 | grammar. In other words, it was discarded because of conflicts. */ |
235 | bool rule_useless_in_parser_p (rule const *r); |
236 | |
237 | /* Whether the rule has a single RHS, and no user action. */ |
238 | bool rule_useless_chain_p (rule const *r); |
239 | |
240 | /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was |
241 | already displayed (by a previous call for another rule), avoid |
242 | useless repetitions. */ |
243 | void rule_lhs_print (rule const *r, sym_content const *previous_lhs, |
244 | FILE *out); |
245 | void rule_lhs_print_xml (rule const *r, FILE *out, int level); |
246 | |
247 | /* The length of the RHS. */ |
248 | size_t rule_rhs_length (rule const *r); |
249 | |
250 | /* Print this rule's RHS on OUT. */ |
251 | void rule_rhs_print (rule const *r, FILE *out); |
252 | |
253 | /* Print this rule on OUT. If a PREVIOUS_RULE was already displayed, |
254 | avoid useless repetitions of their LHS. */ |
255 | void rule_print (rule const *r, rule const *prev_rule, FILE *out); |
256 | |
257 | |
258 | |
259 | /* Table of the symbols, indexed by the symbol number. */ |
260 | extern symbol **symbols; |
261 | |
262 | /* TOKEN_TRANSLATION -- a table indexed by a token number as returned |
263 | by the user's yylex routine, it yields the internal token number |
264 | used by the parser and throughout bison. */ |
265 | extern symbol_number *token_translations; |
266 | extern int max_user_token_number; |
267 | |
268 | |
269 | |
270 | /* Dump RITEM for traces. */ |
271 | void ritem_print (FILE *out); |
272 | |
273 | /* The size of the longest rule RHS. */ |
274 | size_t ritem_longest_rhs (void); |
275 | |
276 | /* Print the grammar's rules that match FILTER on OUT under TITLE. */ |
277 | void grammar_rules_partial_print (FILE *out, const char *title, |
278 | rule_filter filter); |
279 | |
280 | /* Print the grammar's useful rules on OUT. */ |
281 | void grammar_rules_print (FILE *out); |
282 | /* Print all of the grammar's rules with a "usefulness" attribute. */ |
283 | void grammar_rules_print_xml (FILE *out, int level); |
284 | |
285 | /* Dump the grammar. */ |
286 | void grammar_dump (FILE *out, const char *title); |
287 | |
288 | /* Report on STDERR the rules that are not flagged USEFUL, using the |
289 | MESSAGE (which can be 'rule useless in grammar' when invoked after grammar |
290 | reduction, or 'rule useless in parser due to conflicts' after conflicts |
291 | were taken into account). */ |
292 | void grammar_rules_useless_report (const char *message); |
293 | |
294 | /* Free the packed grammar. */ |
295 | void grammar_free (void); |
296 | |
297 | /* The version %required by the grammar file, as an int (100 * major + |
298 | minor). 0 if unspecified. */ |
299 | extern int required_version; |
300 | |
301 | #endif /* !GRAM_H_ */ |
302 | |