1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QLALR project on Qt Labs. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
21 | ** included in the packaging of this file. Please review the following |
22 | ** information to ensure the GNU General Public License requirements will |
23 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
24 | ** |
25 | ** $QT_END_LICENSE$ |
26 | ** |
27 | ****************************************************************************/ |
28 | |
29 | #include "recognizer.h" |
30 | |
31 | #include <QtCore/qdir.h> |
32 | |
33 | #include <cstdlib> |
34 | #include <cstring> |
35 | #include <cctype> |
36 | |
37 | Recognizer::Recognizer (Grammar *grammar, bool no_lines): |
38 | tos(0), |
39 | stack_size(0), |
40 | state_stack(nullptr), |
41 | _M_line(1), |
42 | _M_action_line(0), |
43 | _M_grammar(grammar), |
44 | _M_no_lines(no_lines) |
45 | { |
46 | } |
47 | |
48 | Recognizer::~Recognizer() |
49 | { |
50 | if (stack_size) |
51 | ::free(state_stack); |
52 | } |
53 | |
54 | inline void Recognizer::reallocateStack() |
55 | { |
56 | if (! stack_size) |
57 | stack_size = 128; |
58 | else |
59 | stack_size <<= 1; |
60 | |
61 | sym_stack.resize (stack_size); |
62 | |
63 | if (! state_stack) |
64 | state_stack = reinterpret_cast<int*> (::malloc(stack_size * sizeof(int))); |
65 | else |
66 | state_stack = reinterpret_cast<int*> (::realloc(state_stack, stack_size * sizeof(int))); |
67 | } |
68 | |
69 | int Recognizer::nextToken() |
70 | { |
71 | QString text; |
72 | |
73 | Lagain: |
74 | while (ch.isSpace ()) |
75 | inp (); |
76 | |
77 | if (ch.isNull ()) |
78 | return EOF_SYMBOL; |
79 | |
80 | int token = ch.unicode (); |
81 | |
82 | if (token == '"') |
83 | { |
84 | inp(); // skip " |
85 | text.clear (); |
86 | while (! ch.isNull () && ch != QLatin1Char ('"')) |
87 | { |
88 | if (ch == QLatin1Char ('\\')) |
89 | { |
90 | text += ch; |
91 | inp(); |
92 | } |
93 | text += ch; |
94 | inp (); |
95 | } |
96 | |
97 | if (ch == QLatin1Char ('"')) |
98 | inp (); |
99 | else |
100 | qerr() << _M_input_file << ":" << _M_line << ": Warning. Expected `\"'" << Qt::endl; |
101 | |
102 | _M_current_value = text; |
103 | return (token = STRING_LITERAL); |
104 | } |
105 | |
106 | else if (ch.isLetterOrNumber () || ch == QLatin1Char ('_')) |
107 | { |
108 | text.clear (); |
109 | do { text += ch; inp (); } |
110 | while (ch.isLetterOrNumber () || ch == QLatin1Char ('_') || ch == QLatin1Char ('.')); |
111 | _M_current_value = text; |
112 | return (token = ID); |
113 | } |
114 | |
115 | else if (token == '%') |
116 | { |
117 | text.clear (); |
118 | |
119 | do { inp (); } |
120 | while (ch.isSpace ()); |
121 | |
122 | do { text += ch; inp (); } |
123 | while (ch.isLetterOrNumber () || ch == QLatin1Char ('_') || ch == QLatin1Char ('-')); |
124 | |
125 | if (text == QLatin1String("token_prefix" )) |
126 | return (token = TOKEN_PREFIX); |
127 | else if (text == QLatin1String("merged_output" )) |
128 | return (token = MERGED_OUTPUT); |
129 | else if (text == QLatin1String("token" )) |
130 | return (token = TOKEN); |
131 | else if (text == QLatin1String("start" )) |
132 | return (token = START); |
133 | else if (text == QLatin1String("parser" )) |
134 | return (token = PARSER); |
135 | else if (text == QLatin1String("decl" )) |
136 | return (token = DECL_FILE); |
137 | else if (text == QLatin1String("impl" )) |
138 | return (token = IMPL_FILE); |
139 | else if (text == QLatin1String("expect" )) |
140 | return (token = EXPECT); |
141 | else if (text == QLatin1String("expect-rr" )) |
142 | return (token = EXPECT_RR); |
143 | else if (text == QLatin1String("left" )) |
144 | return (token = LEFT); |
145 | else if (text == QLatin1String("right" )) |
146 | return (token = RIGHT); |
147 | else if (text == QLatin1String("nonassoc" )) |
148 | return (token = NONASSOC); |
149 | else if (text == QLatin1String("prec" )) |
150 | return (token = PREC); |
151 | else |
152 | { |
153 | qerr() << _M_input_file << ":" << _M_line << ": Unknown keyword `" << text << "'" << Qt::endl; |
154 | exit (EXIT_FAILURE); |
155 | return (token = ERROR); |
156 | } |
157 | } |
158 | |
159 | inp (); |
160 | |
161 | if (token == '-' && ch == QLatin1Char ('-')) |
162 | { |
163 | do { inp (); } |
164 | while (! ch.isNull () && ch != QLatin1Char ('\n')); |
165 | goto Lagain; |
166 | } |
167 | |
168 | else if (token == ':' && ch == QLatin1Char (':')) |
169 | { |
170 | inp (); |
171 | if (ch != QLatin1Char ('=')) |
172 | return (token = ERROR); |
173 | inp (); |
174 | return (token = COLON); |
175 | } |
176 | |
177 | else if (token == '/' && ch == QLatin1Char (':')) |
178 | { |
179 | _M_action_line = _M_line; |
180 | |
181 | text.clear (); |
182 | if (! _M_no_lines) |
183 | text += QLatin1String("\n#line " ) + QString::number(_M_action_line) + |
184 | QLatin1String(" \"" ) + QDir::fromNativeSeparators(_M_input_file) + QLatin1String("\"\n" ); |
185 | inp (); // skip ':' |
186 | |
187 | forever |
188 | { |
189 | while (! ch.isNull ()) |
190 | { |
191 | token = ch.unicode (); |
192 | inp (); |
193 | |
194 | if (token == ':' && ch == QLatin1Char ('/')) |
195 | break; |
196 | |
197 | text += QLatin1Char (token); |
198 | } |
199 | |
200 | if (ch != QLatin1Char ('/')) |
201 | return (token = ERROR); |
202 | |
203 | inp (); |
204 | |
205 | if (ch.isNull () || ch.isSpace ()) |
206 | { |
207 | _M_current_value = text; |
208 | return (token = DECL); |
209 | } |
210 | else |
211 | text += QLatin1String (":/" ); |
212 | } |
213 | } |
214 | |
215 | else if (token == '/' && ch == QLatin1Char ('.')) |
216 | { |
217 | _M_action_line = _M_line; |
218 | |
219 | text.clear (); |
220 | if (! _M_no_lines) |
221 | text += QLatin1String("\n#line " ) + QString::number(_M_action_line) + |
222 | QLatin1String(" \"" ) + QDir::fromNativeSeparators(_M_input_file) + QLatin1String("\"\n" ); |
223 | |
224 | inp (); // skip ':' |
225 | |
226 | forever |
227 | { |
228 | while (! ch.isNull ()) |
229 | { |
230 | token = ch.unicode (); |
231 | inp (); |
232 | |
233 | if (token == '.' && ch == QLatin1Char ('/')) |
234 | break; |
235 | |
236 | text += QLatin1Char (token); |
237 | } |
238 | |
239 | if (ch != QLatin1Char ('/')) |
240 | return (token = ERROR); |
241 | |
242 | inp (); |
243 | |
244 | if (ch.isNull () || ch.isSpace ()) |
245 | { |
246 | _M_current_value = text; |
247 | return (token = IMPL); |
248 | } |
249 | else |
250 | text += QLatin1String ("" ); |
251 | } |
252 | } |
253 | |
254 | switch (token) { |
255 | case ':': |
256 | return (token = COLON); |
257 | |
258 | case ';': |
259 | return (token = SEMICOLON); |
260 | |
261 | case '|': |
262 | return (token = OR); |
263 | |
264 | default: |
265 | break; |
266 | } |
267 | |
268 | return token; |
269 | } |
270 | |
271 | bool Recognizer::parse (const QString &input_file) |
272 | { |
273 | _M_input_file = input_file; |
274 | |
275 | QFile file(_M_input_file); |
276 | if (! file.open(QFile::ReadOnly)) |
277 | { |
278 | qerr() << "qlalr: no input file\n" ; |
279 | return false; |
280 | } |
281 | |
282 | QString _M_contents = QTextStream(&file).readAll(); |
283 | _M_firstChar = _M_contents.constBegin(); |
284 | _M_lastChar = _M_contents.constEnd(); |
285 | _M_currentChar = _M_firstChar; |
286 | _M_line = 1; |
287 | |
288 | int yytoken = -1; |
289 | inp (); |
290 | |
291 | reallocateStack(); |
292 | |
293 | _M_current_rule = _M_grammar->rules.end (); |
294 | _M_decls.clear (); |
295 | _M_impls.clear (); |
296 | |
297 | tos = 0; |
298 | state_stack[++tos] = 0; |
299 | |
300 | while (true) |
301 | { |
302 | if (yytoken == -1 && - TERMINAL_COUNT != action_index [state_stack [tos]]) |
303 | yytoken = nextToken(); |
304 | |
305 | int act = t_action (state_stack [tos], yytoken); |
306 | |
307 | if (act == ACCEPT_STATE) |
308 | return true; |
309 | |
310 | else if (act > 0) |
311 | { |
312 | if (++tos == stack_size) |
313 | reallocateStack(); |
314 | |
315 | sym_stack [tos] = _M_current_value; |
316 | state_stack [tos] = act; |
317 | yytoken = -1; |
318 | } |
319 | |
320 | else if (act < 0) |
321 | { |
322 | int r = - act - 1; |
323 | |
324 | tos -= rhs [r]; |
325 | act = state_stack [tos++]; |
326 | |
327 | switch (r) { |
328 | |
329 | case 3: { |
330 | Name name = _M_grammar->intern (sym(2)); |
331 | _M_grammar->start = name; |
332 | _M_grammar->non_terminals.insert (name); |
333 | } break; |
334 | |
335 | case 5: { |
336 | _M_grammar->table_name = sym(2); |
337 | } break; |
338 | |
339 | case 6: { |
340 | _M_grammar->merged_output = sym(2); |
341 | } break; |
342 | |
343 | case 7: { |
344 | _M_grammar->decl_file_name = sym(2); |
345 | } break; |
346 | |
347 | case 8: { |
348 | _M_grammar->impl_file_name = sym(2); |
349 | } break; |
350 | |
351 | case 9: { |
352 | _M_grammar->expected_shift_reduce = sym(2).toInt(); |
353 | } break; |
354 | |
355 | case 10: { |
356 | _M_grammar->expected_reduce_reduce = sym(2).toInt(); |
357 | } break; |
358 | |
359 | case 11: { |
360 | _M_grammar->token_prefix = sym(2); |
361 | } break; |
362 | case 17:case 18: { |
363 | Name name = _M_grammar->intern (sym(1)); |
364 | _M_grammar->terminals.insert (name); |
365 | _M_grammar->spells.insert (name, sym(2)); |
366 | } break; |
367 | |
368 | case 19: { |
369 | _M_grammar->current_assoc = Grammar::Left; |
370 | ++_M_grammar->current_prec; |
371 | } break; |
372 | |
373 | case 20: { |
374 | _M_grammar->current_assoc = Grammar::Right; |
375 | ++_M_grammar->current_prec; |
376 | } break; |
377 | |
378 | case 21: { |
379 | _M_grammar->current_assoc = Grammar::NonAssoc; |
380 | ++_M_grammar->current_prec; |
381 | } break; |
382 | |
383 | case 25: { |
384 | Name name = _M_grammar->intern (sym(1)); |
385 | _M_grammar->terminals.insert (name); |
386 | |
387 | Grammar::TokenInfo info; |
388 | info.prec = _M_grammar->current_prec; |
389 | info.assoc = _M_grammar->current_assoc; |
390 | _M_grammar->token_info.insert (name, info); |
391 | } break; |
392 | |
393 | case 26: { |
394 | _M_decls += expand (sym(1)); |
395 | } break; |
396 | |
397 | case 27: { |
398 | _M_impls += expand (sym(1)); |
399 | } break; |
400 | |
401 | case 34: { |
402 | _M_current_rule = _M_grammar->rules.insert (_M_grammar->rules.end (), Rule ()); |
403 | _M_current_rule->lhs = _M_grammar->intern (sym(1)); |
404 | _M_grammar->declared_lhs.insert (_M_current_rule->lhs); |
405 | |
406 | if (_M_grammar->terminals.find (_M_current_rule->lhs) != _M_grammar->terminals.end ()) |
407 | { |
408 | qerr() << _M_input_file << ":" << _M_line << ": Invalid non terminal `" << *_M_current_rule->lhs << "'" << Qt::endl; |
409 | return false; |
410 | } |
411 | |
412 | _M_grammar->non_terminals.insert (_M_current_rule->lhs); |
413 | } break; |
414 | |
415 | case 38: { |
416 | Name lhs = _M_current_rule->lhs; |
417 | _M_current_rule = _M_grammar->rules.insert (_M_grammar->rules.end (), Rule ()); |
418 | _M_current_rule->lhs = lhs; |
419 | _M_grammar->declared_lhs.insert (_M_current_rule->lhs); |
420 | |
421 | if (_M_grammar->terminals.find (_M_current_rule->lhs) != _M_grammar->terminals.end ()) |
422 | { |
423 | qerr() << _M_input_file << ":" << _M_line << ": Invalid non terminal `" << *_M_current_rule->lhs << "'" << Qt::endl; |
424 | return false; |
425 | } |
426 | |
427 | _M_grammar->non_terminals.insert (_M_current_rule->lhs); |
428 | } break; |
429 | |
430 | case 39: { |
431 | _M_current_rule->prec = _M_grammar->names.end (); |
432 | |
433 | for (NameList::iterator it = _M_current_rule->rhs.begin (); it != _M_current_rule->rhs.end (); ++it) |
434 | { |
435 | if (! _M_grammar->isTerminal (*it)) |
436 | continue; |
437 | |
438 | _M_current_rule->prec = *it; |
439 | } |
440 | } break; |
441 | |
442 | case 40: { |
443 | Name tok = _M_grammar->intern (sym(2)); |
444 | if (! _M_grammar->isTerminal (tok)) |
445 | { |
446 | qerr() << _M_input_file << ":" << _M_line << ": `" << *tok << " is not a terminal symbol" << Qt::endl; |
447 | _M_current_rule->prec = _M_grammar->names.end (); |
448 | } |
449 | else |
450 | _M_current_rule->prec = tok; |
451 | } break; |
452 | |
453 | case 42: { |
454 | Name name = _M_grammar->intern (sym(2)); |
455 | |
456 | if (_M_grammar->terminals.find (name) == _M_grammar->terminals.end ()) |
457 | _M_grammar->non_terminals.insert (name); |
458 | |
459 | _M_current_rule->rhs.push_back (name); |
460 | } break; |
461 | |
462 | case 43: { |
463 | sym(1) = QString(); |
464 | } break; |
465 | |
466 | } // switch |
467 | |
468 | state_stack [tos] = nt_action (act, lhs [r] - TERMINAL_COUNT); |
469 | } |
470 | |
471 | else |
472 | { |
473 | break; |
474 | } |
475 | } |
476 | |
477 | qerr() << _M_input_file << ":" << _M_line << ": Syntax error" << Qt::endl; |
478 | return false; |
479 | } |
480 | |
481 | |