1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org> |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the tools applications of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
22 | ** included in the packaging of this file. Please review the following |
23 | ** information to ensure the GNU General Public License requirements will |
24 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
25 | ** |
26 | ** $QT_END_LICENSE$ |
27 | ** |
28 | ****************************************************************************/ |
29 | |
30 | #include "preprocessor.h" |
31 | #include "utils.h" |
32 | #include <qstringlist.h> |
33 | #include <qfile.h> |
34 | #include <qdir.h> |
35 | #include <qfileinfo.h> |
36 | |
37 | QT_BEGIN_NAMESPACE |
38 | |
39 | #include "ppkeywords.cpp" |
40 | #include "keywords.cpp" |
41 | |
42 | // transform \r\n into \n |
43 | // \r into \n (os9 style) |
44 | // backslash-newlines into newlines |
45 | static QByteArray cleaned(const QByteArray &input) |
46 | { |
47 | QByteArray result; |
48 | result.resize(input.size()); |
49 | const char *data = input.constData(); |
50 | const char *end = input.constData() + input.size(); |
51 | char *output = result.data(); |
52 | |
53 | int newlines = 0; |
54 | while (data != end) { |
55 | while (data != end && is_space(*data)) |
56 | ++data; |
57 | bool takeLine = (*data == '#'); |
58 | if (*data == '%' && *(data+1) == ':') { |
59 | takeLine = true; |
60 | ++data; |
61 | } |
62 | if (takeLine) { |
63 | *output = '#'; |
64 | ++output; |
65 | do ++data; while (data != end && is_space(*data)); |
66 | } |
67 | while (data != end) { |
68 | // handle \\\n, \\\r\n and \\\r |
69 | if (*data == '\\') { |
70 | if (*(data + 1) == '\r') { |
71 | ++data; |
72 | } |
73 | if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) { |
74 | ++newlines; |
75 | data += 1; |
76 | if (data != end && *data != '\r') |
77 | data += 1; |
78 | continue; |
79 | } |
80 | } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n |
81 | ++data; |
82 | } |
83 | if (data == end) |
84 | break; |
85 | |
86 | char ch = *data; |
87 | if (ch == '\r') // os9: replace \r with \n |
88 | ch = '\n'; |
89 | *output = ch; |
90 | ++output; |
91 | |
92 | if (*data == '\n') { |
93 | // output additional newlines to keep the correct line-numbering |
94 | // for the lines following the backslash-newline sequence(s) |
95 | while (newlines) { |
96 | *output = '\n'; |
97 | ++output; |
98 | --newlines; |
99 | } |
100 | ++data; |
101 | break; |
102 | } |
103 | ++data; |
104 | } |
105 | } |
106 | result.resize(output - result.constData()); |
107 | return result; |
108 | } |
109 | |
110 | bool Preprocessor::preprocessOnly = false; |
111 | void Preprocessor::skipUntilEndif() |
112 | { |
113 | while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){ |
114 | switch (symbols.at(index).token) { |
115 | case PP_IF: |
116 | case PP_IFDEF: |
117 | case PP_IFNDEF: |
118 | ++index; |
119 | skipUntilEndif(); |
120 | break; |
121 | default: |
122 | ; |
123 | } |
124 | ++index; |
125 | } |
126 | } |
127 | |
128 | bool Preprocessor::skipBranch() |
129 | { |
130 | while (index < symbols.size() - 1 |
131 | && (symbols.at(index).token != PP_ENDIF |
132 | && symbols.at(index).token != PP_ELIF |
133 | && symbols.at(index).token != PP_ELSE) |
134 | ){ |
135 | switch (symbols.at(index).token) { |
136 | case PP_IF: |
137 | case PP_IFDEF: |
138 | case PP_IFNDEF: |
139 | ++index; |
140 | skipUntilEndif(); |
141 | break; |
142 | default: |
143 | ; |
144 | } |
145 | ++index; |
146 | } |
147 | return (index < symbols.size() - 1); |
148 | } |
149 | |
150 | |
151 | Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode) |
152 | { |
153 | Symbols symbols; |
154 | // Preallocate some space to speed up the code below. |
155 | // The magic divisor value was found by calculating the average ratio between |
156 | // input size and the final size of symbols. |
157 | // This yielded a value of 16.x when compiling Qt Base. |
158 | symbols.reserve(input.size() / 16); |
159 | const char *begin = input.constData(); |
160 | const char *data = begin; |
161 | while (*data) { |
162 | if (mode == TokenizeCpp || mode == TokenizeDefine) { |
163 | int column = 0; |
164 | |
165 | const char *lexem = data; |
166 | int state = 0; |
167 | Token token = NOTOKEN; |
168 | for (;;) { |
169 | if (static_cast<signed char>(*data) < 0) { |
170 | ++data; |
171 | continue; |
172 | } |
173 | int nextindex = keywords[state].next; |
174 | int next = 0; |
175 | if (*data == keywords[state].defchar) |
176 | next = keywords[state].defnext; |
177 | else if (!state || nextindex) |
178 | next = keyword_trans[nextindex][(int)*data]; |
179 | if (!next) |
180 | break; |
181 | state = next; |
182 | token = keywords[state].token; |
183 | ++data; |
184 | } |
185 | |
186 | // suboptimal, is_ident_char should use a table |
187 | if (keywords[state].ident && is_ident_char(*data)) |
188 | token = keywords[state].ident; |
189 | |
190 | if (token == NOTOKEN) { |
191 | if (*data) |
192 | ++data; |
193 | // an error really, but let's ignore this input |
194 | // to not confuse moc later. However in pre-processor |
195 | // only mode let's continue. |
196 | if (!Preprocessor::preprocessOnly) |
197 | continue; |
198 | } |
199 | |
200 | ++column; |
201 | |
202 | if (token > SPECIAL_TREATMENT_MARK) { |
203 | switch (token) { |
204 | case QUOTE: |
205 | data = skipQuote(data); |
206 | token = STRING_LITERAL; |
207 | // concatenate multi-line strings for easier |
208 | // STRING_LITERAL handling in moc |
209 | if (!Preprocessor::preprocessOnly |
210 | && !symbols.isEmpty() |
211 | && symbols.constLast().token == STRING_LITERAL) { |
212 | |
213 | const QByteArray newString |
214 | = '\"' |
215 | + symbols.constLast().unquotedLexem() |
216 | + input.mid(lexem - begin + 1, data - lexem - 2) |
217 | + '\"'; |
218 | symbols.last() = Symbol(symbols.constLast().lineNum, |
219 | STRING_LITERAL, |
220 | newString); |
221 | continue; |
222 | } |
223 | break; |
224 | case SINGLEQUOTE: |
225 | while (*data && (*data != '\'' |
226 | || (*(data-1)=='\\' |
227 | && *(data-2)!='\\'))) |
228 | ++data; |
229 | if (*data) |
230 | ++data; |
231 | token = CHARACTER_LITERAL; |
232 | break; |
233 | case LANGLE_SCOPE: |
234 | // split <:: into two tokens, < and :: |
235 | token = LANGLE; |
236 | data -= 2; |
237 | break; |
238 | case DIGIT: |
239 | while (is_digit_char(*data) || *data == '\'') |
240 | ++data; |
241 | if (!*data || *data != '.') { |
242 | token = INTEGER_LITERAL; |
243 | if (data - lexem == 1 && |
244 | (*data == 'x' || *data == 'X' |
245 | || *data == 'b' || *data == 'B') |
246 | && *lexem == '0') { |
247 | ++data; |
248 | while (is_hex_char(*data) || *data == '\'') |
249 | ++data; |
250 | } |
251 | break; |
252 | } |
253 | token = FLOATING_LITERAL; |
254 | ++data; |
255 | Q_FALLTHROUGH(); |
256 | case FLOATING_LITERAL: |
257 | while (is_digit_char(*data) || *data == '\'') |
258 | ++data; |
259 | if (*data == '+' || *data == '-') |
260 | ++data; |
261 | if (*data == 'e' || *data == 'E') { |
262 | ++data; |
263 | while (is_digit_char(*data) || *data == '\'') |
264 | ++data; |
265 | } |
266 | if (*data == 'f' || *data == 'F' |
267 | || *data == 'l' || *data == 'L') |
268 | ++data; |
269 | break; |
270 | case HASH: |
271 | if (column == 1 && mode == TokenizeCpp) { |
272 | mode = PreparePreprocessorStatement; |
273 | while (*data && (*data == ' ' || *data == '\t')) |
274 | ++data; |
275 | if (is_ident_char(*data)) |
276 | mode = TokenizePreprocessorStatement; |
277 | continue; |
278 | } |
279 | break; |
280 | case PP_HASHHASH: |
281 | if (mode == TokenizeCpp) |
282 | continue; |
283 | break; |
284 | case NEWLINE: |
285 | ++lineNum; |
286 | if (mode == TokenizeDefine) { |
287 | mode = TokenizeCpp; |
288 | // emit the newline token |
289 | break; |
290 | } |
291 | continue; |
292 | case BACKSLASH: |
293 | { |
294 | const char *rewind = data; |
295 | while (*data && (*data == ' ' || *data == '\t')) |
296 | ++data; |
297 | if (*data && *data == '\n') { |
298 | ++data; |
299 | continue; |
300 | } |
301 | data = rewind; |
302 | } break; |
303 | case CHARACTER: |
304 | while (is_ident_char(*data)) |
305 | ++data; |
306 | token = IDENTIFIER; |
307 | break; |
308 | case C_COMMENT: |
309 | if (*data) { |
310 | if (*data == '\n') |
311 | ++lineNum; |
312 | ++data; |
313 | if (*data) { |
314 | if (*data == '\n') |
315 | ++lineNum; |
316 | ++data; |
317 | } |
318 | } |
319 | while (*data && (*(data-1) != '/' || *(data-2) != '*')) { |
320 | if (*data == '\n') |
321 | ++lineNum; |
322 | ++data; |
323 | } |
324 | token = WHITESPACE; // one comment, one whitespace |
325 | Q_FALLTHROUGH(); |
326 | case WHITESPACE: |
327 | if (column == 1) |
328 | column = 0; |
329 | while (*data && (*data == ' ' || *data == '\t')) |
330 | ++data; |
331 | if (Preprocessor::preprocessOnly) // tokenize whitespace |
332 | break; |
333 | continue; |
334 | case CPP_COMMENT: |
335 | while (*data && *data != '\n') |
336 | ++data; |
337 | continue; // ignore safely, the newline is a separator |
338 | default: |
339 | continue; //ignore |
340 | } |
341 | } |
342 | #ifdef USE_LEXEM_STORE |
343 | if (!Preprocessor::preprocessOnly |
344 | && token != IDENTIFIER |
345 | && token != STRING_LITERAL |
346 | && token != FLOATING_LITERAL |
347 | && token != INTEGER_LITERAL) |
348 | symbols += Symbol(lineNum, token); |
349 | else |
350 | #endif |
351 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
352 | |
353 | } else { // Preprocessor |
354 | |
355 | const char *lexem = data; |
356 | int state = 0; |
357 | Token token = NOTOKEN; |
358 | if (mode == TokenizePreprocessorStatement) { |
359 | state = pp_keyword_trans[0][(int)'#']; |
360 | mode = TokenizePreprocessor; |
361 | } |
362 | for (;;) { |
363 | if (static_cast<signed char>(*data) < 0) { |
364 | ++data; |
365 | continue; |
366 | } |
367 | int nextindex = pp_keywords[state].next; |
368 | int next = 0; |
369 | if (*data == pp_keywords[state].defchar) |
370 | next = pp_keywords[state].defnext; |
371 | else if (!state || nextindex) |
372 | next = pp_keyword_trans[nextindex][(int)*data]; |
373 | if (!next) |
374 | break; |
375 | state = next; |
376 | token = pp_keywords[state].token; |
377 | ++data; |
378 | } |
379 | // suboptimal, is_ident_char should use a table |
380 | if (pp_keywords[state].ident && is_ident_char(*data)) |
381 | token = pp_keywords[state].ident; |
382 | |
383 | switch (token) { |
384 | case NOTOKEN: |
385 | if (*data) |
386 | ++data; |
387 | break; |
388 | case PP_DEFINE: |
389 | mode = PrepareDefine; |
390 | break; |
391 | case PP_IFDEF: |
392 | symbols += Symbol(lineNum, PP_IF); |
393 | symbols += Symbol(lineNum, PP_DEFINED); |
394 | continue; |
395 | case PP_IFNDEF: |
396 | symbols += Symbol(lineNum, PP_IF); |
397 | symbols += Symbol(lineNum, PP_NOT); |
398 | symbols += Symbol(lineNum, PP_DEFINED); |
399 | continue; |
400 | case PP_INCLUDE: |
401 | mode = TokenizeInclude; |
402 | break; |
403 | case PP_QUOTE: |
404 | data = skipQuote(data); |
405 | token = PP_STRING_LITERAL; |
406 | break; |
407 | case PP_SINGLEQUOTE: |
408 | while (*data && (*data != '\'' |
409 | || (*(data-1)=='\\' |
410 | && *(data-2)!='\\'))) |
411 | ++data; |
412 | if (*data) |
413 | ++data; |
414 | token = PP_CHARACTER_LITERAL; |
415 | break; |
416 | case PP_DIGIT: |
417 | while (is_digit_char(*data) || *data == '\'') |
418 | ++data; |
419 | if (!*data || *data != '.') { |
420 | token = PP_INTEGER_LITERAL; |
421 | if (data - lexem == 1 && |
422 | (*data == 'x' || *data == 'X') |
423 | && *lexem == '0') { |
424 | ++data; |
425 | while (is_hex_char(*data) || *data == '\'') |
426 | ++data; |
427 | } |
428 | break; |
429 | } |
430 | token = PP_FLOATING_LITERAL; |
431 | ++data; |
432 | Q_FALLTHROUGH(); |
433 | case PP_FLOATING_LITERAL: |
434 | while (is_digit_char(*data) || *data == '\'') |
435 | ++data; |
436 | if (*data == '+' || *data == '-') |
437 | ++data; |
438 | if (*data == 'e' || *data == 'E') { |
439 | ++data; |
440 | while (is_digit_char(*data) || *data == '\'') |
441 | ++data; |
442 | } |
443 | if (*data == 'f' || *data == 'F' |
444 | || *data == 'l' || *data == 'L') |
445 | ++data; |
446 | break; |
447 | case PP_CHARACTER: |
448 | if (mode == PreparePreprocessorStatement) { |
449 | // rewind entire token to begin |
450 | data = lexem; |
451 | mode = TokenizePreprocessorStatement; |
452 | continue; |
453 | } |
454 | while (is_ident_char(*data)) |
455 | ++data; |
456 | token = PP_IDENTIFIER; |
457 | |
458 | if (mode == PrepareDefine) { |
459 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
460 | // make sure we explicitly add the whitespace here if the next char |
461 | // is not an opening brace, so we can distinguish correctly between |
462 | // regular and function macros |
463 | if (*data != '(') |
464 | symbols += Symbol(lineNum, WHITESPACE); |
465 | mode = TokenizeDefine; |
466 | continue; |
467 | } |
468 | break; |
469 | case PP_C_COMMENT: |
470 | if (*data) { |
471 | if (*data == '\n') |
472 | ++lineNum; |
473 | ++data; |
474 | if (*data) { |
475 | if (*data == '\n') |
476 | ++lineNum; |
477 | ++data; |
478 | } |
479 | } |
480 | while (*data && (*(data-1) != '/' || *(data-2) != '*')) { |
481 | if (*data == '\n') |
482 | ++lineNum; |
483 | ++data; |
484 | } |
485 | token = PP_WHITESPACE; // one comment, one whitespace |
486 | Q_FALLTHROUGH(); |
487 | case PP_WHITESPACE: |
488 | while (*data && (*data == ' ' || *data == '\t')) |
489 | ++data; |
490 | continue; // the preprocessor needs no whitespace |
491 | case PP_CPP_COMMENT: |
492 | while (*data && *data != '\n') |
493 | ++data; |
494 | continue; // ignore safely, the newline is a separator |
495 | case PP_NEWLINE: |
496 | ++lineNum; |
497 | mode = TokenizeCpp; |
498 | break; |
499 | case PP_BACKSLASH: |
500 | { |
501 | const char *rewind = data; |
502 | while (*data && (*data == ' ' || *data == '\t')) |
503 | ++data; |
504 | if (*data && *data == '\n') { |
505 | ++data; |
506 | continue; |
507 | } |
508 | data = rewind; |
509 | } break; |
510 | case PP_LANGLE: |
511 | if (mode != TokenizeInclude) |
512 | break; |
513 | token = PP_STRING_LITERAL; |
514 | while (*data && *data != '\n' && *(data-1) != '>') |
515 | ++data; |
516 | break; |
517 | default: |
518 | break; |
519 | } |
520 | if (mode == PreparePreprocessorStatement) |
521 | continue; |
522 | #ifdef USE_LEXEM_STORE |
523 | if (token != PP_IDENTIFIER |
524 | && token != PP_STRING_LITERAL |
525 | && token != PP_FLOATING_LITERAL |
526 | && token != PP_INTEGER_LITERAL) |
527 | symbols += Symbol(lineNum, token); |
528 | else |
529 | #endif |
530 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
531 | } |
532 | } |
533 | symbols += Symbol(); // eof symbol |
534 | return symbols; |
535 | } |
536 | |
537 | void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, int &index, |
538 | int lineNum, bool one, const QSet<QByteArray> &excludeSymbols) |
539 | { |
540 | SymbolStack symbols; |
541 | SafeSymbols sf; |
542 | sf.symbols = toExpand; |
543 | sf.index = index; |
544 | sf.excludedSymbols = excludeSymbols; |
545 | symbols.push(sf); |
546 | |
547 | if (toExpand.isEmpty()) |
548 | return; |
549 | |
550 | for (;;) { |
551 | QByteArray macro; |
552 | Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, ¯o); |
553 | |
554 | if (macro.isEmpty()) { |
555 | // not a macro |
556 | Symbol s = symbols.symbol(); |
557 | s.lineNum = lineNum; |
558 | *into += s; |
559 | } else { |
560 | SafeSymbols sf; |
561 | sf.symbols = newSyms; |
562 | sf.index = 0; |
563 | sf.expandedMacro = macro; |
564 | symbols.push(sf); |
565 | } |
566 | if (!symbols.hasNext() || (one && symbols.size() == 1)) |
567 | break; |
568 | symbols.next(); |
569 | } |
570 | |
571 | if (symbols.size()) |
572 | index = symbols.top().index; |
573 | else |
574 | index = toExpand.size(); |
575 | } |
576 | |
577 | |
578 | Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName) |
579 | { |
580 | Symbol s = symbols.symbol(); |
581 | |
582 | // not a macro |
583 | if (s.token != PP_IDENTIFIER || !that->macros.contains(s) || symbols.dontReplaceSymbol(s.lexem())) { |
584 | return Symbols(); |
585 | } |
586 | |
587 | const Macro ¯o = that->macros.value(s); |
588 | *macroName = s.lexem(); |
589 | |
590 | Symbols expansion; |
591 | if (!macro.isFunction) { |
592 | expansion = macro.symbols; |
593 | } else { |
594 | bool haveSpace = false; |
595 | while (symbols.test(PP_WHITESPACE)) { haveSpace = true; } |
596 | if (!symbols.test(PP_LPAREN)) { |
597 | *macroName = QByteArray(); |
598 | Symbols syms; |
599 | if (haveSpace) |
600 | syms += Symbol(lineNum, PP_WHITESPACE); |
601 | syms += s; |
602 | syms.last().lineNum = lineNum; |
603 | return syms; |
604 | } |
605 | QVarLengthArray<Symbols, 5> arguments; |
606 | while (symbols.hasNext()) { |
607 | Symbols argument; |
608 | // strip leading space |
609 | while (symbols.test(PP_WHITESPACE)) {} |
610 | int nesting = 0; |
611 | bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1); |
612 | while (symbols.hasNext()) { |
613 | Token t = symbols.next(); |
614 | if (t == PP_LPAREN) { |
615 | ++nesting; |
616 | } else if (t == PP_RPAREN) { |
617 | --nesting; |
618 | if (nesting < 0) |
619 | break; |
620 | } else if (t == PP_COMMA && nesting == 0) { |
621 | if (!vararg) |
622 | break; |
623 | } |
624 | argument += symbols.symbol(); |
625 | } |
626 | arguments += argument; |
627 | |
628 | if (nesting < 0) |
629 | break; |
630 | else if (!symbols.hasNext()) |
631 | that->error("missing ')' in macro usage" ); |
632 | } |
633 | |
634 | // empty VA_ARGS |
635 | if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1) |
636 | arguments += Symbols(); |
637 | |
638 | // now replace the macro arguments with the expanded arguments |
639 | enum Mode { |
640 | Normal, |
641 | Hash, |
642 | HashHash |
643 | } mode = Normal; |
644 | |
645 | for (int i = 0; i < macro.symbols.size(); ++i) { |
646 | const Symbol &s = macro.symbols.at(i); |
647 | if (s.token == HASH || s.token == PP_HASHHASH) { |
648 | mode = (s.token == HASH ? Hash : HashHash); |
649 | continue; |
650 | } |
651 | int index = macro.arguments.indexOf(s); |
652 | if (mode == Normal) { |
653 | if (index >= 0 && index < arguments.size()) { |
654 | // each argument undoergoes macro expansion if it's not used as part of a # or ## |
655 | if (i == macro.symbols.size() - 1 || macro.symbols.at(i + 1).token != PP_HASHHASH) { |
656 | Symbols arg = arguments.at(index); |
657 | int idx = 1; |
658 | macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols()); |
659 | } else { |
660 | expansion += arguments.at(index); |
661 | } |
662 | } else { |
663 | expansion += s; |
664 | } |
665 | } else if (mode == Hash) { |
666 | if (index < 0) { |
667 | that->error("'#' is not followed by a macro parameter" ); |
668 | continue; |
669 | } else if (index >= arguments.size()) { |
670 | that->error("Macro invoked with too few parameters for a use of '#'" ); |
671 | continue; |
672 | } |
673 | |
674 | const Symbols &arg = arguments.at(index); |
675 | QByteArray stringified; |
676 | for (int i = 0; i < arg.size(); ++i) { |
677 | stringified += arg.at(i).lexem(); |
678 | } |
679 | stringified.replace('"', "\\\"" ); |
680 | stringified.prepend('"'); |
681 | stringified.append('"'); |
682 | expansion += Symbol(lineNum, STRING_LITERAL, stringified); |
683 | } else if (mode == HashHash){ |
684 | if (s.token == WHITESPACE) |
685 | continue; |
686 | |
687 | while (expansion.size() && expansion.constLast().token == PP_WHITESPACE) |
688 | expansion.pop_back(); |
689 | |
690 | Symbol next = s; |
691 | if (index >= 0 && index < arguments.size()) { |
692 | const Symbols &arg = arguments.at(index); |
693 | if (arg.size() == 0) { |
694 | mode = Normal; |
695 | continue; |
696 | } |
697 | next = arg.at(0); |
698 | } |
699 | |
700 | if (!expansion.isEmpty() && expansion.constLast().token == s.token |
701 | && expansion.constLast().token != STRING_LITERAL) { |
702 | Symbol last = expansion.takeLast(); |
703 | |
704 | QByteArray lexem = last.lexem() + next.lexem(); |
705 | expansion += Symbol(lineNum, last.token, lexem); |
706 | } else { |
707 | expansion += next; |
708 | } |
709 | |
710 | if (index >= 0 && index < arguments.size()) { |
711 | const Symbols &arg = arguments.at(index); |
712 | for (int i = 1; i < arg.size(); ++i) |
713 | expansion += arg.at(i); |
714 | } |
715 | } |
716 | mode = Normal; |
717 | } |
718 | if (mode != Normal) |
719 | that->error("'#' or '##' found at the end of a macro argument" ); |
720 | |
721 | } |
722 | |
723 | return expansion; |
724 | } |
725 | |
726 | void Preprocessor::substituteUntilNewline(Symbols &substituted) |
727 | { |
728 | while (hasNext()) { |
729 | Token token = next(); |
730 | if (token == PP_IDENTIFIER) { |
731 | macroExpand(&substituted, this, symbols, index, symbol().lineNum, true); |
732 | } else if (token == PP_DEFINED) { |
733 | bool braces = test(PP_LPAREN); |
734 | next(PP_IDENTIFIER); |
735 | Symbol definedOrNotDefined = symbol(); |
736 | definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE; |
737 | substituted += definedOrNotDefined; |
738 | if (braces) |
739 | test(PP_RPAREN); |
740 | continue; |
741 | } else if (token == PP_NEWLINE) { |
742 | substituted += symbol(); |
743 | break; |
744 | } else { |
745 | substituted += symbol(); |
746 | } |
747 | } |
748 | } |
749 | |
750 | |
751 | class PP_Expression : public Parser |
752 | { |
753 | public: |
754 | int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; } |
755 | |
756 | int conditional_expression(); |
757 | int logical_OR_expression(); |
758 | int logical_AND_expression(); |
759 | int inclusive_OR_expression(); |
760 | int exclusive_OR_expression(); |
761 | int AND_expression(); |
762 | int equality_expression(); |
763 | int relational_expression(); |
764 | int shift_expression(); |
765 | int additive_expression(); |
766 | int multiplicative_expression(); |
767 | int unary_expression(); |
768 | bool unary_expression_lookup(); |
769 | int primary_expression(); |
770 | bool primary_expression_lookup(); |
771 | }; |
772 | |
773 | int PP_Expression::conditional_expression() |
774 | { |
775 | int value = logical_OR_expression(); |
776 | if (test(PP_QUESTION)) { |
777 | int alt1 = conditional_expression(); |
778 | int alt2 = test(PP_COLON) ? conditional_expression() : 0; |
779 | return value ? alt1 : alt2; |
780 | } |
781 | return value; |
782 | } |
783 | |
784 | int PP_Expression::logical_OR_expression() |
785 | { |
786 | int value = logical_AND_expression(); |
787 | if (test(PP_OROR)) |
788 | return logical_OR_expression() || value; |
789 | return value; |
790 | } |
791 | |
792 | int PP_Expression::logical_AND_expression() |
793 | { |
794 | int value = inclusive_OR_expression(); |
795 | if (test(PP_ANDAND)) |
796 | return logical_AND_expression() && value; |
797 | return value; |
798 | } |
799 | |
800 | int PP_Expression::inclusive_OR_expression() |
801 | { |
802 | int value = exclusive_OR_expression(); |
803 | if (test(PP_OR)) |
804 | return value | inclusive_OR_expression(); |
805 | return value; |
806 | } |
807 | |
808 | int PP_Expression::exclusive_OR_expression() |
809 | { |
810 | int value = AND_expression(); |
811 | if (test(PP_HAT)) |
812 | return value ^ exclusive_OR_expression(); |
813 | return value; |
814 | } |
815 | |
816 | int PP_Expression::AND_expression() |
817 | { |
818 | int value = equality_expression(); |
819 | if (test(PP_AND)) |
820 | return value & AND_expression(); |
821 | return value; |
822 | } |
823 | |
824 | int PP_Expression::equality_expression() |
825 | { |
826 | int value = relational_expression(); |
827 | switch (next()) { |
828 | case PP_EQEQ: |
829 | return value == equality_expression(); |
830 | case PP_NE: |
831 | return value != equality_expression(); |
832 | default: |
833 | prev(); |
834 | return value; |
835 | } |
836 | } |
837 | |
838 | int PP_Expression::relational_expression() |
839 | { |
840 | int value = shift_expression(); |
841 | switch (next()) { |
842 | case PP_LANGLE: |
843 | return value < relational_expression(); |
844 | case PP_RANGLE: |
845 | return value > relational_expression(); |
846 | case PP_LE: |
847 | return value <= relational_expression(); |
848 | case PP_GE: |
849 | return value >= relational_expression(); |
850 | default: |
851 | prev(); |
852 | return value; |
853 | } |
854 | } |
855 | |
856 | int PP_Expression::shift_expression() |
857 | { |
858 | int value = additive_expression(); |
859 | switch (next()) { |
860 | case PP_LTLT: |
861 | return value << shift_expression(); |
862 | case PP_GTGT: |
863 | return value >> shift_expression(); |
864 | default: |
865 | prev(); |
866 | return value; |
867 | } |
868 | } |
869 | |
870 | int PP_Expression::additive_expression() |
871 | { |
872 | int value = multiplicative_expression(); |
873 | switch (next()) { |
874 | case PP_PLUS: |
875 | return value + additive_expression(); |
876 | case PP_MINUS: |
877 | return value - additive_expression(); |
878 | default: |
879 | prev(); |
880 | return value; |
881 | } |
882 | } |
883 | |
884 | int PP_Expression::multiplicative_expression() |
885 | { |
886 | int value = unary_expression(); |
887 | switch (next()) { |
888 | case PP_STAR: |
889 | return value * multiplicative_expression(); |
890 | case PP_PERCENT: |
891 | { |
892 | int remainder = multiplicative_expression(); |
893 | return remainder ? value % remainder : 0; |
894 | } |
895 | case PP_SLASH: |
896 | { |
897 | int div = multiplicative_expression(); |
898 | return div ? value / div : 0; |
899 | } |
900 | default: |
901 | prev(); |
902 | return value; |
903 | }; |
904 | } |
905 | |
906 | int PP_Expression::unary_expression() |
907 | { |
908 | switch (next()) { |
909 | case PP_PLUS: |
910 | return unary_expression(); |
911 | case PP_MINUS: |
912 | return -unary_expression(); |
913 | case PP_NOT: |
914 | return !unary_expression(); |
915 | case PP_TILDE: |
916 | return ~unary_expression(); |
917 | case PP_MOC_TRUE: |
918 | return 1; |
919 | case PP_MOC_FALSE: |
920 | return 0; |
921 | default: |
922 | prev(); |
923 | return primary_expression(); |
924 | } |
925 | } |
926 | |
927 | bool PP_Expression::unary_expression_lookup() |
928 | { |
929 | Token t = lookup(); |
930 | return (primary_expression_lookup() |
931 | || t == PP_PLUS |
932 | || t == PP_MINUS |
933 | || t == PP_NOT |
934 | || t == PP_TILDE |
935 | || t == PP_DEFINED); |
936 | } |
937 | |
938 | int PP_Expression::primary_expression() |
939 | { |
940 | int value; |
941 | if (test(PP_LPAREN)) { |
942 | value = conditional_expression(); |
943 | test(PP_RPAREN); |
944 | } else { |
945 | next(); |
946 | value = lexem().toInt(nullptr, 0); |
947 | } |
948 | return value; |
949 | } |
950 | |
951 | bool PP_Expression::primary_expression_lookup() |
952 | { |
953 | Token t = lookup(); |
954 | return (t == PP_IDENTIFIER |
955 | || t == PP_INTEGER_LITERAL |
956 | || t == PP_FLOATING_LITERAL |
957 | || t == PP_MOC_TRUE |
958 | || t == PP_MOC_FALSE |
959 | || t == PP_LPAREN); |
960 | } |
961 | |
962 | int Preprocessor::evaluateCondition() |
963 | { |
964 | PP_Expression expression; |
965 | expression.currentFilenames = currentFilenames; |
966 | |
967 | substituteUntilNewline(expression.symbols); |
968 | |
969 | return expression.value(); |
970 | } |
971 | |
972 | static QByteArray readOrMapFile(QFile *file) |
973 | { |
974 | const qint64 size = file->size(); |
975 | char *rawInput = reinterpret_cast<char*>(file->map(0, size)); |
976 | return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll(); |
977 | } |
978 | |
979 | static void mergeStringLiterals(Symbols *_symbols) |
980 | { |
981 | Symbols &symbols = *_symbols; |
982 | for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) { |
983 | if (i->token == STRING_LITERAL) { |
984 | Symbols::Iterator mergeSymbol = i; |
985 | int literalsLength = mergeSymbol->len; |
986 | while (++i != symbols.end() && i->token == STRING_LITERAL) |
987 | literalsLength += i->len - 2; // no quotes |
988 | |
989 | if (literalsLength != mergeSymbol->len) { |
990 | QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem(); |
991 | QByteArray &mergeSymbolLexem = mergeSymbol->lex; |
992 | mergeSymbolLexem.resize(0); |
993 | mergeSymbolLexem.reserve(literalsLength); |
994 | mergeSymbolLexem.append('"'); |
995 | mergeSymbolLexem.append(mergeSymbolOriginalLexem); |
996 | for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j) |
997 | mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem() |
998 | mergeSymbolLexem.append('"'); |
999 | mergeSymbol->len = mergeSymbol->lex.length(); |
1000 | mergeSymbol->from = 0; |
1001 | i = symbols.erase(mergeSymbol + 1, i); |
1002 | } |
1003 | if (i == symbols.end()) |
1004 | break; |
1005 | } |
1006 | } |
1007 | } |
1008 | |
1009 | static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths, |
1010 | const QByteArray &include) |
1011 | { |
1012 | QFileInfo fi; |
1013 | for (int j = 0; j < includepaths.size() && !fi.exists(); ++j) { |
1014 | const Parser::IncludePath &p = includepaths.at(j); |
1015 | if (p.isFrameworkPath) { |
1016 | const int slashPos = include.indexOf('/'); |
1017 | if (slashPos == -1) |
1018 | continue; |
1019 | fi.setFile(QString::fromLocal8Bit(p.path + '/' + include.left(slashPos) + ".framework/Headers/" ), |
1020 | QString::fromLocal8Bit(include.mid(slashPos + 1))); |
1021 | } else { |
1022 | fi.setFile(QString::fromLocal8Bit(p.path), QString::fromLocal8Bit(include)); |
1023 | } |
1024 | // try again, maybe there's a file later in the include paths with the same name |
1025 | // (186067) |
1026 | if (fi.isDir()) { |
1027 | fi = QFileInfo(); |
1028 | continue; |
1029 | } |
1030 | } |
1031 | |
1032 | if (!fi.exists() || fi.isDir()) |
1033 | return QByteArray(); |
1034 | return fi.canonicalFilePath().toLocal8Bit(); |
1035 | } |
1036 | |
1037 | QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo) |
1038 | { |
1039 | if (!relativeTo.isEmpty()) { |
1040 | QFileInfo fi; |
1041 | fi.setFile(QFileInfo(QString::fromLocal8Bit(relativeTo)).dir(), QString::fromLocal8Bit(include)); |
1042 | if (fi.exists() && !fi.isDir()) |
1043 | return fi.canonicalFilePath().toLocal8Bit(); |
1044 | } |
1045 | |
1046 | auto it = nonlocalIncludePathResolutionCache.find(include); |
1047 | if (it == nonlocalIncludePathResolutionCache.end()) |
1048 | it = nonlocalIncludePathResolutionCache.insert(include, searchIncludePaths(includes, include)); |
1049 | return it.value(); |
1050 | } |
1051 | |
1052 | void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed) |
1053 | { |
1054 | currentFilenames.push(filename); |
1055 | preprocessed.reserve(preprocessed.size() + symbols.size()); |
1056 | while (hasNext()) { |
1057 | Token token = next(); |
1058 | |
1059 | switch (token) { |
1060 | case PP_INCLUDE: |
1061 | { |
1062 | int lineNum = symbol().lineNum; |
1063 | QByteArray include; |
1064 | bool local = false; |
1065 | if (test(PP_STRING_LITERAL)) { |
1066 | local = lexem().startsWith('\"'); |
1067 | include = unquotedLexem(); |
1068 | } else |
1069 | continue; |
1070 | until(PP_NEWLINE); |
1071 | |
1072 | include = resolveInclude(include, local ? filename : QByteArray()); |
1073 | if (include.isNull()) |
1074 | continue; |
1075 | |
1076 | if (Preprocessor::preprocessedIncludes.contains(include)) |
1077 | continue; |
1078 | Preprocessor::preprocessedIncludes.insert(include); |
1079 | |
1080 | QFile file(QString::fromLocal8Bit(include.constData())); |
1081 | if (!file.open(QFile::ReadOnly)) |
1082 | continue; |
1083 | |
1084 | QByteArray input = readOrMapFile(&file); |
1085 | |
1086 | file.close(); |
1087 | if (input.isEmpty()) |
1088 | continue; |
1089 | |
1090 | Symbols saveSymbols = symbols; |
1091 | int saveIndex = index; |
1092 | |
1093 | // phase 1: get rid of backslash-newlines |
1094 | input = cleaned(input); |
1095 | |
1096 | // phase 2: tokenize for the preprocessor |
1097 | symbols = tokenize(input); |
1098 | input.clear(); |
1099 | |
1100 | index = 0; |
1101 | |
1102 | // phase 3: preprocess conditions and substitute macros |
1103 | preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include); |
1104 | preprocess(include, preprocessed); |
1105 | preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include); |
1106 | |
1107 | symbols = saveSymbols; |
1108 | index = saveIndex; |
1109 | continue; |
1110 | } |
1111 | case PP_DEFINE: |
1112 | { |
1113 | next(); |
1114 | QByteArray name = lexem(); |
1115 | if (name.isEmpty() || !is_ident_start(name[0])) |
1116 | error(); |
1117 | Macro macro; |
1118 | macro.isVariadic = false; |
1119 | if (test(LPAREN)) { |
1120 | // we have a function macro |
1121 | macro.isFunction = true; |
1122 | parseDefineArguments(¯o); |
1123 | } else { |
1124 | macro.isFunction = false; |
1125 | } |
1126 | int start = index; |
1127 | until(PP_NEWLINE); |
1128 | macro.symbols.reserve(index - start - 1); |
1129 | |
1130 | // remove whitespace where there shouldn't be any: |
1131 | // Before and after the macro, after a # and around ## |
1132 | Token lastToken = HASH; // skip shitespace at the beginning |
1133 | for (int i = start; i < index - 1; ++i) { |
1134 | Token token = symbols.at(i).token; |
1135 | if (token == WHITESPACE) { |
1136 | if (lastToken == PP_HASH || lastToken == HASH || |
1137 | lastToken == PP_HASHHASH || |
1138 | lastToken == WHITESPACE) |
1139 | continue; |
1140 | } else if (token == PP_HASHHASH) { |
1141 | if (!macro.symbols.isEmpty() && |
1142 | lastToken == WHITESPACE) |
1143 | macro.symbols.pop_back(); |
1144 | } |
1145 | macro.symbols.append(symbols.at(i)); |
1146 | lastToken = token; |
1147 | } |
1148 | // remove trailing whitespace |
1149 | while (!macro.symbols.isEmpty() && |
1150 | (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE)) |
1151 | macro.symbols.pop_back(); |
1152 | |
1153 | if (!macro.symbols.isEmpty()) { |
1154 | if (macro.symbols.constFirst().token == PP_HASHHASH || |
1155 | macro.symbols.constLast().token == PP_HASHHASH) { |
1156 | error("'##' cannot appear at either end of a macro expansion" ); |
1157 | } |
1158 | } |
1159 | macros.insert(name, macro); |
1160 | continue; |
1161 | } |
1162 | case PP_UNDEF: { |
1163 | next(); |
1164 | QByteArray name = lexem(); |
1165 | until(PP_NEWLINE); |
1166 | macros.remove(name); |
1167 | continue; |
1168 | } |
1169 | case PP_IDENTIFIER: { |
1170 | // substitute macros |
1171 | macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true); |
1172 | continue; |
1173 | } |
1174 | case PP_HASH: |
1175 | until(PP_NEWLINE); |
1176 | continue; // skip unknown preprocessor statement |
1177 | case PP_IFDEF: |
1178 | case PP_IFNDEF: |
1179 | case PP_IF: |
1180 | while (!evaluateCondition()) { |
1181 | if (!skipBranch()) |
1182 | break; |
1183 | if (test(PP_ELIF)) { |
1184 | } else { |
1185 | until(PP_NEWLINE); |
1186 | break; |
1187 | } |
1188 | } |
1189 | continue; |
1190 | case PP_ELIF: |
1191 | case PP_ELSE: |
1192 | skipUntilEndif(); |
1193 | Q_FALLTHROUGH(); |
1194 | case PP_ENDIF: |
1195 | until(PP_NEWLINE); |
1196 | continue; |
1197 | case PP_NEWLINE: |
1198 | continue; |
1199 | case SIGNALS: |
1200 | case SLOTS: { |
1201 | Symbol sym = symbol(); |
1202 | if (macros.contains("QT_NO_KEYWORDS" )) |
1203 | sym.token = IDENTIFIER; |
1204 | else |
1205 | sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN); |
1206 | preprocessed += sym; |
1207 | } continue; |
1208 | default: |
1209 | break; |
1210 | } |
1211 | preprocessed += symbol(); |
1212 | } |
1213 | |
1214 | currentFilenames.pop(); |
1215 | } |
1216 | |
1217 | Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file) |
1218 | { |
1219 | QByteArray input = readOrMapFile(file); |
1220 | |
1221 | if (input.isEmpty()) |
1222 | return symbols; |
1223 | |
1224 | // phase 1: get rid of backslash-newlines |
1225 | input = cleaned(input); |
1226 | |
1227 | // phase 2: tokenize for the preprocessor |
1228 | index = 0; |
1229 | symbols = tokenize(input); |
1230 | |
1231 | #if 0 |
1232 | for (int j = 0; j < symbols.size(); ++j) |
1233 | fprintf(stderr, "line %d: %s(%s)\n" , |
1234 | symbols[j].lineNum, |
1235 | symbols[j].lexem().constData(), |
1236 | tokenTypeName(symbols[j].token)); |
1237 | #endif |
1238 | |
1239 | // phase 3: preprocess conditions and substitute macros |
1240 | Symbols result; |
1241 | // Preallocate some space to speed up the code below. |
1242 | // The magic value was found by logging the final size |
1243 | // and calculating an average when running moc over FOSS projects. |
1244 | result.reserve(file->size() / 300000); |
1245 | preprocess(filename, result); |
1246 | mergeStringLiterals(&result); |
1247 | |
1248 | #if 0 |
1249 | for (int j = 0; j < result.size(); ++j) |
1250 | fprintf(stderr, "line %d: %s(%s)\n" , |
1251 | result[j].lineNum, |
1252 | result[j].lexem().constData(), |
1253 | tokenTypeName(result[j].token)); |
1254 | #endif |
1255 | |
1256 | return result; |
1257 | } |
1258 | |
1259 | void Preprocessor::parseDefineArguments(Macro *m) |
1260 | { |
1261 | Symbols arguments; |
1262 | while (hasNext()) { |
1263 | while (test(PP_WHITESPACE)) {} |
1264 | Token t = next(); |
1265 | if (t == PP_RPAREN) |
1266 | break; |
1267 | if (t != PP_IDENTIFIER) { |
1268 | QByteArray l = lexem(); |
1269 | if (l == "..." ) { |
1270 | m->isVariadic = true; |
1271 | arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__" ); |
1272 | while (test(PP_WHITESPACE)) {} |
1273 | if (!test(PP_RPAREN)) |
1274 | error("missing ')' in macro argument list" ); |
1275 | break; |
1276 | } else if (!is_identifier(l.constData(), l.length())) { |
1277 | error("Unexpected character in macro argument list." ); |
1278 | } |
1279 | } |
1280 | |
1281 | Symbol arg = symbol(); |
1282 | if (arguments.contains(arg)) |
1283 | error("Duplicate macro parameter." ); |
1284 | arguments += symbol(); |
1285 | |
1286 | while (test(PP_WHITESPACE)) {} |
1287 | t = next(); |
1288 | if (t == PP_RPAREN) |
1289 | break; |
1290 | if (t == PP_COMMA) |
1291 | continue; |
1292 | if (lexem() == "..." ) { |
1293 | //GCC extension: #define FOO(x, y...) x(y) |
1294 | // The last argument was already parsed. Just mark the macro as variadic. |
1295 | m->isVariadic = true; |
1296 | while (test(PP_WHITESPACE)) {} |
1297 | if (!test(PP_RPAREN)) |
1298 | error("missing ')' in macro argument list" ); |
1299 | break; |
1300 | } |
1301 | error("Unexpected character in macro argument list." ); |
1302 | } |
1303 | m->arguments = arguments; |
1304 | while (test(PP_WHITESPACE)) {} |
1305 | } |
1306 | |
1307 | void Preprocessor::until(Token t) |
1308 | { |
1309 | while(hasNext() && next() != t) |
1310 | ; |
1311 | } |
1312 | |
1313 | QT_END_NAMESPACE |
1314 | |