1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the tools applications of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU
20** General Public License version 3 as published by the Free Software
21** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22** included in the packaging of this file. Please review the following
23** information to ensure the GNU General Public License requirements will
24** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25**
26** $QT_END_LICENSE$
27**
28****************************************************************************/
29
30#include "preprocessor.h"
31#include "utils.h"
32#include <qstringlist.h>
33#include <qfile.h>
34#include <qdir.h>
35#include <qfileinfo.h>
36
37QT_BEGIN_NAMESPACE
38
39#include "ppkeywords.cpp"
40#include "keywords.cpp"
41
42// transform \r\n into \n
43// \r into \n (os9 style)
44// backslash-newlines into newlines
45static QByteArray cleaned(const QByteArray &input)
46{
47 QByteArray result;
48 result.resize(input.size());
49 const char *data = input.constData();
50 const char *end = input.constData() + input.size();
51 char *output = result.data();
52
53 int newlines = 0;
54 while (data != end) {
55 while (data != end && is_space(*data))
56 ++data;
57 bool takeLine = (*data == '#');
58 if (*data == '%' && *(data+1) == ':') {
59 takeLine = true;
60 ++data;
61 }
62 if (takeLine) {
63 *output = '#';
64 ++output;
65 do ++data; while (data != end && is_space(*data));
66 }
67 while (data != end) {
68 // handle \\\n, \\\r\n and \\\r
69 if (*data == '\\') {
70 if (*(data + 1) == '\r') {
71 ++data;
72 }
73 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
74 ++newlines;
75 data += 1;
76 if (data != end && *data != '\r')
77 data += 1;
78 continue;
79 }
80 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
81 ++data;
82 }
83 if (data == end)
84 break;
85
86 char ch = *data;
87 if (ch == '\r') // os9: replace \r with \n
88 ch = '\n';
89 *output = ch;
90 ++output;
91
92 if (*data == '\n') {
93 // output additional newlines to keep the correct line-numbering
94 // for the lines following the backslash-newline sequence(s)
95 while (newlines) {
96 *output = '\n';
97 ++output;
98 --newlines;
99 }
100 ++data;
101 break;
102 }
103 ++data;
104 }
105 }
106 result.resize(output - result.constData());
107 return result;
108}
109
110bool Preprocessor::preprocessOnly = false;
111void Preprocessor::skipUntilEndif()
112{
113 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
114 switch (symbols.at(index).token) {
115 case PP_IF:
116 case PP_IFDEF:
117 case PP_IFNDEF:
118 ++index;
119 skipUntilEndif();
120 break;
121 default:
122 ;
123 }
124 ++index;
125 }
126}
127
128bool Preprocessor::skipBranch()
129{
130 while (index < symbols.size() - 1
131 && (symbols.at(index).token != PP_ENDIF
132 && symbols.at(index).token != PP_ELIF
133 && symbols.at(index).token != PP_ELSE)
134 ){
135 switch (symbols.at(index).token) {
136 case PP_IF:
137 case PP_IFDEF:
138 case PP_IFNDEF:
139 ++index;
140 skipUntilEndif();
141 break;
142 default:
143 ;
144 }
145 ++index;
146 }
147 return (index < symbols.size() - 1);
148}
149
150
151Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
152{
153 Symbols symbols;
154 // Preallocate some space to speed up the code below.
155 // The magic divisor value was found by calculating the average ratio between
156 // input size and the final size of symbols.
157 // This yielded a value of 16.x when compiling Qt Base.
158 symbols.reserve(input.size() / 16);
159 const char *begin = input.constData();
160 const char *data = begin;
161 while (*data) {
162 if (mode == TokenizeCpp || mode == TokenizeDefine) {
163 int column = 0;
164
165 const char *lexem = data;
166 int state = 0;
167 Token token = NOTOKEN;
168 for (;;) {
169 if (static_cast<signed char>(*data) < 0) {
170 ++data;
171 continue;
172 }
173 int nextindex = keywords[state].next;
174 int next = 0;
175 if (*data == keywords[state].defchar)
176 next = keywords[state].defnext;
177 else if (!state || nextindex)
178 next = keyword_trans[nextindex][(int)*data];
179 if (!next)
180 break;
181 state = next;
182 token = keywords[state].token;
183 ++data;
184 }
185
186 // suboptimal, is_ident_char should use a table
187 if (keywords[state].ident && is_ident_char(*data))
188 token = keywords[state].ident;
189
190 if (token == NOTOKEN) {
191 if (*data)
192 ++data;
193 // an error really, but let's ignore this input
194 // to not confuse moc later. However in pre-processor
195 // only mode let's continue.
196 if (!Preprocessor::preprocessOnly)
197 continue;
198 }
199
200 ++column;
201
202 if (token > SPECIAL_TREATMENT_MARK) {
203 switch (token) {
204 case QUOTE:
205 data = skipQuote(data);
206 token = STRING_LITERAL;
207 // concatenate multi-line strings for easier
208 // STRING_LITERAL handling in moc
209 if (!Preprocessor::preprocessOnly
210 && !symbols.isEmpty()
211 && symbols.constLast().token == STRING_LITERAL) {
212
213 const QByteArray newString
214 = '\"'
215 + symbols.constLast().unquotedLexem()
216 + input.mid(lexem - begin + 1, data - lexem - 2)
217 + '\"';
218 symbols.last() = Symbol(symbols.constLast().lineNum,
219 STRING_LITERAL,
220 newString);
221 continue;
222 }
223 break;
224 case SINGLEQUOTE:
225 while (*data && (*data != '\''
226 || (*(data-1)=='\\'
227 && *(data-2)!='\\')))
228 ++data;
229 if (*data)
230 ++data;
231 token = CHARACTER_LITERAL;
232 break;
233 case LANGLE_SCOPE:
234 // split <:: into two tokens, < and ::
235 token = LANGLE;
236 data -= 2;
237 break;
238 case DIGIT:
239 while (is_digit_char(*data) || *data == '\'')
240 ++data;
241 if (!*data || *data != '.') {
242 token = INTEGER_LITERAL;
243 if (data - lexem == 1 &&
244 (*data == 'x' || *data == 'X'
245 || *data == 'b' || *data == 'B')
246 && *lexem == '0') {
247 ++data;
248 while (is_hex_char(*data) || *data == '\'')
249 ++data;
250 }
251 break;
252 }
253 token = FLOATING_LITERAL;
254 ++data;
255 Q_FALLTHROUGH();
256 case FLOATING_LITERAL:
257 while (is_digit_char(*data) || *data == '\'')
258 ++data;
259 if (*data == '+' || *data == '-')
260 ++data;
261 if (*data == 'e' || *data == 'E') {
262 ++data;
263 while (is_digit_char(*data) || *data == '\'')
264 ++data;
265 }
266 if (*data == 'f' || *data == 'F'
267 || *data == 'l' || *data == 'L')
268 ++data;
269 break;
270 case HASH:
271 if (column == 1 && mode == TokenizeCpp) {
272 mode = PreparePreprocessorStatement;
273 while (*data && (*data == ' ' || *data == '\t'))
274 ++data;
275 if (is_ident_char(*data))
276 mode = TokenizePreprocessorStatement;
277 continue;
278 }
279 break;
280 case PP_HASHHASH:
281 if (mode == TokenizeCpp)
282 continue;
283 break;
284 case NEWLINE:
285 ++lineNum;
286 if (mode == TokenizeDefine) {
287 mode = TokenizeCpp;
288 // emit the newline token
289 break;
290 }
291 continue;
292 case BACKSLASH:
293 {
294 const char *rewind = data;
295 while (*data && (*data == ' ' || *data == '\t'))
296 ++data;
297 if (*data && *data == '\n') {
298 ++data;
299 continue;
300 }
301 data = rewind;
302 } break;
303 case CHARACTER:
304 while (is_ident_char(*data))
305 ++data;
306 token = IDENTIFIER;
307 break;
308 case C_COMMENT:
309 if (*data) {
310 if (*data == '\n')
311 ++lineNum;
312 ++data;
313 if (*data) {
314 if (*data == '\n')
315 ++lineNum;
316 ++data;
317 }
318 }
319 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
320 if (*data == '\n')
321 ++lineNum;
322 ++data;
323 }
324 token = WHITESPACE; // one comment, one whitespace
325 Q_FALLTHROUGH();
326 case WHITESPACE:
327 if (column == 1)
328 column = 0;
329 while (*data && (*data == ' ' || *data == '\t'))
330 ++data;
331 if (Preprocessor::preprocessOnly) // tokenize whitespace
332 break;
333 continue;
334 case CPP_COMMENT:
335 while (*data && *data != '\n')
336 ++data;
337 continue; // ignore safely, the newline is a separator
338 default:
339 continue; //ignore
340 }
341 }
342#ifdef USE_LEXEM_STORE
343 if (!Preprocessor::preprocessOnly
344 && token != IDENTIFIER
345 && token != STRING_LITERAL
346 && token != FLOATING_LITERAL
347 && token != INTEGER_LITERAL)
348 symbols += Symbol(lineNum, token);
349 else
350#endif
351 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
352
353 } else { // Preprocessor
354
355 const char *lexem = data;
356 int state = 0;
357 Token token = NOTOKEN;
358 if (mode == TokenizePreprocessorStatement) {
359 state = pp_keyword_trans[0][(int)'#'];
360 mode = TokenizePreprocessor;
361 }
362 for (;;) {
363 if (static_cast<signed char>(*data) < 0) {
364 ++data;
365 continue;
366 }
367 int nextindex = pp_keywords[state].next;
368 int next = 0;
369 if (*data == pp_keywords[state].defchar)
370 next = pp_keywords[state].defnext;
371 else if (!state || nextindex)
372 next = pp_keyword_trans[nextindex][(int)*data];
373 if (!next)
374 break;
375 state = next;
376 token = pp_keywords[state].token;
377 ++data;
378 }
379 // suboptimal, is_ident_char should use a table
380 if (pp_keywords[state].ident && is_ident_char(*data))
381 token = pp_keywords[state].ident;
382
383 switch (token) {
384 case NOTOKEN:
385 if (*data)
386 ++data;
387 break;
388 case PP_DEFINE:
389 mode = PrepareDefine;
390 break;
391 case PP_IFDEF:
392 symbols += Symbol(lineNum, PP_IF);
393 symbols += Symbol(lineNum, PP_DEFINED);
394 continue;
395 case PP_IFNDEF:
396 symbols += Symbol(lineNum, PP_IF);
397 symbols += Symbol(lineNum, PP_NOT);
398 symbols += Symbol(lineNum, PP_DEFINED);
399 continue;
400 case PP_INCLUDE:
401 mode = TokenizeInclude;
402 break;
403 case PP_QUOTE:
404 data = skipQuote(data);
405 token = PP_STRING_LITERAL;
406 break;
407 case PP_SINGLEQUOTE:
408 while (*data && (*data != '\''
409 || (*(data-1)=='\\'
410 && *(data-2)!='\\')))
411 ++data;
412 if (*data)
413 ++data;
414 token = PP_CHARACTER_LITERAL;
415 break;
416 case PP_DIGIT:
417 while (is_digit_char(*data) || *data == '\'')
418 ++data;
419 if (!*data || *data != '.') {
420 token = PP_INTEGER_LITERAL;
421 if (data - lexem == 1 &&
422 (*data == 'x' || *data == 'X')
423 && *lexem == '0') {
424 ++data;
425 while (is_hex_char(*data) || *data == '\'')
426 ++data;
427 }
428 break;
429 }
430 token = PP_FLOATING_LITERAL;
431 ++data;
432 Q_FALLTHROUGH();
433 case PP_FLOATING_LITERAL:
434 while (is_digit_char(*data) || *data == '\'')
435 ++data;
436 if (*data == '+' || *data == '-')
437 ++data;
438 if (*data == 'e' || *data == 'E') {
439 ++data;
440 while (is_digit_char(*data) || *data == '\'')
441 ++data;
442 }
443 if (*data == 'f' || *data == 'F'
444 || *data == 'l' || *data == 'L')
445 ++data;
446 break;
447 case PP_CHARACTER:
448 if (mode == PreparePreprocessorStatement) {
449 // rewind entire token to begin
450 data = lexem;
451 mode = TokenizePreprocessorStatement;
452 continue;
453 }
454 while (is_ident_char(*data))
455 ++data;
456 token = PP_IDENTIFIER;
457
458 if (mode == PrepareDefine) {
459 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
460 // make sure we explicitly add the whitespace here if the next char
461 // is not an opening brace, so we can distinguish correctly between
462 // regular and function macros
463 if (*data != '(')
464 symbols += Symbol(lineNum, WHITESPACE);
465 mode = TokenizeDefine;
466 continue;
467 }
468 break;
469 case PP_C_COMMENT:
470 if (*data) {
471 if (*data == '\n')
472 ++lineNum;
473 ++data;
474 if (*data) {
475 if (*data == '\n')
476 ++lineNum;
477 ++data;
478 }
479 }
480 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
481 if (*data == '\n')
482 ++lineNum;
483 ++data;
484 }
485 token = PP_WHITESPACE; // one comment, one whitespace
486 Q_FALLTHROUGH();
487 case PP_WHITESPACE:
488 while (*data && (*data == ' ' || *data == '\t'))
489 ++data;
490 continue; // the preprocessor needs no whitespace
491 case PP_CPP_COMMENT:
492 while (*data && *data != '\n')
493 ++data;
494 continue; // ignore safely, the newline is a separator
495 case PP_NEWLINE:
496 ++lineNum;
497 mode = TokenizeCpp;
498 break;
499 case PP_BACKSLASH:
500 {
501 const char *rewind = data;
502 while (*data && (*data == ' ' || *data == '\t'))
503 ++data;
504 if (*data && *data == '\n') {
505 ++data;
506 continue;
507 }
508 data = rewind;
509 } break;
510 case PP_LANGLE:
511 if (mode != TokenizeInclude)
512 break;
513 token = PP_STRING_LITERAL;
514 while (*data && *data != '\n' && *(data-1) != '>')
515 ++data;
516 break;
517 default:
518 break;
519 }
520 if (mode == PreparePreprocessorStatement)
521 continue;
522#ifdef USE_LEXEM_STORE
523 if (token != PP_IDENTIFIER
524 && token != PP_STRING_LITERAL
525 && token != PP_FLOATING_LITERAL
526 && token != PP_INTEGER_LITERAL)
527 symbols += Symbol(lineNum, token);
528 else
529#endif
530 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
531 }
532 }
533 symbols += Symbol(); // eof symbol
534 return symbols;
535}
536
537void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, int &index,
538 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
539{
540 SymbolStack symbols;
541 SafeSymbols sf;
542 sf.symbols = toExpand;
543 sf.index = index;
544 sf.excludedSymbols = excludeSymbols;
545 symbols.push(sf);
546
547 if (toExpand.isEmpty())
548 return;
549
550 for (;;) {
551 QByteArray macro;
552 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, &macro);
553
554 if (macro.isEmpty()) {
555 // not a macro
556 Symbol s = symbols.symbol();
557 s.lineNum = lineNum;
558 *into += s;
559 } else {
560 SafeSymbols sf;
561 sf.symbols = newSyms;
562 sf.index = 0;
563 sf.expandedMacro = macro;
564 symbols.push(sf);
565 }
566 if (!symbols.hasNext() || (one && symbols.size() == 1))
567 break;
568 symbols.next();
569 }
570
571 if (symbols.size())
572 index = symbols.top().index;
573 else
574 index = toExpand.size();
575}
576
577
578Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
579{
580 Symbol s = symbols.symbol();
581
582 // not a macro
583 if (s.token != PP_IDENTIFIER || !that->macros.contains(s) || symbols.dontReplaceSymbol(s.lexem())) {
584 return Symbols();
585 }
586
587 const Macro &macro = that->macros.value(s);
588 *macroName = s.lexem();
589
590 Symbols expansion;
591 if (!macro.isFunction) {
592 expansion = macro.symbols;
593 } else {
594 bool haveSpace = false;
595 while (symbols.test(PP_WHITESPACE)) { haveSpace = true; }
596 if (!symbols.test(PP_LPAREN)) {
597 *macroName = QByteArray();
598 Symbols syms;
599 if (haveSpace)
600 syms += Symbol(lineNum, PP_WHITESPACE);
601 syms += s;
602 syms.last().lineNum = lineNum;
603 return syms;
604 }
605 QVarLengthArray<Symbols, 5> arguments;
606 while (symbols.hasNext()) {
607 Symbols argument;
608 // strip leading space
609 while (symbols.test(PP_WHITESPACE)) {}
610 int nesting = 0;
611 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
612 while (symbols.hasNext()) {
613 Token t = symbols.next();
614 if (t == PP_LPAREN) {
615 ++nesting;
616 } else if (t == PP_RPAREN) {
617 --nesting;
618 if (nesting < 0)
619 break;
620 } else if (t == PP_COMMA && nesting == 0) {
621 if (!vararg)
622 break;
623 }
624 argument += symbols.symbol();
625 }
626 arguments += argument;
627
628 if (nesting < 0)
629 break;
630 else if (!symbols.hasNext())
631 that->error("missing ')' in macro usage");
632 }
633
634 // empty VA_ARGS
635 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
636 arguments += Symbols();
637
638 // now replace the macro arguments with the expanded arguments
639 enum Mode {
640 Normal,
641 Hash,
642 HashHash
643 } mode = Normal;
644
645 for (int i = 0; i < macro.symbols.size(); ++i) {
646 const Symbol &s = macro.symbols.at(i);
647 if (s.token == HASH || s.token == PP_HASHHASH) {
648 mode = (s.token == HASH ? Hash : HashHash);
649 continue;
650 }
651 int index = macro.arguments.indexOf(s);
652 if (mode == Normal) {
653 if (index >= 0 && index < arguments.size()) {
654 // each argument undoergoes macro expansion if it's not used as part of a # or ##
655 if (i == macro.symbols.size() - 1 || macro.symbols.at(i + 1).token != PP_HASHHASH) {
656 Symbols arg = arguments.at(index);
657 int idx = 1;
658 macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols());
659 } else {
660 expansion += arguments.at(index);
661 }
662 } else {
663 expansion += s;
664 }
665 } else if (mode == Hash) {
666 if (index < 0) {
667 that->error("'#' is not followed by a macro parameter");
668 continue;
669 } else if (index >= arguments.size()) {
670 that->error("Macro invoked with too few parameters for a use of '#'");
671 continue;
672 }
673
674 const Symbols &arg = arguments.at(index);
675 QByteArray stringified;
676 for (int i = 0; i < arg.size(); ++i) {
677 stringified += arg.at(i).lexem();
678 }
679 stringified.replace('"', "\\\"");
680 stringified.prepend('"');
681 stringified.append('"');
682 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
683 } else if (mode == HashHash){
684 if (s.token == WHITESPACE)
685 continue;
686
687 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
688 expansion.pop_back();
689
690 Symbol next = s;
691 if (index >= 0 && index < arguments.size()) {
692 const Symbols &arg = arguments.at(index);
693 if (arg.size() == 0) {
694 mode = Normal;
695 continue;
696 }
697 next = arg.at(0);
698 }
699
700 if (!expansion.isEmpty() && expansion.constLast().token == s.token
701 && expansion.constLast().token != STRING_LITERAL) {
702 Symbol last = expansion.takeLast();
703
704 QByteArray lexem = last.lexem() + next.lexem();
705 expansion += Symbol(lineNum, last.token, lexem);
706 } else {
707 expansion += next;
708 }
709
710 if (index >= 0 && index < arguments.size()) {
711 const Symbols &arg = arguments.at(index);
712 for (int i = 1; i < arg.size(); ++i)
713 expansion += arg.at(i);
714 }
715 }
716 mode = Normal;
717 }
718 if (mode != Normal)
719 that->error("'#' or '##' found at the end of a macro argument");
720
721 }
722
723 return expansion;
724}
725
726void Preprocessor::substituteUntilNewline(Symbols &substituted)
727{
728 while (hasNext()) {
729 Token token = next();
730 if (token == PP_IDENTIFIER) {
731 macroExpand(&substituted, this, symbols, index, symbol().lineNum, true);
732 } else if (token == PP_DEFINED) {
733 bool braces = test(PP_LPAREN);
734 next(PP_IDENTIFIER);
735 Symbol definedOrNotDefined = symbol();
736 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
737 substituted += definedOrNotDefined;
738 if (braces)
739 test(PP_RPAREN);
740 continue;
741 } else if (token == PP_NEWLINE) {
742 substituted += symbol();
743 break;
744 } else {
745 substituted += symbol();
746 }
747 }
748}
749
750
751class PP_Expression : public Parser
752{
753public:
754 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
755
756 int conditional_expression();
757 int logical_OR_expression();
758 int logical_AND_expression();
759 int inclusive_OR_expression();
760 int exclusive_OR_expression();
761 int AND_expression();
762 int equality_expression();
763 int relational_expression();
764 int shift_expression();
765 int additive_expression();
766 int multiplicative_expression();
767 int unary_expression();
768 bool unary_expression_lookup();
769 int primary_expression();
770 bool primary_expression_lookup();
771};
772
773int PP_Expression::conditional_expression()
774{
775 int value = logical_OR_expression();
776 if (test(PP_QUESTION)) {
777 int alt1 = conditional_expression();
778 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
779 return value ? alt1 : alt2;
780 }
781 return value;
782}
783
784int PP_Expression::logical_OR_expression()
785{
786 int value = logical_AND_expression();
787 if (test(PP_OROR))
788 return logical_OR_expression() || value;
789 return value;
790}
791
792int PP_Expression::logical_AND_expression()
793{
794 int value = inclusive_OR_expression();
795 if (test(PP_ANDAND))
796 return logical_AND_expression() && value;
797 return value;
798}
799
800int PP_Expression::inclusive_OR_expression()
801{
802 int value = exclusive_OR_expression();
803 if (test(PP_OR))
804 return value | inclusive_OR_expression();
805 return value;
806}
807
808int PP_Expression::exclusive_OR_expression()
809{
810 int value = AND_expression();
811 if (test(PP_HAT))
812 return value ^ exclusive_OR_expression();
813 return value;
814}
815
816int PP_Expression::AND_expression()
817{
818 int value = equality_expression();
819 if (test(PP_AND))
820 return value & AND_expression();
821 return value;
822}
823
824int PP_Expression::equality_expression()
825{
826 int value = relational_expression();
827 switch (next()) {
828 case PP_EQEQ:
829 return value == equality_expression();
830 case PP_NE:
831 return value != equality_expression();
832 default:
833 prev();
834 return value;
835 }
836}
837
838int PP_Expression::relational_expression()
839{
840 int value = shift_expression();
841 switch (next()) {
842 case PP_LANGLE:
843 return value < relational_expression();
844 case PP_RANGLE:
845 return value > relational_expression();
846 case PP_LE:
847 return value <= relational_expression();
848 case PP_GE:
849 return value >= relational_expression();
850 default:
851 prev();
852 return value;
853 }
854}
855
856int PP_Expression::shift_expression()
857{
858 int value = additive_expression();
859 switch (next()) {
860 case PP_LTLT:
861 return value << shift_expression();
862 case PP_GTGT:
863 return value >> shift_expression();
864 default:
865 prev();
866 return value;
867 }
868}
869
870int PP_Expression::additive_expression()
871{
872 int value = multiplicative_expression();
873 switch (next()) {
874 case PP_PLUS:
875 return value + additive_expression();
876 case PP_MINUS:
877 return value - additive_expression();
878 default:
879 prev();
880 return value;
881 }
882}
883
884int PP_Expression::multiplicative_expression()
885{
886 int value = unary_expression();
887 switch (next()) {
888 case PP_STAR:
889 return value * multiplicative_expression();
890 case PP_PERCENT:
891 {
892 int remainder = multiplicative_expression();
893 return remainder ? value % remainder : 0;
894 }
895 case PP_SLASH:
896 {
897 int div = multiplicative_expression();
898 return div ? value / div : 0;
899 }
900 default:
901 prev();
902 return value;
903 };
904}
905
906int PP_Expression::unary_expression()
907{
908 switch (next()) {
909 case PP_PLUS:
910 return unary_expression();
911 case PP_MINUS:
912 return -unary_expression();
913 case PP_NOT:
914 return !unary_expression();
915 case PP_TILDE:
916 return ~unary_expression();
917 case PP_MOC_TRUE:
918 return 1;
919 case PP_MOC_FALSE:
920 return 0;
921 default:
922 prev();
923 return primary_expression();
924 }
925}
926
927bool PP_Expression::unary_expression_lookup()
928{
929 Token t = lookup();
930 return (primary_expression_lookup()
931 || t == PP_PLUS
932 || t == PP_MINUS
933 || t == PP_NOT
934 || t == PP_TILDE
935 || t == PP_DEFINED);
936}
937
938int PP_Expression::primary_expression()
939{
940 int value;
941 if (test(PP_LPAREN)) {
942 value = conditional_expression();
943 test(PP_RPAREN);
944 } else {
945 next();
946 value = lexem().toInt(nullptr, 0);
947 }
948 return value;
949}
950
951bool PP_Expression::primary_expression_lookup()
952{
953 Token t = lookup();
954 return (t == PP_IDENTIFIER
955 || t == PP_INTEGER_LITERAL
956 || t == PP_FLOATING_LITERAL
957 || t == PP_MOC_TRUE
958 || t == PP_MOC_FALSE
959 || t == PP_LPAREN);
960}
961
962int Preprocessor::evaluateCondition()
963{
964 PP_Expression expression;
965 expression.currentFilenames = currentFilenames;
966
967 substituteUntilNewline(expression.symbols);
968
969 return expression.value();
970}
971
972static QByteArray readOrMapFile(QFile *file)
973{
974 const qint64 size = file->size();
975 char *rawInput = reinterpret_cast<char*>(file->map(0, size));
976 return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
977}
978
979static void mergeStringLiterals(Symbols *_symbols)
980{
981 Symbols &symbols = *_symbols;
982 for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
983 if (i->token == STRING_LITERAL) {
984 Symbols::Iterator mergeSymbol = i;
985 int literalsLength = mergeSymbol->len;
986 while (++i != symbols.end() && i->token == STRING_LITERAL)
987 literalsLength += i->len - 2; // no quotes
988
989 if (literalsLength != mergeSymbol->len) {
990 QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
991 QByteArray &mergeSymbolLexem = mergeSymbol->lex;
992 mergeSymbolLexem.resize(0);
993 mergeSymbolLexem.reserve(literalsLength);
994 mergeSymbolLexem.append('"');
995 mergeSymbolLexem.append(mergeSymbolOriginalLexem);
996 for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j)
997 mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem()
998 mergeSymbolLexem.append('"');
999 mergeSymbol->len = mergeSymbol->lex.length();
1000 mergeSymbol->from = 0;
1001 i = symbols.erase(mergeSymbol + 1, i);
1002 }
1003 if (i == symbols.end())
1004 break;
1005 }
1006 }
1007}
1008
1009static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1010 const QByteArray &include)
1011{
1012 QFileInfo fi;
1013 for (int j = 0; j < includepaths.size() && !fi.exists(); ++j) {
1014 const Parser::IncludePath &p = includepaths.at(j);
1015 if (p.isFrameworkPath) {
1016 const int slashPos = include.indexOf('/');
1017 if (slashPos == -1)
1018 continue;
1019 fi.setFile(QString::fromLocal8Bit(p.path + '/' + include.left(slashPos) + ".framework/Headers/"),
1020 QString::fromLocal8Bit(include.mid(slashPos + 1)));
1021 } else {
1022 fi.setFile(QString::fromLocal8Bit(p.path), QString::fromLocal8Bit(include));
1023 }
1024 // try again, maybe there's a file later in the include paths with the same name
1025 // (186067)
1026 if (fi.isDir()) {
1027 fi = QFileInfo();
1028 continue;
1029 }
1030 }
1031
1032 if (!fi.exists() || fi.isDir())
1033 return QByteArray();
1034 return fi.canonicalFilePath().toLocal8Bit();
1035}
1036
1037QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1038{
1039 if (!relativeTo.isEmpty()) {
1040 QFileInfo fi;
1041 fi.setFile(QFileInfo(QString::fromLocal8Bit(relativeTo)).dir(), QString::fromLocal8Bit(include));
1042 if (fi.exists() && !fi.isDir())
1043 return fi.canonicalFilePath().toLocal8Bit();
1044 }
1045
1046 auto it = nonlocalIncludePathResolutionCache.find(include);
1047 if (it == nonlocalIncludePathResolutionCache.end())
1048 it = nonlocalIncludePathResolutionCache.insert(include, searchIncludePaths(includes, include));
1049 return it.value();
1050}
1051
1052void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1053{
1054 currentFilenames.push(filename);
1055 preprocessed.reserve(preprocessed.size() + symbols.size());
1056 while (hasNext()) {
1057 Token token = next();
1058
1059 switch (token) {
1060 case PP_INCLUDE:
1061 {
1062 int lineNum = symbol().lineNum;
1063 QByteArray include;
1064 bool local = false;
1065 if (test(PP_STRING_LITERAL)) {
1066 local = lexem().startsWith('\"');
1067 include = unquotedLexem();
1068 } else
1069 continue;
1070 until(PP_NEWLINE);
1071
1072 include = resolveInclude(include, local ? filename : QByteArray());
1073 if (include.isNull())
1074 continue;
1075
1076 if (Preprocessor::preprocessedIncludes.contains(include))
1077 continue;
1078 Preprocessor::preprocessedIncludes.insert(include);
1079
1080 QFile file(QString::fromLocal8Bit(include.constData()));
1081 if (!file.open(QFile::ReadOnly))
1082 continue;
1083
1084 QByteArray input = readOrMapFile(&file);
1085
1086 file.close();
1087 if (input.isEmpty())
1088 continue;
1089
1090 Symbols saveSymbols = symbols;
1091 int saveIndex = index;
1092
1093 // phase 1: get rid of backslash-newlines
1094 input = cleaned(input);
1095
1096 // phase 2: tokenize for the preprocessor
1097 symbols = tokenize(input);
1098 input.clear();
1099
1100 index = 0;
1101
1102 // phase 3: preprocess conditions and substitute macros
1103 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1104 preprocess(include, preprocessed);
1105 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1106
1107 symbols = saveSymbols;
1108 index = saveIndex;
1109 continue;
1110 }
1111 case PP_DEFINE:
1112 {
1113 next();
1114 QByteArray name = lexem();
1115 if (name.isEmpty() || !is_ident_start(name[0]))
1116 error();
1117 Macro macro;
1118 macro.isVariadic = false;
1119 if (test(LPAREN)) {
1120 // we have a function macro
1121 macro.isFunction = true;
1122 parseDefineArguments(&macro);
1123 } else {
1124 macro.isFunction = false;
1125 }
1126 int start = index;
1127 until(PP_NEWLINE);
1128 macro.symbols.reserve(index - start - 1);
1129
1130 // remove whitespace where there shouldn't be any:
1131 // Before and after the macro, after a # and around ##
1132 Token lastToken = HASH; // skip shitespace at the beginning
1133 for (int i = start; i < index - 1; ++i) {
1134 Token token = symbols.at(i).token;
1135 if (token == WHITESPACE) {
1136 if (lastToken == PP_HASH || lastToken == HASH ||
1137 lastToken == PP_HASHHASH ||
1138 lastToken == WHITESPACE)
1139 continue;
1140 } else if (token == PP_HASHHASH) {
1141 if (!macro.symbols.isEmpty() &&
1142 lastToken == WHITESPACE)
1143 macro.symbols.pop_back();
1144 }
1145 macro.symbols.append(symbols.at(i));
1146 lastToken = token;
1147 }
1148 // remove trailing whitespace
1149 while (!macro.symbols.isEmpty() &&
1150 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1151 macro.symbols.pop_back();
1152
1153 if (!macro.symbols.isEmpty()) {
1154 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1155 macro.symbols.constLast().token == PP_HASHHASH) {
1156 error("'##' cannot appear at either end of a macro expansion");
1157 }
1158 }
1159 macros.insert(name, macro);
1160 continue;
1161 }
1162 case PP_UNDEF: {
1163 next();
1164 QByteArray name = lexem();
1165 until(PP_NEWLINE);
1166 macros.remove(name);
1167 continue;
1168 }
1169 case PP_IDENTIFIER: {
1170 // substitute macros
1171 macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true);
1172 continue;
1173 }
1174 case PP_HASH:
1175 until(PP_NEWLINE);
1176 continue; // skip unknown preprocessor statement
1177 case PP_IFDEF:
1178 case PP_IFNDEF:
1179 case PP_IF:
1180 while (!evaluateCondition()) {
1181 if (!skipBranch())
1182 break;
1183 if (test(PP_ELIF)) {
1184 } else {
1185 until(PP_NEWLINE);
1186 break;
1187 }
1188 }
1189 continue;
1190 case PP_ELIF:
1191 case PP_ELSE:
1192 skipUntilEndif();
1193 Q_FALLTHROUGH();
1194 case PP_ENDIF:
1195 until(PP_NEWLINE);
1196 continue;
1197 case PP_NEWLINE:
1198 continue;
1199 case SIGNALS:
1200 case SLOTS: {
1201 Symbol sym = symbol();
1202 if (macros.contains("QT_NO_KEYWORDS"))
1203 sym.token = IDENTIFIER;
1204 else
1205 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1206 preprocessed += sym;
1207 } continue;
1208 default:
1209 break;
1210 }
1211 preprocessed += symbol();
1212 }
1213
1214 currentFilenames.pop();
1215}
1216
1217Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1218{
1219 QByteArray input = readOrMapFile(file);
1220
1221 if (input.isEmpty())
1222 return symbols;
1223
1224 // phase 1: get rid of backslash-newlines
1225 input = cleaned(input);
1226
1227 // phase 2: tokenize for the preprocessor
1228 index = 0;
1229 symbols = tokenize(input);
1230
1231#if 0
1232 for (int j = 0; j < symbols.size(); ++j)
1233 fprintf(stderr, "line %d: %s(%s)\n",
1234 symbols[j].lineNum,
1235 symbols[j].lexem().constData(),
1236 tokenTypeName(symbols[j].token));
1237#endif
1238
1239 // phase 3: preprocess conditions and substitute macros
1240 Symbols result;
1241 // Preallocate some space to speed up the code below.
1242 // The magic value was found by logging the final size
1243 // and calculating an average when running moc over FOSS projects.
1244 result.reserve(file->size() / 300000);
1245 preprocess(filename, result);
1246 mergeStringLiterals(&result);
1247
1248#if 0
1249 for (int j = 0; j < result.size(); ++j)
1250 fprintf(stderr, "line %d: %s(%s)\n",
1251 result[j].lineNum,
1252 result[j].lexem().constData(),
1253 tokenTypeName(result[j].token));
1254#endif
1255
1256 return result;
1257}
1258
1259void Preprocessor::parseDefineArguments(Macro *m)
1260{
1261 Symbols arguments;
1262 while (hasNext()) {
1263 while (test(PP_WHITESPACE)) {}
1264 Token t = next();
1265 if (t == PP_RPAREN)
1266 break;
1267 if (t != PP_IDENTIFIER) {
1268 QByteArray l = lexem();
1269 if (l == "...") {
1270 m->isVariadic = true;
1271 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1272 while (test(PP_WHITESPACE)) {}
1273 if (!test(PP_RPAREN))
1274 error("missing ')' in macro argument list");
1275 break;
1276 } else if (!is_identifier(l.constData(), l.length())) {
1277 error("Unexpected character in macro argument list.");
1278 }
1279 }
1280
1281 Symbol arg = symbol();
1282 if (arguments.contains(arg))
1283 error("Duplicate macro parameter.");
1284 arguments += symbol();
1285
1286 while (test(PP_WHITESPACE)) {}
1287 t = next();
1288 if (t == PP_RPAREN)
1289 break;
1290 if (t == PP_COMMA)
1291 continue;
1292 if (lexem() == "...") {
1293 //GCC extension: #define FOO(x, y...) x(y)
1294 // The last argument was already parsed. Just mark the macro as variadic.
1295 m->isVariadic = true;
1296 while (test(PP_WHITESPACE)) {}
1297 if (!test(PP_RPAREN))
1298 error("missing ')' in macro argument list");
1299 break;
1300 }
1301 error("Unexpected character in macro argument list.");
1302 }
1303 m->arguments = arguments;
1304 while (test(PP_WHITESPACE)) {}
1305}
1306
1307void Preprocessor::until(Token t)
1308{
1309 while(hasNext() && next() != t)
1310 ;
1311}
1312
1313QT_END_NAMESPACE
1314