preprocessor.cpp source code [Qt/src/tools/moc/preprocessor.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
5	** Contact: https://www.qt.io/licensing/
6	**
7	** This file is part of the tools applications of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10	** Commercial License Usage
11	** Licensees holding valid commercial Qt licenses may use this file in
12	** accordance with the commercial license agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and The Qt Company. For licensing terms
15	** and conditions see https://www.qt.io/terms-conditions. For further
16	** information use the contact form at https://www.qt.io/contact-us.
17	**
18	** GNU General Public License Usage
19	** Alternatively, this file may be used under the terms of the GNU
20	** General Public License version 3 as published by the Free Software
21	** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22	** included in the packaging of this file. Please review the following
23	** information to ensure the GNU General Public License requirements will
24	** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25	**
26	** $QT_END_LICENSE$
27	**
28	****************************************************************************/
29
30	#include "preprocessor.h"
31	#include "utils.h"
32	#include <qstringlist.h>
33	#include <qfile.h>
34	#include <qdir.h>
35	#include <qfileinfo.h>
36
37	QT_BEGIN_NAMESPACE
38
39	#include "ppkeywords.cpp"
40	#include "keywords.cpp"
41
42	// transform \r\n into \n
43	// \r into \n (os9 style)
44	// backslash-newlines into newlines
45	static QByteArray cleaned(const QByteArray &input)
46	{
47	QByteArray result;
48	result.resize(input.size());
49	const char *data = input.constData();
50	const char *end = input.constData() + input.size();
51	char *output = result.data();
52
53	int newlines = `0`;
54	while (data != end) {
55	while (data != end && is_space(*data))
56	++data;
57	bool takeLine = (*data == `'#'`);
58	if (data == `'%'` && (data+`1`) == `':'`) {
59	takeLine = true;
60	++data;
61	}
62	if (takeLine) {
63	*output = `'#'`;
64	++output;
65	do ++data; while (data != end && is_space(*data));
66	}
67	while (data != end) {
68	// handle \\\n, \\\r\n and \\\r
69	if (*data == `'\\'`) {
70	if (*(data + `1`) == `'\r'`) {
71	++data;
72	}
73	if (data != end && ((data + `1`) == `'\n'` \|\| (data) == `'\r'`)) {
74	++newlines;
75	data += `1`;
76	if (data != end && *data != `'\r'`)
77	data += `1`;
78	continue;
79	}
80	} else if (data == `'\r'` && (data + `1`) == `'\n'`) { // reduce \r\n to \n
81	++data;
82	}
83	if (data == end)
84	break;
85
86	char ch = *data;
87	if (ch == `'\r'`) // os9: replace \r with \n
88	ch = `'\n'`;
89	*output = ch;
90	++output;
91
92	if (*data == `'\n'`) {
93	// output additional newlines to keep the correct line-numbering
94	// for the lines following the backslash-newline sequence(s)
95	while (newlines) {
96	*output = `'\n'`;
97	++output;
98	--newlines;
99	}
100	++data;
101	break;
102	}
103	++data;
104	}
105	}
106	result.resize(output - result.constData());
107	return result;
108	}
109
110	bool Preprocessor::preprocessOnly = false;
111	void Preprocessor::skipUntilEndif()
112	{
113	while(index < symbols.size() - `1` && symbols.at(index).token != PP_ENDIF){
114	switch (symbols.at(index).token) {
115	case PP_IF:
116	case PP_IFDEF:
117	case PP_IFNDEF:
118	++index;
119	skipUntilEndif();
120	break;
121	default:
122	;
123	}
124	++index;
125	}
126	}
127
128	bool Preprocessor::skipBranch()
129	{
130	while (index < symbols.size() - `1`
131	&& (symbols.at(index).token != PP_ENDIF
132	&& symbols.at(index).token != PP_ELIF
133	&& symbols.at(index).token != PP_ELSE)
134	){
135	switch (symbols.at(index).token) {
136	case PP_IF:
137	case PP_IFDEF:
138	case PP_IFNDEF:
139	++index;
140	skipUntilEndif();
141	break;
142	default:
143	;
144	}
145	++index;
146	}
147	return (index < symbols.size() - `1`);
148	}
149
150
151	Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
152	{
153	Symbols symbols;
154	// Preallocate some space to speed up the code below.
155	// The magic divisor value was found by calculating the average ratio between
156	// input size and the final size of symbols.
157	// This yielded a value of 16.x when compiling Qt Base.
158	symbols.reserve(input.size() / `16`);
159	const char *begin = input.constData();
160	const char *data = begin;
161	while (*data) {
162	if (mode == TokenizeCpp \|\| mode == TokenizeDefine) {
163	int column = `0`;
164
165	const char *lexem = data;
166	int state = `0`;
167	Token token = NOTOKEN;
168	for (;;) {
169	if (static_cast<signed char>(*data) < `0`) {
170	++data;
171	continue;
172	}
173	int nextindex = keywords[state].next;
174	int next = `0`;
175	if (*data == keywords[state].defchar)
176	next = keywords[state].defnext;
177	else if (!state \|\| nextindex)
178	next = keyword_trans[nextindex][(int)*data];
179	if (!next)
180	break;
181	state = next;
182	token = keywords[state].token;
183	++data;
184	}
185
186	// suboptimal, is_ident_char should use a table
187	if (keywords[state].ident && is_ident_char(*data))
188	token = keywords[state].ident;
189
190	if (token == NOTOKEN) {
191	if (*data)
192	++data;
193	// an error really, but let's ignore this input
194	// to not confuse moc later. However in pre-processor
195	// only mode let's continue.
196	if (!Preprocessor::preprocessOnly)
197	continue;
198	}
199
200	++column;
201
202	if (token > SPECIAL_TREATMENT_MARK) {
203	switch (token) {
204	case QUOTE:
205	data = skipQuote(data);
206	token = STRING_LITERAL;
207	// concatenate multi-line strings for easier
208	// STRING_LITERAL handling in moc
209	if (!Preprocessor::preprocessOnly
210	&& !symbols.isEmpty()
211	&& symbols.constLast().token == STRING_LITERAL) {
212
213	const QByteArray newString
214	= `'\"'`
215	+ symbols.constLast().unquotedLexem()
216	+ input.mid(lexem - begin + `1`, data - lexem - `2`)
217	+ `'\"'`;
218	symbols.last() = Symbol (symbols.constLast().lineNum,
219	STRING_LITERAL,
220	newString);
221	continue;
222	}
223	break;
224	case SINGLEQUOTE:
225	while (data && (data != `'\''`
226	\|\| (*(data-`1`)==`'\\'`
227	&& *(data-`2`)!=`'\\'`)))
228	++data;
229	if (*data)
230	++data;
231	token = CHARACTER_LITERAL;
232	break;
233	case LANGLE_SCOPE:
234	// split <:: into two tokens, < and ::
235	token = LANGLE;
236	data -= `2`;
237	break;
238	case DIGIT:
239	while (is_digit_char(data) \|\| data == `'\''`)
240	++data;
241	if (!data \|\| data != `'.'`) {
242	token = INTEGER_LITERAL;
243	if (data - lexem == `1` &&
244	(data == `'x'` \|\| data == `'X'`
245	\|\| data == `'b'` \|\| data == `'B'`)
246	&& *lexem == `'0'`) {
247	++data;
248	while (is_hex_char(data) \|\| data == `'\''`)
249	++data;
250	}
251	break;
252	}
253	token = FLOATING_LITERAL;
254	++data;
255	Q_FALLTHROUGH();
256	case FLOATING_LITERAL:
257	while (is_digit_char(data) \|\| data == `'\''`)
258	++data;
259	if (data == `'+'` \|\| data == `'-'`)
260	++data;
261	if (data == `'e'` \|\| data == `'E'`) {
262	++data;
263	while (is_digit_char(data) \|\| data == `'\''`)
264	++data;
265	}
266	if (data == `'f'` \|\| data == `'F'`
267	\|\| data == `'l'` \|\| data == `'L'`)
268	++data;
269	break;
270	case HASH:
271	if (column == `1` && mode == TokenizeCpp) {
272	mode = PreparePreprocessorStatement;
273	while (data && (data == `' '` \|\| *data == `'\t'`))
274	++data;
275	if (is_ident_char(*data))
276	mode = TokenizePreprocessorStatement;
277	continue;
278	}
279	break;
280	case PP_HASHHASH:
281	if (mode == TokenizeCpp)
282	continue;
283	break;
284	case NEWLINE:
285	++lineNum;
286	if (mode == TokenizeDefine) {
287	mode = TokenizeCpp;
288	// emit the newline token
289	break;
290	}
291	continue;
292	case BACKSLASH:
293	{
294	const char *rewind = data;
295	while (data && (data == `' '` \|\| *data == `'\t'`))
296	++data;
297	if (data && data == `'\n'`) {
298	++data;
299	continue;
300	}
301	data = rewind;
302	} break;
303	case CHARACTER:
304	while (is_ident_char(*data))
305	++data;
306	token = IDENTIFIER;
307	break;
308	case C_COMMENT:
309	if (*data) {
310	if (*data == `'\n'`)
311	++lineNum;
312	++data;
313	if (*data) {
314	if (*data == `'\n'`)
315	++lineNum;
316	++data;
317	}
318	}
319	while (data && ((data-`1`) != `'/'` \|\| (data-`2`) != `''`)) {
320	if (*data == `'\n'`)
321	++lineNum;
322	++data;
323	}
324	token = WHITESPACE; // one comment, one whitespace
325	Q_FALLTHROUGH();
326	case WHITESPACE:
327	if (column == `1`)
328	column = `0`;
329	while (data && (data == `' '` \|\| *data == `'\t'`))
330	++data;
331	if (Preprocessor::preprocessOnly) // tokenize whitespace
332	break;
333	continue;
334	case CPP_COMMENT:
335	while (data && data != `'\n'`)
336	++data;
337	continue; // ignore safely, the newline is a separator
338	default:
339	continue; //ignore
340	}
341	}
342	#ifdef USE_LEXEM_STORE
343	if (!Preprocessor::preprocessOnly
344	&& token != IDENTIFIER
345	&& token != STRING_LITERAL
346	&& token != FLOATING_LITERAL
347	&& token != INTEGER_LITERAL)
348	symbols += Symbol(lineNum, token);
349	else
350	#endif
351	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
352
353	} else { // Preprocessor
354
355	const char *lexem = data;
356	int state = `0`;
357	Token token = NOTOKEN;
358	if (mode == TokenizePreprocessorStatement) {
359	state = pp_keyword_trans[`0`][(int)`'#'`];
360	mode = TokenizePreprocessor;
361	}
362	for (;;) {
363	if (static_cast<signed char>(*data) < `0`) {
364	++data;
365	continue;
366	}
367	int nextindex = pp_keywords[state].next;
368	int next = `0`;
369	if (*data == pp_keywords[state].defchar)
370	next = pp_keywords[state].defnext;
371	else if (!state \|\| nextindex)
372	next = pp_keyword_trans[nextindex][(int)*data];
373	if (!next)
374	break;
375	state = next;
376	token = pp_keywords[state].token;
377	++data;
378	}
379	// suboptimal, is_ident_char should use a table
380	if (pp_keywords[state].ident && is_ident_char(*data))
381	token = pp_keywords[state].ident;
382
383	switch (token) {
384	case NOTOKEN:
385	if (*data)
386	++data;
387	break;
388	case PP_DEFINE:
389	mode = PrepareDefine;
390	break;
391	case PP_IFDEF:
392	symbols += Symbol (lineNum, PP_IF);
393	symbols += Symbol (lineNum, PP_DEFINED);
394	continue;
395	case PP_IFNDEF:
396	symbols += Symbol (lineNum, PP_IF);
397	symbols += Symbol (lineNum, PP_NOT);
398	symbols += Symbol (lineNum, PP_DEFINED);
399	continue;
400	case PP_INCLUDE:
401	mode = TokenizeInclude;
402	break;
403	case PP_QUOTE:
404	data = skipQuote(data);
405	token = PP_STRING_LITERAL;
406	break;
407	case PP_SINGLEQUOTE:
408	while (data && (data != `'\''`
409	\|\| (*(data-`1`)==`'\\'`
410	&& *(data-`2`)!=`'\\'`)))
411	++data;
412	if (*data)
413	++data;
414	token = PP_CHARACTER_LITERAL;
415	break;
416	case PP_DIGIT:
417	while (is_digit_char(data) \|\| data == `'\''`)
418	++data;
419	if (!data \|\| data != `'.'`) {
420	token = PP_INTEGER_LITERAL;
421	if (data - lexem == `1` &&
422	(data == `'x'` \|\| data == `'X'`)
423	&& *lexem == `'0'`) {
424	++data;
425	while (is_hex_char(data) \|\| data == `'\''`)
426	++data;
427	}
428	break;
429	}
430	token = PP_FLOATING_LITERAL;
431	++data;
432	Q_FALLTHROUGH();
433	case PP_FLOATING_LITERAL:
434	while (is_digit_char(data) \|\| data == `'\''`)
435	++data;
436	if (data == `'+'` \|\| data == `'-'`)
437	++data;
438	if (data == `'e'` \|\| data == `'E'`) {
439	++data;
440	while (is_digit_char(data) \|\| data == `'\''`)
441	++data;
442	}
443	if (data == `'f'` \|\| data == `'F'`
444	\|\| data == `'l'` \|\| data == `'L'`)
445	++data;
446	break;
447	case PP_CHARACTER:
448	if (mode == PreparePreprocessorStatement) {
449	// rewind entire token to begin
450	data = lexem;
451	mode = TokenizePreprocessorStatement;
452	continue;
453	}
454	while (is_ident_char(*data))
455	++data;
456	token = PP_IDENTIFIER;
457
458	if (mode == PrepareDefine) {
459	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
460	// make sure we explicitly add the whitespace here if the next char
461	// is not an opening brace, so we can distinguish correctly between
462	// regular and function macros
463	if (*data != `'('`)
464	symbols += Symbol (lineNum, WHITESPACE);
465	mode = TokenizeDefine;
466	continue;
467	}
468	break;
469	case PP_C_COMMENT:
470	if (*data) {
471	if (*data == `'\n'`)
472	++lineNum;
473	++data;
474	if (*data) {
475	if (*data == `'\n'`)
476	++lineNum;
477	++data;
478	}
479	}
480	while (data && ((data-`1`) != `'/'` \|\| (data-`2`) != `''`)) {
481	if (*data == `'\n'`)
482	++lineNum;
483	++data;
484	}
485	token = PP_WHITESPACE; // one comment, one whitespace
486	Q_FALLTHROUGH();
487	case PP_WHITESPACE:
488	while (data && (data == `' '` \|\| *data == `'\t'`))
489	++data;
490	continue; // the preprocessor needs no whitespace
491	case PP_CPP_COMMENT:
492	while (data && data != `'\n'`)
493	++data;
494	continue; // ignore safely, the newline is a separator
495	case PP_NEWLINE:
496	++lineNum;
497	mode = TokenizeCpp;
498	break;
499	case PP_BACKSLASH:
500	{
501	const char *rewind = data;
502	while (data && (data == `' '` \|\| *data == `'\t'`))
503	++data;
504	if (data && data == `'\n'`) {
505	++data;
506	continue;
507	}
508	data = rewind;
509	} break;
510	case PP_LANGLE:
511	if (mode != TokenizeInclude)
512	break;
513	token = PP_STRING_LITERAL;
514	while (data && data != `'\n'` && *(data-`1`) != `'>'`)
515	++data;
516	break;
517	default:
518	break;
519	}
520	if (mode == PreparePreprocessorStatement)
521	continue;
522	#ifdef USE_LEXEM_STORE
523	if (token != PP_IDENTIFIER
524	&& token != PP_STRING_LITERAL
525	&& token != PP_FLOATING_LITERAL
526	&& token != PP_INTEGER_LITERAL)
527	symbols += Symbol(lineNum, token);
528	else
529	#endif
530	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
531	}
532	}
533	symbols += Symbol (); // eof symbol
534	return symbols;
535	}
536
537	void Preprocessor::macroExpand(Symbols into, Preprocessor that, const Symbols &toExpand, int &index,
538	int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
539	{
540	SymbolStack symbols;
541	SafeSymbols sf;
542	sf.symbols = toExpand;
543	sf.index = index;
544	sf.excludedSymbols = excludeSymbols;
545	symbols.push(sf);
546
547	if (toExpand.isEmpty())
548	return;
549
550	for (;;) {
551	QByteArray macro;
552	Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, &macro);
553
554	if (macro.isEmpty()) {
555	// not a macro
556	Symbol s = symbols.symbol();
557	s.lineNum = lineNum;
558	*into += s;
559	} else {
560	SafeSymbols sf;
561	sf.symbols = newSyms;
562	sf.index = `0`;
563	sf.expandedMacro = macro;
564	symbols.push(sf);
565	}
566	if (!symbols.hasNext() \|\| (one && symbols.size() == `1`))
567	break;
568	symbols.next();
569	}
570
571	if (symbols.size())
572	index = symbols.top().index;
573	else
574	index = toExpand.size();
575	}
576
577
578	Symbols Preprocessor::macroExpandIdentifier(Preprocessor that, SymbolStack &symbols, int* lineNum, QByteArray *macroName)
579	{
580	Symbol s = symbols.symbol();
581
582	// not a macro
583	if (s.token != PP_IDENTIFIER \|\| !that->macros.contains(s) \|\| symbols.dontReplaceSymbol(s.lexem())) {
584	return Symbols ();
585	}
586
587	const Macro &macro = that->macros.value(s);
588	*macroName = s.lexem();
589
590	Symbols expansion;
591	if (!macro.isFunction) {
592	expansion = macro.symbols;
593	} else {
594	bool haveSpace = false;
595	while (symbols.test(PP_WHITESPACE)) { haveSpace = true; }
596	if (!symbols.test(PP_LPAREN)) {
597	*macroName = QByteArray ();
598	Symbols syms;
599	if (haveSpace)
600	syms += Symbol (lineNum, PP_WHITESPACE);
601	syms += s;
602	syms.last().lineNum = lineNum;
603	return syms;
604	}
605	QVarLengthArray<Symbols, `5`> arguments;
606	while (symbols.hasNext()) {
607	Symbols argument;
608	// strip leading space
609	while (symbols.test(PP_WHITESPACE)) {}
610	int nesting = `0`;
611	bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - `1`);
612	while (symbols.hasNext()) {
613	Token t = symbols.next();
614	if (t == PP_LPAREN) {
615	++nesting;
616	} else if (t == PP_RPAREN) {
617	--nesting;
618	if (nesting < `0`)
619	break;
620	} else if (t == PP_COMMA && nesting == `0`) {
621	if (!vararg)
622	break;
623	}
624	argument += symbols.symbol();
625	}
626	arguments += argument;
627
628	if (nesting < `0`)
629	break;
630	else if (!symbols.hasNext())
631	that->error("missing ')' in macro usage");
632	}
633
634	// empty VA_ARGS
635	if (macro.isVariadic && arguments.size() == macro.arguments.size() - `1`)
636	arguments += Symbols ();
637
638	// now replace the macro arguments with the expanded arguments
639	enum Mode {
640	Normal,
641	Hash,
642	HashHash
643	} mode = Normal;
644
645	for (int i = `0`; i < macro.symbols.size(); ++i) {
646	const Symbol &s = macro.symbols.at(i);
647	if (s.token == HASH \|\| s.token == PP_HASHHASH) {
648	mode = (s.token == HASH ? Hash : HashHash);
649	continue;
650	}
651	int index = macro.arguments.indexOf(s);
652	if (mode == Normal) {
653	if (index >= `0` && index < arguments.size()) {
654	// each argument undoergoes macro expansion if it's not used as part of a # or ##
655	if (i == macro.symbols.size() - `1` \|\| macro.symbols.at(i + `1`).token != PP_HASHHASH) {
656	Symbols arg = arguments.at(index);
657	int idx = `1`;
658	macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols());
659	} else {
660	expansion += arguments.at(index);
661	}
662	} else {
663	expansion += s;
664	}
665	} else if (mode == Hash) {
666	if (index < `0`) {
667	that->error("'#' is not followed by a macro parameter");
668	continue;
669	} else if (index >= arguments.size()) {
670	that->error("Macro invoked with too few parameters for a use of '#'");
671	continue;
672	}
673
674	const Symbols &arg = arguments.at(index);
675	QByteArray stringified;
676	for (int i = `0`; i < arg.size(); ++i) {
677	stringified += arg.at(i).lexem();
678	}
679	stringified.replace(`'"'`, "\\\"");
680	stringified.prepend(`'"'`);
681	stringified.append(`'"'`);
682	expansion += Symbol (lineNum, STRING_LITERAL, stringified);
683	} else if (mode == HashHash){
684	if (s.token == WHITESPACE)
685	continue;
686
687	while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
688	expansion.pop_back();
689
690	Symbol next = s;
691	if (index >= `0` && index < arguments.size()) {
692	const Symbols &arg = arguments.at(index);
693	if (arg.size() == `0`) {
694	mode = Normal;
695	continue;
696	}
697	next = arg.at(`0`);
698	}
699
700	if (!expansion.isEmpty() && expansion.constLast().token == s.token
701	&& expansion.constLast().token != STRING_LITERAL) {
702	Symbol last = expansion.takeLast();
703
704	QByteArray lexem = last.lexem() + next.lexem();
705	expansion += Symbol (lineNum, last.token, lexem);
706	} else {
707	expansion += next;
708	}
709
710	if (index >= `0` && index < arguments.size()) {
711	const Symbols &arg = arguments.at(index);
712	for (int i = `1`; i < arg.size(); ++i)
713	expansion += arg.at(i);
714	}
715	}
716	mode = Normal;
717	}
718	if (mode != Normal)
719	that->error("'#' or '##' found at the end of a macro argument");
720
721	}
722
723	return expansion;
724	}
725
726	void Preprocessor::substituteUntilNewline(Symbols &substituted)
727	{
728	while (hasNext()) {
729	Token token = next();
730	if (token == PP_IDENTIFIER) {
731	macroExpand(&substituted, this, symbols, index, symbol().lineNum, true);
732	} else if (token == PP_DEFINED) {
733	bool braces = test(PP_LPAREN);
734	next(PP_IDENTIFIER);
735	Symbol definedOrNotDefined = symbol();
736	definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
737	substituted += definedOrNotDefined;
738	if (braces)
739	test(PP_RPAREN);
740	continue;
741	} else if (token == PP_NEWLINE) {
742	substituted += symbol();
743	break;
744	} else {
745	substituted += symbol();
746	}
747	}
748	}
749
750
751	class PP_Expression : public Parser
752	{
753	public:
754	int value() { index = `0`; return unary_expression_lookup() ? conditional_expression() : `0`; }
755
756	int conditional_expression();
757	int logical_OR_expression();
758	int logical_AND_expression();
759	int inclusive_OR_expression();
760	int exclusive_OR_expression();
761	int AND_expression();
762	int equality_expression();
763	int relational_expression();
764	int shift_expression();
765	int additive_expression();
766	int multiplicative_expression();
767	int unary_expression();
768	bool unary_expression_lookup();
769	int primary_expression();
770	bool primary_expression_lookup();
771	};
772
773	int PP_Expression::conditional_expression()
774	{
775	int value = logical_OR_expression();
776	if (test(PP_QUESTION)) {
777	int alt1 = conditional_expression();
778	int alt2 = test(PP_COLON) ? conditional_expression() : `0`;
779	return value ? alt1 : alt2;
780	}
781	return value;
782	}
783
784	int PP_Expression::logical_OR_expression()
785	{
786	int value = logical_AND_expression();
787	if (test(PP_OROR))
788	return logical_OR_expression() \|\| value;
789	return value;
790	}
791
792	int PP_Expression::logical_AND_expression()
793	{
794	int value = inclusive_OR_expression();
795	if (test(PP_ANDAND))
796	return logical_AND_expression() && value;
797	return value;
798	}
799
800	int PP_Expression::inclusive_OR_expression()
801	{
802	int value = exclusive_OR_expression();
803	if (test(PP_OR))
804	return value \| inclusive_OR_expression();
805	return value;
806	}
807
808	int PP_Expression::exclusive_OR_expression()
809	{
810	int value = AND_expression();
811	if (test(PP_HAT))
812	return value ^ exclusive_OR_expression();
813	return value;
814	}
815
816	int PP_Expression::AND_expression()
817	{
818	int value = equality_expression();
819	if (test(PP_AND))
820	return value & AND_expression();
821	return value;
822	}
823
824	int PP_Expression::equality_expression()
825	{
826	int value = relational_expression();
827	switch (next()) {
828	case PP_EQEQ:
829	return value == equality_expression();
830	case PP_NE:
831	return value != equality_expression();
832	default:
833	prev();
834	return value;
835	}
836	}
837
838	int PP_Expression::relational_expression()
839	{
840	int value = shift_expression();
841	switch (next()) {
842	case PP_LANGLE:
843	return value < relational_expression();
844	case PP_RANGLE:
845	return value > relational_expression();
846	case PP_LE:
847	return value <= relational_expression();
848	case PP_GE:
849	return value >= relational_expression();
850	default:
851	prev();
852	return value;
853	}
854	}
855
856	int PP_Expression::shift_expression()
857	{
858	int value = additive_expression();
859	switch (next()) {
860	case PP_LTLT:
861	return value << shift_expression();
862	case PP_GTGT:
863	return value >> shift_expression();
864	default:
865	prev();
866	return value;
867	}
868	}
869
870	int PP_Expression::additive_expression()
871	{
872	int value = multiplicative_expression();
873	switch (next()) {
874	case PP_PLUS:
875	return value + additive_expression();
876	case PP_MINUS:
877	return value - additive_expression();
878	default:
879	prev();
880	return value;
881	}
882	}
883
884	int PP_Expression::multiplicative_expression()
885	{
886	int value = unary_expression();
887	switch (next()) {
888	case PP_STAR:
889	return value * multiplicative_expression();
890	case PP_PERCENT:
891	{
892	int remainder = multiplicative_expression();
893	return remainder ? value % remainder : `0`;
894	}
895	case PP_SLASH:
896	{
897	int div = multiplicative_expression();
898	return div ? value / div : `0`;
899	}
900	default:
901	prev();
902	return value;
903	};
904	}
905
906	int PP_Expression::unary_expression()
907	{
908	switch (next()) {
909	case PP_PLUS:
910	return unary_expression();
911	case PP_MINUS:
912	return -unary_expression();
913	case PP_NOT:
914	return !unary_expression();
915	case PP_TILDE:
916	return ~unary_expression();
917	case PP_MOC_TRUE:
918	return `1`;
919	case PP_MOC_FALSE:
920	return `0`;
921	default:
922	prev();
923	return primary_expression();
924	}
925	}
926
927	bool PP_Expression::unary_expression_lookup()
928	{
929	Token t = lookup();
930	return (primary_expression_lookup()
931	\|\| t == PP_PLUS
932	\|\| t == PP_MINUS
933	\|\| t == PP_NOT
934	\|\| t == PP_TILDE
935	\|\| t == PP_DEFINED);
936	}
937
938	int PP_Expression::primary_expression()
939	{
940	int value;
941	if (test(PP_LPAREN)) {
942	value = conditional_expression();
943	test(PP_RPAREN);
944	} else {
945	next();
946	value = lexem().toInt(nullptr, `0`);
947	}
948	return value;
949	}
950
951	bool PP_Expression::primary_expression_lookup()
952	{
953	Token t = lookup();
954	return (t == PP_IDENTIFIER
955	\|\| t == PP_INTEGER_LITERAL
956	\|\| t == PP_FLOATING_LITERAL
957	\|\| t == PP_MOC_TRUE
958	\|\| t == PP_MOC_FALSE
959	\|\| t == PP_LPAREN);
960	}
961
962	int Preprocessor::evaluateCondition()
963	{
964	PP_Expression expression;
965	expression.currentFilenames = currentFilenames;
966
967	substituteUntilNewline(expression.symbols);
968
969	return expression.value();
970	}
971
972	static QByteArray readOrMapFile(QFile *file)
973	{
974	const qint64 size = file->size();
975	char rawInput = reinterpret_cast<char**>(file->map(`0`, size));
976	return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
977	}
978
979	static void mergeStringLiterals(Symbols *_symbols)
980	{
981	Symbols &symbols = *_symbols;
982	for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
983	if (i ->token == STRING_LITERAL) {
984	Symbols::Iterator mergeSymbol = i;
985	int literalsLength = mergeSymbol ->len;
986	while (++i != symbols.end() && i ->token == STRING_LITERAL)
987	literalsLength += i ->len - `2`; // no quotes
988
989	if (literalsLength != mergeSymbol ->len) {
990	QByteArray mergeSymbolOriginalLexem = mergeSymbol ->unquotedLexem();
991	QByteArray &mergeSymbolLexem = mergeSymbol ->lex;
992	mergeSymbolLexem.resize(`0`);
993	mergeSymbolLexem.reserve(literalsLength);
994	mergeSymbolLexem.append(`'"'`);
995	mergeSymbolLexem.append(mergeSymbolOriginalLexem);
996	for (Symbols::iterator j = mergeSymbol + `1`; j != i; ++j)
997	mergeSymbolLexem.append(j ->lex.constData() + j ->from + `1`, j ->len - `2`); // append j->unquotedLexem()
998	mergeSymbolLexem.append(`'"'`);
999	mergeSymbol ->len = mergeSymbol ->lex.length();
1000	mergeSymbol ->from = `0`;
1001	i = symbols.erase(mergeSymbol + `1`, i);
1002	}
1003	if (i == symbols.end())
1004	break;
1005	}
1006	}
1007	}
1008
1009	static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1010	const QByteArray &include)
1011	{
1012	QFileInfo fi;
1013	for (int j = `0`; j < includepaths.size() && !fi.exists(); ++j) {
1014	const Parser::IncludePath &p = includepaths.at(j);
1015	if (p.isFrameworkPath) {
1016	const int slashPos = include.indexOf(`'/'`);
1017	if (slashPos == -`1`)
1018	continue;
1019	fi.setFile(QString::fromLocal8Bit(p.path + `'/'` + include.left(slashPos) + ".framework/Headers/"),
1020	QString::fromLocal8Bit(include.mid(slashPos + `1`)));
1021	} else {
1022	fi.setFile(QString::fromLocal8Bit(p.path), QString::fromLocal8Bit(include));
1023	}
1024	// try again, maybe there's a file later in the include paths with the same name
1025	// (186067)
1026	if (fi.isDir()) {
1027	fi = QFileInfo ();
1028	continue;
1029	}
1030	}
1031
1032	if (!fi.exists() \|\| fi.isDir())
1033	return QByteArray ();
1034	return fi.canonicalFilePath().toLocal8Bit();
1035	}
1036
1037	QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1038	{
1039	if (!relativeTo.isEmpty()) {
1040	QFileInfo fi;
1041	fi.setFile(QFileInfo (QString::fromLocal8Bit(relativeTo)).dir(), QString::fromLocal8Bit(include));
1042	if (fi.exists() && !fi.isDir())
1043	return fi.canonicalFilePath().toLocal8Bit();
1044	}
1045
1046	auto it = nonlocalIncludePathResolutionCache.find(include);
1047	if (it == nonlocalIncludePathResolutionCache.end())
1048	it = nonlocalIncludePathResolutionCache.insert(include, searchIncludePaths(includes, include));
1049	return it.value();
1050	}
1051
1052	void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1053	{
1054	currentFilenames.push(filename);
1055	preprocessed.reserve(preprocessed.size() + symbols.size());
1056	while (hasNext()) {
1057	Token token = next();
1058
1059	switch (token) {
1060	case PP_INCLUDE:
1061	{
1062	int lineNum = symbol().lineNum;
1063	QByteArray include;
1064	bool local = false;
1065	if (test(PP_STRING_LITERAL)) {
1066	local = lexem().startsWith(`'\"'`);
1067	include = unquotedLexem();
1068	} else
1069	continue;
1070	until(PP_NEWLINE);
1071
1072	include = resolveInclude(include, local ? filename : QByteArray ());
1073	if (include.isNull())
1074	continue;
1075
1076	if (Preprocessor::preprocessedIncludes.contains(include))
1077	continue;
1078	Preprocessor::preprocessedIncludes.insert(include);
1079
1080	QFile file(QString::fromLocal8Bit(include.constData()));
1081	if (!file.open(QFile::ReadOnly))
1082	continue;
1083
1084	QByteArray input = readOrMapFile(&file);
1085
1086	file.close();
1087	if (input.isEmpty())
1088	continue;
1089
1090	Symbols saveSymbols = symbols;
1091	int saveIndex = index;
1092
1093	// phase 1: get rid of backslash-newlines
1094	input = cleaned(input);
1095
1096	// phase 2: tokenize for the preprocessor
1097	symbols = tokenize(input);
1098	input.clear();
1099
1100	index = `0`;
1101
1102	// phase 3: preprocess conditions and substitute macros
1103	preprocessed += Symbol (`0`, MOC_INCLUDE_BEGIN, include);
1104	preprocess(include, preprocessed);
1105	preprocessed += Symbol (lineNum, MOC_INCLUDE_END, include);
1106
1107	symbols = saveSymbols;
1108	index = saveIndex;
1109	continue;
1110	}
1111	case PP_DEFINE:
1112	{
1113	next();
1114	QByteArray name = lexem();
1115	if (name.isEmpty() \|\| !is_ident_start(name [`0`]))
1116	error();
1117	Macro macro;
1118	macro.isVariadic = false;
1119	if (test(LPAREN)) {
1120	// we have a function macro
1121	macro.isFunction = true;
1122	parseDefineArguments(&macro);
1123	} else {
1124	macro.isFunction = false;
1125	}
1126	int start = index;
1127	until(PP_NEWLINE);
1128	macro.symbols.reserve(index - start - `1`);
1129
1130	// remove whitespace where there shouldn't be any:
1131	// Before and after the macro, after a # and around ##
1132	Token lastToken = HASH; // skip shitespace at the beginning
1133	for (int i = start; i < index - `1`; ++i) {
1134	Token token = symbols.at(i).token;
1135	if (token == WHITESPACE) {
1136	if (lastToken == PP_HASH \|\| lastToken == HASH \|\|
1137	lastToken == PP_HASHHASH \|\|
1138	lastToken == WHITESPACE)
1139	continue;
1140	} else if (token == PP_HASHHASH) {
1141	if (!macro.symbols.isEmpty() &&
1142	lastToken == WHITESPACE)
1143	macro.symbols.pop_back();
1144	}
1145	macro.symbols.append(symbols.at(i));
1146	lastToken = token;
1147	}
1148	// remove trailing whitespace
1149	while (!macro.symbols.isEmpty() &&
1150	(macro.symbols.constLast().token == PP_WHITESPACE \|\| macro.symbols.constLast().token == WHITESPACE))
1151	macro.symbols.pop_back();
1152
1153	if (!macro.symbols.isEmpty()) {
1154	if (macro.symbols.constFirst().token == PP_HASHHASH \|\|
1155	macro.symbols.constLast().token == PP_HASHHASH) {
1156	error("'##' cannot appear at either end of a macro expansion");
1157	}
1158	}
1159	macros.insert(name, macro);
1160	continue;
1161	}
1162	case PP_UNDEF: {
1163	next();
1164	QByteArray name = lexem();
1165	until(PP_NEWLINE);
1166	macros.remove(name);
1167	continue;
1168	}
1169	case PP_IDENTIFIER: {
1170	// substitute macros
1171	macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true);
1172	continue;
1173	}
1174	case PP_HASH:
1175	until(PP_NEWLINE);
1176	continue; // skip unknown preprocessor statement
1177	case PP_IFDEF:
1178	case PP_IFNDEF:
1179	case PP_IF:
1180	while (!evaluateCondition()) {
1181	if (!skipBranch())
1182	break;
1183	if (test(PP_ELIF)) {
1184	} else {
1185	until(PP_NEWLINE);
1186	break;
1187	}
1188	}
1189	continue;
1190	case PP_ELIF:
1191	case PP_ELSE:
1192	skipUntilEndif();
1193	Q_FALLTHROUGH();
1194	case PP_ENDIF:
1195	until(PP_NEWLINE);
1196	continue;
1197	case PP_NEWLINE:
1198	continue;
1199	case SIGNALS:
1200	case SLOTS: {
1201	Symbol sym = symbol();
1202	if (macros.contains("QT_NO_KEYWORDS"))
1203	sym.token = IDENTIFIER;
1204	else
1205	sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1206	preprocessed += sym;
1207	} continue;
1208	default:
1209	break;
1210	}
1211	preprocessed += symbol();
1212	}
1213
1214	currentFilenames.pop();
1215	}
1216
1217	Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1218	{
1219	QByteArray input = readOrMapFile(file);
1220
1221	if (input.isEmpty())
1222	return symbols;
1223
1224	// phase 1: get rid of backslash-newlines
1225	input = cleaned(input);
1226
1227	// phase 2: tokenize for the preprocessor
1228	index = `0`;
1229	symbols = tokenize(input);
1230
1231	#if 0
1232	for (int j = `0`; j < symbols.size(); ++j)
1233	fprintf(stderr, "line %d: %s(%s)\n",
1234	symbols[j].lineNum,
1235	symbols[j].lexem().constData(),
1236	tokenTypeName(symbols[j].token));
1237	#endif
1238
1239	// phase 3: preprocess conditions and substitute macros
1240	Symbols result;
1241	// Preallocate some space to speed up the code below.
1242	// The magic value was found by logging the final size
1243	// and calculating an average when running moc over FOSS projects.
1244	result.reserve(file->size() / `300000`);
1245	preprocess(filename, result);
1246	mergeStringLiterals(&result);
1247
1248	#if 0
1249	for (int j = `0`; j < result.size(); ++j)
1250	fprintf(stderr, "line %d: %s(%s)\n",
1251	result[j].lineNum,
1252	result[j].lexem().constData(),
1253	tokenTypeName(result[j].token));
1254	#endif
1255
1256	return result;
1257	}
1258
1259	void Preprocessor::parseDefineArguments(Macro *m)
1260	{
1261	Symbols arguments;
1262	while (hasNext()) {
1263	while (test(PP_WHITESPACE)) {}
1264	Token t = next();
1265	if (t == PP_RPAREN)
1266	break;
1267	if (t != PP_IDENTIFIER) {
1268	QByteArray l = lexem();
1269	if (l == "...") {
1270	m->isVariadic = true;
1271	arguments += Symbol (symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1272	while (test(PP_WHITESPACE)) {}
1273	if (!test(PP_RPAREN))
1274	error("missing ')' in macro argument list");
1275	break;
1276	} else if (!is_identifier(l.constData(), l.length())) {
1277	error("Unexpected character in macro argument list.");
1278	}
1279	}
1280
1281	Symbol arg = symbol();
1282	if (arguments.contains(arg))
1283	error("Duplicate macro parameter.");
1284	arguments += symbol();
1285
1286	while (test(PP_WHITESPACE)) {}
1287	t = next();
1288	if (t == PP_RPAREN)
1289	break;
1290	if (t == PP_COMMA)
1291	continue;
1292	if (lexem() == "...") {
1293	//GCC extension: #define FOO(x, y...) x(y)
1294	// The last argument was already parsed. Just mark the macro as variadic.
1295	m->isVariadic = true;
1296	while (test(PP_WHITESPACE)) {}
1297	if (!test(PP_RPAREN))
1298	error("missing ')' in macro argument list");
1299	break;
1300	}
1301	error("Unexpected character in macro argument list.");
1302	}
1303	m->arguments = arguments;
1304	while (test(PP_WHITESPACE)) {}
1305	}
1306
1307	void Preprocessor::until(Token t)
1308	{
1309	while(hasNext() && next() != t)
1310	;
1311	}
1312
1313	QT_END_NAMESPACE
1314

Browse the source code of Qt/src/tools/moc/preprocessor.cpp