gdscript_tokenizer.cpp source code [Godot/modules/gdscript/gdscript_tokenizer.cpp]

1	/************************************************************************/
2	/ gdscript_tokenizer.cpp /
3	/************************************************************************/
4	/ This file is part of: /
5	/ GODOT ENGINE /
6	/ https://godotengine.org /
7	/************************************************************************/
8	/ Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). /
9	/ Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. /
10	/ /
11	/ Permission is hereby granted, free of charge, to any person obtaining /
12	/ a copy of this software and associated documentation files (the /
13	/ "Software"), to deal in the Software without restriction, including /
14	/ without limitation the rights to use, copy, modify, merge, publish, /
15	/ distribute, sublicense, and/or sell copies of the Software, and to /
16	/ permit persons to whom the Software is furnished to do so, subject to /
17	/ the following conditions: /
18	/ /
19	/ The above copyright notice and this permission notice shall be /
20	/ included in all copies or substantial portions of the Software. /
21	/ /
22	/ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, /
23	/ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF /
24	/ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. /
25	/ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY /
26	/ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, /
27	/ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE /
28	/ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /
29	/************************************************************************/
30
31	#include "gdscript_tokenizer.h"
32
33	#include "core/error/error_macros.h"
34	#include "core/string/char_utils.h"
35
36	#ifdef DEBUG_ENABLED
37	#include "servers/text_server.h"
38	#endif
39
40	#ifdef TOOLS_ENABLED
41	#include "editor/editor_settings.h"
42	#endif
43
44	static const char *token_names[] = {
45	"Empty", // EMPTY,
46	// Basic
47	"Annotation", // ANNOTATION
48	"Identifier", // IDENTIFIER,
49	"Literal", // LITERAL,
50	// Comparison
51	"<", // LESS,
52	"<=", // LESS_EQUAL,
53	">", // GREATER,
54	">=", // GREATER_EQUAL,
55	"==", // EQUAL_EQUAL,
56	"!=", // BANG_EQUAL,
57	// Logical
58	"and", // AND,
59	"or", // OR,
60	"not", // NOT,
61	"&&", // AMPERSAND_AMPERSAND,
62	"\|\|", // PIPE_PIPE,
63	"!", // BANG,
64	// Bitwise
65	"&", // AMPERSAND,
66	"\|", // PIPE,
67	"~", // TILDE,
68	"^", // CARET,
69	"<<", // LESS_LESS,
70	">>", // GREATER_GREATER,
71	// Math
72	"+", // PLUS,
73	"-", // MINUS,
74	"", // STAR,*
75	"*", // STAR_STAR,*
76	"/", // SLASH,
77	"%", // PERCENT,
78	// Assignment
79	"=", // EQUAL,
80	"+=", // PLUS_EQUAL,
81	"-=", // MINUS_EQUAL,
82	"=", // STAR_EQUAL,*
83	"*=", // STAR_STAR_EQUAL,*
84	"/=", // SLASH_EQUAL,
85	"%=", // PERCENT_EQUAL,
86	"<<=", // LESS_LESS_EQUAL,
87	">>=", // GREATER_GREATER_EQUAL,
88	"&=", // AMPERSAND_EQUAL,
89	"\|=", // PIPE_EQUAL,
90	"^=", // CARET_EQUAL,
91	// Control flow
92	"if", // IF,
93	"elif", // ELIF,
94	"else", // ELSE,
95	"for", // FOR,
96	"while", // WHILE,
97	"break", // BREAK,
98	"continue", // CONTINUE,
99	"pass", // PASS,
100	"return", // RETURN,
101	"match", // MATCH,
102	// Keywords
103	"as", // AS,
104	"assert", // ASSERT,
105	"await", // AWAIT,
106	"breakpoint", // BREAKPOINT,
107	"class", // CLASS,
108	"class_name", // CLASS_NAME,
109	"const", // CONST,
110	"enum", // ENUM,
111	"extends", // EXTENDS,
112	"func", // FUNC,
113	"in", // IN,
114	"is", // IS,
115	"namespace", // NAMESPACE
116	"preload", // PRELOAD,
117	"self", // SELF,
118	"signal", // SIGNAL,
119	"static", // STATIC,
120	"super", // SUPER,
121	"trait", // TRAIT,
122	"var", // VAR,
123	"void", // VOID,
124	"yield", // YIELD,
125	// Punctuation
126	"[", // BRACKET_OPEN,
127	"]", // BRACKET_CLOSE,
128	"{", // BRACE_OPEN,
129	"}", // BRACE_CLOSE,
130	"(", // PARENTHESIS_OPEN,
131	")", // PARENTHESIS_CLOSE,
132	",", // COMMA,
133	";", // SEMICOLON,
134	".", // PERIOD,
135	"..", // PERIOD_PERIOD,
136	":", // COLON,
137	"$", // DOLLAR,
138	"->", // FORWARD_ARROW,
139	"_", // UNDERSCORE,
140	// Whitespace
141	"Newline", // NEWLINE,
142	"Indent", // INDENT,
143	"Dedent", // DEDENT,
144	// Constants
145	"PI", // CONST_PI,
146	"TAU", // CONST_TAU,
147	"INF", // CONST_INF,
148	"NaN", // CONST_NAN,
149	// Error message improvement
150	"VCS conflict marker", // VCS_CONFLICT_MARKER,
151	"`", // BACKTICK,
152	"?", // QUESTION_MARK,
153	// Special
154	"Error", // ERROR,
155	"End of file", // EOF,
156	};
157
158	// Avoid desync.
159	static_assert(sizeof(token_names) / sizeof(token_names[`0`]) == GDScriptTokenizer::Token::TK_MAX, "Amount of token names don't match the amount of token types.");
160
161	const char GDScriptTokenizer::Token::get_name() const* {
162	ERR_FAIL_INDEX_V_MSG(type, TK_MAX, "<error>", "Using token type out of the enum.");
163	return token_names[type];
164	}
165
166	bool GDScriptTokenizer::Token::can_precede_bin_op() const {
167	switch (type) {
168	case IDENTIFIER:
169	case LITERAL:
170	case SELF:
171	case BRACKET_CLOSE:
172	case BRACE_CLOSE:
173	case PARENTHESIS_CLOSE:
174	case CONST_PI:
175	case CONST_TAU:
176	case CONST_INF:
177	case CONST_NAN:
178	return true;
179	default:
180	return false;
181	}
182	}
183
184	bool GDScriptTokenizer::Token::is_identifier() const {
185	// Note: Most keywords should not be recognized as identifiers.
186	// These are only exceptions for stuff that already is on the engine's API.
187	switch (type) {
188	case IDENTIFIER:
189	case MATCH: // Used in String.match().
190	// Allow constants to be treated as regular identifiers.
191	case CONST_PI:
192	case CONST_INF:
193	case CONST_NAN:
194	case CONST_TAU:
195	return true;
196	default:
197	return false;
198	}
199	}
200
201	bool GDScriptTokenizer::Token::is_node_name() const {
202	// This is meant to allow keywords with the $ notation, but not as general identifiers.
203	switch (type) {
204	case IDENTIFIER:
205	case AND:
206	case AS:
207	case ASSERT:
208	case AWAIT:
209	case BREAK:
210	case BREAKPOINT:
211	case CLASS_NAME:
212	case CLASS:
213	case CONST:
214	case CONST_PI:
215	case CONST_INF:
216	case CONST_NAN:
217	case CONST_TAU:
218	case CONTINUE:
219	case ELIF:
220	case ELSE:
221	case ENUM:
222	case EXTENDS:
223	case FOR:
224	case FUNC:
225	case IF:
226	case IN:
227	case IS:
228	case MATCH:
229	case NAMESPACE:
230	case NOT:
231	case OR:
232	case PASS:
233	case PRELOAD:
234	case RETURN:
235	case SELF:
236	case SIGNAL:
237	case STATIC:
238	case SUPER:
239	case TRAIT:
240	case UNDERSCORE:
241	case VAR:
242	case VOID:
243	case WHILE:
244	case YIELD:
245	return true;
246	default:
247	return false;
248	}
249	}
250
251	String GDScriptTokenizer::get_token_name(Token::Type p_token_type) {
252	ERR_FAIL_INDEX_V_MSG(p_token_type, Token::TK_MAX, "<error>", "Using token type out of the enum.");
253	return token_names[p_token_type];
254	}
255
256	void GDScriptTokenizer::set_source_code(const String &p_source_code) {
257	source = p_source_code;
258	if (source.is_empty()) {
259	_source = U"";
260	} else {
261	_source = source.ptr();
262	}
263	_current = _source;
264	line = `1`;
265	column = `1`;
266	length = p_source_code.length();
267	position = `0`;
268	}
269
270	void GDScriptTokenizer::set_cursor_position(int p_line, int p_column) {
271	cursor_line = p_line;
272	cursor_column = p_column;
273	}
274
275	void GDScriptTokenizer::set_multiline_mode(bool p_state) {
276	multiline_mode = p_state;
277	}
278
279	void GDScriptTokenizer::push_expression_indented_block() {
280	indent_stack_stack.push_back(indent_stack);
281	}
282
283	void GDScriptTokenizer::pop_expression_indented_block() {
284	ERR_FAIL_COND(indent_stack_stack.size() == `0`);
285	indent_stack = indent_stack_stack.back()->get();
286	indent_stack_stack.pop_back();
287	}
288
289	int GDScriptTokenizer::get_cursor_line() const {
290	return cursor_line;
291	}
292
293	int GDScriptTokenizer::get_cursor_column() const {
294	return cursor_column;
295	}
296
297	bool GDScriptTokenizer::is_past_cursor() const {
298	if (line < cursor_line) {
299	return false;
300	}
301	if (line > cursor_line) {
302	return true;
303	}
304	if (column < cursor_column) {
305	return false;
306	}
307	return true;
308	}
309
310	char32_t GDScriptTokenizer::_advance() {
311	if (unlikely(_is_at_end())) {
312	return `'\0'`;
313	}
314	_current++;
315	column++;
316	position++;
317	if (column > rightmost_column) {
318	rightmost_column = column;
319	}
320	if (unlikely(_is_at_end())) {
321	// Add extra newline even if it's not there, to satisfy the parser.
322	newline(true);
323	// Also add needed unindent.
324	check_indent();
325	}
326	return _peek(-`1`);
327	}
328
329	void GDScriptTokenizer::push_paren(char32_t p_char) {
330	paren_stack.push_back(p_char);
331	}
332
333	bool GDScriptTokenizer::pop_paren(char32_t p_expected) {
334	if (paren_stack.is_empty()) {
335	return false;
336	}
337	char32_t actual = paren_stack.back()->get();
338	paren_stack.pop_back();
339
340	return actual == p_expected;
341	}
342
343	GDScriptTokenizer::Token GDScriptTokenizer::pop_error() {
344	Token error = error_stack.back()->get();
345	error_stack.pop_back();
346	return error;
347	}
348
349	GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) {
350	Token token(p_type);
351	token.start_line = start_line;
352	token.end_line = line;
353	token.start_column = start_column;
354	token.end_column = column;
355	token.leftmost_column = leftmost_column;
356	token.rightmost_column = rightmost_column;
357	token.source = String (_start, _current - _start);
358
359	if (p_type != Token::ERROR && cursor_line > -`1`) {
360	// Also count whitespace after token.
361	int offset = `0`;
362	while (_peek(offset) == `' '` \|\| _peek(offset) == `'\t'`) {
363	offset++;
364	}
365	int last_column = column + offset;
366	// Check cursor position in token.
367	if (start_line == line) {
368	// Single line token.
369	if (cursor_line == start_line && cursor_column >= start_column && cursor_column <= last_column) {
370	token.cursor_position = cursor_column - start_column;
371	if (cursor_column == start_column) {
372	token.cursor_place = CURSOR_BEGINNING;
373	} else if (cursor_column < column) {
374	token.cursor_place = CURSOR_MIDDLE;
375	} else {
376	token.cursor_place = CURSOR_END;
377	}
378	}
379	} else {
380	// Multi line token.
381	if (cursor_line == start_line && cursor_column >= start_column) {
382	// Is in first line.
383	token.cursor_position = cursor_column - start_column;
384	if (cursor_column == start_column) {
385	token.cursor_place = CURSOR_BEGINNING;
386	} else {
387	token.cursor_place = CURSOR_MIDDLE;
388	}
389	} else if (cursor_line == line && cursor_column <= last_column) {
390	// Is in last line.
391	token.cursor_position = cursor_column - start_column;
392	if (cursor_column < column) {
393	token.cursor_place = CURSOR_MIDDLE;
394	} else {
395	token.cursor_place = CURSOR_END;
396	}
397	} else if (cursor_line > start_line && cursor_line < line) {
398	// Is in middle line.
399	token.cursor_position = CURSOR_MIDDLE;
400	}
401	}
402	}
403
404	last_token = token;
405	return token;
406	}
407
408	GDScriptTokenizer::Token GDScriptTokenizer::make_literal(const Variant &p_literal) {
409	Token token = make_token(Token::LITERAL);
410	token.literal = p_literal;
411	return token;
412	}
413
414	GDScriptTokenizer::Token GDScriptTokenizer::make_identifier(const StringName &p_identifier) {
415	Token identifier = make_token(Token::IDENTIFIER);
416	identifier.literal = p_identifier;
417	return identifier;
418	}
419
420	GDScriptTokenizer::Token GDScriptTokenizer::make_error(const String &p_message) {
421	Token error = make_token(Token::ERROR);
422	error.literal = p_message;
423
424	return error;
425	}
426
427	void GDScriptTokenizer::push_error(const String &p_message) {
428	Token error = make_error(p_message);
429	error_stack.push_back(error);
430	}
431
432	void GDScriptTokenizer::push_error(const Token &p_error) {
433	error_stack.push_back(p_error);
434	}
435
436	GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(char32_t p_paren) {
437	if (paren_stack.is_empty()) {
438	return make_error(vformat("Closing \"%c\" doesn't have an opening counterpart.", p_paren));
439	}
440	Token error = make_error(vformat("Closing \"%c\" doesn't match the opening \"%c\".", p_paren, paren_stack.back()->get()));
441	paren_stack.pop_back(); // Remove opening one anyway.
442	return error;
443	}
444
445	GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, Token::Type p_double_type) {
446	const char32_t *next = _current + `1`;
447	int chars = `2`; // Two already matched.
448
449	// Test before consuming characters, since we don't want to consume more than needed.
450	while (*next == p_test) {
451	chars++;
452	next++;
453	}
454	if (chars >= `7`) {
455	// It is a VCS conflict marker.
456	while (chars > `1`) {
457	// Consume all characters (first was already consumed by scan()).
458	_advance();
459	chars--;
460	}
461	return make_token(Token::VCS_CONFLICT_MARKER);
462	} else {
463	// It is only a regular double character token, so we consume the second character.
464	_advance();
465	return make_token(p_double_type);
466	}
467	}
468
469	GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
470	if (is_unicode_identifier_start(_peek())) {
471	_advance(); // Consume start character.
472	} else {
473	push_error("Expected annotation identifier after \"@\".");
474	}
475	while (is_unicode_identifier_continue(_peek())) {
476	// Consume all identifier characters.
477	_advance();
478	}
479	Token annotation = make_token(Token::ANNOTATION);
480	annotation.literal = StringName (annotation.source);
481	return annotation;
482	}
483
484	#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
485	KEYWORD_GROUP('a') \
486	KEYWORD("as", Token::AS) \
487	KEYWORD("and", Token::AND) \
488	KEYWORD("assert", Token::ASSERT) \
489	KEYWORD("await", Token::AWAIT) \
490	KEYWORD_GROUP('b') \
491	KEYWORD("break", Token::BREAK) \
492	KEYWORD("breakpoint", Token::BREAKPOINT) \
493	KEYWORD_GROUP('c') \
494	KEYWORD("class", Token::CLASS) \
495	KEYWORD("class_name", Token::CLASS_NAME) \
496	KEYWORD("const", Token::CONST) \
497	KEYWORD("continue", Token::CONTINUE) \
498	KEYWORD_GROUP('e') \
499	KEYWORD("elif", Token::ELIF) \
500	KEYWORD("else", Token::ELSE) \
501	KEYWORD("enum", Token::ENUM) \
502	KEYWORD("extends", Token::EXTENDS) \
503	KEYWORD_GROUP('f') \
504	KEYWORD("for", Token::FOR) \
505	KEYWORD("func", Token::FUNC) \
506	KEYWORD_GROUP('i') \
507	KEYWORD("if", Token::IF) \
508	KEYWORD("in", Token::IN) \
509	KEYWORD("is", Token::IS) \
510	KEYWORD_GROUP('m') \
511	KEYWORD("match", Token::MATCH) \
512	KEYWORD_GROUP('n') \
513	KEYWORD("namespace", Token::NAMESPACE) \
514	KEYWORD("not", Token::NOT) \
515	KEYWORD_GROUP('o') \
516	KEYWORD("or", Token::OR) \
517	KEYWORD_GROUP('p') \
518	KEYWORD("pass", Token::PASS) \
519	KEYWORD("preload", Token::PRELOAD) \
520	KEYWORD_GROUP('r') \
521	KEYWORD("return", Token::RETURN) \
522	KEYWORD_GROUP('s') \
523	KEYWORD("self", Token::SELF) \
524	KEYWORD("signal", Token::SIGNAL) \
525	KEYWORD("static", Token::STATIC) \
526	KEYWORD("super", Token::SUPER) \
527	KEYWORD_GROUP('t') \
528	KEYWORD("trait", Token::TRAIT) \
529	KEYWORD_GROUP('v') \
530	KEYWORD("var", Token::VAR) \
531	KEYWORD("void", Token::VOID) \
532	KEYWORD_GROUP('w') \
533	KEYWORD("while", Token::WHILE) \
534	KEYWORD_GROUP('y') \
535	KEYWORD("yield", Token::YIELD) \
536	KEYWORD_GROUP('I') \
537	KEYWORD("INF", Token::CONST_INF) \
538	KEYWORD_GROUP('N') \
539	KEYWORD("NAN", Token::CONST_NAN) \
540	KEYWORD_GROUP('P') \
541	KEYWORD("PI", Token::CONST_PI) \
542	KEYWORD_GROUP('T') \
543	KEYWORD("TAU", Token::CONST_TAU)
544
545	#define MIN_KEYWORD_LENGTH 2
546	#define MAX_KEYWORD_LENGTH 10
547
548	#ifdef DEBUG_ENABLED
549	void GDScriptTokenizer::make_keyword_list() {
550	#define KEYWORD_LINE(keyword, token_type) keyword,
551	#define KEYWORD_GROUP_IGNORE(group)
552	keyword_list = {
553	KEYWORDS(KEYWORD_GROUP_IGNORE, KEYWORD_LINE)
554	};
555	#undef KEYWORD_LINE
556	#undef KEYWORD_GROUP_IGNORE
557	}
558	#endif // DEBUG_ENABLED
559
560	GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
561	bool only_ascii = _peek(-`1`) < `128`;
562
563	// Consume all identifier characters.
564	while (is_unicode_identifier_continue(_peek())) {
565	char32_t c = _advance();
566	only_ascii = only_ascii && c < `128`;
567	}
568
569	int len = _current - _start;
570
571	if (len == `1` && _peek(-`1`) == `'_'`) {
572	// Lone underscore.
573	return make_token(Token::UNDERSCORE);
574	}
575
576	String name(_start, len);
577	if (len < MIN_KEYWORD_LENGTH \|\| len > MAX_KEYWORD_LENGTH) {
578	// Cannot be a keyword, as the length doesn't match any.
579	return make_identifier(name);
580	}
581
582	if (!only_ascii) {
583	// Kept here in case the order with push_error matters.
584	Token id = make_identifier(name);
585
586	#ifdef DEBUG_ENABLED
587	// Additional checks for identifiers but only in debug and if it's available in TextServer.
588	if (TS ->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
589	int64_t confusable = TS ->is_confusable(name, keyword_list);
590	if (confusable >= `0`) {
591	push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list [confusable]));
592	}
593	}
594	#endif // DEBUG_ENABLED
595
596	// Cannot be a keyword, as keywords are ASCII only.
597	return id;
598	}
599
600	// Define some helper macros for the switch case.
601	#define KEYWORD_GROUP_CASE(char) \
602	break; \
603	case char:
604	#define KEYWORD(keyword, token_type) \
605	{ \
606	const int keyword_length = sizeof(keyword) - 1; \
607	static_assert(keyword_length <= MAX_KEYWORD_LENGTH, "There's a keyword longer than the defined maximum length"); \
608	static_assert(keyword_length >= MIN_KEYWORD_LENGTH, "There's a keyword shorter than the defined minimum length"); \
609	if (keyword_length == len && name == keyword) { \
610	return make_token(token_type); \
611	} \
612	}
613
614	// Find if it's a keyword.
615	switch (_start[`0`]) {
616	default:
617	KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
618	break;
619	}
620
621	// Check if it's a special literal
622	if (len == `4`) {
623	if (name == "true") {
624	return make_literal(true);
625	} else if (name == "null") {
626	return make_literal(Variant ());
627	}
628	} else if (len == `5`) {
629	if (name == "false") {
630	return make_literal(false);
631	}
632	}
633
634	// Not a keyword, so must be an identifier.
635	return make_identifier(name);
636
637	#undef KEYWORD_GROUP_CASE
638	#undef KEYWORD
639	}
640
641	#undef MAX_KEYWORD_LENGTH
642	#undef MIN_KEYWORD_LENGTH
643	#undef KEYWORDS
644
645	void GDScriptTokenizer::newline(bool p_make_token) {
646	// Don't overwrite previous newline, nor create if we want a line continuation.
647	if (p_make_token && !pending_newline && !line_continuation) {
648	Token newline(Token::NEWLINE);
649	newline.start_line = line;
650	newline.end_line = line;
651	newline.start_column = column - `1`;
652	newline.end_column = column;
653	newline.leftmost_column = newline.start_column;
654	newline.rightmost_column = newline.end_column;
655	pending_newline = true;
656	last_token = newline;
657	last_newline = newline;
658	}
659
660	// Increment line/column counters.
661	line++;
662	column = `1`;
663	leftmost_column = `1`;
664	}
665
666	GDScriptTokenizer::Token GDScriptTokenizer::number() {
667	int base = `10`;
668	bool has_decimal = false;
669	bool has_exponent = false;
670	bool has_error = false;
671	bool (digit_check_func)(char32_t*) = is_digit;
672
673	// Sign before hexadecimal or binary.
674	if ((_peek(-`1`) == `'+'` \|\| _peek(-`1`) == `'-'`) && _peek() == `'0'`) {
675	_advance();
676	}
677
678	if (_peek(-`1`) == `'.'`) {
679	has_decimal = true;
680	} else if (_peek(-`1`) == `'0'`) {
681	if (_peek() == `'x'`) {
682	// Hexadecimal.
683	base = `16`;
684	digit_check_func = is_hex_digit;
685	_advance();
686	} else if (_peek() == `'b'`) {
687	// Binary.
688	base = `2`;
689	digit_check_func = is_binary_digit;
690	_advance();
691	}
692	}
693
694	if (base != `10` && is_underscore(_peek())) { // Disallow `0x_` and `0b_`.
695	Token error = make_error(vformat(R"(Unexpected underscore after "0%c".)", _peek(-`1`)));
696	error.start_column = column;
697	error.leftmost_column = column;
698	error.end_column = column + `1`;
699	error.rightmost_column = column + `1`;
700	push_error(error);
701	has_error = true;
702	}
703	bool previous_was_underscore = false; // Allow `_` to be used in a number, for readability.
704	while (digit_check_func(_peek()) \|\| is_underscore(_peek())) {
705	if (is_underscore(_peek())) {
706	if (previous_was_underscore) {
707	Token error = make_error(R"(Multiple underscores cannot be adjacent in a numeric literal.)");
708	error.start_column = column;
709	error.leftmost_column = column;
710	error.end_column = column + `1`;
711	error.rightmost_column = column + `1`;
712	push_error(error);
713	}
714	previous_was_underscore = true;
715	} else {
716	previous_was_underscore = false;
717	}
718	_advance();
719	}
720
721	// It might be a ".." token (instead of decimal point) so we check if it's not.
722	if (_peek() == `'.'` && _peek(`1`) != `'.'`) {
723	if (base == `10` && !has_decimal) {
724	has_decimal = true;
725	} else if (base == `10`) {
726	Token error = make_error("Cannot use a decimal point twice in a number.");
727	error.start_column = column;
728	error.leftmost_column = column;
729	error.end_column = column + `1`;
730	error.rightmost_column = column + `1`;
731	push_error(error);
732	has_error = true;
733	} else if (base == `16`) {
734	Token error = make_error("Cannot use a decimal point in a hexadecimal number.");
735	error.start_column = column;
736	error.leftmost_column = column;
737	error.end_column = column + `1`;
738	error.rightmost_column = column + `1`;
739	push_error(error);
740	has_error = true;
741	} else {
742	Token error = make_error("Cannot use a decimal point in a binary number.");
743	error.start_column = column;
744	error.leftmost_column = column;
745	error.end_column = column + `1`;
746	error.rightmost_column = column + `1`;
747	push_error(error);
748	has_error = true;
749	}
750	if (!has_error) {
751	_advance();
752
753	// Consume decimal digits.
754	if (is_underscore(_peek())) { // Disallow `10._`, but allow `10.`.
755	Token error = make_error(R"(Unexpected underscore after decimal point.)");
756	error.start_column = column;
757	error.leftmost_column = column;
758	error.end_column = column + `1`;
759	error.rightmost_column = column + `1`;
760	push_error(error);
761	has_error = true;
762	}
763	previous_was_underscore = false;
764	while (is_digit(_peek()) \|\| is_underscore(_peek())) {
765	if (is_underscore(_peek())) {
766	if (previous_was_underscore) {
767	Token error = make_error(R"(Multiple underscores cannot be adjacent in a numeric literal.)");
768	error.start_column = column;
769	error.leftmost_column = column;
770	error.end_column = column + `1`;
771	error.rightmost_column = column + `1`;
772	push_error(error);
773	}
774	previous_was_underscore = true;
775	} else {
776	previous_was_underscore = false;
777	}
778	_advance();
779	}
780	}
781	}
782	if (base == `10`) {
783	if (_peek() == `'e'` \|\| _peek() == `'E'`) {
784	has_exponent = true;
785	_advance();
786	if (_peek() == `'+'` \|\| _peek() == `'-'`) {
787	// Exponent sign.
788	_advance();
789	}
790	// Consume exponent digits.
791	if (!is_digit(_peek())) {
792	Token error = make_error(R"(Expected exponent value after "e".)");
793	error.start_column = column;
794	error.leftmost_column = column;
795	error.end_column = column + `1`;
796	error.rightmost_column = column + `1`;
797	push_error(error);
798	}
799	previous_was_underscore = false;
800	while (is_digit(_peek()) \|\| is_underscore(_peek())) {
801	if (is_underscore(_peek())) {
802	if (previous_was_underscore) {
803	Token error = make_error(R"(Multiple underscores cannot be adjacent in a numeric literal.)");
804	error.start_column = column;
805	error.leftmost_column = column;
806	error.end_column = column + `1`;
807	error.rightmost_column = column + `1`;
808	push_error(error);
809	}
810	previous_was_underscore = true;
811	} else {
812	previous_was_underscore = false;
813	}
814	_advance();
815	}
816	}
817	}
818
819	// Detect extra decimal point.
820	if (!has_error && has_decimal && _peek() == `'.'` && _peek(`1`) != `'.'`) {
821	Token error = make_error("Cannot use a decimal point twice in a number.");
822	error.start_column = column;
823	error.leftmost_column = column;
824	error.end_column = column + `1`;
825	error.rightmost_column = column + `1`;
826	push_error(error);
827	has_error = true;
828	} else if (is_unicode_identifier_start(_peek()) \|\| is_unicode_identifier_continue(_peek())) {
829	// Letter at the end of the number.
830	push_error("Invalid numeric notation.");
831	}
832
833	// Create a string with the whole number.
834	int len = _current - _start;
835	String number = String (_start, len).replace("_", "");
836
837	// Convert to the appropriate literal type.
838	if (base == `16`) {
839	int64_t value = number.hex_to_int();
840	return make_literal(value);
841	} else if (base == `2`) {
842	int64_t value = number.bin_to_int();
843	return make_literal(value);
844	} else if (has_decimal \|\| has_exponent) {
845	double value = number.to_float();
846	return make_literal(value);
847	} else {
848	int64_t value = number.to_int();
849	return make_literal(value);
850	}
851	}
852
853	GDScriptTokenizer::Token GDScriptTokenizer::string() {
854	enum StringType {
855	STRING_REGULAR,
856	STRING_NAME,
857	STRING_NODEPATH,
858	};
859
860	bool is_multiline = false;
861	StringType type = STRING_REGULAR;
862
863	if (_peek(-`1`) == `'&'`) {
864	type = STRING_NAME;
865	_advance();
866	} else if (_peek(-`1`) == `'^'`) {
867	type = STRING_NODEPATH;
868	_advance();
869	}
870
871	char32_t quote_char = _peek(-`1`);
872
873	if (_peek() == quote_char && _peek(`1`) == quote_char) {
874	is_multiline = true;
875	// Consume all quotes.
876	_advance();
877	_advance();
878	}
879
880	String result;
881	char32_t prev = `0`;
882	int prev_pos = `0`;
883
884	for (;;) {
885	// Consume actual string.
886	if (_is_at_end()) {
887	return make_error("Unterminated string.");
888	}
889
890	char32_t ch = _peek();
891
892	if (ch == `0x200E` \|\| ch == `0x200F` \|\| (ch >= `0x202A` && ch <= `0x202E`) \|\| (ch >= `0x2066` && ch <= `0x2069`)) {
893	Token error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, `16`) + "\") to avoid confusion.");
894	error.start_column = column;
895	error.leftmost_column = error.start_column;
896	error.end_column = column + `1`;
897	error.rightmost_column = error.end_column;
898	push_error(error);
899	}
900
901	if (ch == `'\\'`) {
902	// Escape pattern.
903	_advance();
904	if (_is_at_end()) {
905	return make_error("Unterminated string.");
906	}
907
908	// Grab escape character.
909	char32_t code = _peek();
910	_advance();
911	if (_is_at_end()) {
912	return make_error("Unterminated string.");
913	}
914
915	char32_t escaped = `0`;
916	bool valid_escape = true;
917
918	switch (code) {
919	case `'a'`:
920	escaped = `'\a'`;
921	break;
922	case `'b'`:
923	escaped = `'\b'`;
924	break;
925	case `'f'`:
926	escaped = `'\f'`;
927	break;
928	case `'n'`:
929	escaped = `'\n'`;
930	break;
931	case `'r'`:
932	escaped = `'\r'`;
933	break;
934	case `'t'`:
935	escaped = `'\t'`;
936	break;
937	case `'v'`:
938	escaped = `'\v'`;
939	break;
940	case `'\''`:
941	escaped = `'\''`;
942	break;
943	case `'\"'`:
944	escaped = `'\"'`;
945	break;
946	case `'\\'`:
947	escaped = `'\\'`;
948	break;
949	case `'U'`:
950	case `'u'`: {
951	// Hexadecimal sequence.
952	int hex_len = (code == `'U'`) ? `6` : `4`;
953	for (int j = `0`; j < hex_len; j++) {
954	if (_is_at_end()) {
955	return make_error("Unterminated string.");
956	}
957
958	char32_t digit = _peek();
959	char32_t value = `0`;
960	if (is_digit(digit)) {
961	value = digit - `'0'`;
962	} else if (digit >= `'a'` && digit <= `'f'`) {
963	value = digit - `'a'`;
964	value += `10`;
965	} else if (digit >= `'A'` && digit <= `'F'`) {
966	value = digit - `'A'`;
967	value += `10`;
968	} else {
969	// Make error, but keep parsing the string.
970	Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
971	error.start_column = column;
972	error.leftmost_column = error.start_column;
973	error.end_column = column + `1`;
974	error.rightmost_column = error.end_column;
975	push_error(error);
976	valid_escape = false;
977	break;
978	}
979
980	escaped <<= `4`;
981	escaped \|= value;
982
983	_advance();
984	}
985	} break;
986	case `'\r'`:
987	if (_peek() != `'\n'`) {
988	// Carriage return without newline in string. (???)
989	// Just add it to the string and keep going.
990	result += ch;
991	_advance();
992	break;
993	}
994	[[fallthrough]];
995	case `'\n'`:
996	// Escaping newline.
997	newline(false);
998	valid_escape = false; // Don't add to the string.
999	break;
1000	default:
1001	Token error = make_error("Invalid escape in string.");
1002	error.start_column = column - `2`;
1003	error.leftmost_column = error.start_column;
1004	push_error(error);
1005	valid_escape = false;
1006	break;
1007	}
1008	// Parse UTF-16 pair.
1009	if (valid_escape) {
1010	if ((escaped & `0xfffffc00`) == `0xd800`) {
1011	if (prev == `0`) {
1012	prev = escaped;
1013	prev_pos = column - `2`;
1014	continue;
1015	} else {
1016	Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
1017	error.start_column = column - `2`;
1018	error.leftmost_column = error.start_column;
1019	push_error(error);
1020	valid_escape = false;
1021	prev = `0`;
1022	}
1023	} else if ((escaped & `0xfffffc00`) == `0xdc00`) {
1024	if (prev == `0`) {
1025	Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
1026	error.start_column = column - `2`;
1027	error.leftmost_column = error.start_column;
1028	push_error(error);
1029	valid_escape = false;
1030	} else {
1031	escaped = (prev << `10UL`) + escaped - ((`0xd800` << `10UL`) + `0xdc00` - `0x10000`);
1032	prev = `0`;
1033	}
1034	}
1035	if (prev != `0`) {
1036	Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
1037	error.start_column = prev_pos;
1038	error.leftmost_column = error.start_column;
1039	push_error(error);
1040	prev = `0`;
1041	}
1042	}
1043
1044	if (valid_escape) {
1045	result += escaped;
1046	}
1047	} else if (ch == quote_char) {
1048	if (prev != `0`) {
1049	Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
1050	error.start_column = prev_pos;
1051	error.leftmost_column = error.start_column;
1052	push_error(error);
1053	prev = `0`;
1054	}
1055	_advance();
1056	if (is_multiline) {
1057	if (_peek() == quote_char && _peek(`1`) == quote_char) {
1058	// Ended the multiline string. Consume all quotes.
1059	_advance();
1060	_advance();
1061	break;
1062	} else {
1063	// Not a multiline string termination, add consumed quote.
1064	result += quote_char;
1065	}
1066	} else {
1067	// Ended single-line string.
1068	break;
1069	}
1070	} else {
1071	if (prev != `0`) {
1072	Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
1073	error.start_column = prev_pos;
1074	error.leftmost_column = error.start_column;
1075	push_error(error);
1076	prev = `0`;
1077	}
1078	result += ch;
1079	_advance();
1080	if (ch == `'\n'`) {
1081	newline(false);
1082	}
1083	}
1084	}
1085	if (prev != `0`) {
1086	Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
1087	error.start_column = prev_pos;
1088	error.leftmost_column = error.start_column;
1089	push_error(error);
1090	prev = `0`;
1091	}
1092
1093	// Make the literal.
1094	Variant string;
1095	switch (type) {
1096	case STRING_NAME:
1097	string = StringName (result);
1098	break;
1099	case STRING_NODEPATH:
1100	string = NodePath (result);
1101	break;
1102	case STRING_REGULAR:
1103	string = result;
1104	break;
1105	}
1106
1107	return make_literal(string);
1108	}
1109
1110	void GDScriptTokenizer::check_indent() {
1111	ERR_FAIL_COND_MSG(column != `1`, "Checking tokenizer indentation in the middle of a line.");
1112
1113	if (_is_at_end()) {
1114	// Send dedents for every indent level.
1115	pending_indents -= indent_level();
1116	indent_stack.clear();
1117	return;
1118	}
1119
1120	for (;;) {
1121	char32_t current_indent_char = _peek();
1122	int indent_count = `0`;
1123
1124	if (current_indent_char != `' '` && current_indent_char != `'\t'` && current_indent_char != `'\r'` && current_indent_char != `'\n'` && current_indent_char != `'#'`) {
1125	// First character of the line is not whitespace, so we clear all indentation levels.
1126	// Unless we are in a continuation or in multiline mode (inside expression).
1127	if (line_continuation \|\| multiline_mode) {
1128	return;
1129	}
1130	pending_indents -= indent_level();
1131	indent_stack.clear();
1132	return;
1133	}
1134
1135	if (_peek() == `'\r'`) {
1136	_advance();
1137	if (_peek() != `'\n'`) {
1138	push_error("Stray carriage return character in source code.");
1139	}
1140	}
1141	if (_peek() == `'\n'`) {
1142	// Empty line, keep going.
1143	_advance();
1144	newline(false);
1145	continue;
1146	}
1147
1148	// Check indent level.
1149	bool mixed = false;
1150	while (!_is_at_end()) {
1151	char32_t space = _peek();
1152	if (space == `'\t'`) {
1153	// Consider individual tab columns.
1154	column += tab_size - `1`;
1155	indent_count += tab_size;
1156	} else if (space == `' '`) {
1157	indent_count += `1`;
1158	} else {
1159	break;
1160	}
1161	mixed = mixed \|\| space != current_indent_char;
1162	_advance();
1163	}
1164
1165	if (_is_at_end()) {
1166	// Reached the end with an empty line, so just dedent as much as needed.
1167	pending_indents -= indent_level();
1168	indent_stack.clear();
1169	return;
1170	}
1171
1172	if (_peek() == `'\r'`) {
1173	_advance();
1174	if (_peek() != `'\n'`) {
1175	push_error("Stray carriage return character in source code.");
1176	}
1177	}
1178	if (_peek() == `'\n'`) {
1179	// Empty line, keep going.
1180	_advance();
1181	newline(false);
1182	continue;
1183	}
1184	if (_peek() == `'#'`) {
1185	// Comment. Advance to the next line.
1186	#ifdef TOOLS_ENABLED
1187	String comment;
1188	while (_peek() != `'\n'` && !_is_at_end()) {
1189	comment += _advance();
1190	}
1191	comments [line] = CommentData (comment, true);
1192	#else
1193	while (_peek() != `'\n'` && !_is_at_end()) {
1194	_advance();
1195	}
1196	#endif // TOOLS_ENABLED
1197	if (_is_at_end()) {
1198	// Reached the end with an empty line, so just dedent as much as needed.
1199	pending_indents -= indent_level();
1200	indent_stack.clear();
1201	return;
1202	}
1203	_advance(); // Consume '\n'.
1204	newline(false);
1205	continue;
1206	}
1207
1208	if (mixed && !line_continuation && !multiline_mode) {
1209	Token error = make_error("Mixed use of tabs and spaces for indentation.");
1210	error.start_line = line;
1211	error.start_column = `1`;
1212	error.leftmost_column = `1`;
1213	error.rightmost_column = column;
1214	push_error(error);
1215	}
1216
1217	if (line_continuation \|\| multiline_mode) {
1218	// We cleared up all the whitespace at the beginning of the line.
1219	// But if this is a continuation or multiline mode and we don't want any indentation change.
1220	return;
1221	}
1222
1223	// Check if indentation character is consistent.
1224	if (indent_char == `'\0'`) {
1225	// First time indenting, choose character now.
1226	indent_char = current_indent_char;
1227	} else if (current_indent_char != indent_char) {
1228	Token error = make_error(vformat("Used %s character for indentation instead of %s as used before in the file.",
1229	_get_indent_char_name(current_indent_char), _get_indent_char_name(indent_char)));
1230	error.start_line = line;
1231	error.start_column = `1`;
1232	error.leftmost_column = `1`;
1233	error.rightmost_column = column;
1234	push_error(error);
1235	}
1236
1237	// Now we can do actual indentation changes.
1238
1239	// Check if indent or dedent.
1240	int previous_indent = `0`;
1241	if (indent_level() > `0`) {
1242	previous_indent = indent_stack.back()->get();
1243	}
1244	if (indent_count == previous_indent) {
1245	// No change in indentation.
1246	return;
1247	}
1248	if (indent_count > previous_indent) {
1249	// Indentation increased.
1250	indent_stack.push_back(indent_count);
1251	pending_indents++;
1252	} else {
1253	// Indentation decreased (dedent).
1254	if (indent_level() == `0`) {
1255	push_error("Tokenizer bug: trying to dedent without previous indent.");
1256	return;
1257	}
1258	while (indent_level() > `0` && indent_stack.back()->get() > indent_count) {
1259	indent_stack.pop_back();
1260	pending_indents--;
1261	}
1262	if ((indent_level() > `0` && indent_stack.back()->get() != indent_count) \|\| (indent_level() == `0` && indent_count != `0`)) {
1263	// Mismatched indentation alignment.
1264	Token error = make_error("Unindent doesn't match the previous indentation level.");
1265	error.start_line = line;
1266	error.start_column = `1`;
1267	error.leftmost_column = `1`;
1268	error.end_column = column + `1`;
1269	error.rightmost_column = column + `1`;
1270	push_error(error);
1271	// Still, we'll be lenient and keep going, so keep this level in the stack.
1272	indent_stack.push_back(indent_count);
1273	}
1274	}
1275	break; // Get out of the loop in any case.
1276	}
1277	}
1278
1279	String GDScriptTokenizer::_get_indent_char_name(char32_t ch) {
1280	ERR_FAIL_COND_V(ch != `' '` && ch != `'\t'`, String (&ch, `1`).c_escape());
1281
1282	return ch == `' '` ? "space" : "tab";
1283	}
1284
1285	void GDScriptTokenizer::_skip_whitespace() {
1286	if (pending_indents != `0`) {
1287	// Still have some indent/dedent tokens to give.
1288	return;
1289	}
1290
1291	bool is_bol = column == `1`; // Beginning of line.
1292
1293	if (is_bol) {
1294	check_indent();
1295	return;
1296	}
1297
1298	for (;;) {
1299	char32_t c = _peek();
1300	switch (c) {
1301	case `' '`:
1302	_advance();
1303	break;
1304	case `'\t'`:
1305	_advance();
1306	// Consider individual tab columns.
1307	column += tab_size - `1`;
1308	break;
1309	case `'\r'`:
1310	_advance(); // Consume either way.
1311	if (_peek() != `'\n'`) {
1312	push_error("Stray carriage return character in source code.");
1313	return;
1314	}
1315	break;
1316	case `'\n'`:
1317	_advance();
1318	newline(!is_bol); // Don't create new line token if line is empty.
1319	check_indent();
1320	break;
1321	case `'#'`: {
1322	// Comment.
1323	#ifdef TOOLS_ENABLED
1324	String comment;
1325	while (_peek() != `'\n'` && !_is_at_end()) {
1326	comment += _advance();
1327	}
1328	comments [line] = CommentData (comment, is_bol);
1329	#else
1330	while (_peek() != `'\n'` && !_is_at_end()) {
1331	_advance();
1332	}
1333	#endif // TOOLS_ENABLED
1334	if (_is_at_end()) {
1335	return;
1336	}
1337	_advance(); // Consume '\n'
1338	newline(!is_bol);
1339	check_indent();
1340	} break;
1341	default:
1342	return;
1343	}
1344	}
1345	}
1346
1347	GDScriptTokenizer::Token GDScriptTokenizer::scan() {
1348	if (has_error()) {
1349	return pop_error();
1350	}
1351
1352	_skip_whitespace();
1353
1354	if (pending_newline) {
1355	pending_newline = false;
1356	if (!multiline_mode) {
1357	// Don't return newline tokens on multiline mode.
1358	return last_newline;
1359	}
1360	}
1361
1362	// Check for potential errors after skipping whitespace().
1363	if (has_error()) {
1364	return pop_error();
1365	}
1366
1367	_start = _current;
1368	start_line = line;
1369	start_column = column;
1370	leftmost_column = column;
1371	rightmost_column = column;
1372
1373	if (pending_indents != `0`) {
1374	// Adjust position for indent.
1375	_start -= start_column - `1`;
1376	start_column = `1`;
1377	leftmost_column = `1`;
1378	if (pending_indents > `0`) {
1379	// Indents.
1380	pending_indents--;
1381	return make_token(Token::INDENT);
1382	} else {
1383	// Dedents.
1384	pending_indents++;
1385	Token dedent = make_token(Token::DEDENT);
1386	dedent.end_column += `1`;
1387	dedent.rightmost_column += `1`;
1388	return dedent;
1389	}
1390	}
1391
1392	if (_is_at_end()) {
1393	return make_token(Token::TK_EOF);
1394	}
1395
1396	const char32_t c = _advance();
1397
1398	if (c == `'\\'`) {
1399	// Line continuation with backslash.
1400	if (_peek() == `'\r'`) {
1401	if (_peek(`1`) != `'\n'`) {
1402	return make_error("Unexpected carriage return character.");
1403	}
1404	_advance();
1405	}
1406	if (_peek() != `'\n'`) {
1407	return make_error("Expected new line after \"\\\".");
1408	}
1409	_advance();
1410	newline(false);
1411	line_continuation = true;
1412	return scan(); // Recurse to get next token.
1413	}
1414
1415	line_continuation = false;
1416
1417	if (is_digit(c)) {
1418	return number();
1419	} else if (is_unicode_identifier_start(c)) {
1420	return potential_identifier();
1421	}
1422
1423	switch (c) {
1424	// String literals.
1425	case `'"'`:
1426	case `'\''`:
1427	return string();
1428
1429	// Annotation.
1430	case `'@'`:
1431	return annotation();
1432
1433	// Single characters.
1434	case `'~'`:
1435	return make_token(Token::TILDE);
1436	case `','`:
1437	return make_token(Token::COMMA);
1438	case `':'`:
1439	return make_token(Token::COLON);
1440	case `';'`:
1441	return make_token(Token::SEMICOLON);
1442	case `'$'`:
1443	return make_token(Token::DOLLAR);
1444	case `'?'`:
1445	return make_token(Token::QUESTION_MARK);
1446	case '`':
1447	return make_token(Token::BACKTICK);
1448
1449	// Parens.
1450	case `'('`:
1451	push_paren(`'('`);
1452	return make_token(Token::PARENTHESIS_OPEN);
1453	case `'['`:
1454	push_paren(`'['`);
1455	return make_token(Token::BRACKET_OPEN);
1456	case `'{'`:
1457	push_paren(`'{'`);
1458	return make_token(Token::BRACE_OPEN);
1459	case `')'`:
1460	if (!pop_paren(`'('`)) {
1461	return make_paren_error(c);
1462	}
1463	return make_token(Token::PARENTHESIS_CLOSE);
1464	case `']'`:
1465	if (!pop_paren(`'['`)) {
1466	return make_paren_error(c);
1467	}
1468	return make_token(Token::BRACKET_CLOSE);
1469	case `'}'`:
1470	if (!pop_paren(`'{'`)) {
1471	return make_paren_error(c);
1472	}
1473	return make_token(Token::BRACE_CLOSE);
1474
1475	// Double characters.
1476	case `'!'`:
1477	if (_peek() == `'='`) {
1478	_advance();
1479	return make_token(Token::BANG_EQUAL);
1480	} else {
1481	return make_token(Token::BANG);
1482	}
1483	case `'.'`:
1484	if (_peek() == `'.'`) {
1485	_advance();
1486	return make_token(Token::PERIOD_PERIOD);
1487	} else if (is_digit(_peek())) {
1488	// Number starting with '.'.
1489	return number();
1490	} else {
1491	return make_token(Token::PERIOD);
1492	}
1493	case `'+'`:
1494	if (_peek() == `'='`) {
1495	_advance();
1496	return make_token(Token::PLUS_EQUAL);
1497	} else if (is_digit(_peek()) && !last_token.can_precede_bin_op()) {
1498	// Number starting with '+'.
1499	return number();
1500	} else {
1501	return make_token(Token::PLUS);
1502	}
1503	case `'-'`:
1504	if (_peek() == `'='`) {
1505	_advance();
1506	return make_token(Token::MINUS_EQUAL);
1507	} else if (is_digit(_peek()) && !last_token.can_precede_bin_op()) {
1508	// Number starting with '-'.
1509	return number();
1510	} else if (_peek() == `'>'`) {
1511	_advance();
1512	return make_token(Token::FORWARD_ARROW);
1513	} else {
1514	return make_token(Token::MINUS);
1515	}
1516	case `'*'`:
1517	if (_peek() == `'='`) {
1518	_advance();
1519	return make_token(Token::STAR_EQUAL);
1520	} else if (_peek() == `'*'`) {
1521	if (_peek(`1`) == `'='`) {
1522	_advance();
1523	_advance(); // Advance both '' and '='*
1524	return make_token(Token::STAR_STAR_EQUAL);
1525	}
1526	_advance();
1527	return make_token(Token::STAR_STAR);
1528	} else {
1529	return make_token(Token::STAR);
1530	}
1531	case `'/'`:
1532	if (_peek() == `'='`) {
1533	_advance();
1534	return make_token(Token::SLASH_EQUAL);
1535	} else {
1536	return make_token(Token::SLASH);
1537	}
1538	case `'%'`:
1539	if (_peek() == `'='`) {
1540	_advance();
1541	return make_token(Token::PERCENT_EQUAL);
1542	} else {
1543	return make_token(Token::PERCENT);
1544	}
1545	case `'^'`:
1546	if (_peek() == `'='`) {
1547	_advance();
1548	return make_token(Token::CARET_EQUAL);
1549	} else if (_peek() == `'"'` \|\| _peek() == `'\''`) {
1550	// Node path
1551	return string();
1552	} else {
1553	return make_token(Token::CARET);
1554	}
1555	case `'&'`:
1556	if (_peek() == `'&'`) {
1557	_advance();
1558	return make_token(Token::AMPERSAND_AMPERSAND);
1559	} else if (_peek() == `'='`) {
1560	_advance();
1561	return make_token(Token::AMPERSAND_EQUAL);
1562	} else if (_peek() == `'"'` \|\| _peek() == `'\''`) {
1563	// String Name
1564	return string();
1565	} else {
1566	return make_token(Token::AMPERSAND);
1567	}
1568	case `'\|'`:
1569	if (_peek() == `'\|'`) {
1570	_advance();
1571	return make_token(Token::PIPE_PIPE);
1572	} else if (_peek() == `'='`) {
1573	_advance();
1574	return make_token(Token::PIPE_EQUAL);
1575	} else {
1576	return make_token(Token::PIPE);
1577	}
1578
1579	// Potential VCS conflict markers.
1580	case `'='`:
1581	if (_peek() == `'='`) {
1582	return check_vcs_marker(`'='`, Token::EQUAL_EQUAL);
1583	} else {
1584	return make_token(Token::EQUAL);
1585	}
1586	case `'<'`:
1587	if (_peek() == `'='`) {
1588	_advance();
1589	return make_token(Token::LESS_EQUAL);
1590	} else if (_peek() == `'<'`) {
1591	if (_peek(`1`) == `'='`) {
1592	_advance();
1593	_advance(); // Advance both '<' and '='
1594	return make_token(Token::LESS_LESS_EQUAL);
1595	} else {
1596	return check_vcs_marker(`'<'`, Token::LESS_LESS);
1597	}
1598	} else {
1599	return make_token(Token::LESS);
1600	}
1601	case `'>'`:
1602	if (_peek() == `'='`) {
1603	_advance();
1604	return make_token(Token::GREATER_EQUAL);
1605	} else if (_peek() == `'>'`) {
1606	if (_peek(`1`) == `'='`) {
1607	_advance();
1608	_advance(); // Advance both '>' and '='
1609	return make_token(Token::GREATER_GREATER_EQUAL);
1610	} else {
1611	return check_vcs_marker(`'>'`, Token::GREATER_GREATER);
1612	}
1613	} else {
1614	return make_token(Token::GREATER);
1615	}
1616
1617	default:
1618	if (is_whitespace(c)) {
1619	return make_error(vformat(R"(Invalid white space character U+%04X.)", static_cast<int32_t>(c)));
1620	} else {
1621	return make_error(vformat(R"(Invalid character "%c" (U+%04X).)", c, static_cast<int32_t>(c)));
1622	}
1623	}
1624	}
1625
1626	GDScriptTokenizer::GDScriptTokenizer() {
1627	#ifdef TOOLS_ENABLED
1628	if (EditorSettings::get_singleton()) {
1629	tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");
1630	}
1631	#endif // TOOLS_ENABLED
1632	#ifdef DEBUG_ENABLED
1633	make_keyword_list();
1634	#endif // DEBUG_ENABLED
1635	}
1636

Browse the source code of Godot/modules/gdscript/gdscript_tokenizer.cpp