1/**************************************************************************/
2/* gdscript_tokenizer.h */
3/**************************************************************************/
4/* This file is part of: */
5/* GODOT ENGINE */
6/* https://godotengine.org */
7/**************************************************************************/
8/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10/* */
11/* Permission is hereby granted, free of charge, to any person obtaining */
12/* a copy of this software and associated documentation files (the */
13/* "Software"), to deal in the Software without restriction, including */
14/* without limitation the rights to use, copy, modify, merge, publish, */
15/* distribute, sublicense, and/or sell copies of the Software, and to */
16/* permit persons to whom the Software is furnished to do so, subject to */
17/* the following conditions: */
18/* */
19/* The above copyright notice and this permission notice shall be */
20/* included in all copies or substantial portions of the Software. */
21/* */
22/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29/**************************************************************************/
30
31#ifndef GDSCRIPT_TOKENIZER_H
32#define GDSCRIPT_TOKENIZER_H
33
34#include "core/templates/hash_map.h"
35#include "core/templates/hash_set.h"
36#include "core/templates/list.h"
37#include "core/templates/vector.h"
38#include "core/variant/variant.h"
39
40class GDScriptTokenizer {
41public:
42 enum CursorPlace {
43 CURSOR_NONE,
44 CURSOR_BEGINNING,
45 CURSOR_MIDDLE,
46 CURSOR_END,
47 };
48
49 struct Token {
50 enum Type {
51 EMPTY,
52 // Basic
53 ANNOTATION,
54 IDENTIFIER,
55 LITERAL,
56 // Comparison
57 LESS,
58 LESS_EQUAL,
59 GREATER,
60 GREATER_EQUAL,
61 EQUAL_EQUAL,
62 BANG_EQUAL,
63 // Logical
64 AND,
65 OR,
66 NOT,
67 AMPERSAND_AMPERSAND,
68 PIPE_PIPE,
69 BANG,
70 // Bitwise
71 AMPERSAND,
72 PIPE,
73 TILDE,
74 CARET,
75 LESS_LESS,
76 GREATER_GREATER,
77 // Math
78 PLUS,
79 MINUS,
80 STAR,
81 STAR_STAR,
82 SLASH,
83 PERCENT,
84 // Assignment
85 EQUAL,
86 PLUS_EQUAL,
87 MINUS_EQUAL,
88 STAR_EQUAL,
89 STAR_STAR_EQUAL,
90 SLASH_EQUAL,
91 PERCENT_EQUAL,
92 LESS_LESS_EQUAL,
93 GREATER_GREATER_EQUAL,
94 AMPERSAND_EQUAL,
95 PIPE_EQUAL,
96 CARET_EQUAL,
97 // Control flow
98 IF,
99 ELIF,
100 ELSE,
101 FOR,
102 WHILE,
103 BREAK,
104 CONTINUE,
105 PASS,
106 RETURN,
107 MATCH,
108 // Keywords
109 AS,
110 ASSERT,
111 AWAIT,
112 BREAKPOINT,
113 CLASS,
114 CLASS_NAME,
115 CONST,
116 ENUM,
117 EXTENDS,
118 FUNC,
119 IN,
120 IS,
121 NAMESPACE,
122 PRELOAD,
123 SELF,
124 SIGNAL,
125 STATIC,
126 SUPER,
127 TRAIT,
128 VAR,
129 VOID,
130 YIELD,
131 // Punctuation
132 BRACKET_OPEN,
133 BRACKET_CLOSE,
134 BRACE_OPEN,
135 BRACE_CLOSE,
136 PARENTHESIS_OPEN,
137 PARENTHESIS_CLOSE,
138 COMMA,
139 SEMICOLON,
140 PERIOD,
141 PERIOD_PERIOD,
142 COLON,
143 DOLLAR,
144 FORWARD_ARROW,
145 UNDERSCORE,
146 // Whitespace
147 NEWLINE,
148 INDENT,
149 DEDENT,
150 // Constants
151 CONST_PI,
152 CONST_TAU,
153 CONST_INF,
154 CONST_NAN,
155 // Error message improvement
156 VCS_CONFLICT_MARKER,
157 BACKTICK,
158 QUESTION_MARK,
159 // Special
160 ERROR,
161 TK_EOF, // "EOF" is reserved
162 TK_MAX
163 };
164
165 Type type = EMPTY;
166 Variant literal;
167 int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
168 int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
169 int cursor_position = -1;
170 CursorPlace cursor_place = CURSOR_NONE;
171 String source;
172
173 const char *get_name() const;
174 bool can_precede_bin_op() const;
175 bool is_identifier() const;
176 bool is_node_name() const;
177 StringName get_identifier() const { return source; }
178
179 Token(Type p_type) {
180 type = p_type;
181 }
182
183 Token() {
184 }
185 };
186
187#ifdef TOOLS_ENABLED
188 struct CommentData {
189 String comment;
190 bool new_line = false;
191 CommentData() {}
192 CommentData(const String &p_comment, bool p_new_line) {
193 comment = p_comment;
194 new_line = p_new_line;
195 }
196 };
197 const HashMap<int, CommentData> &get_comments() const {
198 return comments;
199 }
200#endif // TOOLS_ENABLED
201
202private:
203 String source;
204 const char32_t *_source = nullptr;
205 const char32_t *_current = nullptr;
206 int line = -1, column = -1;
207 int cursor_line = -1, cursor_column = -1;
208 int tab_size = 4;
209
210 // Keep track of multichar tokens.
211 const char32_t *_start = nullptr;
212 int start_line = 0, start_column = 0;
213 int leftmost_column = 0, rightmost_column = 0;
214
215 // Info cache.
216 bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
217 bool multiline_mode = false;
218 List<Token> error_stack;
219 bool pending_newline = false;
220 Token last_token;
221 Token last_newline;
222 int pending_indents = 0;
223 List<int> indent_stack;
224 List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
225 List<char32_t> paren_stack;
226 char32_t indent_char = '\0';
227 int position = 0;
228 int length = 0;
229#ifdef DEBUG_ENABLED
230 Vector<String> keyword_list;
231#endif // DEBUG_ENABLED
232
233#ifdef TOOLS_ENABLED
234 HashMap<int, CommentData> comments;
235#endif // TOOLS_ENABLED
236
237 _FORCE_INLINE_ bool _is_at_end() { return position >= length; }
238 _FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
239 int indent_level() const { return indent_stack.size(); }
240 bool has_error() const { return !error_stack.is_empty(); }
241 Token pop_error();
242 char32_t _advance();
243 String _get_indent_char_name(char32_t ch);
244 void _skip_whitespace();
245 void check_indent();
246
247#ifdef DEBUG_ENABLED
248 void make_keyword_list();
249#endif // DEBUG_ENABLED
250
251 Token make_error(const String &p_message);
252 void push_error(const String &p_message);
253 void push_error(const Token &p_error);
254 Token make_paren_error(char32_t p_paren);
255 Token make_token(Token::Type p_type);
256 Token make_literal(const Variant &p_literal);
257 Token make_identifier(const StringName &p_identifier);
258 Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
259 void push_paren(char32_t p_char);
260 bool pop_paren(char32_t p_expected);
261
262 void newline(bool p_make_token);
263 Token number();
264 Token potential_identifier();
265 Token string();
266 Token annotation();
267
268public:
269 Token scan();
270
271 void set_source_code(const String &p_source_code);
272
273 int get_cursor_line() const;
274 int get_cursor_column() const;
275 void set_cursor_position(int p_line, int p_column);
276 void set_multiline_mode(bool p_state);
277 bool is_past_cursor() const;
278 static String get_token_name(Token::Type p_token_type);
279 void push_expression_indented_block(); // For lambdas, or blocks inside expressions.
280 void pop_expression_indented_block(); // For lambdas, or blocks inside expressions.
281
282 GDScriptTokenizer();
283};
284
285#endif // GDSCRIPT_TOKENIZER_H
286