1 | /**************************************************************************/ |
2 | /* gdscript_tokenizer.h */ |
3 | /**************************************************************************/ |
4 | /* This file is part of: */ |
5 | /* GODOT ENGINE */ |
6 | /* https://godotengine.org */ |
7 | /**************************************************************************/ |
8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ |
9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ |
10 | /* */ |
11 | /* Permission is hereby granted, free of charge, to any person obtaining */ |
12 | /* a copy of this software and associated documentation files (the */ |
13 | /* "Software"), to deal in the Software without restriction, including */ |
14 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
15 | /* distribute, sublicense, and/or sell copies of the Software, and to */ |
16 | /* permit persons to whom the Software is furnished to do so, subject to */ |
17 | /* the following conditions: */ |
18 | /* */ |
19 | /* The above copyright notice and this permission notice shall be */ |
20 | /* included in all copies or substantial portions of the Software. */ |
21 | /* */ |
22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ |
23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ |
24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ |
25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ |
26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ |
27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ |
28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
29 | /**************************************************************************/ |
30 | |
31 | #ifndef GDSCRIPT_TOKENIZER_H |
32 | #define GDSCRIPT_TOKENIZER_H |
33 | |
34 | #include "core/templates/hash_map.h" |
35 | #include "core/templates/hash_set.h" |
36 | #include "core/templates/list.h" |
37 | #include "core/templates/vector.h" |
38 | #include "core/variant/variant.h" |
39 | |
40 | class GDScriptTokenizer { |
41 | public: |
42 | enum CursorPlace { |
43 | CURSOR_NONE, |
44 | CURSOR_BEGINNING, |
45 | CURSOR_MIDDLE, |
46 | CURSOR_END, |
47 | }; |
48 | |
49 | struct Token { |
50 | enum Type { |
51 | EMPTY, |
52 | // Basic |
53 | ANNOTATION, |
54 | IDENTIFIER, |
55 | LITERAL, |
56 | // Comparison |
57 | LESS, |
58 | LESS_EQUAL, |
59 | GREATER, |
60 | GREATER_EQUAL, |
61 | EQUAL_EQUAL, |
62 | BANG_EQUAL, |
63 | // Logical |
64 | AND, |
65 | OR, |
66 | NOT, |
67 | AMPERSAND_AMPERSAND, |
68 | PIPE_PIPE, |
69 | BANG, |
70 | // Bitwise |
71 | AMPERSAND, |
72 | PIPE, |
73 | TILDE, |
74 | CARET, |
75 | LESS_LESS, |
76 | GREATER_GREATER, |
77 | // Math |
78 | PLUS, |
79 | MINUS, |
80 | STAR, |
81 | STAR_STAR, |
82 | SLASH, |
83 | PERCENT, |
84 | // Assignment |
85 | EQUAL, |
86 | PLUS_EQUAL, |
87 | MINUS_EQUAL, |
88 | STAR_EQUAL, |
89 | STAR_STAR_EQUAL, |
90 | SLASH_EQUAL, |
91 | PERCENT_EQUAL, |
92 | LESS_LESS_EQUAL, |
93 | GREATER_GREATER_EQUAL, |
94 | AMPERSAND_EQUAL, |
95 | PIPE_EQUAL, |
96 | CARET_EQUAL, |
97 | // Control flow |
98 | IF, |
99 | ELIF, |
100 | ELSE, |
101 | FOR, |
102 | WHILE, |
103 | BREAK, |
104 | CONTINUE, |
105 | PASS, |
106 | RETURN, |
107 | MATCH, |
108 | // Keywords |
109 | AS, |
110 | ASSERT, |
111 | AWAIT, |
112 | BREAKPOINT, |
113 | CLASS, |
114 | CLASS_NAME, |
115 | CONST, |
116 | ENUM, |
117 | EXTENDS, |
118 | FUNC, |
119 | IN, |
120 | IS, |
121 | NAMESPACE, |
122 | PRELOAD, |
123 | SELF, |
124 | SIGNAL, |
125 | STATIC, |
126 | SUPER, |
127 | TRAIT, |
128 | VAR, |
129 | VOID, |
130 | YIELD, |
131 | // Punctuation |
132 | BRACKET_OPEN, |
133 | BRACKET_CLOSE, |
134 | BRACE_OPEN, |
135 | BRACE_CLOSE, |
136 | PARENTHESIS_OPEN, |
137 | PARENTHESIS_CLOSE, |
138 | COMMA, |
139 | SEMICOLON, |
140 | PERIOD, |
141 | PERIOD_PERIOD, |
142 | COLON, |
143 | DOLLAR, |
144 | FORWARD_ARROW, |
145 | UNDERSCORE, |
146 | // Whitespace |
147 | NEWLINE, |
148 | INDENT, |
149 | DEDENT, |
150 | // Constants |
151 | CONST_PI, |
152 | CONST_TAU, |
153 | CONST_INF, |
154 | CONST_NAN, |
155 | // Error message improvement |
156 | VCS_CONFLICT_MARKER, |
157 | BACKTICK, |
158 | QUESTION_MARK, |
159 | // Special |
160 | ERROR, |
161 | TK_EOF, // "EOF" is reserved |
162 | TK_MAX |
163 | }; |
164 | |
165 | Type type = EMPTY; |
166 | Variant literal; |
167 | int start_line = 0, end_line = 0, start_column = 0, end_column = 0; |
168 | int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens. |
169 | int cursor_position = -1; |
170 | CursorPlace cursor_place = CURSOR_NONE; |
171 | String source; |
172 | |
173 | const char *get_name() const; |
174 | bool can_precede_bin_op() const; |
175 | bool is_identifier() const; |
176 | bool is_node_name() const; |
177 | StringName get_identifier() const { return source; } |
178 | |
179 | Token(Type p_type) { |
180 | type = p_type; |
181 | } |
182 | |
183 | Token() { |
184 | } |
185 | }; |
186 | |
187 | #ifdef TOOLS_ENABLED |
188 | struct { |
189 | String ; |
190 | bool = false; |
191 | () {} |
192 | (const String &, bool p_new_line) { |
193 | comment = p_comment; |
194 | new_line = p_new_line; |
195 | } |
196 | }; |
197 | const HashMap<int, CommentData> &() const { |
198 | return comments; |
199 | } |
200 | #endif // TOOLS_ENABLED |
201 | |
202 | private: |
203 | String source; |
204 | const char32_t *_source = nullptr; |
205 | const char32_t *_current = nullptr; |
206 | int line = -1, column = -1; |
207 | int cursor_line = -1, cursor_column = -1; |
208 | int tab_size = 4; |
209 | |
210 | // Keep track of multichar tokens. |
211 | const char32_t *_start = nullptr; |
212 | int start_line = 0, start_column = 0; |
213 | int leftmost_column = 0, rightmost_column = 0; |
214 | |
215 | // Info cache. |
216 | bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'. |
217 | bool multiline_mode = false; |
218 | List<Token> error_stack; |
219 | bool pending_newline = false; |
220 | Token last_token; |
221 | Token last_newline; |
222 | int pending_indents = 0; |
223 | List<int> indent_stack; |
224 | List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point. |
225 | List<char32_t> paren_stack; |
226 | char32_t indent_char = '\0'; |
227 | int position = 0; |
228 | int length = 0; |
229 | #ifdef DEBUG_ENABLED |
230 | Vector<String> keyword_list; |
231 | #endif // DEBUG_ENABLED |
232 | |
233 | #ifdef TOOLS_ENABLED |
234 | HashMap<int, CommentData> ; |
235 | #endif // TOOLS_ENABLED |
236 | |
237 | _FORCE_INLINE_ bool _is_at_end() { return position >= length; } |
238 | _FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; } |
239 | int indent_level() const { return indent_stack.size(); } |
240 | bool has_error() const { return !error_stack.is_empty(); } |
241 | Token pop_error(); |
242 | char32_t _advance(); |
243 | String _get_indent_char_name(char32_t ch); |
244 | void _skip_whitespace(); |
245 | void check_indent(); |
246 | |
247 | #ifdef DEBUG_ENABLED |
248 | void make_keyword_list(); |
249 | #endif // DEBUG_ENABLED |
250 | |
251 | Token make_error(const String &p_message); |
252 | void push_error(const String &p_message); |
253 | void push_error(const Token &p_error); |
254 | Token make_paren_error(char32_t p_paren); |
255 | Token make_token(Token::Type p_type); |
256 | Token make_literal(const Variant &p_literal); |
257 | Token make_identifier(const StringName &p_identifier); |
258 | Token check_vcs_marker(char32_t p_test, Token::Type p_double_type); |
259 | void push_paren(char32_t p_char); |
260 | bool pop_paren(char32_t p_expected); |
261 | |
262 | void newline(bool p_make_token); |
263 | Token number(); |
264 | Token potential_identifier(); |
265 | Token string(); |
266 | Token annotation(); |
267 | |
268 | public: |
269 | Token scan(); |
270 | |
271 | void set_source_code(const String &p_source_code); |
272 | |
273 | int get_cursor_line() const; |
274 | int get_cursor_column() const; |
275 | void set_cursor_position(int p_line, int p_column); |
276 | void set_multiline_mode(bool p_state); |
277 | bool is_past_cursor() const; |
278 | static String get_token_name(Token::Type p_token_type); |
279 | void push_expression_indented_block(); // For lambdas, or blocks inside expressions. |
280 | void pop_expression_indented_block(); // For lambdas, or blocks inside expressions. |
281 | |
282 | GDScriptTokenizer(); |
283 | }; |
284 | |
285 | #endif // GDSCRIPT_TOKENIZER_H |
286 | |