1 | /* |
2 | * This file is part of the MicroPython project, http://micropython.org/ |
3 | * |
4 | * The MIT License (MIT) |
5 | * |
6 | * Copyright (c) 2013, 2014 Damien P. George |
7 | * |
8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | * of this software and associated documentation files (the "Software"), to deal |
10 | * in the Software without restriction, including without limitation the rights |
11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
12 | * copies of the Software, and to permit persons to whom the Software is |
13 | * furnished to do so, subject to the following conditions: |
14 | * |
15 | * The above copyright notice and this permission notice shall be included in |
16 | * all copies or substantial portions of the Software. |
17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
24 | * THE SOFTWARE. |
25 | */ |
26 | #ifndef MICROPY_INCLUDED_PY_LEXER_H |
27 | #define MICROPY_INCLUDED_PY_LEXER_H |
28 | |
29 | #include <stdint.h> |
30 | |
31 | #include "py/mpconfig.h" |
32 | #include "py/qstr.h" |
33 | #include "py/reader.h" |
34 | |
35 | /* lexer.h -- simple tokeniser for MicroPython |
36 | * |
37 | * Uses (byte) length instead of null termination. |
38 | * Tokens are the same - UTF-8 with (byte) length. |
39 | */ |
40 | |
41 | typedef enum _mp_token_kind_t { |
42 | MP_TOKEN_END, |
43 | |
44 | MP_TOKEN_INVALID, |
45 | MP_TOKEN_DEDENT_MISMATCH, |
46 | MP_TOKEN_LONELY_STRING_OPEN, |
47 | |
48 | MP_TOKEN_NEWLINE, |
49 | MP_TOKEN_INDENT, |
50 | MP_TOKEN_DEDENT, |
51 | |
52 | MP_TOKEN_NAME, |
53 | MP_TOKEN_INTEGER, |
54 | MP_TOKEN_FLOAT_OR_IMAG, |
55 | MP_TOKEN_STRING, |
56 | MP_TOKEN_BYTES, |
57 | |
58 | MP_TOKEN_ELLIPSIS, |
59 | |
60 | MP_TOKEN_KW_FALSE, |
61 | MP_TOKEN_KW_NONE, |
62 | MP_TOKEN_KW_TRUE, |
63 | MP_TOKEN_KW___DEBUG__, |
64 | MP_TOKEN_KW_AND, |
65 | MP_TOKEN_KW_AS, |
66 | MP_TOKEN_KW_ASSERT, |
67 | #if MICROPY_PY_ASYNC_AWAIT |
68 | MP_TOKEN_KW_ASYNC, |
69 | MP_TOKEN_KW_AWAIT, |
70 | #endif |
71 | MP_TOKEN_KW_BREAK, |
72 | MP_TOKEN_KW_CLASS, |
73 | MP_TOKEN_KW_CONTINUE, |
74 | MP_TOKEN_KW_DEF, |
75 | MP_TOKEN_KW_DEL, |
76 | MP_TOKEN_KW_ELIF, |
77 | MP_TOKEN_KW_ELSE, |
78 | MP_TOKEN_KW_EXCEPT, |
79 | MP_TOKEN_KW_FINALLY, |
80 | MP_TOKEN_KW_FOR, |
81 | MP_TOKEN_KW_FROM, |
82 | MP_TOKEN_KW_GLOBAL, |
83 | MP_TOKEN_KW_IF, |
84 | MP_TOKEN_KW_IMPORT, |
85 | MP_TOKEN_KW_IN, |
86 | MP_TOKEN_KW_IS, |
87 | MP_TOKEN_KW_LAMBDA, |
88 | MP_TOKEN_KW_NONLOCAL, |
89 | MP_TOKEN_KW_NOT, |
90 | MP_TOKEN_KW_OR, |
91 | MP_TOKEN_KW_PASS, |
92 | MP_TOKEN_KW_RAISE, |
93 | MP_TOKEN_KW_RETURN, |
94 | MP_TOKEN_KW_TRY, |
95 | MP_TOKEN_KW_WHILE, |
96 | MP_TOKEN_KW_WITH, |
97 | MP_TOKEN_KW_YIELD, |
98 | |
99 | MP_TOKEN_OP_ASSIGN, |
100 | MP_TOKEN_OP_TILDE, |
101 | |
102 | // Order of these 6 matches corresponding mp_binary_op_t operator |
103 | MP_TOKEN_OP_LESS, |
104 | MP_TOKEN_OP_MORE, |
105 | MP_TOKEN_OP_DBL_EQUAL, |
106 | MP_TOKEN_OP_LESS_EQUAL, |
107 | MP_TOKEN_OP_MORE_EQUAL, |
108 | MP_TOKEN_OP_NOT_EQUAL, |
109 | |
110 | // Order of these 13 matches corresponding mp_binary_op_t operator |
111 | MP_TOKEN_OP_PIPE, |
112 | MP_TOKEN_OP_CARET, |
113 | MP_TOKEN_OP_AMPERSAND, |
114 | MP_TOKEN_OP_DBL_LESS, |
115 | MP_TOKEN_OP_DBL_MORE, |
116 | MP_TOKEN_OP_PLUS, |
117 | MP_TOKEN_OP_MINUS, |
118 | MP_TOKEN_OP_STAR, |
119 | MP_TOKEN_OP_AT, |
120 | MP_TOKEN_OP_DBL_SLASH, |
121 | MP_TOKEN_OP_SLASH, |
122 | MP_TOKEN_OP_PERCENT, |
123 | MP_TOKEN_OP_DBL_STAR, |
124 | |
125 | // Order of these 13 matches corresponding mp_binary_op_t operator |
126 | MP_TOKEN_DEL_PIPE_EQUAL, |
127 | MP_TOKEN_DEL_CARET_EQUAL, |
128 | MP_TOKEN_DEL_AMPERSAND_EQUAL, |
129 | MP_TOKEN_DEL_DBL_LESS_EQUAL, |
130 | MP_TOKEN_DEL_DBL_MORE_EQUAL, |
131 | MP_TOKEN_DEL_PLUS_EQUAL, |
132 | MP_TOKEN_DEL_MINUS_EQUAL, |
133 | MP_TOKEN_DEL_STAR_EQUAL, |
134 | MP_TOKEN_DEL_AT_EQUAL, |
135 | MP_TOKEN_DEL_DBL_SLASH_EQUAL, |
136 | MP_TOKEN_DEL_SLASH_EQUAL, |
137 | MP_TOKEN_DEL_PERCENT_EQUAL, |
138 | MP_TOKEN_DEL_DBL_STAR_EQUAL, |
139 | |
140 | MP_TOKEN_DEL_PAREN_OPEN, |
141 | MP_TOKEN_DEL_PAREN_CLOSE, |
142 | MP_TOKEN_DEL_BRACKET_OPEN, |
143 | MP_TOKEN_DEL_BRACKET_CLOSE, |
144 | MP_TOKEN_DEL_BRACE_OPEN, |
145 | MP_TOKEN_DEL_BRACE_CLOSE, |
146 | MP_TOKEN_DEL_COMMA, |
147 | MP_TOKEN_DEL_COLON, |
148 | MP_TOKEN_DEL_PERIOD, |
149 | MP_TOKEN_DEL_SEMICOLON, |
150 | MP_TOKEN_DEL_EQUAL, |
151 | MP_TOKEN_DEL_MINUS_MORE, |
152 | } mp_token_kind_t; |
153 | |
154 | // this data structure is exposed for efficiency |
155 | // public members are: source_name, tok_line, tok_column, tok_kind, vstr |
156 | typedef struct _mp_lexer_t { |
157 | qstr source_name; // name of source |
158 | mp_reader_t reader; // stream source |
159 | |
160 | unichar chr0, chr1, chr2; // current cached characters from source |
161 | |
162 | size_t line; // current source line |
163 | size_t column; // current source column |
164 | |
165 | mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit |
166 | mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines |
167 | |
168 | size_t alloc_indent_level; |
169 | size_t num_indent_level; |
170 | uint16_t *indent_level; |
171 | |
172 | size_t tok_line; // token source line |
173 | size_t tok_column; // token source column |
174 | mp_token_kind_t tok_kind; // token kind |
175 | vstr_t vstr; // token data |
176 | } mp_lexer_t; |
177 | |
178 | mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader); |
179 | mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len); |
180 | |
181 | void mp_lexer_free(mp_lexer_t *lex); |
182 | void mp_lexer_to_next(mp_lexer_t *lex); |
183 | |
184 | /******************************************************************/ |
185 | // platform specific import function; must be implemented for a specific port |
186 | // TODO tidy up, rename, or put elsewhere |
187 | |
188 | typedef enum { |
189 | MP_IMPORT_STAT_NO_EXIST, |
190 | MP_IMPORT_STAT_DIR, |
191 | MP_IMPORT_STAT_FILE, |
192 | } mp_import_stat_t; |
193 | |
194 | mp_import_stat_t mp_import_stat(const char *path); |
195 | mp_lexer_t *mp_lexer_new_from_file(const char *filename); |
196 | |
197 | #if MICROPY_HELPER_LEXER_UNIX |
198 | mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd); |
199 | #endif |
200 | |
201 | #endif // MICROPY_INCLUDED_PY_LEXER_H |
202 | |