1/*
2 * This file is part of the MicroPython project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26#ifndef MICROPY_INCLUDED_PY_LEXER_H
27#define MICROPY_INCLUDED_PY_LEXER_H
28
29#include <stdint.h>
30
31#include "py/mpconfig.h"
32#include "py/qstr.h"
33#include "py/reader.h"
34
35/* lexer.h -- simple tokeniser for MicroPython
36 *
37 * Uses (byte) length instead of null termination.
38 * Tokens are the same - UTF-8 with (byte) length.
39 */
40
41typedef enum _mp_token_kind_t {
42 MP_TOKEN_END,
43
44 MP_TOKEN_INVALID,
45 MP_TOKEN_DEDENT_MISMATCH,
46 MP_TOKEN_LONELY_STRING_OPEN,
47
48 MP_TOKEN_NEWLINE,
49 MP_TOKEN_INDENT,
50 MP_TOKEN_DEDENT,
51
52 MP_TOKEN_NAME,
53 MP_TOKEN_INTEGER,
54 MP_TOKEN_FLOAT_OR_IMAG,
55 MP_TOKEN_STRING,
56 MP_TOKEN_BYTES,
57
58 MP_TOKEN_ELLIPSIS,
59
60 MP_TOKEN_KW_FALSE,
61 MP_TOKEN_KW_NONE,
62 MP_TOKEN_KW_TRUE,
63 MP_TOKEN_KW___DEBUG__,
64 MP_TOKEN_KW_AND,
65 MP_TOKEN_KW_AS,
66 MP_TOKEN_KW_ASSERT,
67 #if MICROPY_PY_ASYNC_AWAIT
68 MP_TOKEN_KW_ASYNC,
69 MP_TOKEN_KW_AWAIT,
70 #endif
71 MP_TOKEN_KW_BREAK,
72 MP_TOKEN_KW_CLASS,
73 MP_TOKEN_KW_CONTINUE,
74 MP_TOKEN_KW_DEF,
75 MP_TOKEN_KW_DEL,
76 MP_TOKEN_KW_ELIF,
77 MP_TOKEN_KW_ELSE,
78 MP_TOKEN_KW_EXCEPT,
79 MP_TOKEN_KW_FINALLY,
80 MP_TOKEN_KW_FOR,
81 MP_TOKEN_KW_FROM,
82 MP_TOKEN_KW_GLOBAL,
83 MP_TOKEN_KW_IF,
84 MP_TOKEN_KW_IMPORT,
85 MP_TOKEN_KW_IN,
86 MP_TOKEN_KW_IS,
87 MP_TOKEN_KW_LAMBDA,
88 MP_TOKEN_KW_NONLOCAL,
89 MP_TOKEN_KW_NOT,
90 MP_TOKEN_KW_OR,
91 MP_TOKEN_KW_PASS,
92 MP_TOKEN_KW_RAISE,
93 MP_TOKEN_KW_RETURN,
94 MP_TOKEN_KW_TRY,
95 MP_TOKEN_KW_WHILE,
96 MP_TOKEN_KW_WITH,
97 MP_TOKEN_KW_YIELD,
98
99 MP_TOKEN_OP_ASSIGN,
100 MP_TOKEN_OP_TILDE,
101
102 // Order of these 6 matches corresponding mp_binary_op_t operator
103 MP_TOKEN_OP_LESS,
104 MP_TOKEN_OP_MORE,
105 MP_TOKEN_OP_DBL_EQUAL,
106 MP_TOKEN_OP_LESS_EQUAL,
107 MP_TOKEN_OP_MORE_EQUAL,
108 MP_TOKEN_OP_NOT_EQUAL,
109
110 // Order of these 13 matches corresponding mp_binary_op_t operator
111 MP_TOKEN_OP_PIPE,
112 MP_TOKEN_OP_CARET,
113 MP_TOKEN_OP_AMPERSAND,
114 MP_TOKEN_OP_DBL_LESS,
115 MP_TOKEN_OP_DBL_MORE,
116 MP_TOKEN_OP_PLUS,
117 MP_TOKEN_OP_MINUS,
118 MP_TOKEN_OP_STAR,
119 MP_TOKEN_OP_AT,
120 MP_TOKEN_OP_DBL_SLASH,
121 MP_TOKEN_OP_SLASH,
122 MP_TOKEN_OP_PERCENT,
123 MP_TOKEN_OP_DBL_STAR,
124
125 // Order of these 13 matches corresponding mp_binary_op_t operator
126 MP_TOKEN_DEL_PIPE_EQUAL,
127 MP_TOKEN_DEL_CARET_EQUAL,
128 MP_TOKEN_DEL_AMPERSAND_EQUAL,
129 MP_TOKEN_DEL_DBL_LESS_EQUAL,
130 MP_TOKEN_DEL_DBL_MORE_EQUAL,
131 MP_TOKEN_DEL_PLUS_EQUAL,
132 MP_TOKEN_DEL_MINUS_EQUAL,
133 MP_TOKEN_DEL_STAR_EQUAL,
134 MP_TOKEN_DEL_AT_EQUAL,
135 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
136 MP_TOKEN_DEL_SLASH_EQUAL,
137 MP_TOKEN_DEL_PERCENT_EQUAL,
138 MP_TOKEN_DEL_DBL_STAR_EQUAL,
139
140 MP_TOKEN_DEL_PAREN_OPEN,
141 MP_TOKEN_DEL_PAREN_CLOSE,
142 MP_TOKEN_DEL_BRACKET_OPEN,
143 MP_TOKEN_DEL_BRACKET_CLOSE,
144 MP_TOKEN_DEL_BRACE_OPEN,
145 MP_TOKEN_DEL_BRACE_CLOSE,
146 MP_TOKEN_DEL_COMMA,
147 MP_TOKEN_DEL_COLON,
148 MP_TOKEN_DEL_PERIOD,
149 MP_TOKEN_DEL_SEMICOLON,
150 MP_TOKEN_DEL_EQUAL,
151 MP_TOKEN_DEL_MINUS_MORE,
152} mp_token_kind_t;
153
154// this data structure is exposed for efficiency
155// public members are: source_name, tok_line, tok_column, tok_kind, vstr
156typedef struct _mp_lexer_t {
157 qstr source_name; // name of source
158 mp_reader_t reader; // stream source
159
160 unichar chr0, chr1, chr2; // current cached characters from source
161
162 size_t line; // current source line
163 size_t column; // current source column
164
165 mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
166 mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
167
168 size_t alloc_indent_level;
169 size_t num_indent_level;
170 uint16_t *indent_level;
171
172 size_t tok_line; // token source line
173 size_t tok_column; // token source column
174 mp_token_kind_t tok_kind; // token kind
175 vstr_t vstr; // token data
176} mp_lexer_t;
177
178mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
179mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len);
180
181void mp_lexer_free(mp_lexer_t *lex);
182void mp_lexer_to_next(mp_lexer_t *lex);
183
184/******************************************************************/
185// platform specific import function; must be implemented for a specific port
186// TODO tidy up, rename, or put elsewhere
187
188typedef enum {
189 MP_IMPORT_STAT_NO_EXIST,
190 MP_IMPORT_STAT_DIR,
191 MP_IMPORT_STAT_FILE,
192} mp_import_stat_t;
193
194mp_import_stat_t mp_import_stat(const char *path);
195mp_lexer_t *mp_lexer_new_from_file(const char *filename);
196
197#if MICROPY_HELPER_LEXER_UNIX
198mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd);
199#endif
200
201#endif // MICROPY_INCLUDED_PY_LEXER_H
202