1 | /* Copyright JS Foundation and other contributors, http://js.foundation |
2 | * |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
4 | * you may not use this file except in compliance with the License. |
5 | * You may obtain a copy of the License at |
6 | * |
7 | * http://www.apache.org/licenses/LICENSE-2.0 |
8 | * |
9 | * Unless required by applicable law or agreed to in writing, software |
10 | * distributed under the License is distributed on an "AS IS" BASIS |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | * See the License for the specific language governing permissions and |
13 | * limitations under the License. |
14 | */ |
15 | |
16 | #ifndef LIT_CHAR_HELPERS_H |
17 | #define LIT_CHAR_HELPERS_H |
18 | |
19 | #include "lit-globals.h" |
20 | |
21 | /** |
22 | * Invalid character code point |
23 | */ |
24 | #define LIT_INVALID_CP 0xFFFFFFFF |
25 | |
26 | /** |
27 | * Result of lit_char_to_lower_case/lit_char_to_upper_case consist more than of a single code unit |
28 | */ |
29 | #define LIT_MULTIPLE_CU 0xFFFFFFFE |
30 | |
31 | /* |
32 | * Format control characters (ECMA-262 v5, Table 1) |
33 | */ |
34 | #define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */ |
35 | #define LIT_CHAR_ZWJ ((ecma_char_t) 0x200D) /* zero width joiner */ |
36 | #define LIT_CHAR_BOM ((ecma_char_t) 0xFEFF) /* byte order mark */ |
37 | |
38 | /* |
39 | * Whitespace characters (ECMA-262 v5, Table 2) |
40 | */ |
41 | #define LIT_CHAR_TAB ((ecma_char_t) 0x0009) /* tab */ |
42 | #define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */ |
43 | #define LIT_CHAR_FF ((ecma_char_t) 0x000C) /* form feed */ |
44 | #define LIT_CHAR_SP ((ecma_char_t) 0x0020) /* space */ |
45 | #define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */ |
46 | #define LIT_CHAR_MVS ((ecma_char_t) 0x180E) /* mongolian vowel separator */ |
47 | /* LIT_CHAR_BOM is defined above */ |
48 | |
49 | bool lit_char_is_white_space (lit_code_point_t c); |
50 | |
51 | /* |
52 | * Line terminator characters (ECMA-262 v5, Table 3) |
53 | */ |
54 | #define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */ |
55 | #define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */ |
56 | #define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */ |
57 | #define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */ |
58 | |
59 | bool lit_char_is_line_terminator (ecma_char_t c); |
60 | |
61 | /* |
62 | * String Single Character Escape Sequences (ECMA-262 v5, Table 4) |
63 | */ |
64 | #define LIT_CHAR_BS ((ecma_char_t) 0x0008) /* backspace */ |
65 | /* LIT_CHAR_TAB is defined above */ |
66 | /* LIT_CHAR_LF is defined above */ |
67 | /* LIT_CHAR_VTAB is defined above */ |
68 | /* LIT_CHAR_FF is defined above */ |
69 | /* LIT_CHAR_CR is defined above */ |
70 | #define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */ |
71 | #define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */ |
72 | #define LIT_CHAR_BACKSLASH ((ecma_char_t) '\\') /* reverse solidus (backslash) */ |
73 | |
74 | /* |
75 | * Comment characters (ECMA-262 v5, 7.4) |
76 | */ |
77 | #define LIT_CHAR_SLASH ((ecma_char_t) '/') /* solidus */ |
78 | #define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */ |
79 | |
80 | /* |
81 | * Identifier name characters (ECMA-262 v5, 7.6) |
82 | */ |
83 | #define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$') /* dollar sign */ |
84 | #define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */ |
85 | /* LIT_CHAR_BACKSLASH defined above */ |
86 | |
87 | bool lit_code_point_is_identifier_start (lit_code_point_t code_point); |
88 | bool lit_code_point_is_identifier_part (lit_code_point_t code_point); |
89 | |
90 | /* |
91 | * Punctuator characters (ECMA-262 v5, 7.7) |
92 | */ |
93 | #define LIT_CHAR_LEFT_BRACE ((ecma_char_t) '{') /* left curly bracket */ |
94 | #define LIT_CHAR_RIGHT_BRACE ((ecma_char_t) '}') /* right curly bracket */ |
95 | #define LIT_CHAR_LEFT_PAREN ((ecma_char_t) '(') /* left parenthesis */ |
96 | #define LIT_CHAR_RIGHT_PAREN ((ecma_char_t) ')') /* right parenthesis */ |
97 | #define LIT_CHAR_LEFT_SQUARE ((ecma_char_t) '[') /* left square bracket */ |
98 | #define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */ |
99 | #define LIT_CHAR_DOT ((ecma_char_t) '.') /* dot */ |
100 | #define LIT_CHAR_SEMICOLON ((ecma_char_t) ';') /* semicolon */ |
101 | #define LIT_CHAR_COMMA ((ecma_char_t) ',') /* comma */ |
102 | #define LIT_CHAR_LESS_THAN ((ecma_char_t) '<') /* less-than sign */ |
103 | #define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */ |
104 | #define LIT_CHAR_EQUALS ((ecma_char_t) '=') /* equals sign */ |
105 | #define LIT_CHAR_PLUS ((ecma_char_t) '+') /* plus sign */ |
106 | #define LIT_CHAR_MINUS ((ecma_char_t) '-') /* hyphen-minus */ |
107 | /* LIT_CHAR_ASTERISK is defined above */ |
108 | #define LIT_CHAR_PERCENT ((ecma_char_t) '%') /* percent sign */ |
109 | #define LIT_CHAR_AMPERSAND ((ecma_char_t) '&') /* ampersand */ |
110 | #define LIT_CHAR_VLINE ((ecma_char_t) '|') /* vertical line */ |
111 | #define LIT_CHAR_CIRCUMFLEX ((ecma_char_t) '^') /* circumflex accent */ |
112 | #define LIT_CHAR_EXCLAMATION ((ecma_char_t) '!') /* exclamation mark */ |
113 | #define LIT_CHAR_TILDE ((ecma_char_t) '~') /* tilde */ |
114 | #define LIT_CHAR_QUESTION ((ecma_char_t) '?') /* question mark */ |
115 | #define LIT_CHAR_COLON ((ecma_char_t) ':') /* colon */ |
116 | |
117 | /* |
118 | * Special characters for String.prototype.replace. |
119 | */ |
120 | #define LIT_CHAR_GRAVE_ACCENT ((ecma_char_t) '`') /* grave accent */ |
121 | |
122 | /** |
123 | * Uppercase ASCII letters |
124 | */ |
125 | #define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A') |
126 | #define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B') |
127 | #define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C') |
128 | #define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D') |
129 | #define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E') |
130 | #define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F') |
131 | #define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G') |
132 | #define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H') |
133 | #define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I') |
134 | #define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J') |
135 | #define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K') |
136 | #define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L') |
137 | #define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M') |
138 | #define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N') |
139 | #define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O') |
140 | #define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P') |
141 | #define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q') |
142 | #define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R') |
143 | #define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S') |
144 | #define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T') |
145 | #define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U') |
146 | #define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V') |
147 | #define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W') |
148 | #define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X') |
149 | #define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y') |
150 | #define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z') |
151 | |
152 | /** |
153 | * Lowercase ASCII letters |
154 | */ |
155 | #define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a') |
156 | #define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b') |
157 | #define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c') |
158 | #define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd') |
159 | #define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e') |
160 | #define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f') |
161 | #define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g') |
162 | #define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h') |
163 | #define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i') |
164 | #define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j') |
165 | #define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k') |
166 | #define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l') |
167 | #define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm') |
168 | #define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n') |
169 | #define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o') |
170 | #define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p') |
171 | #define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q') |
172 | #define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r') |
173 | #define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's') |
174 | #define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't') |
175 | #define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u') |
176 | #define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v') |
177 | #define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w') |
178 | #define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x') |
179 | #define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y') |
180 | #define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z') |
181 | |
182 | /** |
183 | * ASCII decimal digits |
184 | */ |
185 | #define LIT_CHAR_0 ((ecma_char_t) '0') |
186 | #define LIT_CHAR_1 ((ecma_char_t) '1') |
187 | #define LIT_CHAR_2 ((ecma_char_t) '2') |
188 | #define LIT_CHAR_3 ((ecma_char_t) '3') |
189 | #define LIT_CHAR_4 ((ecma_char_t) '4') |
190 | #define LIT_CHAR_5 ((ecma_char_t) '5') |
191 | #define LIT_CHAR_6 ((ecma_char_t) '6') |
192 | #define LIT_CHAR_7 ((ecma_char_t) '7') |
193 | #define LIT_CHAR_8 ((ecma_char_t) '8') |
194 | #define LIT_CHAR_9 ((ecma_char_t) '9') |
195 | |
196 | /** |
197 | * ASCII character ranges |
198 | */ |
199 | #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters range */ |
200 | #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END LIT_CHAR_UPPERCASE_Z |
201 | |
202 | #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters range */ |
203 | #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END LIT_CHAR_LOWERCASE_Z |
204 | |
205 | #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters for |
206 | * hexadecimal digits range */ |
207 | #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END LIT_CHAR_UPPERCASE_F |
208 | |
209 | #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters for |
210 | * hexadecimal digits range */ |
211 | #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END LIT_CHAR_LOWERCASE_F |
212 | |
213 | #define LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN LIT_CHAR_0 /* octal digits range */ |
214 | #define LIT_CHAR_ASCII_OCTAL_DIGITS_END LIT_CHAR_7 |
215 | |
216 | #define LIT_CHAR_ASCII_DIGITS_BEGIN LIT_CHAR_0 /* decimal digits range */ |
217 | #define LIT_CHAR_ASCII_DIGITS_END LIT_CHAR_9 |
218 | |
219 | #define LEXER_TO_ASCII_LOWERCASE(character) ((character) | LIT_CHAR_SP) |
220 | |
221 | bool lit_char_is_octal_digit (ecma_char_t c); |
222 | bool lit_char_is_decimal_digit (ecma_char_t c); |
223 | bool lit_char_is_hex_digit (ecma_char_t c); |
224 | #if ENABLED (JERRY_ESNEXT) |
225 | bool lit_char_is_binary_digit (ecma_char_t c); |
226 | #endif /* ENABLED (JERRY_ESNEXT) */ |
227 | void lit_char_unicode_escape (ecma_stringbuilder_t *builder_p, ecma_char_t c); |
228 | uint32_t lit_char_hex_to_int (ecma_char_t c); |
229 | size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point); |
230 | size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point); |
231 | void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p); |
232 | uint32_t lit_char_hex_lookup (const lit_utf8_byte_t *buf_p, const lit_utf8_byte_t *const buf_end_p, uint32_t lookup); |
233 | uint32_t lit_parse_decimal (const lit_utf8_byte_t **buffer_p, const lit_utf8_byte_t *const buffer_end_p); |
234 | bool lit_find_char_in_string (ecma_string_t *str_p, lit_utf8_byte_t c); |
235 | |
236 | /** |
237 | * Null character |
238 | */ |
239 | #define LIT_CHAR_NULL ((ecma_char_t) '\0') |
240 | |
241 | /* |
242 | * Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3) |
243 | */ |
244 | bool lit_char_is_word_char (lit_code_point_t c); |
245 | |
246 | /* |
247 | * Utility functions for uppercasing / lowercasing |
248 | */ |
249 | |
250 | lit_code_point_t lit_char_to_lower_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p); |
251 | lit_code_point_t lit_char_to_upper_case (lit_code_point_t cp, ecma_stringbuilder_t *builder_p); |
252 | |
253 | #if ENABLED (JERRY_ESNEXT) |
254 | bool lit_char_fold_to_lower (lit_code_point_t cp); |
255 | bool lit_char_fold_to_upper (lit_code_point_t cp); |
256 | #endif /* ENABLED (JERRY_ESNEXT) */ |
257 | |
258 | #endif /* !LIT_CHAR_HELPERS_H */ |
259 | |