1 | // Scintilla source code edit control |
2 | /** @file LexAda.cxx |
3 | ** Lexer for Ada 95 |
4 | **/ |
5 | // Copyright 2002 by Sergey Koshcheyev <sergey.k@seznam.cz> |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | |
8 | #include <stdlib.h> |
9 | #include <string.h> |
10 | #include <stdio.h> |
11 | #include <stdarg.h> |
12 | #include <assert.h> |
13 | #include <ctype.h> |
14 | |
15 | #include <string> |
16 | #include <string_view> |
17 | |
18 | #include "ILexer.h" |
19 | #include "Scintilla.h" |
20 | #include "SciLexer.h" |
21 | |
22 | #include "WordList.h" |
23 | #include "LexAccessor.h" |
24 | #include "Accessor.h" |
25 | #include "StyleContext.h" |
26 | #include "CharacterSet.h" |
27 | #include "LexerModule.h" |
28 | |
29 | using namespace Lexilla; |
30 | |
31 | /* |
32 | * Interface |
33 | */ |
34 | |
35 | static void ColouriseDocument( |
36 | Sci_PositionU startPos, |
37 | Sci_Position length, |
38 | int initStyle, |
39 | WordList *keywordlists[], |
40 | Accessor &styler); |
41 | |
42 | static const char * const adaWordListDesc[] = { |
43 | "Keywords" , |
44 | 0 |
45 | }; |
46 | |
47 | LexerModule lmAda(SCLEX_ADA, ColouriseDocument, "ada" , NULL, adaWordListDesc); |
48 | |
49 | /* |
50 | * Implementation |
51 | */ |
52 | |
53 | // Functions that have apostropheStartsAttribute as a parameter set it according to whether |
54 | // an apostrophe encountered after processing the current token will start an attribute or |
55 | // a character literal. |
56 | static void ColouriseCharacter(StyleContext& sc, bool& apostropheStartsAttribute); |
57 | static void ColouriseComment(StyleContext& sc, bool& apostropheStartsAttribute); |
58 | static void ColouriseContext(StyleContext& sc, char chEnd, int stateEOL); |
59 | static void ColouriseDelimiter(StyleContext& sc, bool& apostropheStartsAttribute); |
60 | static void ColouriseLabel(StyleContext& sc, WordList& keywords, bool& apostropheStartsAttribute); |
61 | static void ColouriseNumber(StyleContext& sc, bool& apostropheStartsAttribute); |
62 | static void ColouriseString(StyleContext& sc, bool& apostropheStartsAttribute); |
63 | static void ColouriseWhiteSpace(StyleContext& sc, bool& apostropheStartsAttribute); |
64 | static void ColouriseWord(StyleContext& sc, WordList& keywords, bool& apostropheStartsAttribute); |
65 | |
66 | static inline bool IsDelimiterCharacter(int ch); |
67 | static inline bool IsSeparatorOrDelimiterCharacter(int ch); |
68 | static bool IsValidIdentifier(const std::string& identifier); |
69 | static bool IsValidNumber(const std::string& number); |
70 | static inline bool IsWordStartCharacter(int ch); |
71 | static inline bool IsWordCharacter(int ch); |
72 | |
73 | static void ColouriseCharacter(StyleContext& sc, bool& apostropheStartsAttribute) { |
74 | apostropheStartsAttribute = true; |
75 | |
76 | sc.SetState(SCE_ADA_CHARACTER); |
77 | |
78 | // Skip the apostrophe and one more character (so that '' is shown as non-terminated and ''' |
79 | // is handled correctly) |
80 | sc.Forward(); |
81 | sc.Forward(); |
82 | |
83 | ColouriseContext(sc, '\'', SCE_ADA_CHARACTEREOL); |
84 | } |
85 | |
86 | static void ColouriseContext(StyleContext& sc, char chEnd, int stateEOL) { |
87 | while (!sc.atLineEnd && !sc.Match(chEnd)) { |
88 | sc.Forward(); |
89 | } |
90 | |
91 | if (!sc.atLineEnd) { |
92 | sc.ForwardSetState(SCE_ADA_DEFAULT); |
93 | } else { |
94 | sc.ChangeState(stateEOL); |
95 | } |
96 | } |
97 | |
98 | static void (StyleContext& sc, bool& /*apostropheStartsAttribute*/) { |
99 | // Apostrophe meaning is not changed, but the parameter is present for uniformity |
100 | |
101 | sc.SetState(SCE_ADA_COMMENTLINE); |
102 | |
103 | while (!sc.atLineEnd) { |
104 | sc.Forward(); |
105 | } |
106 | } |
107 | |
108 | static void ColouriseDelimiter(StyleContext& sc, bool& apostropheStartsAttribute) { |
109 | apostropheStartsAttribute = sc.Match (')'); |
110 | sc.SetState(SCE_ADA_DELIMITER); |
111 | sc.ForwardSetState(SCE_ADA_DEFAULT); |
112 | } |
113 | |
114 | static void ColouriseLabel(StyleContext& sc, WordList& keywords, bool& apostropheStartsAttribute) { |
115 | apostropheStartsAttribute = false; |
116 | |
117 | sc.SetState(SCE_ADA_LABEL); |
118 | |
119 | // Skip "<<" |
120 | sc.Forward(); |
121 | sc.Forward(); |
122 | |
123 | std::string identifier; |
124 | |
125 | while (!sc.atLineEnd && !IsSeparatorOrDelimiterCharacter(sc.ch)) { |
126 | identifier += static_cast<char>(tolower(sc.ch)); |
127 | sc.Forward(); |
128 | } |
129 | |
130 | // Skip ">>" |
131 | if (sc.Match('>', '>')) { |
132 | sc.Forward(); |
133 | sc.Forward(); |
134 | } else { |
135 | sc.ChangeState(SCE_ADA_ILLEGAL); |
136 | } |
137 | |
138 | // If the name is an invalid identifier or a keyword, then make it invalid label |
139 | if (!IsValidIdentifier(identifier) || keywords.InList(identifier.c_str())) { |
140 | sc.ChangeState(SCE_ADA_ILLEGAL); |
141 | } |
142 | |
143 | sc.SetState(SCE_ADA_DEFAULT); |
144 | |
145 | } |
146 | |
147 | static void ColouriseNumber(StyleContext& sc, bool& apostropheStartsAttribute) { |
148 | apostropheStartsAttribute = true; |
149 | |
150 | std::string number; |
151 | sc.SetState(SCE_ADA_NUMBER); |
152 | |
153 | // Get all characters up to a delimiter or a separator, including points, but excluding |
154 | // double points (ranges). |
155 | while (!IsSeparatorOrDelimiterCharacter(sc.ch) || (sc.ch == '.' && sc.chNext != '.')) { |
156 | number += static_cast<char>(sc.ch); |
157 | sc.Forward(); |
158 | } |
159 | |
160 | // Special case: exponent with sign |
161 | if ((sc.chPrev == 'e' || sc.chPrev == 'E') && |
162 | (sc.ch == '+' || sc.ch == '-')) { |
163 | number += static_cast<char>(sc.ch); |
164 | sc.Forward (); |
165 | |
166 | while (!IsSeparatorOrDelimiterCharacter(sc.ch)) { |
167 | number += static_cast<char>(sc.ch); |
168 | sc.Forward(); |
169 | } |
170 | } |
171 | |
172 | if (!IsValidNumber(number)) { |
173 | sc.ChangeState(SCE_ADA_ILLEGAL); |
174 | } |
175 | |
176 | sc.SetState(SCE_ADA_DEFAULT); |
177 | } |
178 | |
179 | static void ColouriseString(StyleContext& sc, bool& apostropheStartsAttribute) { |
180 | apostropheStartsAttribute = true; |
181 | |
182 | sc.SetState(SCE_ADA_STRING); |
183 | sc.Forward(); |
184 | |
185 | ColouriseContext(sc, '"', SCE_ADA_STRINGEOL); |
186 | } |
187 | |
188 | static void ColouriseWhiteSpace(StyleContext& sc, bool& /*apostropheStartsAttribute*/) { |
189 | // Apostrophe meaning is not changed, but the parameter is present for uniformity |
190 | sc.SetState(SCE_ADA_DEFAULT); |
191 | sc.ForwardSetState(SCE_ADA_DEFAULT); |
192 | } |
193 | |
194 | static void ColouriseWord(StyleContext& sc, WordList& keywords, bool& apostropheStartsAttribute) { |
195 | apostropheStartsAttribute = true; |
196 | sc.SetState(SCE_ADA_IDENTIFIER); |
197 | |
198 | std::string word; |
199 | |
200 | while (!sc.atLineEnd && !IsSeparatorOrDelimiterCharacter(sc.ch)) { |
201 | word += static_cast<char>(tolower(sc.ch)); |
202 | sc.Forward(); |
203 | } |
204 | |
205 | if (!IsValidIdentifier(word)) { |
206 | sc.ChangeState(SCE_ADA_ILLEGAL); |
207 | |
208 | } else if (keywords.InList(word.c_str())) { |
209 | sc.ChangeState(SCE_ADA_WORD); |
210 | |
211 | if (word != "all" ) { |
212 | apostropheStartsAttribute = false; |
213 | } |
214 | } |
215 | |
216 | sc.SetState(SCE_ADA_DEFAULT); |
217 | } |
218 | |
219 | // |
220 | // ColouriseDocument |
221 | // |
222 | |
223 | static void ColouriseDocument( |
224 | Sci_PositionU startPos, |
225 | Sci_Position length, |
226 | int initStyle, |
227 | WordList *keywordlists[], |
228 | Accessor &styler) { |
229 | WordList &keywords = *keywordlists[0]; |
230 | |
231 | StyleContext sc(startPos, length, initStyle, styler); |
232 | |
233 | Sci_Position lineCurrent = styler.GetLine(startPos); |
234 | bool apostropheStartsAttribute = (styler.GetLineState(lineCurrent) & 1) != 0; |
235 | |
236 | while (sc.More()) { |
237 | if (sc.atLineEnd) { |
238 | // Go to the next line |
239 | sc.Forward(); |
240 | lineCurrent++; |
241 | |
242 | // Remember the line state for future incremental lexing |
243 | styler.SetLineState(lineCurrent, apostropheStartsAttribute); |
244 | |
245 | // Don't continue any styles on the next line |
246 | sc.SetState(SCE_ADA_DEFAULT); |
247 | } |
248 | |
249 | // Comments |
250 | if (sc.Match('-', '-')) { |
251 | ColouriseComment(sc, apostropheStartsAttribute); |
252 | |
253 | // Strings |
254 | } else if (sc.Match('"')) { |
255 | ColouriseString(sc, apostropheStartsAttribute); |
256 | |
257 | // Characters |
258 | } else if (sc.Match('\'') && !apostropheStartsAttribute) { |
259 | ColouriseCharacter(sc, apostropheStartsAttribute); |
260 | |
261 | // Labels |
262 | } else if (sc.Match('<', '<')) { |
263 | ColouriseLabel(sc, keywords, apostropheStartsAttribute); |
264 | |
265 | // Whitespace |
266 | } else if (IsASpace(sc.ch)) { |
267 | ColouriseWhiteSpace(sc, apostropheStartsAttribute); |
268 | |
269 | // Delimiters |
270 | } else if (IsDelimiterCharacter(sc.ch)) { |
271 | ColouriseDelimiter(sc, apostropheStartsAttribute); |
272 | |
273 | // Numbers |
274 | } else if (IsADigit(sc.ch) || sc.ch == '#') { |
275 | ColouriseNumber(sc, apostropheStartsAttribute); |
276 | |
277 | // Keywords or identifiers |
278 | } else { |
279 | ColouriseWord(sc, keywords, apostropheStartsAttribute); |
280 | } |
281 | } |
282 | |
283 | sc.Complete(); |
284 | } |
285 | |
286 | static inline bool IsDelimiterCharacter(int ch) { |
287 | switch (ch) { |
288 | case '&': |
289 | case '\'': |
290 | case '(': |
291 | case ')': |
292 | case '*': |
293 | case '+': |
294 | case ',': |
295 | case '-': |
296 | case '.': |
297 | case '/': |
298 | case ':': |
299 | case ';': |
300 | case '<': |
301 | case '=': |
302 | case '>': |
303 | case '|': |
304 | return true; |
305 | default: |
306 | return false; |
307 | } |
308 | } |
309 | |
310 | static inline bool IsSeparatorOrDelimiterCharacter(int ch) { |
311 | return IsASpace(ch) || IsDelimiterCharacter(ch); |
312 | } |
313 | |
314 | static bool IsValidIdentifier(const std::string& identifier) { |
315 | // First character can't be '_', so initialize the flag to true |
316 | bool lastWasUnderscore = true; |
317 | |
318 | size_t length = identifier.length(); |
319 | |
320 | // Zero-length identifiers are not valid (these can occur inside labels) |
321 | if (length == 0) { |
322 | return false; |
323 | } |
324 | |
325 | // Check for valid character at the start |
326 | if (!IsWordStartCharacter(identifier[0])) { |
327 | return false; |
328 | } |
329 | |
330 | // Check for only valid characters and no double underscores |
331 | for (size_t i = 0; i < length; i++) { |
332 | if (!IsWordCharacter(identifier[i]) || |
333 | (identifier[i] == '_' && lastWasUnderscore)) { |
334 | return false; |
335 | } |
336 | lastWasUnderscore = identifier[i] == '_'; |
337 | } |
338 | |
339 | // Check for underscore at the end |
340 | if (lastWasUnderscore == true) { |
341 | return false; |
342 | } |
343 | |
344 | // All checks passed |
345 | return true; |
346 | } |
347 | |
348 | static bool IsValidNumber(const std::string& number) { |
349 | size_t hashPos = number.find("#" ); |
350 | bool seenDot = false; |
351 | |
352 | size_t i = 0; |
353 | size_t length = number.length(); |
354 | |
355 | if (length == 0) |
356 | return false; // Just in case |
357 | |
358 | // Decimal number |
359 | if (hashPos == std::string::npos) { |
360 | bool canBeSpecial = false; |
361 | |
362 | for (; i < length; i++) { |
363 | if (number[i] == '_') { |
364 | if (!canBeSpecial) { |
365 | return false; |
366 | } |
367 | canBeSpecial = false; |
368 | } else if (number[i] == '.') { |
369 | if (!canBeSpecial || seenDot) { |
370 | return false; |
371 | } |
372 | canBeSpecial = false; |
373 | seenDot = true; |
374 | } else if (IsADigit(number[i])) { |
375 | canBeSpecial = true; |
376 | } else { |
377 | break; |
378 | } |
379 | } |
380 | |
381 | if (!canBeSpecial) |
382 | return false; |
383 | } else { |
384 | // Based number |
385 | bool canBeSpecial = false; |
386 | int base = 0; |
387 | |
388 | // Parse base |
389 | for (; i < length; i++) { |
390 | int ch = number[i]; |
391 | if (ch == '_') { |
392 | if (!canBeSpecial) |
393 | return false; |
394 | canBeSpecial = false; |
395 | } else if (IsADigit(ch)) { |
396 | base = base * 10 + (ch - '0'); |
397 | if (base > 16) |
398 | return false; |
399 | canBeSpecial = true; |
400 | } else if (ch == '#' && canBeSpecial) { |
401 | break; |
402 | } else { |
403 | return false; |
404 | } |
405 | } |
406 | |
407 | if (base < 2) |
408 | return false; |
409 | if (i == length) |
410 | return false; |
411 | |
412 | i++; // Skip over '#' |
413 | |
414 | // Parse number |
415 | canBeSpecial = false; |
416 | |
417 | for (; i < length; i++) { |
418 | int ch = tolower(number[i]); |
419 | |
420 | if (ch == '_') { |
421 | if (!canBeSpecial) { |
422 | return false; |
423 | } |
424 | canBeSpecial = false; |
425 | |
426 | } else if (ch == '.') { |
427 | if (!canBeSpecial || seenDot) { |
428 | return false; |
429 | } |
430 | canBeSpecial = false; |
431 | seenDot = true; |
432 | |
433 | } else if (IsADigit(ch)) { |
434 | if (ch - '0' >= base) { |
435 | return false; |
436 | } |
437 | canBeSpecial = true; |
438 | |
439 | } else if (ch >= 'a' && ch <= 'f') { |
440 | if (ch - 'a' + 10 >= base) { |
441 | return false; |
442 | } |
443 | canBeSpecial = true; |
444 | |
445 | } else if (ch == '#' && canBeSpecial) { |
446 | break; |
447 | |
448 | } else { |
449 | return false; |
450 | } |
451 | } |
452 | |
453 | if (i == length) { |
454 | return false; |
455 | } |
456 | |
457 | i++; |
458 | } |
459 | |
460 | // Exponent (optional) |
461 | if (i < length) { |
462 | if (number[i] != 'e' && number[i] != 'E') |
463 | return false; |
464 | |
465 | i++; // Move past 'E' |
466 | |
467 | if (i == length) { |
468 | return false; |
469 | } |
470 | |
471 | if (number[i] == '+') |
472 | i++; |
473 | else if (number[i] == '-') { |
474 | if (seenDot) { |
475 | i++; |
476 | } else { |
477 | return false; // Integer literals should not have negative exponents |
478 | } |
479 | } |
480 | |
481 | if (i == length) { |
482 | return false; |
483 | } |
484 | |
485 | bool canBeSpecial = false; |
486 | |
487 | for (; i < length; i++) { |
488 | if (number[i] == '_') { |
489 | if (!canBeSpecial) { |
490 | return false; |
491 | } |
492 | canBeSpecial = false; |
493 | } else if (IsADigit(number[i])) { |
494 | canBeSpecial = true; |
495 | } else { |
496 | return false; |
497 | } |
498 | } |
499 | |
500 | if (!canBeSpecial) |
501 | return false; |
502 | } |
503 | |
504 | // if i == length, number was parsed successfully. |
505 | return i == length; |
506 | } |
507 | |
508 | static inline bool IsWordCharacter(int ch) { |
509 | return IsWordStartCharacter(ch) || IsADigit(ch); |
510 | } |
511 | |
512 | static inline bool IsWordStartCharacter(int ch) { |
513 | return (IsASCII(ch) && isalpha(ch)) || ch == '_'; |
514 | } |
515 | |