1// Scintilla source code edit control
2// Encoding: UTF-8
3// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
4// The License.txt file describes the conditions under which this software may be distributed.
5/** @file LexErlang.cxx
6 ** Lexer for Erlang.
7 ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
8 ** Originally wrote by Peter-Henry Mander,
9 ** based on Matlab lexer by José Fonseca.
10 **/
11
12#include <stdlib.h>
13#include <string.h>
14#include <stdio.h>
15#include <stdarg.h>
16#include <assert.h>
17#include <ctype.h>
18
19#include <string>
20#include <string_view>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "WordList.h"
27#include "LexAccessor.h"
28#include "Accessor.h"
29#include "StyleContext.h"
30#include "CharacterSet.h"
31#include "LexerModule.h"
32
33using namespace Lexilla;
34
35static int is_radix(int radix, int ch) {
36 int digit;
37
38 if (36 < radix || 2 > radix)
39 return 0;
40
41 if (isdigit(ch)) {
42 digit = ch - '0';
43 } else if (isalnum(ch)) {
44 digit = toupper(ch) - 'A' + 10;
45 } else {
46 return 0;
47 }
48
49 return (digit < radix);
50}
51
52typedef enum {
53 STATE_NULL,
54 COMMENT,
55 COMMENT_FUNCTION,
56 COMMENT_MODULE,
57 COMMENT_DOC,
58 COMMENT_DOC_MACRO,
59 ATOM_UNQUOTED,
60 ATOM_QUOTED,
61 NODE_NAME_UNQUOTED,
62 NODE_NAME_QUOTED,
63 MACRO_START,
64 MACRO_UNQUOTED,
65 MACRO_QUOTED,
66 RECORD_START,
67 RECORD_UNQUOTED,
68 RECORD_QUOTED,
69 NUMERAL_START,
70 NUMERAL_BASE_VALUE,
71 NUMERAL_FLOAT,
72 NUMERAL_EXPONENT,
73 PREPROCESSOR
74} atom_parse_state_t;
75
76static inline bool IsAWordChar(const int ch) {
77 return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
78}
79
80static void ColouriseErlangDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
81 WordList *keywordlists[], Accessor &styler) {
82
83 StyleContext sc(startPos, length, initStyle, styler);
84 WordList &reservedWords = *keywordlists[0];
85 WordList &erlangBIFs = *keywordlists[1];
86 WordList &erlangPreproc = *keywordlists[2];
87 WordList &erlangModulesAtt = *keywordlists[3];
88 WordList &erlangDoc = *keywordlists[4];
89 WordList &erlangDocMacro = *keywordlists[5];
90 int radix_digits = 0;
91 int exponent_digits = 0;
92 atom_parse_state_t parse_state = STATE_NULL;
93 atom_parse_state_t old_parse_state = STATE_NULL;
94 bool to_late_to_comment = false;
95 char cur[100];
96 int old_style = SCE_ERLANG_DEFAULT;
97
98 styler.StartAt(startPos);
99
100 for (; sc.More(); sc.Forward()) {
101 int style = SCE_ERLANG_DEFAULT;
102 if (STATE_NULL != parse_state) {
103
104 switch (parse_state) {
105
106 case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
107
108 /* COMMENTS ------------------------------------------------------*/
109 case COMMENT : {
110 if (sc.ch != '%') {
111 to_late_to_comment = true;
112 } else if (!to_late_to_comment && sc.ch == '%') {
113 // Switch to comment level 2 (Function)
114 sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
115 old_style = SCE_ERLANG_COMMENT_FUNCTION;
116 parse_state = COMMENT_FUNCTION;
117 sc.Forward();
118 }
119 }
120 // V--- Falling through!
121 // Falls through.
122 case COMMENT_FUNCTION : {
123 if (sc.ch != '%') {
124 to_late_to_comment = true;
125 } else if (!to_late_to_comment && sc.ch == '%') {
126 // Switch to comment level 3 (Module)
127 sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
128 old_style = SCE_ERLANG_COMMENT_MODULE;
129 parse_state = COMMENT_MODULE;
130 sc.Forward();
131 }
132 }
133 // V--- Falling through!
134 // Falls through.
135 case COMMENT_MODULE : {
136 if (parse_state != COMMENT) {
137 // Search for comment documentation
138 if (sc.chNext == '@') {
139 old_parse_state = parse_state;
140 parse_state = ('{' == sc.ch)
141 ? COMMENT_DOC_MACRO
142 : COMMENT_DOC;
143 sc.ForwardSetState(sc.state);
144 }
145 }
146
147 // All comments types fall here.
148 if (sc.MatchLineEnd()) {
149 to_late_to_comment = false;
150 sc.SetState(SCE_ERLANG_DEFAULT);
151 parse_state = STATE_NULL;
152 }
153 } break;
154
155 case COMMENT_DOC :
156 // V--- Falling through!
157 case COMMENT_DOC_MACRO : {
158
159 if (!isalnum(sc.ch)) {
160 // Try to match documentation comment
161 sc.GetCurrent(cur, sizeof(cur));
162
163 if (parse_state == COMMENT_DOC_MACRO
164 && erlangDocMacro.InList(cur)) {
165 sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
166 while (sc.ch != '}' && !sc.atLineEnd)
167 sc.Forward();
168 } else if (erlangDoc.InList(cur)) {
169 sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
170 } else {
171 sc.ChangeState(old_style);
172 }
173
174 // Switch back to old state
175 sc.SetState(old_style);
176 parse_state = old_parse_state;
177 }
178
179 if (sc.MatchLineEnd()) {
180 to_late_to_comment = false;
181 sc.ChangeState(old_style);
182 sc.SetState(SCE_ERLANG_DEFAULT);
183 parse_state = STATE_NULL;
184 }
185 } break;
186
187 /* -------------------------------------------------------------- */
188 /* Atoms ---------------------------------------------------------*/
189 case ATOM_UNQUOTED : {
190 if ('@' == sc.ch){
191 parse_state = NODE_NAME_UNQUOTED;
192 } else if (sc.ch == ':') {
193 // Searching for module name
194 if (sc.chNext == ' ') {
195 // error
196 sc.ChangeState(SCE_ERLANG_UNKNOWN);
197 parse_state = STATE_NULL;
198 } else {
199 sc.Forward();
200 if (isalnum(sc.ch) || (sc.ch == '\'')) {
201 sc.GetCurrent(cur, sizeof(cur));
202 sc.ChangeState(SCE_ERLANG_MODULES);
203 sc.SetState(SCE_ERLANG_MODULES);
204 }
205 if (sc.ch == '\'') {
206 parse_state = ATOM_QUOTED;
207 }
208
209 }
210 } else if (!IsAWordChar(sc.ch)) {
211
212 sc.GetCurrent(cur, sizeof(cur));
213 if (reservedWords.InList(cur)) {
214 style = SCE_ERLANG_KEYWORD;
215 } else if (erlangBIFs.InList(cur)
216 && strcmp(cur,"erlang:")){
217 style = SCE_ERLANG_BIFS;
218 } else if (sc.ch == '(' || '/' == sc.ch){
219 style = SCE_ERLANG_FUNCTION_NAME;
220 } else {
221 style = SCE_ERLANG_ATOM;
222 }
223
224 sc.ChangeState(style);
225 sc.SetState(SCE_ERLANG_DEFAULT);
226 parse_state = STATE_NULL;
227 }
228
229 } break;
230
231 case ATOM_QUOTED : {
232 if ( '@' == sc.ch ){
233 parse_state = NODE_NAME_QUOTED;
234 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
235 sc.ChangeState(SCE_ERLANG_ATOM_QUOTED);
236 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
237 parse_state = STATE_NULL;
238 }
239 } break;
240
241 /* -------------------------------------------------------------- */
242 /* Node names ----------------------------------------------------*/
243 case NODE_NAME_UNQUOTED : {
244 if ('@' == sc.ch) {
245 sc.SetState(SCE_ERLANG_DEFAULT);
246 parse_state = STATE_NULL;
247 } else if (!IsAWordChar(sc.ch)) {
248 sc.ChangeState(SCE_ERLANG_NODE_NAME);
249 sc.SetState(SCE_ERLANG_DEFAULT);
250 parse_state = STATE_NULL;
251 }
252 } break;
253
254 case NODE_NAME_QUOTED : {
255 if ('@' == sc.ch) {
256 sc.SetState(SCE_ERLANG_DEFAULT);
257 parse_state = STATE_NULL;
258 } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
259 sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
260 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
261 parse_state = STATE_NULL;
262 }
263 } break;
264
265 /* -------------------------------------------------------------- */
266 /* Records -------------------------------------------------------*/
267 case RECORD_START : {
268 if ('\'' == sc.ch) {
269 parse_state = RECORD_QUOTED;
270 } else if (isalpha(sc.ch) && islower(sc.ch)) {
271 parse_state = RECORD_UNQUOTED;
272 } else { // error
273 sc.SetState(SCE_ERLANG_DEFAULT);
274 parse_state = STATE_NULL;
275 }
276 } break;
277
278 case RECORD_UNQUOTED : {
279 if (!IsAWordChar(sc.ch)) {
280 sc.ChangeState(SCE_ERLANG_RECORD);
281 sc.SetState(SCE_ERLANG_DEFAULT);
282 parse_state = STATE_NULL;
283 }
284 } break;
285
286 case RECORD_QUOTED : {
287 if ('\'' == sc.ch && '\\' != sc.chPrev) {
288 sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
289 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
290 parse_state = STATE_NULL;
291 }
292 } break;
293
294 /* -------------------------------------------------------------- */
295 /* Macros --------------------------------------------------------*/
296 case MACRO_START : {
297 if ('\'' == sc.ch) {
298 parse_state = MACRO_QUOTED;
299 } else if (isalpha(sc.ch)) {
300 parse_state = MACRO_UNQUOTED;
301 } else { // error
302 sc.SetState(SCE_ERLANG_DEFAULT);
303 parse_state = STATE_NULL;
304 }
305 } break;
306
307 case MACRO_UNQUOTED : {
308 if (!IsAWordChar(sc.ch)) {
309 sc.ChangeState(SCE_ERLANG_MACRO);
310 sc.SetState(SCE_ERLANG_DEFAULT);
311 parse_state = STATE_NULL;
312 }
313 } break;
314
315 case MACRO_QUOTED : {
316 if ('\'' == sc.ch && '\\' != sc.chPrev) {
317 sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
318 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
319 parse_state = STATE_NULL;
320 }
321 } break;
322
323 /* -------------------------------------------------------------- */
324 /* Numerics ------------------------------------------------------*/
325 /* Simple integer */
326 case NUMERAL_START : {
327 if (isdigit(sc.ch)) {
328 radix_digits *= 10;
329 radix_digits += sc.ch - '0'; // Assuming ASCII here!
330 } else if ('#' == sc.ch) {
331 if (2 > radix_digits || 36 < radix_digits) {
332 sc.SetState(SCE_ERLANG_DEFAULT);
333 parse_state = STATE_NULL;
334 } else {
335 parse_state = NUMERAL_BASE_VALUE;
336 }
337 } else if ('.' == sc.ch && isdigit(sc.chNext)) {
338 radix_digits = 0;
339 parse_state = NUMERAL_FLOAT;
340 } else if ('e' == sc.ch || 'E' == sc.ch) {
341 exponent_digits = 0;
342 parse_state = NUMERAL_EXPONENT;
343 } else {
344 radix_digits = 0;
345 sc.ChangeState(SCE_ERLANG_NUMBER);
346 sc.SetState(SCE_ERLANG_DEFAULT);
347 parse_state = STATE_NULL;
348 }
349 } break;
350
351 /* Integer in other base than 10 (x#yyy) */
352 case NUMERAL_BASE_VALUE : {
353 if (!is_radix(radix_digits,sc.ch)) {
354 radix_digits = 0;
355
356 if (!isalnum(sc.ch))
357 sc.ChangeState(SCE_ERLANG_NUMBER);
358
359 sc.SetState(SCE_ERLANG_DEFAULT);
360 parse_state = STATE_NULL;
361 }
362 } break;
363
364 /* Float (x.yyy) */
365 case NUMERAL_FLOAT : {
366 if ('e' == sc.ch || 'E' == sc.ch) {
367 exponent_digits = 0;
368 parse_state = NUMERAL_EXPONENT;
369 } else if (!isdigit(sc.ch)) {
370 sc.ChangeState(SCE_ERLANG_NUMBER);
371 sc.SetState(SCE_ERLANG_DEFAULT);
372 parse_state = STATE_NULL;
373 }
374 } break;
375
376 /* Exponent, either integer or float (xEyy, x.yyEzzz) */
377 case NUMERAL_EXPONENT : {
378 if (('-' == sc.ch || '+' == sc.ch)
379 && (isdigit(sc.chNext))) {
380 sc.Forward();
381 } else if (!isdigit(sc.ch)) {
382 if (0 < exponent_digits)
383 sc.ChangeState(SCE_ERLANG_NUMBER);
384 sc.SetState(SCE_ERLANG_DEFAULT);
385 parse_state = STATE_NULL;
386 } else {
387 ++exponent_digits;
388 }
389 } break;
390
391 /* -------------------------------------------------------------- */
392 /* Preprocessor --------------------------------------------------*/
393 case PREPROCESSOR : {
394 if (!IsAWordChar(sc.ch)) {
395
396 sc.GetCurrent(cur, sizeof(cur));
397 if (erlangPreproc.InList(cur)) {
398 style = SCE_ERLANG_PREPROC;
399 } else if (erlangModulesAtt.InList(cur)) {
400 style = SCE_ERLANG_MODULES_ATT;
401 }
402
403 sc.ChangeState(style);
404 sc.SetState(SCE_ERLANG_DEFAULT);
405 parse_state = STATE_NULL;
406 }
407 } break;
408
409 }
410
411 } /* End of : STATE_NULL != parse_state */
412 else
413 {
414 switch (sc.state) {
415 case SCE_ERLANG_VARIABLE : {
416 if (!IsAWordChar(sc.ch))
417 sc.SetState(SCE_ERLANG_DEFAULT);
418 } break;
419 case SCE_ERLANG_STRING : {
420 if (sc.ch == '\"' && sc.chPrev != '\\')
421 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
422 } break;
423 case SCE_ERLANG_COMMENT : {
424 if (sc.atLineEnd)
425 sc.SetState(SCE_ERLANG_DEFAULT);
426 } break;
427 case SCE_ERLANG_CHARACTER : {
428 if (sc.chPrev == '\\') {
429 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
430 } else if (sc.ch != '\\') {
431 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
432 }
433 } break;
434 case SCE_ERLANG_OPERATOR : {
435 if (sc.chPrev == '.') {
436 if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
437 || sc.ch == '^') {
438 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
439 } else if (sc.ch == '\'') {
440 sc.ForwardSetState(SCE_ERLANG_DEFAULT);
441 } else {
442 sc.SetState(SCE_ERLANG_DEFAULT);
443 }
444 } else {
445 sc.SetState(SCE_ERLANG_DEFAULT);
446 }
447 } break;
448 }
449 }
450
451 if (sc.state == SCE_ERLANG_DEFAULT) {
452 bool no_new_state = false;
453
454 switch (sc.ch) {
455 case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
456 case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
457 case '%' : {
458 parse_state = COMMENT;
459 sc.SetState(SCE_ERLANG_COMMENT);
460 } break;
461 case '#' : {
462 parse_state = RECORD_START;
463 sc.SetState(SCE_ERLANG_UNKNOWN);
464 } break;
465 case '?' : {
466 parse_state = MACRO_START;
467 sc.SetState(SCE_ERLANG_UNKNOWN);
468 } break;
469 case '\'' : {
470 parse_state = ATOM_QUOTED;
471 sc.SetState(SCE_ERLANG_UNKNOWN);
472 } break;
473 case '+' :
474 case '-' : {
475 if (IsADigit(sc.chNext)) {
476 parse_state = NUMERAL_START;
477 radix_digits = 0;
478 sc.SetState(SCE_ERLANG_UNKNOWN);
479 } else if (sc.ch != '+') {
480 parse_state = PREPROCESSOR;
481 sc.SetState(SCE_ERLANG_UNKNOWN);
482 }
483 } break;
484 default : no_new_state = true;
485 }
486
487 if (no_new_state) {
488 if (isdigit(sc.ch)) {
489 parse_state = NUMERAL_START;
490 radix_digits = sc.ch - '0';
491 sc.SetState(SCE_ERLANG_UNKNOWN);
492 } else if (isupper(sc.ch) || '_' == sc.ch) {
493 sc.SetState(SCE_ERLANG_VARIABLE);
494 } else if (isalpha(sc.ch)) {
495 parse_state = ATOM_UNQUOTED;
496 sc.SetState(SCE_ERLANG_UNKNOWN);
497 } else if (isoperator(static_cast<char>(sc.ch))
498 || sc.ch == '\\') {
499 sc.SetState(SCE_ERLANG_OPERATOR);
500 }
501 }
502 }
503
504 }
505 sc.Complete();
506}
507
508static int ClassifyErlangFoldPoint(
509 Accessor &styler,
510 int styleNext,
511 Sci_Position keyword_start
512) {
513 int lev = 0;
514 if (styler.Match(keyword_start,"case")
515 || (
516 styler.Match(keyword_start,"fun")
517 && (SCE_ERLANG_FUNCTION_NAME != styleNext)
518 )
519 || styler.Match(keyword_start,"if")
520 || styler.Match(keyword_start,"query")
521 || styler.Match(keyword_start,"receive")
522 ) {
523 ++lev;
524 } else if (styler.Match(keyword_start,"end")) {
525 --lev;
526 }
527
528 return lev;
529}
530
531static void FoldErlangDoc(
532 Sci_PositionU startPos, Sci_Position length, int initStyle,
533 WordList** /*keywordlists*/, Accessor &styler
534) {
535 Sci_PositionU endPos = startPos + length;
536 Sci_Position currentLine = styler.GetLine(startPos);
537 int lev;
538 int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
539 int currentLevel = previousLevel;
540 int styleNext = styler.StyleAt(startPos);
541 int style = initStyle;
542 int stylePrev;
543 Sci_Position keyword_start = 0;
544 char ch;
545 char chNext = styler.SafeGetCharAt(startPos);
546 bool atEOL;
547
548 for (Sci_PositionU i = startPos; i < endPos; i++) {
549 ch = chNext;
550 chNext = styler.SafeGetCharAt(i + 1);
551
552 // Get styles
553 stylePrev = style;
554 style = styleNext;
555 styleNext = styler.StyleAt(i + 1);
556 atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
557
558 if (stylePrev != SCE_ERLANG_KEYWORD
559 && style == SCE_ERLANG_KEYWORD) {
560 keyword_start = i;
561 }
562
563 // Fold on keywords
564 if (stylePrev == SCE_ERLANG_KEYWORD
565 && style != SCE_ERLANG_KEYWORD
566 && style != SCE_ERLANG_ATOM
567 ) {
568 currentLevel += ClassifyErlangFoldPoint(styler,
569 styleNext,
570 keyword_start);
571 }
572
573 // Fold on comments
574 if (style == SCE_ERLANG_COMMENT
575 || style == SCE_ERLANG_COMMENT_MODULE
576 || style == SCE_ERLANG_COMMENT_FUNCTION) {
577
578 if (ch == '%' && chNext == '{') {
579 currentLevel++;
580 } else if (ch == '%' && chNext == '}') {
581 currentLevel--;
582 }
583 }
584
585 // Fold on braces
586 if (style == SCE_ERLANG_OPERATOR) {
587 if (ch == '{' || ch == '(' || ch == '[') {
588 currentLevel++;
589 } else if (ch == '}' || ch == ')' || ch == ']') {
590 currentLevel--;
591 }
592 }
593
594
595 if (atEOL) {
596 lev = previousLevel;
597
598 if (currentLevel > previousLevel)
599 lev |= SC_FOLDLEVELHEADERFLAG;
600
601 if (lev != styler.LevelAt(currentLine))
602 styler.SetLevel(currentLine, lev);
603
604 currentLine++;
605 previousLevel = currentLevel;
606 }
607
608 }
609
610 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
611 styler.SetLevel(currentLine,
612 previousLevel
613 | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
614}
615
616static const char * const erlangWordListDesc[] = {
617 "Erlang Reserved words",
618 "Erlang BIFs",
619 "Erlang Preprocessor",
620 "Erlang Module Attributes",
621 "Erlang Documentation",
622 "Erlang Documentation Macro",
623 0
624};
625
626LexerModule lmErlang(
627 SCLEX_ERLANG,
628 ColouriseErlangDoc,
629 "erlang",
630 FoldErlangDoc,
631 erlangWordListDesc);
632