1// -*- coding: utf-8 -*-
2// Scintilla source code edit control
3/**
4 * @file LexModula.cxx
5 * @author Dariusz "DKnoto" KnociĊ„ski
6 * @date 2011/02/03
7 * @brief Lexer for Modula-2/3 documents.
8 */
9// The License.txt file describes the conditions under which this software may
10// be distributed.
11
12#include <stdlib.h>
13#include <string.h>
14#include <stdio.h>
15#include <stdarg.h>
16#include <assert.h>
17#include <ctype.h>
18
19#include <string>
20#include <string_view>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "PropSetSimple.h"
27#include "WordList.h"
28#include "LexAccessor.h"
29#include "Accessor.h"
30#include "StyleContext.h"
31#include "CharacterSet.h"
32#include "LexerModule.h"
33
34using namespace Lexilla;
35
36#ifdef DEBUG_LEX_MODULA
37#define DEBUG_STATE( p, c )\
38 fprintf( stderr, "Unknown state: currentPos = %u, char = '%c'\n", static_cast<unsigned int>(p), c );
39#else
40#define DEBUG_STATE( p, c )
41#endif
42
43static inline bool IsDigitOfBase( unsigned ch, unsigned base ) {
44 if( ch < '0' || ch > 'f' ) return false;
45 if( base <= 10 ) {
46 if( ch >= ( '0' + base ) ) return false;
47 } else {
48 if( ch > '9' ) {
49 unsigned nb = base - 10;
50 if( ( ch < 'A' ) || ( ch >= ( 'A' + nb ) ) ) {
51 if( ( ch < 'a' ) || ( ch >= ( 'a' + nb ) ) ) {
52 return false;
53 }
54 }
55 }
56 }
57 return true;
58}
59
60static inline unsigned IsOperator( StyleContext & sc, WordList & op ) {
61 int i;
62 char s[3];
63
64 s[0] = sc.ch;
65 s[1] = sc.chNext;
66 s[2] = 0;
67 for( i = 0; i < op.Length(); i++ ) {
68 if( ( strlen( op.WordAt(i) ) == 2 ) &&
69 ( s[0] == op.WordAt(i)[0] && s[1] == op.WordAt(i)[1] ) ) {
70 return 2;
71 }
72 }
73 s[1] = 0;
74 for( i = 0; i < op.Length(); i++ ) {
75 if( ( strlen( op.WordAt(i) ) == 1 ) &&
76 ( s[0] == op.WordAt(i)[0] ) ) {
77 return 1;
78 }
79 }
80 return 0;
81}
82
83static inline bool IsEOL( Accessor &styler, Sci_PositionU curPos ) {
84 unsigned ch = styler.SafeGetCharAt( curPos );
85 if( ( ch == '\r' && styler.SafeGetCharAt( curPos + 1 ) == '\n' ) ||
86 ( ch == '\n' ) ) {
87 return true;
88 }
89 return false;
90}
91
92static inline bool checkStatement(
93 Accessor &styler,
94 Sci_Position &curPos,
95 const char *stt, bool spaceAfter = true ) {
96 int len = static_cast<int>(strlen( stt ));
97 int i;
98 for( i = 0; i < len; i++ ) {
99 if( styler.SafeGetCharAt( curPos + i ) != stt[i] ) {
100 return false;
101 }
102 }
103 if( spaceAfter ) {
104 if( ! isspace( styler.SafeGetCharAt( curPos + i ) ) ) {
105 return false;
106 }
107 }
108 curPos += ( len - 1 );
109 return true;
110}
111
112static inline bool checkEndSemicolon(
113 Accessor &styler,
114 Sci_Position &curPos, Sci_Position endPos )
115{
116 const char *stt = "END";
117 int len = static_cast<int>(strlen( stt ));
118 int i;
119 for( i = 0; i < len; i++ ) {
120 if( styler.SafeGetCharAt( curPos + i ) != stt[i] ) {
121 return false;
122 }
123 }
124 while( isspace( styler.SafeGetCharAt( curPos + i ) ) ) {
125 i++;
126 if( ( curPos + i ) >= endPos ) return false;
127 }
128 if( styler.SafeGetCharAt( curPos + i ) != ';' ) {
129 return false;
130 }
131 curPos += ( i - 1 );
132 return true;
133}
134
135static inline bool checkKeyIdentOper(
136
137 Accessor &styler,
138 Sci_Position &curPos, Sci_Position endPos,
139 const char *stt, const char etk ) {
140 Sci_Position newPos = curPos;
141 if( ! checkStatement( styler, newPos, stt ) )
142 return false;
143 newPos++;
144 if( newPos >= endPos )
145 return false;
146 if( ! isspace( styler.SafeGetCharAt( newPos ) ) )
147 return false;
148 newPos++;
149 if( newPos >= endPos )
150 return false;
151 while( isspace( styler.SafeGetCharAt( newPos ) ) ) {
152 newPos++;
153 if( newPos >= endPos )
154 return false;
155 }
156 if( ! isalpha( styler.SafeGetCharAt( newPos ) ) )
157 return false;
158 newPos++;
159 if( newPos >= endPos )
160 return false;
161 char ch;
162 ch = styler.SafeGetCharAt( newPos );
163 while( isalpha( ch ) || isdigit( ch ) || ch == '_' ) {
164 newPos++;
165 if( newPos >= endPos ) return false;
166 ch = styler.SafeGetCharAt( newPos );
167 }
168 while( isspace( styler.SafeGetCharAt( newPos ) ) ) {
169 newPos++;
170 if( newPos >= endPos ) return false;
171 }
172 if( styler.SafeGetCharAt( newPos ) != etk )
173 return false;
174 curPos = newPos;
175 return true;
176}
177
178static void FoldModulaDoc( Sci_PositionU startPos,
179 Sci_Position length,
180 int , WordList *[],
181 Accessor &styler)
182{
183 Sci_Position curLine = styler.GetLine(startPos);
184 int curLevel = SC_FOLDLEVELBASE;
185 Sci_Position endPos = startPos + length;
186 if( curLine > 0 )
187 curLevel = styler.LevelAt( curLine - 1 ) >> 16;
188 Sci_Position curPos = startPos;
189 int style = styler.StyleAt( curPos );
190 int visChars = 0;
191 int nextLevel = curLevel;
192
193 while( curPos < endPos ) {
194 if( ! isspace( styler.SafeGetCharAt( curPos ) ) ) visChars++;
195
196 switch( style ) {
197 case SCE_MODULA_COMMENT:
198 if( checkStatement( styler, curPos, "(*" ) )
199 nextLevel++;
200 else
201 if( checkStatement( styler, curPos, "*)" ) )
202 nextLevel--;
203 break;
204
205 case SCE_MODULA_DOXYCOMM:
206 if( checkStatement( styler, curPos, "(**", false ) )
207 nextLevel++;
208 else
209 if( checkStatement( styler, curPos, "*)" ) )
210 nextLevel--;
211 break;
212
213 case SCE_MODULA_KEYWORD:
214 if( checkStatement( styler, curPos, "IF" ) )
215 nextLevel++;
216 else
217 if( checkStatement( styler, curPos, "BEGIN" ) )
218 nextLevel++;
219 else
220 if( checkStatement( styler, curPos, "TRY" ) )
221 nextLevel++;
222 else
223 if( checkStatement( styler, curPos, "LOOP" ) )
224 nextLevel++;
225 else
226 if( checkStatement( styler, curPos, "FOR" ) )
227 nextLevel++;
228 else
229 if( checkStatement( styler, curPos, "WHILE" ) )
230 nextLevel++;
231 else
232 if( checkStatement( styler, curPos, "REPEAT" ) )
233 nextLevel++;
234 else
235 if( checkStatement( styler, curPos, "UNTIL" ) )
236 nextLevel--;
237 else
238 if( checkStatement( styler, curPos, "WITH" ) )
239 nextLevel++;
240 else
241 if( checkStatement( styler, curPos, "CASE" ) )
242 nextLevel++;
243 else
244 if( checkStatement( styler, curPos, "TYPECASE" ) )
245 nextLevel++;
246 else
247 if( checkStatement( styler, curPos, "LOCK" ) )
248 nextLevel++;
249 else
250 if( checkKeyIdentOper( styler, curPos, endPos, "PROCEDURE", '(' ) )
251 nextLevel++;
252 else
253 if( checkKeyIdentOper( styler, curPos, endPos, "END", ';' ) ) {
254 Sci_Position cln = curLine;
255 int clv_old = curLevel;
256 Sci_Position pos;
257 char ch;
258 int clv_new;
259 while( cln > 0 ) {
260 clv_new = styler.LevelAt( cln - 1 ) >> 16;
261 if( clv_new < clv_old ) {
262 nextLevel--;
263 pos = styler.LineStart( cln );
264 while( ( ch = styler.SafeGetCharAt( pos ) ) != '\n' ) {
265 if( ch == 'P' ) {
266 if( styler.StyleAt(pos) == SCE_MODULA_KEYWORD ) {
267 if( checkKeyIdentOper( styler, pos, endPos,
268 "PROCEDURE", '(' ) ) {
269 break;
270 }
271 }
272 }
273 pos++;
274 }
275 clv_old = clv_new;
276 }
277 cln--;
278 }
279 }
280 else
281 if( checkKeyIdentOper( styler, curPos, endPos, "END", '.' ) )
282 nextLevel--;
283 else
284 if( checkEndSemicolon( styler, curPos, endPos ) )
285 nextLevel--;
286 else {
287 while( styler.StyleAt( curPos + 1 ) == SCE_MODULA_KEYWORD )
288 curPos++;
289 }
290 break;
291
292 default:
293 break;
294 }
295
296 if( IsEOL( styler, curPos ) || ( curPos == endPos - 1 ) ) {
297 int efectiveLevel = curLevel | nextLevel << 16;
298 if( visChars == 0 )
299 efectiveLevel |= SC_FOLDLEVELWHITEFLAG;
300 if( curLevel < nextLevel )
301 efectiveLevel |= SC_FOLDLEVELHEADERFLAG;
302 if( efectiveLevel != styler.LevelAt(curLine) ) {
303 styler.SetLevel(curLine, efectiveLevel );
304 }
305 curLine++;
306 curLevel = nextLevel;
307 if( IsEOL( styler, curPos ) && ( curPos == endPos - 1 ) ) {
308 styler.SetLevel( curLine, ( curLevel | curLevel << 16)
309 | SC_FOLDLEVELWHITEFLAG);
310 }
311 visChars = 0;
312 }
313 curPos++;
314 style = styler.StyleAt( curPos );
315 }
316}
317
318static inline bool skipWhiteSpaces( StyleContext & sc ) {
319 while( isspace( sc.ch ) ) {
320 sc.SetState( SCE_MODULA_DEFAULT );
321 if( sc.More() )
322 sc.Forward();
323 else
324 return false;
325 }
326 return true;
327}
328
329static void ColouriseModulaDoc( Sci_PositionU startPos,
330 Sci_Position length,
331 int initStyle,
332 WordList *wl[],
333 Accessor &styler ) {
334 WordList& keyWords = *wl[0];
335 WordList& reservedWords = *wl[1];
336 WordList& operators = *wl[2];
337 WordList& pragmaWords = *wl[3];
338 WordList& escapeCodes = *wl[4];
339 WordList& doxyKeys = *wl[5];
340
341 const int BUFLEN = 128;
342
343 char buf[BUFLEN];
344 int i, kl;
345
346 Sci_Position charPos = 0;
347
348 StyleContext sc( startPos, length, initStyle, styler );
349
350 while( sc.More() ) {
351 switch( sc.state ) {
352 case SCE_MODULA_DEFAULT:
353 if( ! skipWhiteSpaces( sc ) ) break;
354
355 if( sc.ch == '(' && sc.chNext == '*' ) {
356 if( sc.GetRelative(2) == '*' ) {
357 sc.SetState( SCE_MODULA_DOXYCOMM );
358 sc.Forward();
359 } else {
360 sc.SetState( SCE_MODULA_COMMENT );
361 }
362 sc.Forward();
363 }
364 else
365 if( isalpha( sc.ch ) ) {
366 if( isupper( sc.ch ) && isupper( sc.chNext ) ) {
367 for( i = 0; i < BUFLEN - 1; i++ ) {
368 buf[i] = sc.GetRelative(i);
369 if( !isalpha( buf[i] ) && !(buf[i] == '_') )
370 break;
371 }
372 kl = i;
373 buf[kl] = 0;
374
375 if( keyWords.InList( buf ) ) {
376 sc.SetState( SCE_MODULA_KEYWORD );
377 sc.Forward( kl );
378 sc.SetState( SCE_MODULA_DEFAULT );
379 continue;
380 }
381 else
382 if( reservedWords.InList( buf ) ) {
383 sc.SetState( SCE_MODULA_RESERVED );
384 sc.Forward( kl );
385 sc.SetState( SCE_MODULA_DEFAULT );
386 continue;
387 } else {
388 /** check procedure identifier */
389 }
390 } else {
391 for( i = 0; i < BUFLEN - 1; i++ ) {
392 buf[i] = sc.GetRelative(i);
393 if( !isalpha( buf[i] ) &&
394 !isdigit( buf[i] ) &&
395 !(buf[i] == '_') )
396 break;
397 }
398 kl = i;
399 buf[kl] = 0;
400
401 sc.SetState( SCE_MODULA_DEFAULT );
402 sc.Forward( kl );
403 continue;
404 }
405 }
406 else
407 if( isdigit( sc.ch ) ) {
408 sc.SetState( SCE_MODULA_NUMBER );
409 continue;
410 }
411 else
412 if( sc.ch == '\"' ) {
413 sc.SetState( SCE_MODULA_STRING );
414 }
415 else
416 if( sc.ch == '\'' ) {
417 charPos = sc.currentPos;
418 sc.SetState( SCE_MODULA_CHAR );
419 }
420 else
421 if( sc.ch == '<' && sc.chNext == '*' ) {
422 sc.SetState( SCE_MODULA_PRAGMA );
423 sc.Forward();
424 } else {
425 unsigned len = IsOperator( sc, operators );
426 if( len > 0 ) {
427 sc.SetState( SCE_MODULA_OPERATOR );
428 sc.Forward( len );
429 sc.SetState( SCE_MODULA_DEFAULT );
430 continue;
431 } else {
432 DEBUG_STATE( sc.currentPos, sc.ch );
433 }
434 }
435 break;
436
437 case SCE_MODULA_COMMENT:
438 if( sc.ch == '*' && sc.chNext == ')' ) {
439 sc.Forward( 2 );
440 sc.SetState( SCE_MODULA_DEFAULT );
441 continue;
442 }
443 break;
444
445 case SCE_MODULA_DOXYCOMM:
446 switch( sc.ch ) {
447 case '*':
448 if( sc.chNext == ')' ) {
449 sc.Forward( 2 );
450 sc.SetState( SCE_MODULA_DEFAULT );
451 continue;
452 }
453 break;
454
455 case '@':
456 if( islower( sc.chNext ) ) {
457 for( i = 0; i < BUFLEN - 1; i++ ) {
458 buf[i] = sc.GetRelative(i+1);
459 if( isspace( buf[i] ) ) break;
460 }
461 buf[i] = 0;
462 kl = i;
463
464 if( doxyKeys.InList( buf ) ) {
465 sc.SetState( SCE_MODULA_DOXYKEY );
466 sc.Forward( kl + 1 );
467 sc.SetState( SCE_MODULA_DOXYCOMM );
468 }
469 }
470 break;
471
472 default:
473 break;
474 }
475 break;
476
477 case SCE_MODULA_NUMBER:
478 {
479 buf[0] = sc.ch;
480 for( i = 1; i < BUFLEN - 1; i++ ) {
481 buf[i] = sc.GetRelative(i);
482 if( ! isdigit( buf[i] ) )
483 break;
484 }
485 kl = i;
486 buf[kl] = 0;
487
488 switch( sc.GetRelative(kl) ) {
489 case '_':
490 {
491 int base = atoi( buf );
492 if( base < 2 || base > 16 ) {
493 sc.SetState( SCE_MODULA_BADSTR );
494 } else {
495 int imax;
496
497 kl++;
498 for( i = 0; i < BUFLEN - 1; i++ ) {
499 buf[i] = sc.GetRelative(kl+i);
500 if( ! IsDigitOfBase( buf[i], 16 ) ) {
501 break;
502 }
503 }
504 imax = i;
505 for( i = 0; i < imax; i++ ) {
506 if( ! IsDigitOfBase( buf[i], base ) ) {
507 sc.SetState( SCE_MODULA_BADSTR );
508 break;
509 }
510 }
511 kl += imax;
512 }
513 sc.SetState( SCE_MODULA_BASENUM );
514 for( i = 0; i < kl; i++ ) {
515 sc.Forward();
516 }
517 sc.SetState( SCE_MODULA_DEFAULT );
518 continue;
519 }
520 break;
521
522 case '.':
523 if( sc.GetRelative(kl+1) == '.' ) {
524 kl--;
525 for( i = 0; i < kl; i++ ) {
526 sc.Forward();
527 }
528 sc.Forward();
529 sc.SetState( SCE_MODULA_DEFAULT );
530 continue;
531 } else {
532 bool doNext = false;
533
534 kl++;
535
536 buf[0] = sc.GetRelative(kl);
537 if( isdigit( buf[0] ) ) {
538 for( i = 0;; i++ ) {
539 if( !isdigit(sc.GetRelative(kl+i)) )
540 break;
541 }
542 kl += i;
543 buf[0] = sc.GetRelative(kl);
544
545 switch( buf[0] )
546 {
547 case 'E':
548 case 'e':
549 case 'D':
550 case 'd':
551 case 'X':
552 case 'x':
553 kl++;
554 buf[0] = sc.GetRelative(kl);
555 if( buf[0] == '-' || buf[0] == '+' ) {
556 kl++;
557 }
558 buf[0] = sc.GetRelative(kl);
559 if( isdigit( buf[0] ) ) {
560 for( i = 0;; i++ ) {
561 if( !isdigit(sc.GetRelative(kl+i)) ) {
562 buf[0] = sc.GetRelative(kl+i);
563 break;
564 }
565 }
566 kl += i;
567 doNext = true;
568 } else {
569 sc.SetState( SCE_MODULA_BADSTR );
570 }
571 break;
572
573 default:
574 doNext = true;
575 break;
576 }
577 } else {
578 sc.SetState( SCE_MODULA_BADSTR );
579 }
580
581 if( doNext ) {
582 if( ! isspace( buf[0] ) &&
583 buf[0] != ')' &&
584 buf[0] != '>' &&
585 buf[0] != '<' &&
586 buf[0] != '=' &&
587 buf[0] != '#' &&
588 buf[0] != '+' &&
589 buf[0] != '-' &&
590 buf[0] != '*' &&
591 buf[0] != '/' &&
592 buf[0] != ',' &&
593 buf[0] != ';'
594 ) {
595 sc.SetState( SCE_MODULA_BADSTR );
596 } else {
597 kl--;
598 }
599 }
600 }
601 sc.SetState( SCE_MODULA_FLOAT );
602 for( i = 0; i < kl; i++ ) {
603 sc.Forward();
604 }
605 sc.SetState( SCE_MODULA_DEFAULT );
606 continue;
607 break;
608
609 default:
610 for( i = 0; i < kl; i++ ) {
611 sc.Forward();
612 }
613 break;
614 }
615 sc.SetState( SCE_MODULA_DEFAULT );
616 continue;
617 }
618 break;
619
620 case SCE_MODULA_STRING:
621 if( sc.ch == '\"' ) {
622 sc.Forward();
623 sc.SetState( SCE_MODULA_DEFAULT );
624 continue;
625 } else {
626 if( sc.ch == '\\' ) {
627 i = 1;
628 if( IsDigitOfBase( sc.chNext, 8 ) ) {
629 for( i = 1; i < BUFLEN - 1; i++ ) {
630 if( ! IsDigitOfBase(sc.GetRelative(i+1), 8 ) )
631 break;
632 }
633 if( i == 3 ) {
634 sc.SetState( SCE_MODULA_STRSPEC );
635 } else {
636 sc.SetState( SCE_MODULA_BADSTR );
637 }
638 } else {
639 buf[0] = sc.chNext;
640 buf[1] = 0;
641
642 if( escapeCodes.InList( buf ) ) {
643 sc.SetState( SCE_MODULA_STRSPEC );
644 } else {
645 sc.SetState( SCE_MODULA_BADSTR );
646 }
647 }
648 sc.Forward(i+1);
649 sc.SetState( SCE_MODULA_STRING );
650 continue;
651 }
652 }
653 break;
654
655 case SCE_MODULA_CHAR:
656 if( sc.ch == '\'' ) {
657 sc.Forward();
658 sc.SetState( SCE_MODULA_DEFAULT );
659 continue;
660 }
661 else
662 if( ( sc.currentPos - charPos ) == 1 ) {
663 if( sc.ch == '\\' ) {
664 i = 1;
665 if( IsDigitOfBase( sc.chNext, 8 ) ) {
666 for( i = 1; i < BUFLEN - 1; i++ ) {
667 if( ! IsDigitOfBase(sc.GetRelative(i+1), 8 ) )
668 break;
669 }
670 if( i == 3 ) {
671 sc.SetState( SCE_MODULA_CHARSPEC );
672 } else {
673 sc.SetState( SCE_MODULA_BADSTR );
674 }
675 } else {
676 buf[0] = sc.chNext;
677 buf[1] = 0;
678
679 if( escapeCodes.InList( buf ) ) {
680 sc.SetState( SCE_MODULA_CHARSPEC );
681 } else {
682 sc.SetState( SCE_MODULA_BADSTR );
683 }
684 }
685 sc.Forward(i+1);
686 sc.SetState( SCE_MODULA_CHAR );
687 continue;
688 }
689 } else {
690 sc.SetState( SCE_MODULA_BADSTR );
691 sc.Forward();
692 sc.SetState( SCE_MODULA_CHAR );
693 continue;
694 }
695 break;
696
697 case SCE_MODULA_PRAGMA:
698 if( sc.ch == '*' && sc.chNext == '>' ) {
699 sc.Forward();
700 sc.Forward();
701 sc.SetState( SCE_MODULA_DEFAULT );
702 continue;
703 }
704 else
705 if( isupper( sc.ch ) && isupper( sc.chNext ) ) {
706 buf[0] = sc.ch;
707 buf[1] = sc.chNext;
708 for( i = 2; i < BUFLEN - 1; i++ ) {
709 buf[i] = sc.GetRelative(i);
710 if( !isupper( buf[i] ) )
711 break;
712 }
713 kl = i;
714 buf[kl] = 0;
715 if( pragmaWords.InList( buf ) ) {
716 sc.SetState( SCE_MODULA_PRGKEY );
717 sc.Forward( kl );
718 sc.SetState( SCE_MODULA_PRAGMA );
719 continue;
720 }
721 }
722 break;
723
724 default:
725 break;
726 }
727 sc.Forward();
728 }
729 sc.Complete();
730}
731
732static const char *const modulaWordListDesc[] =
733{
734 "Keywords",
735 "ReservedKeywords",
736 "Operators",
737 "PragmaKeyswords",
738 "EscapeCodes",
739 "DoxygeneKeywords",
740 0
741};
742
743LexerModule lmModula( SCLEX_MODULA, ColouriseModulaDoc, "modula", FoldModulaDoc,
744 modulaWordListDesc);
745