1/******************************************************************
2 * LexHaskell.cxx
3 *
4 * A haskell lexer for the scintilla code control.
5 * Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6 * External lexer stuff inspired from the caml external lexer.
7 * Folder copied from Python's.
8 *
9 * Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10 *
11 * Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12 *
13 * Improved by kudah <kudahkukarek@gmail.com>
14 *
15 * TODO:
16 * * A proper lexical folder to fold group declarations, comments, pragmas,
17 * #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18 * etc.
19 *
20 *****************************************************************/
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <stdarg.h>
25#include <assert.h>
26#include <ctype.h>
27
28#include <string>
29#include <string_view>
30#include <vector>
31#include <map>
32#include <functional>
33
34#include "ILexer.h"
35#include "Scintilla.h"
36#include "SciLexer.h"
37
38#include "PropSetSimple.h"
39#include "WordList.h"
40#include "LexAccessor.h"
41#include "Accessor.h"
42#include "StyleContext.h"
43#include "CharacterSet.h"
44#include "CharacterCategory.h"
45#include "LexerModule.h"
46#include "OptionSet.h"
47#include "DefaultLexer.h"
48
49using namespace Scintilla;
50using namespace Lexilla;
51
52// See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
53// Note, letter modifiers are prohibited.
54
55static int u_iswupper (int ch) {
56 CharacterCategory c = CategoriseCharacter(ch);
57 return c == ccLu || c == ccLt;
58}
59
60static int u_iswalpha (int ch) {
61 CharacterCategory c = CategoriseCharacter(ch);
62 return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
63}
64
65static int u_iswalnum (int ch) {
66 CharacterCategory c = CategoriseCharacter(ch);
67 return c == ccLl || c == ccLu || c == ccLt || c == ccLo
68 || c == ccNd || c == ccNo;
69}
70
71static int u_IsHaskellSymbol(int ch) {
72 CharacterCategory c = CategoriseCharacter(ch);
73 return c == ccPc || c == ccPd || c == ccPo
74 || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
75}
76
77static inline bool IsHaskellLetter(const int ch) {
78 if (IsASCII(ch)) {
79 return (ch >= 'a' && ch <= 'z')
80 || (ch >= 'A' && ch <= 'Z');
81 } else {
82 return u_iswalpha(ch) != 0;
83 }
84}
85
86static inline bool IsHaskellAlphaNumeric(const int ch) {
87 if (IsASCII(ch)) {
88 return IsAlphaNumeric(ch);
89 } else {
90 return u_iswalnum(ch) != 0;
91 }
92}
93
94static inline bool IsHaskellUpperCase(const int ch) {
95 if (IsASCII(ch)) {
96 return ch >= 'A' && ch <= 'Z';
97 } else {
98 return u_iswupper(ch) != 0;
99 }
100}
101
102static inline bool IsAnHaskellOperatorChar(const int ch) {
103 if (IsASCII(ch)) {
104 return
105 ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
106 || ch == '&' || ch == '*' || ch == '+' || ch == '-'
107 || ch == '.' || ch == '/' || ch == ':' || ch == '<'
108 || ch == '=' || ch == '>' || ch == '?' || ch == '@'
109 || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
110 } else {
111 return u_IsHaskellSymbol(ch) != 0;
112 }
113}
114
115static inline bool IsAHaskellWordStart(const int ch) {
116 return IsHaskellLetter(ch) || ch == '_';
117}
118
119static inline bool IsAHaskellWordChar(const int ch) {
120 return ( IsHaskellAlphaNumeric(ch)
121 || ch == '_'
122 || ch == '\'');
123}
124
125static inline bool IsCommentBlockStyle(int style) {
126 return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
127}
128
129static inline bool IsCommentStyle(int style) {
130 return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
131 || ( style == SCE_HA_LITERATE_COMMENT
132 || style == SCE_HA_LITERATE_CODEDELIM);
133}
134
135// styles which do not belong to Haskell, but to external tools
136static inline bool IsExternalStyle(int style) {
137 return ( style == SCE_HA_PREPROCESSOR
138 || style == SCE_HA_LITERATE_COMMENT
139 || style == SCE_HA_LITERATE_CODEDELIM);
140}
141
142static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
143 return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
144}
145
146// Mangled version of lexlib/Accessor.cxx IndentAmount.
147// Modified to treat comment blocks as whitespace
148// plus special case for commentline/preprocessor.
149static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
150
151 // Determines the indentation level of the current line
152 // Comment blocks are treated as whitespace
153
154 Sci_Position pos = styler.LineStart(line);
155 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
156
157 char ch = styler[pos];
158 int style = styler.StyleAt(pos);
159
160 int indent = 0;
161 bool inPrevPrefix = line > 0;
162
163 Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
164
165 while (( ch == ' ' || ch == '\t'
166 || IsCommentBlockStyle(style)
167 || style == SCE_HA_LITERATE_CODEDELIM)
168 && (pos < eol_pos)) {
169 if (inPrevPrefix) {
170 char chPrev = styler[posPrev++];
171 if (chPrev != ' ' && chPrev != '\t') {
172 inPrevPrefix = false;
173 }
174 }
175 if (ch == '\t') {
176 indent = (indent / 8 + 1) * 8;
177 } else { // Space or comment block
178 indent++;
179 }
180 pos++;
181 ch = styler[pos];
182 style = styler.StyleAt(pos);
183 }
184
185 indent += SC_FOLDLEVELBASE;
186 // if completely empty line or the start of a comment or preprocessor...
187 if ( styler.LineStart(line) == styler.Length()
188 || ch == ' '
189 || ch == '\t'
190 || ch == '\n'
191 || ch == '\r'
192 || IsCommentStyle(style)
193 || style == SCE_HA_PREPROCESSOR)
194 return indent | SC_FOLDLEVELWHITEFLAG;
195 else
196 return indent;
197}
198
199struct OptionsHaskell {
200 bool magicHash;
201 bool allowQuotes;
202 bool implicitParams;
203 bool highlightSafe;
204 bool cpp;
205 bool stylingWithinPreprocessor;
206 bool fold;
207 bool foldComment;
208 bool foldCompact;
209 bool foldImports;
210 OptionsHaskell() {
211 magicHash = true; // Widespread use, enabled by default.
212 allowQuotes = true; // Widespread use, enabled by default.
213 implicitParams = false; // Fell out of favor, seldom used, disabled.
214 highlightSafe = true; // Moderately used, doesn't hurt to enable.
215 cpp = true; // Widespread use, enabled by default;
216 stylingWithinPreprocessor = false;
217 fold = false;
218 foldComment = false;
219 foldCompact = false;
220 foldImports = false;
221 }
222};
223
224static const char * const haskellWordListDesc[] = {
225 "Keywords",
226 "FFI",
227 "Reserved operators",
228 0
229};
230
231struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
232 OptionSetHaskell() {
233 DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
234 "Set to 0 to disallow the '#' character at the end of identifiers and "
235 "literals with the haskell lexer "
236 "(GHC -XMagicHash extension)");
237
238 DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
239 "Set to 0 to disable highlighting of Template Haskell name quotations "
240 "and promoted constructors "
241 "(GHC -XTemplateHaskell and -XDataKinds extensions)");
242
243 DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
244 "Set to 1 to allow the '?' character at the start of identifiers "
245 "with the haskell lexer "
246 "(GHC & Hugs -XImplicitParams extension)");
247
248 DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
249 "Set to 0 to disallow \"safe\" keyword in imports "
250 "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
251
252 DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
253 "Set to 0 to disable C-preprocessor highlighting "
254 "(-XCPP extension)");
255
256 DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
257 "For Haskell code, determines whether all preprocessor code is styled in the "
258 "preprocessor style (0, the default) or only from the initial # to the end "
259 "of the command word(1)."
260 );
261
262 DefineProperty("fold", &OptionsHaskell::fold);
263
264 DefineProperty("fold.comment", &OptionsHaskell::foldComment);
265
266 DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
267
268 DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
269 "Set to 1 to enable folding of import declarations");
270
271 DefineWordListSets(haskellWordListDesc);
272 }
273};
274
275class LexerHaskell : public DefaultLexer {
276 bool literate;
277 Sci_Position firstImportLine;
278 int firstImportIndent;
279 WordList keywords;
280 WordList ffi;
281 WordList reserved_operators;
282 OptionsHaskell options;
283 OptionSetHaskell osHaskell;
284
285 enum HashCount {
286 oneHash
287 ,twoHashes
288 ,unlimitedHashes
289 };
290
291 enum KeywordMode {
292 HA_MODE_DEFAULT = 0
293 ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
294 ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
295 ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
296 ,HA_MODE_MODULE = 4 // after "module", before module name.
297 ,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
298 ,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
299 };
300
301 enum LiterateMode {
302 LITERATE_BIRD = 0 // if '>' is the first character on the line,
303 // color '>' as a codedelim and the rest of
304 // the line as code.
305 // else if "\begin{code}" is the only word on the
306 // line except whitespace, switch to LITERATE_BLOCK
307 // otherwise color the line as a literate comment.
308 ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
309 // 0 ignoring all later characters, color the line
310 // as a codedelim and switch to LITERATE_BIRD
311 // otherwise color the line as code.
312 };
313
314 struct HaskellLineInfo {
315 unsigned int nestLevel; // 22 bits ought to be enough for anybody
316 unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
317 // beyond 31.
318 bool pragma;
319 LiterateMode lmode;
320 KeywordMode mode;
321
322 HaskellLineInfo(int state) :
323 nestLevel (state >> 10)
324 , nonexternalStyle ((state >> 5) & 0x1F)
325 , pragma ((state >> 4) & 0x1)
326 , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
327 , mode (static_cast<KeywordMode>(state & 0x7))
328 {}
329
330 int ToLineState() {
331 return
332 (nestLevel << 10)
333 | (nonexternalStyle << 5)
334 | (pragma << 4)
335 | (lmode << 3)
336 | mode;
337 }
338 };
339
340 inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
341 if (options.magicHash && sc.ch == '#') {
342 sc.Forward();
343 if (hashes == twoHashes && sc.ch == '#') {
344 sc.Forward();
345 } else if (hashes == unlimitedHashes) {
346 while (sc.ch == '#') {
347 sc.Forward();
348 }
349 }
350 }
351 }
352
353 bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
354 if (options.foldImports) {
355 Sci_Position currentPos = styler.LineStart(line);
356 int style = styler.StyleAt(currentPos);
357
358 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
359
360 while (currentPos < eol_pos) {
361 int ch = styler[currentPos];
362 style = styler.StyleAt(currentPos);
363
364 if (ch == ' ' || ch == '\t'
365 || IsCommentBlockStyle(style)
366 || style == SCE_HA_LITERATE_CODEDELIM) {
367 currentPos++;
368 } else {
369 break;
370 }
371 }
372
373 return (style == SCE_HA_KEYWORD
374 && styler.Match(currentPos, "import"));
375 } else {
376 return false;
377 }
378 }
379
380 inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
381 const int indent = HaskellIndentAmount(styler, line);
382 const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
383 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
384 ? indent
385 : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
386 }
387
388 inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
389 return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
390 ? indentLevel
391 : indentLevel - firstImportIndent;
392 }
393
394public:
395 LexerHaskell(bool literate_)
396 : DefaultLexer(literate_ ? "literatehaskell" : "haskell", literate_ ? SCLEX_LITERATEHASKELL : SCLEX_HASKELL)
397 , literate(literate_)
398 , firstImportLine(-1)
399 , firstImportIndent(0)
400 {}
401 virtual ~LexerHaskell() {}
402
403 void SCI_METHOD Release() override {
404 delete this;
405 }
406
407 int SCI_METHOD Version() const override {
408 return lvRelease5;
409 }
410
411 const char * SCI_METHOD PropertyNames() override {
412 return osHaskell.PropertyNames();
413 }
414
415 int SCI_METHOD PropertyType(const char *name) override {
416 return osHaskell.PropertyType(name);
417 }
418
419 const char * SCI_METHOD DescribeProperty(const char *name) override {
420 return osHaskell.DescribeProperty(name);
421 }
422
423 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
424
425 const char * SCI_METHOD PropertyGet(const char *key) override {
426 return osHaskell.PropertyGet(key);
427 }
428
429 const char * SCI_METHOD DescribeWordListSets() override {
430 return osHaskell.DescribeWordListSets();
431 }
432
433 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
434
435 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
436
437 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
438
439 void * SCI_METHOD PrivateCall(int, void *) override {
440 return 0;
441 }
442
443 static ILexer5 *LexerFactoryHaskell() {
444 return new LexerHaskell(false);
445 }
446
447 static ILexer5 *LexerFactoryLiterateHaskell() {
448 return new LexerHaskell(true);
449 }
450};
451
452Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
453 if (osHaskell.PropertySet(&options, key, val)) {
454 return 0;
455 }
456 return -1;
457}
458
459Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
460 WordList *wordListN = 0;
461 switch (n) {
462 case 0:
463 wordListN = &keywords;
464 break;
465 case 1:
466 wordListN = &ffi;
467 break;
468 case 2:
469 wordListN = &reserved_operators;
470 break;
471 }
472 Sci_Position firstModification = -1;
473 if (wordListN) {
474 WordList wlNew;
475 wlNew.Set(wl);
476 if (*wordListN != wlNew) {
477 wordListN->Set(wl);
478 firstModification = 0;
479 }
480 }
481 return firstModification;
482}
483
484void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
485 ,IDocument *pAccess) {
486 LexAccessor styler(pAccess);
487
488 Sci_Position lineCurrent = styler.GetLine(startPos);
489
490 HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
491
492 // Do not leak onto next line
493 if (initStyle == SCE_HA_STRINGEOL)
494 initStyle = SCE_HA_DEFAULT;
495 else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
496 initStyle = hs.nonexternalStyle;
497
498 StyleContext sc(startPos, length, initStyle, styler);
499
500 int base = 10;
501 bool dot = false;
502
503 bool inDashes = false;
504 bool alreadyInTheMiddleOfOperator = false;
505
506 assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
507
508 while (sc.More()) {
509 // Check for state end
510
511 if (!IsExternalStyle(sc.state)) {
512 hs.nonexternalStyle = sc.state;
513 }
514
515 // For lexer to work, states should unconditionally forward at least one
516 // character.
517 // If they don't, they should still check if they are at line end and
518 // forward if so.
519 // If a state forwards more than one character, it should check every time
520 // that it is not a line end and cease forwarding otherwise.
521 if (sc.atLineEnd) {
522 // Remember the line state for future incremental lexing
523 styler.SetLineState(lineCurrent, hs.ToLineState());
524 lineCurrent++;
525 }
526
527 // Handle line continuation generically.
528 if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
529 && ( sc.state == SCE_HA_STRING
530 || sc.state == SCE_HA_PREPROCESSOR)) {
531 // Remember the line state for future incremental lexing
532 styler.SetLineState(lineCurrent, hs.ToLineState());
533 lineCurrent++;
534
535 sc.Forward();
536 if (sc.ch == '\r' && sc.chNext == '\n') {
537 sc.Forward();
538 }
539 sc.Forward();
540
541 continue;
542 }
543
544 if (sc.atLineStart) {
545
546 if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
547 // Prevent SCE_HA_STRINGEOL from leaking back to previous line
548 sc.SetState(sc.state);
549 }
550
551 if (literate && hs.lmode == LITERATE_BIRD) {
552 if (!IsExternalStyle(sc.state)) {
553 sc.SetState(SCE_HA_LITERATE_COMMENT);
554 }
555 }
556 }
557
558 // External
559 // Literate
560 if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
561 && sc.ch == '>') {
562 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
563 sc.ForwardSetState(hs.nonexternalStyle);
564 }
565 else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
566 && ( sc.ch == ' ' || sc.ch == '\t'
567 || sc.Match("\\begin{code}"))) {
568 sc.SetState(sc.state);
569
570 while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
571 sc.Forward();
572
573 if (sc.Match("\\begin{code}")) {
574 sc.Forward(static_cast<int>(strlen("\\begin{code}")));
575
576 bool correct = true;
577
578 while (!sc.atLineEnd && sc.More()) {
579 if (sc.ch != ' ' && sc.ch != '\t') {
580 correct = false;
581 }
582 sc.Forward();
583 }
584
585 if (correct) {
586 sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
587 hs.lmode = LITERATE_BLOCK;
588 }
589 }
590 }
591 else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
592 && sc.Match("\\end{code}")) {
593 sc.SetState(SCE_HA_LITERATE_CODEDELIM);
594
595 sc.Forward(static_cast<int>(strlen("\\end{code}")));
596
597 while (!sc.atLineEnd && sc.More()) {
598 sc.Forward();
599 }
600
601 sc.SetState(SCE_HA_LITERATE_COMMENT);
602 hs.lmode = LITERATE_BIRD;
603 }
604 // Preprocessor
605 else if (sc.atLineStart && sc.ch == '#' && options.cpp
606 && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
607 sc.SetState(SCE_HA_PREPROCESSOR);
608 sc.Forward();
609 }
610 // Literate
611 else if (sc.state == SCE_HA_LITERATE_COMMENT) {
612 sc.Forward();
613 }
614 else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
615 sc.ForwardSetState(hs.nonexternalStyle);
616 }
617 // Preprocessor
618 else if (sc.state == SCE_HA_PREPROCESSOR) {
619 if (sc.atLineEnd) {
620 sc.SetState(options.stylingWithinPreprocessor
621 ? SCE_HA_DEFAULT
622 : hs.nonexternalStyle);
623 sc.Forward(); // prevent double counting a line
624 } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
625 sc.SetState(SCE_HA_DEFAULT);
626 } else {
627 sc.Forward();
628 }
629 }
630 // Haskell
631 // Operator
632 else if (sc.state == SCE_HA_OPERATOR) {
633 int style = SCE_HA_OPERATOR;
634
635 if ( sc.ch == ':'
636 && !alreadyInTheMiddleOfOperator
637 // except "::"
638 && !( sc.chNext == ':'
639 && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
640 style = SCE_HA_CAPITAL;
641 }
642
643 alreadyInTheMiddleOfOperator = false;
644
645 while (IsAnHaskellOperatorChar(sc.ch))
646 sc.Forward();
647
648 char s[100];
649 sc.GetCurrent(s, sizeof(s));
650
651 if (reserved_operators.InList(s))
652 style = SCE_HA_RESERVED_OPERATOR;
653
654 sc.ChangeState(style);
655 sc.SetState(SCE_HA_DEFAULT);
656 }
657 // String
658 else if (sc.state == SCE_HA_STRING) {
659 if (sc.atLineEnd) {
660 sc.ChangeState(SCE_HA_STRINGEOL);
661 sc.ForwardSetState(SCE_HA_DEFAULT);
662 } else if (sc.ch == '\"') {
663 sc.Forward();
664 skipMagicHash(sc, oneHash);
665 sc.SetState(SCE_HA_DEFAULT);
666 } else if (sc.ch == '\\') {
667 sc.Forward(2);
668 } else {
669 sc.Forward();
670 }
671 }
672 // Char
673 else if (sc.state == SCE_HA_CHARACTER) {
674 if (sc.atLineEnd) {
675 sc.ChangeState(SCE_HA_STRINGEOL);
676 sc.ForwardSetState(SCE_HA_DEFAULT);
677 } else if (sc.ch == '\'') {
678 sc.Forward();
679 skipMagicHash(sc, oneHash);
680 sc.SetState(SCE_HA_DEFAULT);
681 } else if (sc.ch == '\\') {
682 sc.Forward(2);
683 } else {
684 sc.Forward();
685 }
686 }
687 // Number
688 else if (sc.state == SCE_HA_NUMBER) {
689 if (sc.atLineEnd) {
690 sc.SetState(SCE_HA_DEFAULT);
691 sc.Forward(); // prevent double counting a line
692 } else if (IsADigit(sc.ch, base)) {
693 sc.Forward();
694 } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
695 sc.Forward(2);
696 dot = false;
697 } else if ((base == 10) &&
698 (sc.ch == 'e' || sc.ch == 'E') &&
699 (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
700 sc.Forward();
701 if (sc.ch == '+' || sc.ch == '-')
702 sc.Forward();
703 } else {
704 skipMagicHash(sc, twoHashes);
705 sc.SetState(SCE_HA_DEFAULT);
706 }
707 }
708 // Keyword or Identifier
709 else if (sc.state == SCE_HA_IDENTIFIER) {
710 int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
711
712 assert(IsAHaskellWordStart(sc.ch));
713
714 sc.Forward();
715
716 while (sc.More()) {
717 if (IsAHaskellWordChar(sc.ch)) {
718 sc.Forward();
719 } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
720 if (IsHaskellUpperCase(sc.chNext)) {
721 sc.Forward();
722 style = SCE_HA_CAPITAL;
723 } else if (IsAHaskellWordStart(sc.chNext)) {
724 sc.Forward();
725 style = SCE_HA_IDENTIFIER;
726 } else if (IsAnHaskellOperatorChar(sc.chNext)) {
727 sc.Forward();
728 style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
729 while (IsAnHaskellOperatorChar(sc.ch))
730 sc.Forward();
731 break;
732 } else {
733 break;
734 }
735 } else {
736 break;
737 }
738 }
739
740 skipMagicHash(sc, unlimitedHashes);
741
742 char s[100];
743 sc.GetCurrent(s, sizeof(s));
744
745 KeywordMode new_mode = HA_MODE_DEFAULT;
746
747 if (keywords.InList(s)) {
748 style = SCE_HA_KEYWORD;
749 } else if (style == SCE_HA_CAPITAL) {
750 if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
751 style = SCE_HA_MODULE;
752 new_mode = HA_MODE_IMPORT2;
753 } else if (hs.mode == HA_MODE_MODULE) {
754 style = SCE_HA_MODULE;
755 }
756 } else if (hs.mode == HA_MODE_IMPORT1 &&
757 strcmp(s,"qualified") == 0) {
758 style = SCE_HA_KEYWORD;
759 new_mode = HA_MODE_IMPORT1;
760 } else if (options.highlightSafe &&
761 hs.mode == HA_MODE_IMPORT1 &&
762 strcmp(s,"safe") == 0) {
763 style = SCE_HA_KEYWORD;
764 new_mode = HA_MODE_IMPORT1;
765 } else if (hs.mode == HA_MODE_IMPORT2) {
766 if (strcmp(s,"as") == 0) {
767 style = SCE_HA_KEYWORD;
768 new_mode = HA_MODE_IMPORT3;
769 } else if (strcmp(s,"hiding") == 0) {
770 style = SCE_HA_KEYWORD;
771 }
772 } else if (hs.mode == HA_MODE_TYPE) {
773 if (strcmp(s,"family") == 0)
774 style = SCE_HA_KEYWORD;
775 }
776
777 if (hs.mode == HA_MODE_FFI) {
778 if (ffi.InList(s)) {
779 style = SCE_HA_KEYWORD;
780 new_mode = HA_MODE_FFI;
781 }
782 }
783
784 sc.ChangeState(style);
785 sc.SetState(SCE_HA_DEFAULT);
786
787 if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
788 new_mode = HA_MODE_IMPORT1;
789 else if (strcmp(s,"module") == 0)
790 new_mode = HA_MODE_MODULE;
791 else if (strcmp(s,"foreign") == 0)
792 new_mode = HA_MODE_FFI;
793 else if (strcmp(s,"type") == 0
794 || strcmp(s,"data") == 0)
795 new_mode = HA_MODE_TYPE;
796
797 hs.mode = new_mode;
798 }
799
800 // Comments
801 // Oneliner
802 else if (sc.state == SCE_HA_COMMENTLINE) {
803 if (sc.atLineEnd) {
804 sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
805 sc.Forward(); // prevent double counting a line
806 } else if (inDashes && sc.ch != '-' && !hs.pragma) {
807 inDashes = false;
808 if (IsAnHaskellOperatorChar(sc.ch)) {
809 alreadyInTheMiddleOfOperator = true;
810 sc.ChangeState(SCE_HA_OPERATOR);
811 }
812 } else {
813 sc.Forward();
814 }
815 }
816 // Nested
817 else if (IsCommentBlockStyle(sc.state)) {
818 if (sc.Match('{','-')) {
819 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
820 sc.Forward(2);
821 hs.nestLevel++;
822 } else if (sc.Match('-','}')) {
823 sc.Forward(2);
824 assert(hs.nestLevel > 0);
825 if (hs.nestLevel > 0)
826 hs.nestLevel--;
827 sc.SetState(
828 hs.nestLevel == 0
829 ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
830 : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
831 } else {
832 sc.Forward();
833 }
834 }
835 // Pragma
836 else if (sc.state == SCE_HA_PRAGMA) {
837 if (sc.Match("#-}")) {
838 hs.pragma = false;
839 sc.Forward(3);
840 sc.SetState(SCE_HA_DEFAULT);
841 } else if (sc.Match('-','-')) {
842 sc.SetState(SCE_HA_COMMENTLINE);
843 sc.Forward(2);
844 inDashes = false;
845 } else if (sc.Match('{','-')) {
846 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
847 sc.Forward(2);
848 hs.nestLevel = 1;
849 } else {
850 sc.Forward();
851 }
852 }
853 // New state?
854 else if (sc.state == SCE_HA_DEFAULT) {
855 // Digit
856 if (IsADigit(sc.ch)) {
857 hs.mode = HA_MODE_DEFAULT;
858
859 sc.SetState(SCE_HA_NUMBER);
860 if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
861 // Match anything starting with "0x" or "0X", too
862 sc.Forward(2);
863 base = 16;
864 dot = false;
865 } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
866 // Match anything starting with "0o" or "0O", too
867 sc.Forward(2);
868 base = 8;
869 dot = false;
870 } else {
871 sc.Forward();
872 base = 10;
873 dot = true;
874 }
875 }
876 // Pragma
877 else if (sc.Match("{-#")) {
878 hs.pragma = true;
879 sc.SetState(SCE_HA_PRAGMA);
880 sc.Forward(3);
881 }
882 // Comment line
883 else if (sc.Match('-','-')) {
884 sc.SetState(SCE_HA_COMMENTLINE);
885 sc.Forward(2);
886 inDashes = true;
887 }
888 // Comment block
889 else if (sc.Match('{','-')) {
890 sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
891 sc.Forward(2);
892 hs.nestLevel = 1;
893 }
894 // String
895 else if (sc.ch == '\"') {
896 sc.SetState(SCE_HA_STRING);
897 sc.Forward();
898 }
899 // Character or quoted name or promoted term
900 else if (sc.ch == '\'') {
901 hs.mode = HA_MODE_DEFAULT;
902
903 sc.SetState(SCE_HA_CHARACTER);
904 sc.Forward();
905
906 if (options.allowQuotes) {
907 // Quoted type ''T
908 if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
909 sc.Forward();
910 sc.ChangeState(SCE_HA_IDENTIFIER);
911 } else if (sc.chNext != '\'') {
912 // Quoted name 'n or promoted constructor 'N
913 if (IsAHaskellWordStart(sc.ch)) {
914 sc.ChangeState(SCE_HA_IDENTIFIER);
915 // Promoted constructor operator ':~>
916 } else if (sc.ch == ':') {
917 alreadyInTheMiddleOfOperator = false;
918 sc.ChangeState(SCE_HA_OPERATOR);
919 // Promoted list or tuple '[T]
920 } else if (sc.ch == '[' || sc.ch== '(') {
921 sc.ChangeState(SCE_HA_OPERATOR);
922 sc.ForwardSetState(SCE_HA_DEFAULT);
923 }
924 }
925 }
926 }
927 // Operator starting with '?' or an implicit parameter
928 else if (sc.ch == '?') {
929 hs.mode = HA_MODE_DEFAULT;
930
931 alreadyInTheMiddleOfOperator = false;
932 sc.SetState(SCE_HA_OPERATOR);
933
934 if ( options.implicitParams
935 && IsAHaskellWordStart(sc.chNext)
936 && !IsHaskellUpperCase(sc.chNext)) {
937 sc.Forward();
938 sc.ChangeState(SCE_HA_IDENTIFIER);
939 }
940 }
941 // Operator
942 else if (IsAnHaskellOperatorChar(sc.ch)) {
943 hs.mode = HA_MODE_DEFAULT;
944
945 sc.SetState(SCE_HA_OPERATOR);
946 }
947 // Braces and punctuation
948 else if (sc.ch == ',' || sc.ch == ';'
949 || sc.ch == '(' || sc.ch == ')'
950 || sc.ch == '[' || sc.ch == ']'
951 || sc.ch == '{' || sc.ch == '}') {
952 sc.SetState(SCE_HA_OPERATOR);
953 sc.ForwardSetState(SCE_HA_DEFAULT);
954 }
955 // Keyword or Identifier
956 else if (IsAHaskellWordStart(sc.ch)) {
957 sc.SetState(SCE_HA_IDENTIFIER);
958 // Something we don't care about
959 } else {
960 sc.Forward();
961 }
962 }
963 // This branch should never be reached.
964 else {
965 assert(false);
966 sc.Forward();
967 }
968 }
969 sc.Complete();
970}
971
972void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
973 ,IDocument *pAccess) {
974 if (!options.fold)
975 return;
976
977 Accessor styler(pAccess, NULL);
978
979 Sci_Position lineCurrent = styler.GetLine(startPos);
980
981 if (lineCurrent <= firstImportLine) {
982 firstImportLine = -1; // readjust first import position
983 firstImportIndent = 0;
984 }
985
986 const Sci_Position maxPos = startPos + length;
987 const Sci_Position maxLines =
988 maxPos == styler.Length()
989 ? styler.GetLine(maxPos)
990 : styler.GetLine(maxPos - 1); // Requested last line
991 const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
992
993 // Backtrack to previous non-blank line so we can determine indent level
994 // for any white space lines
995 // and so we can fix any preceding fold level (which is why we go back
996 // at least one line in all cases)
997 bool importHere = LineContainsImport(lineCurrent, styler);
998 int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
999
1000 while (lineCurrent > 0) {
1001 lineCurrent--;
1002 importHere = LineContainsImport(lineCurrent, styler);
1003 indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
1004 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
1005 break;
1006 }
1007
1008 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1009
1010 if (importHere) {
1011 indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1012 if (firstImportLine == -1) {
1013 firstImportLine = lineCurrent;
1014 firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1015 }
1016 if (firstImportLine != lineCurrent) {
1017 indentCurrentLevel++;
1018 }
1019 }
1020
1021 indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1022
1023 // Process all characters to end of requested range
1024 //that hangs over the end of the range. Cap processing in all cases
1025 // to end of document.
1026 while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1027
1028 // Gather info
1029 Sci_Position lineNext = lineCurrent + 1;
1030 importHere = false;
1031 int indentNext = indentCurrent;
1032
1033 if (lineNext <= docLines) {
1034 // Information about next line is only available if not at end of document
1035 importHere = LineContainsImport(lineNext, styler);
1036 indentNext = IndentAmountWithOffset(styler, lineNext);
1037 }
1038 if (indentNext & SC_FOLDLEVELWHITEFLAG)
1039 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1040
1041 // Skip past any blank lines for next indent level info; we skip also
1042 // comments (all comments, not just those starting in column 0)
1043 // which effectively folds them into surrounding code rather
1044 // than screwing up folding.
1045
1046 while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1047 lineNext++;
1048 importHere = LineContainsImport(lineNext, styler);
1049 indentNext = IndentAmountWithOffset(styler, lineNext);
1050 }
1051
1052 int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1053
1054 if (importHere) {
1055 indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1056 if (firstImportLine == -1) {
1057 firstImportLine = lineNext;
1058 firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1059 }
1060 if (firstImportLine != lineNext) {
1061 indentNextLevel++;
1062 }
1063 }
1064
1065 indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1066
1067 const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1068
1069 // Now set all the indent levels on the lines we skipped
1070 // Do this from end to start. Once we encounter one line
1071 // which is indented more than the line after the end of
1072 // the comment-block, use the level of the block before
1073
1074 Sci_Position skipLine = lineNext;
1075 int skipLevel = indentNextLevel;
1076
1077 while (--skipLine > lineCurrent) {
1078 int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1079
1080 if (options.foldCompact) {
1081 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1082 skipLevel = levelBeforeComments;
1083 }
1084
1085 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1086
1087 styler.SetLevel(skipLine, skipLevel | whiteFlag);
1088 } else {
1089 if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1090 && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1091 skipLevel = levelBeforeComments;
1092 }
1093
1094 styler.SetLevel(skipLine, skipLevel);
1095 }
1096 }
1097
1098 int lev = indentCurrent;
1099
1100 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1101 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1102 lev |= SC_FOLDLEVELHEADERFLAG;
1103 }
1104
1105 // Set fold level for this line and move to next line
1106 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1107
1108 indentCurrent = indentNext;
1109 indentCurrentLevel = indentNextLevel;
1110 lineCurrent = lineNext;
1111 }
1112
1113 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1114 // header flag set; the loop above is crafted to take care of this case!
1115 //styler.SetLevel(lineCurrent, indentCurrent);
1116}
1117
1118LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1119LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1120