1 | /****************************************************************** |
2 | * LexHaskell.cxx |
3 | * |
4 | * A haskell lexer for the scintilla code control. |
5 | * Some stuff "lended" from LexPython.cxx and LexCPP.cxx. |
6 | * External lexer stuff inspired from the caml external lexer. |
7 | * Folder copied from Python's. |
8 | * |
9 | * Written by Tobias Engvall - tumm at dtek dot chalmers dot se |
10 | * |
11 | * Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com |
12 | * |
13 | * Improved by kudah <kudahkukarek@gmail.com> |
14 | * |
15 | * TODO: |
16 | * * A proper lexical folder to fold group declarations, comments, pragmas, |
17 | * #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc, |
18 | * etc. |
19 | * |
20 | *****************************************************************/ |
21 | #include <stdlib.h> |
22 | #include <string.h> |
23 | #include <stdio.h> |
24 | #include <stdarg.h> |
25 | #include <assert.h> |
26 | #include <ctype.h> |
27 | |
28 | #include <string> |
29 | #include <string_view> |
30 | #include <vector> |
31 | #include <map> |
32 | #include <functional> |
33 | |
34 | #include "ILexer.h" |
35 | #include "Scintilla.h" |
36 | #include "SciLexer.h" |
37 | |
38 | #include "PropSetSimple.h" |
39 | #include "WordList.h" |
40 | #include "LexAccessor.h" |
41 | #include "Accessor.h" |
42 | #include "StyleContext.h" |
43 | #include "CharacterSet.h" |
44 | #include "CharacterCategory.h" |
45 | #include "LexerModule.h" |
46 | #include "OptionSet.h" |
47 | #include "DefaultLexer.h" |
48 | |
49 | using namespace Scintilla; |
50 | using namespace Lexilla; |
51 | |
52 | // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682 |
53 | // Note, letter modifiers are prohibited. |
54 | |
55 | static int u_iswupper (int ch) { |
56 | CharacterCategory c = CategoriseCharacter(ch); |
57 | return c == ccLu || c == ccLt; |
58 | } |
59 | |
60 | static int u_iswalpha (int ch) { |
61 | CharacterCategory c = CategoriseCharacter(ch); |
62 | return c == ccLl || c == ccLu || c == ccLt || c == ccLo; |
63 | } |
64 | |
65 | static int u_iswalnum (int ch) { |
66 | CharacterCategory c = CategoriseCharacter(ch); |
67 | return c == ccLl || c == ccLu || c == ccLt || c == ccLo |
68 | || c == ccNd || c == ccNo; |
69 | } |
70 | |
71 | static int u_IsHaskellSymbol(int ch) { |
72 | CharacterCategory c = CategoriseCharacter(ch); |
73 | return c == ccPc || c == ccPd || c == ccPo |
74 | || c == ccSm || c == ccSc || c == ccSk || c == ccSo; |
75 | } |
76 | |
77 | static inline bool IsHaskellLetter(const int ch) { |
78 | if (IsASCII(ch)) { |
79 | return (ch >= 'a' && ch <= 'z') |
80 | || (ch >= 'A' && ch <= 'Z'); |
81 | } else { |
82 | return u_iswalpha(ch) != 0; |
83 | } |
84 | } |
85 | |
86 | static inline bool IsHaskellAlphaNumeric(const int ch) { |
87 | if (IsASCII(ch)) { |
88 | return IsAlphaNumeric(ch); |
89 | } else { |
90 | return u_iswalnum(ch) != 0; |
91 | } |
92 | } |
93 | |
94 | static inline bool IsHaskellUpperCase(const int ch) { |
95 | if (IsASCII(ch)) { |
96 | return ch >= 'A' && ch <= 'Z'; |
97 | } else { |
98 | return u_iswupper(ch) != 0; |
99 | } |
100 | } |
101 | |
102 | static inline bool IsAnHaskellOperatorChar(const int ch) { |
103 | if (IsASCII(ch)) { |
104 | return |
105 | ( ch == '!' || ch == '#' || ch == '$' || ch == '%' |
106 | || ch == '&' || ch == '*' || ch == '+' || ch == '-' |
107 | || ch == '.' || ch == '/' || ch == ':' || ch == '<' |
108 | || ch == '=' || ch == '>' || ch == '?' || ch == '@' |
109 | || ch == '^' || ch == '|' || ch == '~' || ch == '\\'); |
110 | } else { |
111 | return u_IsHaskellSymbol(ch) != 0; |
112 | } |
113 | } |
114 | |
115 | static inline bool IsAHaskellWordStart(const int ch) { |
116 | return IsHaskellLetter(ch) || ch == '_'; |
117 | } |
118 | |
119 | static inline bool IsAHaskellWordChar(const int ch) { |
120 | return ( IsHaskellAlphaNumeric(ch) |
121 | || ch == '_' |
122 | || ch == '\''); |
123 | } |
124 | |
125 | static inline bool (int style) { |
126 | return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3); |
127 | } |
128 | |
129 | static inline bool (int style) { |
130 | return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3) |
131 | || ( style == SCE_HA_LITERATE_COMMENT |
132 | || style == SCE_HA_LITERATE_CODEDELIM); |
133 | } |
134 | |
135 | // styles which do not belong to Haskell, but to external tools |
136 | static inline bool IsExternalStyle(int style) { |
137 | return ( style == SCE_HA_PREPROCESSOR |
138 | || style == SCE_HA_LITERATE_COMMENT |
139 | || style == SCE_HA_LITERATE_CODEDELIM); |
140 | } |
141 | |
142 | static inline int (const unsigned int nestLevel) { |
143 | return SCE_HA_COMMENTBLOCK + (nestLevel % 3); |
144 | } |
145 | |
146 | // Mangled version of lexlib/Accessor.cxx IndentAmount. |
147 | // Modified to treat comment blocks as whitespace |
148 | // plus special case for commentline/preprocessor. |
149 | static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) { |
150 | |
151 | // Determines the indentation level of the current line |
152 | // Comment blocks are treated as whitespace |
153 | |
154 | Sci_Position pos = styler.LineStart(line); |
155 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
156 | |
157 | char ch = styler[pos]; |
158 | int style = styler.StyleAt(pos); |
159 | |
160 | int indent = 0; |
161 | bool inPrevPrefix = line > 0; |
162 | |
163 | Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0; |
164 | |
165 | while (( ch == ' ' || ch == '\t' |
166 | || IsCommentBlockStyle(style) |
167 | || style == SCE_HA_LITERATE_CODEDELIM) |
168 | && (pos < eol_pos)) { |
169 | if (inPrevPrefix) { |
170 | char chPrev = styler[posPrev++]; |
171 | if (chPrev != ' ' && chPrev != '\t') { |
172 | inPrevPrefix = false; |
173 | } |
174 | } |
175 | if (ch == '\t') { |
176 | indent = (indent / 8 + 1) * 8; |
177 | } else { // Space or comment block |
178 | indent++; |
179 | } |
180 | pos++; |
181 | ch = styler[pos]; |
182 | style = styler.StyleAt(pos); |
183 | } |
184 | |
185 | indent += SC_FOLDLEVELBASE; |
186 | // if completely empty line or the start of a comment or preprocessor... |
187 | if ( styler.LineStart(line) == styler.Length() |
188 | || ch == ' ' |
189 | || ch == '\t' |
190 | || ch == '\n' |
191 | || ch == '\r' |
192 | || IsCommentStyle(style) |
193 | || style == SCE_HA_PREPROCESSOR) |
194 | return indent | SC_FOLDLEVELWHITEFLAG; |
195 | else |
196 | return indent; |
197 | } |
198 | |
199 | struct OptionsHaskell { |
200 | bool magicHash; |
201 | bool allowQuotes; |
202 | bool implicitParams; |
203 | bool highlightSafe; |
204 | bool cpp; |
205 | bool stylingWithinPreprocessor; |
206 | bool fold; |
207 | bool ; |
208 | bool foldCompact; |
209 | bool foldImports; |
210 | OptionsHaskell() { |
211 | magicHash = true; // Widespread use, enabled by default. |
212 | allowQuotes = true; // Widespread use, enabled by default. |
213 | implicitParams = false; // Fell out of favor, seldom used, disabled. |
214 | highlightSafe = true; // Moderately used, doesn't hurt to enable. |
215 | cpp = true; // Widespread use, enabled by default; |
216 | stylingWithinPreprocessor = false; |
217 | fold = false; |
218 | foldComment = false; |
219 | foldCompact = false; |
220 | foldImports = false; |
221 | } |
222 | }; |
223 | |
224 | static const char * const haskellWordListDesc[] = { |
225 | "Keywords" , |
226 | "FFI" , |
227 | "Reserved operators" , |
228 | 0 |
229 | }; |
230 | |
231 | struct OptionSetHaskell : public OptionSet<OptionsHaskell> { |
232 | OptionSetHaskell() { |
233 | DefineProperty("lexer.haskell.allow.hash" , &OptionsHaskell::magicHash, |
234 | "Set to 0 to disallow the '#' character at the end of identifiers and " |
235 | "literals with the haskell lexer " |
236 | "(GHC -XMagicHash extension)" ); |
237 | |
238 | DefineProperty("lexer.haskell.allow.quotes" , &OptionsHaskell::allowQuotes, |
239 | "Set to 0 to disable highlighting of Template Haskell name quotations " |
240 | "and promoted constructors " |
241 | "(GHC -XTemplateHaskell and -XDataKinds extensions)" ); |
242 | |
243 | DefineProperty("lexer.haskell.allow.questionmark" , &OptionsHaskell::implicitParams, |
244 | "Set to 1 to allow the '?' character at the start of identifiers " |
245 | "with the haskell lexer " |
246 | "(GHC & Hugs -XImplicitParams extension)" ); |
247 | |
248 | DefineProperty("lexer.haskell.import.safe" , &OptionsHaskell::highlightSafe, |
249 | "Set to 0 to disallow \"safe\" keyword in imports " |
250 | "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)" ); |
251 | |
252 | DefineProperty("lexer.haskell.cpp" , &OptionsHaskell::cpp, |
253 | "Set to 0 to disable C-preprocessor highlighting " |
254 | "(-XCPP extension)" ); |
255 | |
256 | DefineProperty("styling.within.preprocessor" , &OptionsHaskell::stylingWithinPreprocessor, |
257 | "For Haskell code, determines whether all preprocessor code is styled in the " |
258 | "preprocessor style (0, the default) or only from the initial # to the end " |
259 | "of the command word(1)." |
260 | ); |
261 | |
262 | DefineProperty("fold" , &OptionsHaskell::fold); |
263 | |
264 | DefineProperty("fold.comment" , &OptionsHaskell::foldComment); |
265 | |
266 | DefineProperty("fold.compact" , &OptionsHaskell::foldCompact); |
267 | |
268 | DefineProperty("fold.haskell.imports" , &OptionsHaskell::foldImports, |
269 | "Set to 1 to enable folding of import declarations" ); |
270 | |
271 | DefineWordListSets(haskellWordListDesc); |
272 | } |
273 | }; |
274 | |
275 | class LexerHaskell : public DefaultLexer { |
276 | bool literate; |
277 | Sci_Position firstImportLine; |
278 | int firstImportIndent; |
279 | WordList keywords; |
280 | WordList ffi; |
281 | WordList reserved_operators; |
282 | OptionsHaskell options; |
283 | OptionSetHaskell osHaskell; |
284 | |
285 | enum HashCount { |
286 | oneHash |
287 | ,twoHashes |
288 | ,unlimitedHashes |
289 | }; |
290 | |
291 | enum KeywordMode { |
292 | HA_MODE_DEFAULT = 0 |
293 | ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name. |
294 | ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding". |
295 | ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding" |
296 | ,HA_MODE_MODULE = 4 // after "module", before module name. |
297 | ,HA_MODE_FFI = 5 // after "foreign", before FFI keywords |
298 | ,HA_MODE_TYPE = 6 // after "type" or "data", before "family" |
299 | }; |
300 | |
301 | enum LiterateMode { |
302 | LITERATE_BIRD = 0 // if '>' is the first character on the line, |
303 | // color '>' as a codedelim and the rest of |
304 | // the line as code. |
305 | // else if "\begin{code}" is the only word on the |
306 | // line except whitespace, switch to LITERATE_BLOCK |
307 | // otherwise color the line as a literate comment. |
308 | ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column |
309 | // 0 ignoring all later characters, color the line |
310 | // as a codedelim and switch to LITERATE_BIRD |
311 | // otherwise color the line as code. |
312 | }; |
313 | |
314 | struct HaskellLineInfo { |
315 | unsigned int nestLevel; // 22 bits ought to be enough for anybody |
316 | unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes |
317 | // beyond 31. |
318 | bool pragma; |
319 | LiterateMode lmode; |
320 | KeywordMode mode; |
321 | |
322 | HaskellLineInfo(int state) : |
323 | nestLevel (state >> 10) |
324 | , nonexternalStyle ((state >> 5) & 0x1F) |
325 | , pragma ((state >> 4) & 0x1) |
326 | , lmode (static_cast<LiterateMode>((state >> 3) & 0x1)) |
327 | , mode (static_cast<KeywordMode>(state & 0x7)) |
328 | {} |
329 | |
330 | int ToLineState() { |
331 | return |
332 | (nestLevel << 10) |
333 | | (nonexternalStyle << 5) |
334 | | (pragma << 4) |
335 | | (lmode << 3) |
336 | | mode; |
337 | } |
338 | }; |
339 | |
340 | inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const { |
341 | if (options.magicHash && sc.ch == '#') { |
342 | sc.Forward(); |
343 | if (hashes == twoHashes && sc.ch == '#') { |
344 | sc.Forward(); |
345 | } else if (hashes == unlimitedHashes) { |
346 | while (sc.ch == '#') { |
347 | sc.Forward(); |
348 | } |
349 | } |
350 | } |
351 | } |
352 | |
353 | bool LineContainsImport(const Sci_Position line, Accessor &styler) const { |
354 | if (options.foldImports) { |
355 | Sci_Position currentPos = styler.LineStart(line); |
356 | int style = styler.StyleAt(currentPos); |
357 | |
358 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
359 | |
360 | while (currentPos < eol_pos) { |
361 | int ch = styler[currentPos]; |
362 | style = styler.StyleAt(currentPos); |
363 | |
364 | if (ch == ' ' || ch == '\t' |
365 | || IsCommentBlockStyle(style) |
366 | || style == SCE_HA_LITERATE_CODEDELIM) { |
367 | currentPos++; |
368 | } else { |
369 | break; |
370 | } |
371 | } |
372 | |
373 | return (style == SCE_HA_KEYWORD |
374 | && styler.Match(currentPos, "import" )); |
375 | } else { |
376 | return false; |
377 | } |
378 | } |
379 | |
380 | inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const { |
381 | const int indent = HaskellIndentAmount(styler, line); |
382 | const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK; |
383 | return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE) |
384 | ? indent |
385 | : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK); |
386 | } |
387 | |
388 | inline int IndentLevelRemoveIndentOffset(const int indentLevel) const { |
389 | return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE) |
390 | ? indentLevel |
391 | : indentLevel - firstImportIndent; |
392 | } |
393 | |
394 | public: |
395 | LexerHaskell(bool literate_) |
396 | : DefaultLexer(literate_ ? "literatehaskell" : "haskell" , literate_ ? SCLEX_LITERATEHASKELL : SCLEX_HASKELL) |
397 | , literate(literate_) |
398 | , firstImportLine(-1) |
399 | , firstImportIndent(0) |
400 | {} |
401 | virtual ~LexerHaskell() {} |
402 | |
403 | void SCI_METHOD Release() override { |
404 | delete this; |
405 | } |
406 | |
407 | int SCI_METHOD Version() const override { |
408 | return lvRelease5; |
409 | } |
410 | |
411 | const char * SCI_METHOD PropertyNames() override { |
412 | return osHaskell.PropertyNames(); |
413 | } |
414 | |
415 | int SCI_METHOD PropertyType(const char *name) override { |
416 | return osHaskell.PropertyType(name); |
417 | } |
418 | |
419 | const char * SCI_METHOD DescribeProperty(const char *name) override { |
420 | return osHaskell.DescribeProperty(name); |
421 | } |
422 | |
423 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
424 | |
425 | const char * SCI_METHOD PropertyGet(const char *key) override { |
426 | return osHaskell.PropertyGet(key); |
427 | } |
428 | |
429 | const char * SCI_METHOD DescribeWordListSets() override { |
430 | return osHaskell.DescribeWordListSets(); |
431 | } |
432 | |
433 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
434 | |
435 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
436 | |
437 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
438 | |
439 | void * SCI_METHOD PrivateCall(int, void *) override { |
440 | return 0; |
441 | } |
442 | |
443 | static ILexer5 *LexerFactoryHaskell() { |
444 | return new LexerHaskell(false); |
445 | } |
446 | |
447 | static ILexer5 *LexerFactoryLiterateHaskell() { |
448 | return new LexerHaskell(true); |
449 | } |
450 | }; |
451 | |
452 | Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) { |
453 | if (osHaskell.PropertySet(&options, key, val)) { |
454 | return 0; |
455 | } |
456 | return -1; |
457 | } |
458 | |
459 | Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) { |
460 | WordList *wordListN = 0; |
461 | switch (n) { |
462 | case 0: |
463 | wordListN = &keywords; |
464 | break; |
465 | case 1: |
466 | wordListN = &ffi; |
467 | break; |
468 | case 2: |
469 | wordListN = &reserved_operators; |
470 | break; |
471 | } |
472 | Sci_Position firstModification = -1; |
473 | if (wordListN) { |
474 | WordList wlNew; |
475 | wlNew.Set(wl); |
476 | if (*wordListN != wlNew) { |
477 | wordListN->Set(wl); |
478 | firstModification = 0; |
479 | } |
480 | } |
481 | return firstModification; |
482 | } |
483 | |
484 | void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle |
485 | ,IDocument *pAccess) { |
486 | LexAccessor styler(pAccess); |
487 | |
488 | Sci_Position lineCurrent = styler.GetLine(startPos); |
489 | |
490 | HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0); |
491 | |
492 | // Do not leak onto next line |
493 | if (initStyle == SCE_HA_STRINGEOL) |
494 | initStyle = SCE_HA_DEFAULT; |
495 | else if (initStyle == SCE_HA_LITERATE_CODEDELIM) |
496 | initStyle = hs.nonexternalStyle; |
497 | |
498 | StyleContext sc(startPos, length, initStyle, styler); |
499 | |
500 | int base = 10; |
501 | bool dot = false; |
502 | |
503 | bool inDashes = false; |
504 | bool alreadyInTheMiddleOfOperator = false; |
505 | |
506 | assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0)); |
507 | |
508 | while (sc.More()) { |
509 | // Check for state end |
510 | |
511 | if (!IsExternalStyle(sc.state)) { |
512 | hs.nonexternalStyle = sc.state; |
513 | } |
514 | |
515 | // For lexer to work, states should unconditionally forward at least one |
516 | // character. |
517 | // If they don't, they should still check if they are at line end and |
518 | // forward if so. |
519 | // If a state forwards more than one character, it should check every time |
520 | // that it is not a line end and cease forwarding otherwise. |
521 | if (sc.atLineEnd) { |
522 | // Remember the line state for future incremental lexing |
523 | styler.SetLineState(lineCurrent, hs.ToLineState()); |
524 | lineCurrent++; |
525 | } |
526 | |
527 | // Handle line continuation generically. |
528 | if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r') |
529 | && ( sc.state == SCE_HA_STRING |
530 | || sc.state == SCE_HA_PREPROCESSOR)) { |
531 | // Remember the line state for future incremental lexing |
532 | styler.SetLineState(lineCurrent, hs.ToLineState()); |
533 | lineCurrent++; |
534 | |
535 | sc.Forward(); |
536 | if (sc.ch == '\r' && sc.chNext == '\n') { |
537 | sc.Forward(); |
538 | } |
539 | sc.Forward(); |
540 | |
541 | continue; |
542 | } |
543 | |
544 | if (sc.atLineStart) { |
545 | |
546 | if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) { |
547 | // Prevent SCE_HA_STRINGEOL from leaking back to previous line |
548 | sc.SetState(sc.state); |
549 | } |
550 | |
551 | if (literate && hs.lmode == LITERATE_BIRD) { |
552 | if (!IsExternalStyle(sc.state)) { |
553 | sc.SetState(SCE_HA_LITERATE_COMMENT); |
554 | } |
555 | } |
556 | } |
557 | |
558 | // External |
559 | // Literate |
560 | if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart |
561 | && sc.ch == '>') { |
562 | sc.SetState(SCE_HA_LITERATE_CODEDELIM); |
563 | sc.ForwardSetState(hs.nonexternalStyle); |
564 | } |
565 | else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart |
566 | && ( sc.ch == ' ' || sc.ch == '\t' |
567 | || sc.Match("\\begin{code}" ))) { |
568 | sc.SetState(sc.state); |
569 | |
570 | while ((sc.ch == ' ' || sc.ch == '\t') && sc.More()) |
571 | sc.Forward(); |
572 | |
573 | if (sc.Match("\\begin{code}" )) { |
574 | sc.Forward(static_cast<int>(strlen("\\begin{code}" ))); |
575 | |
576 | bool correct = true; |
577 | |
578 | while (!sc.atLineEnd && sc.More()) { |
579 | if (sc.ch != ' ' && sc.ch != '\t') { |
580 | correct = false; |
581 | } |
582 | sc.Forward(); |
583 | } |
584 | |
585 | if (correct) { |
586 | sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end |
587 | hs.lmode = LITERATE_BLOCK; |
588 | } |
589 | } |
590 | } |
591 | else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart |
592 | && sc.Match("\\end{code}" )) { |
593 | sc.SetState(SCE_HA_LITERATE_CODEDELIM); |
594 | |
595 | sc.Forward(static_cast<int>(strlen("\\end{code}" ))); |
596 | |
597 | while (!sc.atLineEnd && sc.More()) { |
598 | sc.Forward(); |
599 | } |
600 | |
601 | sc.SetState(SCE_HA_LITERATE_COMMENT); |
602 | hs.lmode = LITERATE_BIRD; |
603 | } |
604 | // Preprocessor |
605 | else if (sc.atLineStart && sc.ch == '#' && options.cpp |
606 | && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) { |
607 | sc.SetState(SCE_HA_PREPROCESSOR); |
608 | sc.Forward(); |
609 | } |
610 | // Literate |
611 | else if (sc.state == SCE_HA_LITERATE_COMMENT) { |
612 | sc.Forward(); |
613 | } |
614 | else if (sc.state == SCE_HA_LITERATE_CODEDELIM) { |
615 | sc.ForwardSetState(hs.nonexternalStyle); |
616 | } |
617 | // Preprocessor |
618 | else if (sc.state == SCE_HA_PREPROCESSOR) { |
619 | if (sc.atLineEnd) { |
620 | sc.SetState(options.stylingWithinPreprocessor |
621 | ? SCE_HA_DEFAULT |
622 | : hs.nonexternalStyle); |
623 | sc.Forward(); // prevent double counting a line |
624 | } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) { |
625 | sc.SetState(SCE_HA_DEFAULT); |
626 | } else { |
627 | sc.Forward(); |
628 | } |
629 | } |
630 | // Haskell |
631 | // Operator |
632 | else if (sc.state == SCE_HA_OPERATOR) { |
633 | int style = SCE_HA_OPERATOR; |
634 | |
635 | if ( sc.ch == ':' |
636 | && !alreadyInTheMiddleOfOperator |
637 | // except "::" |
638 | && !( sc.chNext == ':' |
639 | && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) { |
640 | style = SCE_HA_CAPITAL; |
641 | } |
642 | |
643 | alreadyInTheMiddleOfOperator = false; |
644 | |
645 | while (IsAnHaskellOperatorChar(sc.ch)) |
646 | sc.Forward(); |
647 | |
648 | char s[100]; |
649 | sc.GetCurrent(s, sizeof(s)); |
650 | |
651 | if (reserved_operators.InList(s)) |
652 | style = SCE_HA_RESERVED_OPERATOR; |
653 | |
654 | sc.ChangeState(style); |
655 | sc.SetState(SCE_HA_DEFAULT); |
656 | } |
657 | // String |
658 | else if (sc.state == SCE_HA_STRING) { |
659 | if (sc.atLineEnd) { |
660 | sc.ChangeState(SCE_HA_STRINGEOL); |
661 | sc.ForwardSetState(SCE_HA_DEFAULT); |
662 | } else if (sc.ch == '\"') { |
663 | sc.Forward(); |
664 | skipMagicHash(sc, oneHash); |
665 | sc.SetState(SCE_HA_DEFAULT); |
666 | } else if (sc.ch == '\\') { |
667 | sc.Forward(2); |
668 | } else { |
669 | sc.Forward(); |
670 | } |
671 | } |
672 | // Char |
673 | else if (sc.state == SCE_HA_CHARACTER) { |
674 | if (sc.atLineEnd) { |
675 | sc.ChangeState(SCE_HA_STRINGEOL); |
676 | sc.ForwardSetState(SCE_HA_DEFAULT); |
677 | } else if (sc.ch == '\'') { |
678 | sc.Forward(); |
679 | skipMagicHash(sc, oneHash); |
680 | sc.SetState(SCE_HA_DEFAULT); |
681 | } else if (sc.ch == '\\') { |
682 | sc.Forward(2); |
683 | } else { |
684 | sc.Forward(); |
685 | } |
686 | } |
687 | // Number |
688 | else if (sc.state == SCE_HA_NUMBER) { |
689 | if (sc.atLineEnd) { |
690 | sc.SetState(SCE_HA_DEFAULT); |
691 | sc.Forward(); // prevent double counting a line |
692 | } else if (IsADigit(sc.ch, base)) { |
693 | sc.Forward(); |
694 | } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) { |
695 | sc.Forward(2); |
696 | dot = false; |
697 | } else if ((base == 10) && |
698 | (sc.ch == 'e' || sc.ch == 'E') && |
699 | (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) { |
700 | sc.Forward(); |
701 | if (sc.ch == '+' || sc.ch == '-') |
702 | sc.Forward(); |
703 | } else { |
704 | skipMagicHash(sc, twoHashes); |
705 | sc.SetState(SCE_HA_DEFAULT); |
706 | } |
707 | } |
708 | // Keyword or Identifier |
709 | else if (sc.state == SCE_HA_IDENTIFIER) { |
710 | int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER; |
711 | |
712 | assert(IsAHaskellWordStart(sc.ch)); |
713 | |
714 | sc.Forward(); |
715 | |
716 | while (sc.More()) { |
717 | if (IsAHaskellWordChar(sc.ch)) { |
718 | sc.Forward(); |
719 | } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) { |
720 | if (IsHaskellUpperCase(sc.chNext)) { |
721 | sc.Forward(); |
722 | style = SCE_HA_CAPITAL; |
723 | } else if (IsAHaskellWordStart(sc.chNext)) { |
724 | sc.Forward(); |
725 | style = SCE_HA_IDENTIFIER; |
726 | } else if (IsAnHaskellOperatorChar(sc.chNext)) { |
727 | sc.Forward(); |
728 | style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR; |
729 | while (IsAnHaskellOperatorChar(sc.ch)) |
730 | sc.Forward(); |
731 | break; |
732 | } else { |
733 | break; |
734 | } |
735 | } else { |
736 | break; |
737 | } |
738 | } |
739 | |
740 | skipMagicHash(sc, unlimitedHashes); |
741 | |
742 | char s[100]; |
743 | sc.GetCurrent(s, sizeof(s)); |
744 | |
745 | KeywordMode new_mode = HA_MODE_DEFAULT; |
746 | |
747 | if (keywords.InList(s)) { |
748 | style = SCE_HA_KEYWORD; |
749 | } else if (style == SCE_HA_CAPITAL) { |
750 | if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) { |
751 | style = SCE_HA_MODULE; |
752 | new_mode = HA_MODE_IMPORT2; |
753 | } else if (hs.mode == HA_MODE_MODULE) { |
754 | style = SCE_HA_MODULE; |
755 | } |
756 | } else if (hs.mode == HA_MODE_IMPORT1 && |
757 | strcmp(s,"qualified" ) == 0) { |
758 | style = SCE_HA_KEYWORD; |
759 | new_mode = HA_MODE_IMPORT1; |
760 | } else if (options.highlightSafe && |
761 | hs.mode == HA_MODE_IMPORT1 && |
762 | strcmp(s,"safe" ) == 0) { |
763 | style = SCE_HA_KEYWORD; |
764 | new_mode = HA_MODE_IMPORT1; |
765 | } else if (hs.mode == HA_MODE_IMPORT2) { |
766 | if (strcmp(s,"as" ) == 0) { |
767 | style = SCE_HA_KEYWORD; |
768 | new_mode = HA_MODE_IMPORT3; |
769 | } else if (strcmp(s,"hiding" ) == 0) { |
770 | style = SCE_HA_KEYWORD; |
771 | } |
772 | } else if (hs.mode == HA_MODE_TYPE) { |
773 | if (strcmp(s,"family" ) == 0) |
774 | style = SCE_HA_KEYWORD; |
775 | } |
776 | |
777 | if (hs.mode == HA_MODE_FFI) { |
778 | if (ffi.InList(s)) { |
779 | style = SCE_HA_KEYWORD; |
780 | new_mode = HA_MODE_FFI; |
781 | } |
782 | } |
783 | |
784 | sc.ChangeState(style); |
785 | sc.SetState(SCE_HA_DEFAULT); |
786 | |
787 | if (strcmp(s,"import" ) == 0 && hs.mode != HA_MODE_FFI) |
788 | new_mode = HA_MODE_IMPORT1; |
789 | else if (strcmp(s,"module" ) == 0) |
790 | new_mode = HA_MODE_MODULE; |
791 | else if (strcmp(s,"foreign" ) == 0) |
792 | new_mode = HA_MODE_FFI; |
793 | else if (strcmp(s,"type" ) == 0 |
794 | || strcmp(s,"data" ) == 0) |
795 | new_mode = HA_MODE_TYPE; |
796 | |
797 | hs.mode = new_mode; |
798 | } |
799 | |
800 | // Comments |
801 | // Oneliner |
802 | else if (sc.state == SCE_HA_COMMENTLINE) { |
803 | if (sc.atLineEnd) { |
804 | sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT); |
805 | sc.Forward(); // prevent double counting a line |
806 | } else if (inDashes && sc.ch != '-' && !hs.pragma) { |
807 | inDashes = false; |
808 | if (IsAnHaskellOperatorChar(sc.ch)) { |
809 | alreadyInTheMiddleOfOperator = true; |
810 | sc.ChangeState(SCE_HA_OPERATOR); |
811 | } |
812 | } else { |
813 | sc.Forward(); |
814 | } |
815 | } |
816 | // Nested |
817 | else if (IsCommentBlockStyle(sc.state)) { |
818 | if (sc.Match('{','-')) { |
819 | sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel)); |
820 | sc.Forward(2); |
821 | hs.nestLevel++; |
822 | } else if (sc.Match('-','}')) { |
823 | sc.Forward(2); |
824 | assert(hs.nestLevel > 0); |
825 | if (hs.nestLevel > 0) |
826 | hs.nestLevel--; |
827 | sc.SetState( |
828 | hs.nestLevel == 0 |
829 | ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT) |
830 | : CommentBlockStyleFromNestLevel(hs.nestLevel - 1)); |
831 | } else { |
832 | sc.Forward(); |
833 | } |
834 | } |
835 | // Pragma |
836 | else if (sc.state == SCE_HA_PRAGMA) { |
837 | if (sc.Match("#-}" )) { |
838 | hs.pragma = false; |
839 | sc.Forward(3); |
840 | sc.SetState(SCE_HA_DEFAULT); |
841 | } else if (sc.Match('-','-')) { |
842 | sc.SetState(SCE_HA_COMMENTLINE); |
843 | sc.Forward(2); |
844 | inDashes = false; |
845 | } else if (sc.Match('{','-')) { |
846 | sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel)); |
847 | sc.Forward(2); |
848 | hs.nestLevel = 1; |
849 | } else { |
850 | sc.Forward(); |
851 | } |
852 | } |
853 | // New state? |
854 | else if (sc.state == SCE_HA_DEFAULT) { |
855 | // Digit |
856 | if (IsADigit(sc.ch)) { |
857 | hs.mode = HA_MODE_DEFAULT; |
858 | |
859 | sc.SetState(SCE_HA_NUMBER); |
860 | if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) { |
861 | // Match anything starting with "0x" or "0X", too |
862 | sc.Forward(2); |
863 | base = 16; |
864 | dot = false; |
865 | } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) { |
866 | // Match anything starting with "0o" or "0O", too |
867 | sc.Forward(2); |
868 | base = 8; |
869 | dot = false; |
870 | } else { |
871 | sc.Forward(); |
872 | base = 10; |
873 | dot = true; |
874 | } |
875 | } |
876 | // Pragma |
877 | else if (sc.Match("{-#" )) { |
878 | hs.pragma = true; |
879 | sc.SetState(SCE_HA_PRAGMA); |
880 | sc.Forward(3); |
881 | } |
882 | // Comment line |
883 | else if (sc.Match('-','-')) { |
884 | sc.SetState(SCE_HA_COMMENTLINE); |
885 | sc.Forward(2); |
886 | inDashes = true; |
887 | } |
888 | // Comment block |
889 | else if (sc.Match('{','-')) { |
890 | sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel)); |
891 | sc.Forward(2); |
892 | hs.nestLevel = 1; |
893 | } |
894 | // String |
895 | else if (sc.ch == '\"') { |
896 | sc.SetState(SCE_HA_STRING); |
897 | sc.Forward(); |
898 | } |
899 | // Character or quoted name or promoted term |
900 | else if (sc.ch == '\'') { |
901 | hs.mode = HA_MODE_DEFAULT; |
902 | |
903 | sc.SetState(SCE_HA_CHARACTER); |
904 | sc.Forward(); |
905 | |
906 | if (options.allowQuotes) { |
907 | // Quoted type ''T |
908 | if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) { |
909 | sc.Forward(); |
910 | sc.ChangeState(SCE_HA_IDENTIFIER); |
911 | } else if (sc.chNext != '\'') { |
912 | // Quoted name 'n or promoted constructor 'N |
913 | if (IsAHaskellWordStart(sc.ch)) { |
914 | sc.ChangeState(SCE_HA_IDENTIFIER); |
915 | // Promoted constructor operator ':~> |
916 | } else if (sc.ch == ':') { |
917 | alreadyInTheMiddleOfOperator = false; |
918 | sc.ChangeState(SCE_HA_OPERATOR); |
919 | // Promoted list or tuple '[T] |
920 | } else if (sc.ch == '[' || sc.ch== '(') { |
921 | sc.ChangeState(SCE_HA_OPERATOR); |
922 | sc.ForwardSetState(SCE_HA_DEFAULT); |
923 | } |
924 | } |
925 | } |
926 | } |
927 | // Operator starting with '?' or an implicit parameter |
928 | else if (sc.ch == '?') { |
929 | hs.mode = HA_MODE_DEFAULT; |
930 | |
931 | alreadyInTheMiddleOfOperator = false; |
932 | sc.SetState(SCE_HA_OPERATOR); |
933 | |
934 | if ( options.implicitParams |
935 | && IsAHaskellWordStart(sc.chNext) |
936 | && !IsHaskellUpperCase(sc.chNext)) { |
937 | sc.Forward(); |
938 | sc.ChangeState(SCE_HA_IDENTIFIER); |
939 | } |
940 | } |
941 | // Operator |
942 | else if (IsAnHaskellOperatorChar(sc.ch)) { |
943 | hs.mode = HA_MODE_DEFAULT; |
944 | |
945 | sc.SetState(SCE_HA_OPERATOR); |
946 | } |
947 | // Braces and punctuation |
948 | else if (sc.ch == ',' || sc.ch == ';' |
949 | || sc.ch == '(' || sc.ch == ')' |
950 | || sc.ch == '[' || sc.ch == ']' |
951 | || sc.ch == '{' || sc.ch == '}') { |
952 | sc.SetState(SCE_HA_OPERATOR); |
953 | sc.ForwardSetState(SCE_HA_DEFAULT); |
954 | } |
955 | // Keyword or Identifier |
956 | else if (IsAHaskellWordStart(sc.ch)) { |
957 | sc.SetState(SCE_HA_IDENTIFIER); |
958 | // Something we don't care about |
959 | } else { |
960 | sc.Forward(); |
961 | } |
962 | } |
963 | // This branch should never be reached. |
964 | else { |
965 | assert(false); |
966 | sc.Forward(); |
967 | } |
968 | } |
969 | sc.Complete(); |
970 | } |
971 | |
972 | void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle |
973 | ,IDocument *pAccess) { |
974 | if (!options.fold) |
975 | return; |
976 | |
977 | Accessor styler(pAccess, NULL); |
978 | |
979 | Sci_Position lineCurrent = styler.GetLine(startPos); |
980 | |
981 | if (lineCurrent <= firstImportLine) { |
982 | firstImportLine = -1; // readjust first import position |
983 | firstImportIndent = 0; |
984 | } |
985 | |
986 | const Sci_Position maxPos = startPos + length; |
987 | const Sci_Position maxLines = |
988 | maxPos == styler.Length() |
989 | ? styler.GetLine(maxPos) |
990 | : styler.GetLine(maxPos - 1); // Requested last line |
991 | const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line |
992 | |
993 | // Backtrack to previous non-blank line so we can determine indent level |
994 | // for any white space lines |
995 | // and so we can fix any preceding fold level (which is why we go back |
996 | // at least one line in all cases) |
997 | bool importHere = LineContainsImport(lineCurrent, styler); |
998 | int indentCurrent = IndentAmountWithOffset(styler, lineCurrent); |
999 | |
1000 | while (lineCurrent > 0) { |
1001 | lineCurrent--; |
1002 | importHere = LineContainsImport(lineCurrent, styler); |
1003 | indentCurrent = IndentAmountWithOffset(styler, lineCurrent); |
1004 | if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) |
1005 | break; |
1006 | } |
1007 | |
1008 | int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK; |
1009 | |
1010 | if (importHere) { |
1011 | indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel); |
1012 | if (firstImportLine == -1) { |
1013 | firstImportLine = lineCurrent; |
1014 | firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE; |
1015 | } |
1016 | if (firstImportLine != lineCurrent) { |
1017 | indentCurrentLevel++; |
1018 | } |
1019 | } |
1020 | |
1021 | indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK); |
1022 | |
1023 | // Process all characters to end of requested range |
1024 | //that hangs over the end of the range. Cap processing in all cases |
1025 | // to end of document. |
1026 | while (lineCurrent <= docLines && lineCurrent <= maxLines) { |
1027 | |
1028 | // Gather info |
1029 | Sci_Position lineNext = lineCurrent + 1; |
1030 | importHere = false; |
1031 | int indentNext = indentCurrent; |
1032 | |
1033 | if (lineNext <= docLines) { |
1034 | // Information about next line is only available if not at end of document |
1035 | importHere = LineContainsImport(lineNext, styler); |
1036 | indentNext = IndentAmountWithOffset(styler, lineNext); |
1037 | } |
1038 | if (indentNext & SC_FOLDLEVELWHITEFLAG) |
1039 | indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel; |
1040 | |
1041 | // Skip past any blank lines for next indent level info; we skip also |
1042 | // comments (all comments, not just those starting in column 0) |
1043 | // which effectively folds them into surrounding code rather |
1044 | // than screwing up folding. |
1045 | |
1046 | while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) { |
1047 | lineNext++; |
1048 | importHere = LineContainsImport(lineNext, styler); |
1049 | indentNext = IndentAmountWithOffset(styler, lineNext); |
1050 | } |
1051 | |
1052 | int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK; |
1053 | |
1054 | if (importHere) { |
1055 | indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel); |
1056 | if (firstImportLine == -1) { |
1057 | firstImportLine = lineNext; |
1058 | firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE; |
1059 | } |
1060 | if (firstImportLine != lineNext) { |
1061 | indentNextLevel++; |
1062 | } |
1063 | } |
1064 | |
1065 | indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK); |
1066 | |
1067 | const int = Maximum(indentCurrentLevel,indentNextLevel); |
1068 | |
1069 | // Now set all the indent levels on the lines we skipped |
1070 | // Do this from end to start. Once we encounter one line |
1071 | // which is indented more than the line after the end of |
1072 | // the comment-block, use the level of the block before |
1073 | |
1074 | Sci_Position skipLine = lineNext; |
1075 | int skipLevel = indentNextLevel; |
1076 | |
1077 | while (--skipLine > lineCurrent) { |
1078 | int skipLineIndent = IndentAmountWithOffset(styler, skipLine); |
1079 | |
1080 | if (options.foldCompact) { |
1081 | if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) { |
1082 | skipLevel = levelBeforeComments; |
1083 | } |
1084 | |
1085 | int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG; |
1086 | |
1087 | styler.SetLevel(skipLine, skipLevel | whiteFlag); |
1088 | } else { |
1089 | if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel |
1090 | && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) { |
1091 | skipLevel = levelBeforeComments; |
1092 | } |
1093 | |
1094 | styler.SetLevel(skipLine, skipLevel); |
1095 | } |
1096 | } |
1097 | |
1098 | int lev = indentCurrent; |
1099 | |
1100 | if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) { |
1101 | if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK)) |
1102 | lev |= SC_FOLDLEVELHEADERFLAG; |
1103 | } |
1104 | |
1105 | // Set fold level for this line and move to next line |
1106 | styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG); |
1107 | |
1108 | indentCurrent = indentNext; |
1109 | indentCurrentLevel = indentNextLevel; |
1110 | lineCurrent = lineNext; |
1111 | } |
1112 | |
1113 | // NOTE: Cannot set level of last line here because indentCurrent doesn't have |
1114 | // header flag set; the loop above is crafted to take care of this case! |
1115 | //styler.SetLevel(lineCurrent, indentCurrent); |
1116 | } |
1117 | |
1118 | LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell" , haskellWordListDesc); |
1119 | LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell" , haskellWordListDesc); |
1120 | |