LexHaskell.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexHaskell.cxx]

1	/******************************************************************
2	* LexHaskell.cxx
3	*
4	* A haskell lexer for the scintilla code control.
5	* Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
6	* External lexer stuff inspired from the caml external lexer.
7	* Folder copied from Python's.
8	*
9	* Written by Tobias Engvall - tumm at dtek dot chalmers dot se
10	*
11	* Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
12	*
13	* Improved by kudah <kudahkukarek@gmail.com>
14	*
15	* TODO:
16	* * A proper lexical folder to fold group declarations, comments, pragmas,
17	* #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
18	* etc.
19	*
20	*****************************************************************/
21	#include <stdlib.h>
22	#include <string.h>
23	#include <stdio.h>
24	#include <stdarg.h>
25	#include <assert.h>
26	#include <ctype.h>
27
28	#include <string>
29	#include <string_view>
30	#include <vector>
31	#include <map>
32	#include <functional>
33
34	#include "ILexer.h"
35	#include "Scintilla.h"
36	#include "SciLexer.h"
37
38	#include "PropSetSimple.h"
39	#include "WordList.h"
40	#include "LexAccessor.h"
41	#include "Accessor.h"
42	#include "StyleContext.h"
43	#include "CharacterSet.h"
44	#include "CharacterCategory.h"
45	#include "LexerModule.h"
46	#include "OptionSet.h"
47	#include "DefaultLexer.h"
48
49	using namespace Scintilla;
50	using namespace Lexilla;
51
52	// See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
53	// Note, letter modifiers are prohibited.
54
55	static int u_iswupper (int ch) {
56	CharacterCategory c = CategoriseCharacter(ch);
57	return c == ccLu \|\| c == ccLt;
58	}
59
60	static int u_iswalpha (int ch) {
61	CharacterCategory c = CategoriseCharacter(ch);
62	return c == ccLl \|\| c == ccLu \|\| c == ccLt \|\| c == ccLo;
63	}
64
65	static int u_iswalnum (int ch) {
66	CharacterCategory c = CategoriseCharacter(ch);
67	return c == ccLl \|\| c == ccLu \|\| c == ccLt \|\| c == ccLo
68	\|\| c == ccNd \|\| c == ccNo;
69	}
70
71	static int u_IsHaskellSymbol(int ch) {
72	CharacterCategory c = CategoriseCharacter(ch);
73	return c == ccPc \|\| c == ccPd \|\| c == ccPo
74	\|\| c == ccSm \|\| c == ccSc \|\| c == ccSk \|\| c == ccSo;
75	}
76
77	static inline bool IsHaskellLetter(const int ch) {
78	if (IsASCII(ch)) {
79	return (ch >= `'a'` && ch <= `'z'`)
80	\|\| (ch >= `'A'` && ch <= `'Z'`);
81	} else {
82	return u_iswalpha(ch) != `0`;
83	}
84	}
85
86	static inline bool IsHaskellAlphaNumeric(const int ch) {
87	if (IsASCII(ch)) {
88	return IsAlphaNumeric(ch);
89	} else {
90	return u_iswalnum(ch) != `0`;
91	}
92	}
93
94	static inline bool IsHaskellUpperCase(const int ch) {
95	if (IsASCII(ch)) {
96	return ch >= `'A'` && ch <= `'Z'`;
97	} else {
98	return u_iswupper(ch) != `0`;
99	}
100	}
101
102	static inline bool IsAnHaskellOperatorChar(const int ch) {
103	if (IsASCII(ch)) {
104	return
105	( ch == `'!'` \|\| ch == `'#'` \|\| ch == `'$'` \|\| ch == `'%'`
106	\|\| ch == `'&'` \|\| ch == `'*'` \|\| ch == `'+'` \|\| ch == `'-'`
107	\|\| ch == `'.'` \|\| ch == `'/'` \|\| ch == `':'` \|\| ch == `'<'`
108	\|\| ch == `'='` \|\| ch == `'>'` \|\| ch == `'?'` \|\| ch == `'@'`
109	\|\| ch == `'^'` \|\| ch == `'\|'` \|\| ch == `'~'` \|\| ch == `'\\'`);
110	} else {
111	return u_IsHaskellSymbol(ch) != `0`;
112	}
113	}
114
115	static inline bool IsAHaskellWordStart(const int ch) {
116	return IsHaskellLetter(ch) \|\| ch == `'_'`;
117	}
118
119	static inline bool IsAHaskellWordChar(const int ch) {
120	return ( IsHaskellAlphaNumeric(ch)
121	\|\| ch == `'_'`
122	\|\| ch == `'\''`);
123	}
124
125	static inline bool IsCommentBlockStyle(int style) {
126	return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
127	}
128
129	static inline bool IsCommentStyle(int style) {
130	return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
131	\|\| ( style == SCE_HA_LITERATE_COMMENT
132	\|\| style == SCE_HA_LITERATE_CODEDELIM);
133	}
134
135	// styles which do not belong to Haskell, but to external tools
136	static inline bool IsExternalStyle(int style) {
137	return ( style == SCE_HA_PREPROCESSOR
138	\|\| style == SCE_HA_LITERATE_COMMENT
139	\|\| style == SCE_HA_LITERATE_CODEDELIM);
140	}
141
142	static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
143	return SCE_HA_COMMENTBLOCK + (nestLevel % `3`);
144	}
145
146	// Mangled version of lexlib/Accessor.cxx IndentAmount.
147	// Modified to treat comment blocks as whitespace
148	// plus special case for commentline/preprocessor.
149	static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
150
151	// Determines the indentation level of the current line
152	// Comment blocks are treated as whitespace
153
154	Sci_Position pos = styler.LineStart(line);
155	Sci_Position eol_pos = styler.LineStart(line + `1`) - `1`;
156
157	char ch = styler [pos];
158	int style = styler.StyleAt(pos);
159
160	int indent = `0`;
161	bool inPrevPrefix = line > `0`;
162
163	Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-`1`) : `0`;
164
165	while (( ch == `' '` \|\| ch == `'\t'`
166	\|\| IsCommentBlockStyle(style)
167	\|\| style == SCE_HA_LITERATE_CODEDELIM)
168	&& (pos < eol_pos)) {
169	if (inPrevPrefix) {
170	char chPrev = styler [posPrev++];
171	if (chPrev != `' '` && chPrev != `'\t'`) {
172	inPrevPrefix = false;
173	}
174	}
175	if (ch == `'\t'`) {
176	indent = (indent / `8` + `1`) * `8`;
177	} else { // Space or comment block
178	indent++;
179	}
180	pos++;
181	ch = styler [pos];
182	style = styler.StyleAt(pos);
183	}
184
185	indent += SC_FOLDLEVELBASE;
186	// if completely empty line or the start of a comment or preprocessor...
187	if ( styler.LineStart(line) == styler.Length()
188	\|\| ch == `' '`
189	\|\| ch == `'\t'`
190	\|\| ch == `'\n'`
191	\|\| ch == `'\r'`
192	\|\| IsCommentStyle(style)
193	\|\| style == SCE_HA_PREPROCESSOR)
194	return indent \| SC_FOLDLEVELWHITEFLAG;
195	else
196	return indent;
197	}
198
199	struct OptionsHaskell {
200	bool magicHash;
201	bool allowQuotes;
202	bool implicitParams;
203	bool highlightSafe;
204	bool cpp;
205	bool stylingWithinPreprocessor;
206	bool fold;
207	bool foldComment;
208	bool foldCompact;
209	bool foldImports;
210	OptionsHaskell() {
211	magicHash = true; // Widespread use, enabled by default.
212	allowQuotes = true; // Widespread use, enabled by default.
213	implicitParams = false; // Fell out of favor, seldom used, disabled.
214	highlightSafe = true; // Moderately used, doesn't hurt to enable.
215	cpp = true; // Widespread use, enabled by default;
216	stylingWithinPreprocessor = false;
217	fold = false;
218	foldComment = false;
219	foldCompact = false;
220	foldImports = false;
221	}
222	};
223
224	static const char * const haskellWordListDesc[] = {
225	"Keywords",
226	"FFI",
227	"Reserved operators",
228	`0`
229	};
230
231	struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
232	OptionSetHaskell() {
233	DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
234	"Set to 0 to disallow the '#' character at the end of identifiers and "
235	"literals with the haskell lexer "
236	"(GHC -XMagicHash extension)");
237
238	DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
239	"Set to 0 to disable highlighting of Template Haskell name quotations "
240	"and promoted constructors "
241	"(GHC -XTemplateHaskell and -XDataKinds extensions)");
242
243	DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
244	"Set to 1 to allow the '?' character at the start of identifiers "
245	"with the haskell lexer "
246	"(GHC & Hugs -XImplicitParams extension)");
247
248	DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
249	"Set to 0 to disallow \"safe\" keyword in imports "
250	"(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
251
252	DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
253	"Set to 0 to disable C-preprocessor highlighting "
254	"(-XCPP extension)");
255
256	DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
257	"For Haskell code, determines whether all preprocessor code is styled in the "
258	"preprocessor style (0, the default) or only from the initial # to the end "
259	"of the command word(1)."
260	);
261
262	DefineProperty("fold", &OptionsHaskell::fold);
263
264	DefineProperty("fold.comment", &OptionsHaskell::foldComment);
265
266	DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
267
268	DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
269	"Set to 1 to enable folding of import declarations");
270
271	DefineWordListSets(haskellWordListDesc);
272	}
273	};
274
275	class LexerHaskell : public DefaultLexer {
276	bool literate;
277	Sci_Position firstImportLine;
278	int firstImportIndent;
279	WordList keywords;
280	WordList ffi;
281	WordList reserved_operators;
282	OptionsHaskell options;
283	OptionSetHaskell osHaskell;
284
285	enum HashCount {
286	oneHash
287	,twoHashes
288	,unlimitedHashes
289	};
290
291	enum KeywordMode {
292	HA_MODE_DEFAULT = `0`
293	,HA_MODE_IMPORT1 = `1` // after "import", before "qualified" or "safe" or package name or module name.
294	,HA_MODE_IMPORT2 = `2` // after module name, before "as" or "hiding".
295	,HA_MODE_IMPORT3 = `3` // after "as", before "hiding"
296	,HA_MODE_MODULE = `4` // after "module", before module name.
297	,HA_MODE_FFI = `5` // after "foreign", before FFI keywords
298	,HA_MODE_TYPE = `6` // after "type" or "data", before "family"
299	};
300
301	enum LiterateMode {
302	LITERATE_BIRD = `0` // if '>' is the first character on the line,
303	// color '>' as a codedelim and the rest of
304	// the line as code.
305	// else if "\begin{code}" is the only word on the
306	// line except whitespace, switch to LITERATE_BLOCK
307	// otherwise color the line as a literate comment.
308	,LITERATE_BLOCK = `1` // if the string "\end{code}" is encountered at column
309	// 0 ignoring all later characters, color the line
310	// as a codedelim and switch to LITERATE_BIRD
311	// otherwise color the line as code.
312	};
313
314	struct HaskellLineInfo {
315	unsigned int nestLevel; // 22 bits ought to be enough for anybody
316	unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
317	// beyond 31.
318	bool pragma;
319	LiterateMode lmode;
320	KeywordMode mode;
321
322	HaskellLineInfo(int state) :
323	nestLevel (state >> `10`)
324	, nonexternalStyle ((state >> `5`) & `0x1F`)
325	, pragma ((state >> `4`) & `0x1`)
326	, lmode (static_cast<LiterateMode>((state >> `3`) & `0x1`))
327	, mode (static_cast<KeywordMode>(state & `0x7`))
328	{}
329
330	int ToLineState() {
331	return
332	(nestLevel << `10`)
333	\| (nonexternalStyle << `5`)
334	\| (pragma << `4`)
335	\| (lmode << `3`)
336	\| mode;
337	}
338	};
339
340	inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
341	if (options.magicHash && sc.ch == `'#'`) {
342	sc.Forward();
343	if (hashes == twoHashes && sc.ch == `'#'`) {
344	sc.Forward();
345	} else if (hashes == unlimitedHashes) {
346	while (sc.ch == `'#'`) {
347	sc.Forward();
348	}
349	}
350	}
351	}
352
353	bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
354	if (options.foldImports) {
355	Sci_Position currentPos = styler.LineStart(line);
356	int style = styler.StyleAt(currentPos);
357
358	Sci_Position eol_pos = styler.LineStart(line + `1`) - `1`;
359
360	while (currentPos < eol_pos) {
361	int ch = styler [currentPos];
362	style = styler.StyleAt(currentPos);
363
364	if (ch == `' '` \|\| ch == `'\t'`
365	\|\| IsCommentBlockStyle(style)
366	\|\| style == SCE_HA_LITERATE_CODEDELIM) {
367	currentPos++;
368	} else {
369	break;
370	}
371	}
372
373	return (style == SCE_HA_KEYWORD
374	&& styler.Match(currentPos, "import"));
375	} else {
376	return false;
377	}
378	}
379
380	inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
381	const int indent = HaskellIndentAmount(styler, line);
382	const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
383	return indentLevel <= ((firstImportIndent - `1`) + SC_FOLDLEVELBASE)
384	? indent
385	: (indentLevel + firstImportIndent) \| (indent & ~SC_FOLDLEVELNUMBERMASK);
386	}
387
388	inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
389	return indentLevel <= ((firstImportIndent - `1`) + SC_FOLDLEVELBASE)
390	? indentLevel
391	: indentLevel - firstImportIndent;
392	}
393
394	public:
395	LexerHaskell(bool literate_)
396	: DefaultLexer (literate_ ? "literatehaskell" : "haskell", literate_ ? SCLEX_LITERATEHASKELL : SCLEX_HASKELL)
397	, literate(literate_)
398	, firstImportLine(-`1`)
399	, firstImportIndent(`0`)
400	{}
401	virtual ~LexerHaskell() {}
402
403	void SCI_METHOD Release() override {
404	delete this;
405	}
406
407	int SCI_METHOD Version() const override {
408	return lvRelease5;
409	}
410
411	const char * SCI_METHOD PropertyNames() override {
412	return osHaskell.PropertyNames();
413	}
414
415	int SCI_METHOD PropertyType(const char *name) override {
416	return osHaskell.PropertyType(name);
417	}
418
419	const char * SCI_METHOD DescribeProperty(const char *name) override {
420	return osHaskell.DescribeProperty(name);
421	}
422
423	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override;
424
425	const char * SCI_METHOD PropertyGet(const char *key) override {
426	return osHaskell.PropertyGet(key);
427	}
428
429	const char * SCI_METHOD DescribeWordListSets() override {
430	return osHaskell.DescribeWordListSets();
431	}
432
433	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
434
435	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
436
437	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
438
439	void * SCI_METHOD PrivateCall(int, void *) override {
440	return `0`;
441	}
442
443	static ILexer5 *LexerFactoryHaskell() {
444	return new LexerHaskell (false);
445	}
446
447	static ILexer5 *LexerFactoryLiterateHaskell() {
448	return new LexerHaskell (true);
449	}
450	};
451
452	Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char key, const* char *val) {
453	if (osHaskell.PropertySet(&options, key, val)) {
454	return `0`;
455	}
456	return -`1`;
457	}
458
459	Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
460	WordList *wordListN = `0`;
461	switch (n) {
462	case `0`:
463	wordListN = &keywords;
464	break;
465	case `1`:
466	wordListN = &ffi;
467	break;
468	case `2`:
469	wordListN = &reserved_operators;
470	break;
471	}
472	Sci_Position firstModification = -`1`;
473	if (wordListN) {
474	WordList wlNew;
475	wlNew.Set(wl);
476	if (*wordListN != wlNew) {
477	wordListN->Set(wl);
478	firstModification = `0`;
479	}
480	}
481	return firstModification;
482	}
483
484	void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
485	,IDocument *pAccess) {
486	LexAccessor styler(pAccess);
487
488	Sci_Position lineCurrent = styler.GetLine(startPos);
489
490	HaskellLineInfo hs = HaskellLineInfo (lineCurrent ? styler.GetLineState(lineCurrent-`1`) : `0`);
491
492	// Do not leak onto next line
493	if (initStyle == SCE_HA_STRINGEOL)
494	initStyle = SCE_HA_DEFAULT;
495	else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
496	initStyle = hs.nonexternalStyle;
497
498	StyleContext sc(startPos, length, initStyle, styler);
499
500	int base = `10`;
501	bool dot = false;
502
503	bool inDashes = false;
504	bool alreadyInTheMiddleOfOperator = false;
505
506	assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == `0`));
507
508	while (sc.More()) {
509	// Check for state end
510
511	if (!IsExternalStyle(sc.state)) {
512	hs.nonexternalStyle = sc.state;
513	}
514
515	// For lexer to work, states should unconditionally forward at least one
516	// character.
517	// If they don't, they should still check if they are at line end and
518	// forward if so.
519	// If a state forwards more than one character, it should check every time
520	// that it is not a line end and cease forwarding otherwise.
521	if (sc.atLineEnd) {
522	// Remember the line state for future incremental lexing
523	styler.SetLineState(lineCurrent, hs.ToLineState());
524	lineCurrent++;
525	}
526
527	// Handle line continuation generically.
528	if (sc.ch == `'\\'` && (sc.chNext == `'\n'` \|\| sc.chNext == `'\r'`)
529	&& ( sc.state == SCE_HA_STRING
530	\|\| sc.state == SCE_HA_PREPROCESSOR)) {
531	// Remember the line state for future incremental lexing
532	styler.SetLineState(lineCurrent, hs.ToLineState());
533	lineCurrent++;
534
535	sc.Forward();
536	if (sc.ch == `'\r'` && sc.chNext == `'\n'`) {
537	sc.Forward();
538	}
539	sc.Forward();
540
541	continue;
542	}
543
544	if (sc.atLineStart) {
545
546	if (sc.state == SCE_HA_STRING \|\| sc.state == SCE_HA_CHARACTER) {
547	// Prevent SCE_HA_STRINGEOL from leaking back to previous line
548	sc.SetState(sc.state);
549	}
550
551	if (literate && hs.lmode == LITERATE_BIRD) {
552	if (!IsExternalStyle(sc.state)) {
553	sc.SetState(SCE_HA_LITERATE_COMMENT);
554	}
555	}
556	}
557
558	// External
559	// Literate
560	if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
561	&& sc.ch == `'>'`) {
562	sc.SetState(SCE_HA_LITERATE_CODEDELIM);
563	sc.ForwardSetState(hs.nonexternalStyle);
564	}
565	else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
566	&& ( sc.ch == `' '` \|\| sc.ch == `'\t'`
567	\|\| sc.Match("\\begin{code}"))) {
568	sc.SetState(sc.state);
569
570	while ((sc.ch == `' '` \|\| sc.ch == `'\t'`) && sc.More())
571	sc.Forward();
572
573	if (sc.Match("\\begin{code}")) {
574	sc.Forward(static_cast<int>(strlen("\\begin{code}")));
575
576	bool correct = true;
577
578	while (!sc.atLineEnd && sc.More()) {
579	if (sc.ch != `' '` && sc.ch != `'\t'`) {
580	correct = false;
581	}
582	sc.Forward();
583	}
584
585	if (correct) {
586	sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
587	hs.lmode = LITERATE_BLOCK;
588	}
589	}
590	}
591	else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
592	&& sc.Match("\\end{code}")) {
593	sc.SetState(SCE_HA_LITERATE_CODEDELIM);
594
595	sc.Forward(static_cast<int>(strlen("\\end{code}")));
596
597	while (!sc.atLineEnd && sc.More()) {
598	sc.Forward();
599	}
600
601	sc.SetState(SCE_HA_LITERATE_COMMENT);
602	hs.lmode = LITERATE_BIRD;
603	}
604	// Preprocessor
605	else if (sc.atLineStart && sc.ch == `'#'` && options.cpp
606	&& (!options.stylingWithinPreprocessor \|\| sc.state == SCE_HA_DEFAULT)) {
607	sc.SetState(SCE_HA_PREPROCESSOR);
608	sc.Forward();
609	}
610	// Literate
611	else if (sc.state == SCE_HA_LITERATE_COMMENT) {
612	sc.Forward();
613	}
614	else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
615	sc.ForwardSetState(hs.nonexternalStyle);
616	}
617	// Preprocessor
618	else if (sc.state == SCE_HA_PREPROCESSOR) {
619	if (sc.atLineEnd) {
620	sc.SetState(options.stylingWithinPreprocessor
621	? SCE_HA_DEFAULT
622	: hs.nonexternalStyle);
623	sc.Forward(); // prevent double counting a line
624	} else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
625	sc.SetState(SCE_HA_DEFAULT);
626	} else {
627	sc.Forward();
628	}
629	}
630	// Haskell
631	// Operator
632	else if (sc.state == SCE_HA_OPERATOR) {
633	int style = SCE_HA_OPERATOR;
634
635	if ( sc.ch == `':'`
636	&& !alreadyInTheMiddleOfOperator
637	// except "::"
638	&& !( sc.chNext == `':'`
639	&& !IsAnHaskellOperatorChar(sc.GetRelative(`2`)))) {
640	style = SCE_HA_CAPITAL;
641	}
642
643	alreadyInTheMiddleOfOperator = false;
644
645	while (IsAnHaskellOperatorChar(sc.ch))
646	sc.Forward();
647
648	char s[`100`];
649	sc.GetCurrent(s, sizeof(s));
650
651	if (reserved_operators.InList(s))
652	style = SCE_HA_RESERVED_OPERATOR;
653
654	sc.ChangeState(style);
655	sc.SetState(SCE_HA_DEFAULT);
656	}
657	// String
658	else if (sc.state == SCE_HA_STRING) {
659	if (sc.atLineEnd) {
660	sc.ChangeState(SCE_HA_STRINGEOL);
661	sc.ForwardSetState(SCE_HA_DEFAULT);
662	} else if (sc.ch == `'\"'`) {
663	sc.Forward();
664	skipMagicHash(sc, oneHash);
665	sc.SetState(SCE_HA_DEFAULT);
666	} else if (sc.ch == `'\\'`) {
667	sc.Forward(`2`);
668	} else {
669	sc.Forward();
670	}
671	}
672	// Char
673	else if (sc.state == SCE_HA_CHARACTER) {
674	if (sc.atLineEnd) {
675	sc.ChangeState(SCE_HA_STRINGEOL);
676	sc.ForwardSetState(SCE_HA_DEFAULT);
677	} else if (sc.ch == `'\''`) {
678	sc.Forward();
679	skipMagicHash(sc, oneHash);
680	sc.SetState(SCE_HA_DEFAULT);
681	} else if (sc.ch == `'\\'`) {
682	sc.Forward(`2`);
683	} else {
684	sc.Forward();
685	}
686	}
687	// Number
688	else if (sc.state == SCE_HA_NUMBER) {
689	if (sc.atLineEnd) {
690	sc.SetState(SCE_HA_DEFAULT);
691	sc.Forward(); // prevent double counting a line
692	} else if (IsADigit(sc.ch, base)) {
693	sc.Forward();
694	} else if (sc.ch==`'.'` && dot && IsADigit(sc.chNext, base)) {
695	sc.Forward(`2`);
696	dot = false;
697	} else if ((base == `10`) &&
698	(sc.ch == `'e'` \|\| sc.ch == `'E'`) &&
699	(IsADigit(sc.chNext) \|\| sc.chNext == `'+'` \|\| sc.chNext == `'-'`)) {
700	sc.Forward();
701	if (sc.ch == `'+'` \|\| sc.ch == `'-'`)
702	sc.Forward();
703	} else {
704	skipMagicHash(sc, twoHashes);
705	sc.SetState(SCE_HA_DEFAULT);
706	}
707	}
708	// Keyword or Identifier
709	else if (sc.state == SCE_HA_IDENTIFIER) {
710	int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
711
712	assert(IsAHaskellWordStart(sc.ch));
713
714	sc.Forward();
715
716	while (sc.More()) {
717	if (IsAHaskellWordChar(sc.ch)) {
718	sc.Forward();
719	} else if (sc.ch == `'.'` && style == SCE_HA_CAPITAL) {
720	if (IsHaskellUpperCase(sc.chNext)) {
721	sc.Forward();
722	style = SCE_HA_CAPITAL;
723	} else if (IsAHaskellWordStart(sc.chNext)) {
724	sc.Forward();
725	style = SCE_HA_IDENTIFIER;
726	} else if (IsAnHaskellOperatorChar(sc.chNext)) {
727	sc.Forward();
728	style = sc.ch == `':'` ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
729	while (IsAnHaskellOperatorChar(sc.ch))
730	sc.Forward();
731	break;
732	} else {
733	break;
734	}
735	} else {
736	break;
737	}
738	}
739
740	skipMagicHash(sc, unlimitedHashes);
741
742	char s[`100`];
743	sc.GetCurrent(s, sizeof(s));
744
745	KeywordMode new_mode = HA_MODE_DEFAULT;
746
747	if (keywords.InList(s)) {
748	style = SCE_HA_KEYWORD;
749	} else if (style == SCE_HA_CAPITAL) {
750	if (hs.mode == HA_MODE_IMPORT1 \|\| hs.mode == HA_MODE_IMPORT3) {
751	style = SCE_HA_MODULE;
752	new_mode = HA_MODE_IMPORT2;
753	} else if (hs.mode == HA_MODE_MODULE) {
754	style = SCE_HA_MODULE;
755	}
756	} else if (hs.mode == HA_MODE_IMPORT1 &&
757	strcmp(s,"qualified") == `0`) {
758	style = SCE_HA_KEYWORD;
759	new_mode = HA_MODE_IMPORT1;
760	} else if (options.highlightSafe &&
761	hs.mode == HA_MODE_IMPORT1 &&
762	strcmp(s,"safe") == `0`) {
763	style = SCE_HA_KEYWORD;
764	new_mode = HA_MODE_IMPORT1;
765	} else if (hs.mode == HA_MODE_IMPORT2) {
766	if (strcmp(s,"as") == `0`) {
767	style = SCE_HA_KEYWORD;
768	new_mode = HA_MODE_IMPORT3;
769	} else if (strcmp(s,"hiding") == `0`) {
770	style = SCE_HA_KEYWORD;
771	}
772	} else if (hs.mode == HA_MODE_TYPE) {
773	if (strcmp(s,"family") == `0`)
774	style = SCE_HA_KEYWORD;
775	}
776
777	if (hs.mode == HA_MODE_FFI) {
778	if (ffi.InList(s)) {
779	style = SCE_HA_KEYWORD;
780	new_mode = HA_MODE_FFI;
781	}
782	}
783
784	sc.ChangeState(style);
785	sc.SetState(SCE_HA_DEFAULT);
786
787	if (strcmp(s,"import") == `0` && hs.mode != HA_MODE_FFI)
788	new_mode = HA_MODE_IMPORT1;
789	else if (strcmp(s,"module") == `0`)
790	new_mode = HA_MODE_MODULE;
791	else if (strcmp(s,"foreign") == `0`)
792	new_mode = HA_MODE_FFI;
793	else if (strcmp(s,"type") == `0`
794	\|\| strcmp(s,"data") == `0`)
795	new_mode = HA_MODE_TYPE;
796
797	hs.mode = new_mode;
798	}
799
800	// Comments
801	// Oneliner
802	else if (sc.state == SCE_HA_COMMENTLINE) {
803	if (sc.atLineEnd) {
804	sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
805	sc.Forward(); // prevent double counting a line
806	} else if (inDashes && sc.ch != `'-'` && !hs.pragma) {
807	inDashes = false;
808	if (IsAnHaskellOperatorChar(sc.ch)) {
809	alreadyInTheMiddleOfOperator = true;
810	sc.ChangeState(SCE_HA_OPERATOR);
811	}
812	} else {
813	sc.Forward();
814	}
815	}
816	// Nested
817	else if (IsCommentBlockStyle(sc.state)) {
818	if (sc.Match(`'{'`,`'-'`)) {
819	sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
820	sc.Forward(`2`);
821	hs.nestLevel++;
822	} else if (sc.Match(`'-'`,`'}'`)) {
823	sc.Forward(`2`);
824	assert(hs.nestLevel > `0`);
825	if (hs.nestLevel > `0`)
826	hs.nestLevel--;
827	sc.SetState(
828	hs.nestLevel == `0`
829	? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
830	: CommentBlockStyleFromNestLevel(hs.nestLevel - `1`));
831	} else {
832	sc.Forward();
833	}
834	}
835	// Pragma
836	else if (sc.state == SCE_HA_PRAGMA) {
837	if (sc.Match("#-}")) {
838	hs.pragma = false;
839	sc.Forward(`3`);
840	sc.SetState(SCE_HA_DEFAULT);
841	} else if (sc.Match(`'-'`,`'-'`)) {
842	sc.SetState(SCE_HA_COMMENTLINE);
843	sc.Forward(`2`);
844	inDashes = false;
845	} else if (sc.Match(`'{'`,`'-'`)) {
846	sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
847	sc.Forward(`2`);
848	hs.nestLevel = `1`;
849	} else {
850	sc.Forward();
851	}
852	}
853	// New state?
854	else if (sc.state == SCE_HA_DEFAULT) {
855	// Digit
856	if (IsADigit(sc.ch)) {
857	hs.mode = HA_MODE_DEFAULT;
858
859	sc.SetState(SCE_HA_NUMBER);
860	if (sc.ch == `'0'` && (sc.chNext == `'X'` \|\| sc.chNext == `'x'`)) {
861	// Match anything starting with "0x" or "0X", too
862	sc.Forward(`2`);
863	base = `16`;
864	dot = false;
865	} else if (sc.ch == `'0'` && (sc.chNext == `'O'` \|\| sc.chNext == `'o'`)) {
866	// Match anything starting with "0o" or "0O", too
867	sc.Forward(`2`);
868	base = `8`;
869	dot = false;
870	} else {
871	sc.Forward();
872	base = `10`;
873	dot = true;
874	}
875	}
876	// Pragma
877	else if (sc.Match("{-#")) {
878	hs.pragma = true;
879	sc.SetState(SCE_HA_PRAGMA);
880	sc.Forward(`3`);
881	}
882	// Comment line
883	else if (sc.Match(`'-'`,`'-'`)) {
884	sc.SetState(SCE_HA_COMMENTLINE);
885	sc.Forward(`2`);
886	inDashes = true;
887	}
888	// Comment block
889	else if (sc.Match(`'{'`,`'-'`)) {
890	sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
891	sc.Forward(`2`);
892	hs.nestLevel = `1`;
893	}
894	// String
895	else if (sc.ch == `'\"'`) {
896	sc.SetState(SCE_HA_STRING);
897	sc.Forward();
898	}
899	// Character or quoted name or promoted term
900	else if (sc.ch == `'\''`) {
901	hs.mode = HA_MODE_DEFAULT;
902
903	sc.SetState(SCE_HA_CHARACTER);
904	sc.Forward();
905
906	if (options.allowQuotes) {
907	// Quoted type ''T
908	if (sc.ch==`'\''` && IsAHaskellWordStart(sc.chNext)) {
909	sc.Forward();
910	sc.ChangeState(SCE_HA_IDENTIFIER);
911	} else if (sc.chNext != `'\''`) {
912	// Quoted name 'n or promoted constructor 'N
913	if (IsAHaskellWordStart(sc.ch)) {
914	sc.ChangeState(SCE_HA_IDENTIFIER);
915	// Promoted constructor operator ':~>
916	} else if (sc.ch == `':'`) {
917	alreadyInTheMiddleOfOperator = false;
918	sc.ChangeState(SCE_HA_OPERATOR);
919	// Promoted list or tuple '[T]
920	} else if (sc.ch == `'['` \|\| sc.ch== `'('`) {
921	sc.ChangeState(SCE_HA_OPERATOR);
922	sc.ForwardSetState(SCE_HA_DEFAULT);
923	}
924	}
925	}
926	}
927	// Operator starting with '?' or an implicit parameter
928	else if (sc.ch == `'?'`) {
929	hs.mode = HA_MODE_DEFAULT;
930
931	alreadyInTheMiddleOfOperator = false;
932	sc.SetState(SCE_HA_OPERATOR);
933
934	if ( options.implicitParams
935	&& IsAHaskellWordStart(sc.chNext)
936	&& !IsHaskellUpperCase(sc.chNext)) {
937	sc.Forward();
938	sc.ChangeState(SCE_HA_IDENTIFIER);
939	}
940	}
941	// Operator
942	else if (IsAnHaskellOperatorChar(sc.ch)) {
943	hs.mode = HA_MODE_DEFAULT;
944
945	sc.SetState(SCE_HA_OPERATOR);
946	}
947	// Braces and punctuation
948	else if (sc.ch == `','` \|\| sc.ch == `';'`
949	\|\| sc.ch == `'('` \|\| sc.ch == `')'`
950	\|\| sc.ch == `'['` \|\| sc.ch == `']'`
951	\|\| sc.ch == `'{'` \|\| sc.ch == `'}'`) {
952	sc.SetState(SCE_HA_OPERATOR);
953	sc.ForwardSetState(SCE_HA_DEFAULT);
954	}
955	// Keyword or Identifier
956	else if (IsAHaskellWordStart(sc.ch)) {
957	sc.SetState(SCE_HA_IDENTIFIER);
958	// Something we don't care about
959	} else {
960	sc.Forward();
961	}
962	}
963	// This branch should never be reached.
964	else {
965	assert(false);
966	sc.Forward();
967	}
968	}
969	sc.Complete();
970	}
971
972	void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
973	,IDocument *pAccess) {
974	if (!options.fold)
975	return;
976
977	Accessor styler(pAccess, NULL);
978
979	Sci_Position lineCurrent = styler.GetLine(startPos);
980
981	if (lineCurrent <= firstImportLine) {
982	firstImportLine = -`1`; // readjust first import position
983	firstImportIndent = `0`;
984	}
985
986	const Sci_Position maxPos = startPos + length;
987	const Sci_Position maxLines =
988	maxPos == styler.Length()
989	? styler.GetLine(maxPos)
990	: styler.GetLine(maxPos - `1`); // Requested last line
991	const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
992
993	// Backtrack to previous non-blank line so we can determine indent level
994	// for any white space lines
995	// and so we can fix any preceding fold level (which is why we go back
996	// at least one line in all cases)
997	bool importHere = LineContainsImport(lineCurrent, styler);
998	int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
999
1000	while (lineCurrent > `0`) {
1001	lineCurrent--;
1002	importHere = LineContainsImport(lineCurrent, styler);
1003	indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
1004	if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
1005	break;
1006	}
1007
1008	int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1009
1010	if (importHere) {
1011	indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1012	if (firstImportLine == -`1`) {
1013	firstImportLine = lineCurrent;
1014	firstImportIndent = (`1` + indentCurrentLevel) - SC_FOLDLEVELBASE;
1015	}
1016	if (firstImportLine != lineCurrent) {
1017	indentCurrentLevel++;
1018	}
1019	}
1020
1021	indentCurrent = indentCurrentLevel \| (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1022
1023	// Process all characters to end of requested range
1024	//that hangs over the end of the range. Cap processing in all cases
1025	// to end of document.
1026	while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1027
1028	// Gather info
1029	Sci_Position lineNext = lineCurrent + `1`;
1030	importHere = false;
1031	int indentNext = indentCurrent;
1032
1033	if (lineNext <= docLines) {
1034	// Information about next line is only available if not at end of document
1035	importHere = LineContainsImport(lineNext, styler);
1036	indentNext = IndentAmountWithOffset(styler, lineNext);
1037	}
1038	if (indentNext & SC_FOLDLEVELWHITEFLAG)
1039	indentNext = SC_FOLDLEVELWHITEFLAG \| indentCurrentLevel;
1040
1041	// Skip past any blank lines for next indent level info; we skip also
1042	// comments (all comments, not just those starting in column 0)
1043	// which effectively folds them into surrounding code rather
1044	// than screwing up folding.
1045
1046	while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1047	lineNext++;
1048	importHere = LineContainsImport(lineNext, styler);
1049	indentNext = IndentAmountWithOffset(styler, lineNext);
1050	}
1051
1052	int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1053
1054	if (importHere) {
1055	indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1056	if (firstImportLine == -`1`) {
1057	firstImportLine = lineNext;
1058	firstImportIndent = (`1` + indentNextLevel) - SC_FOLDLEVELBASE;
1059	}
1060	if (firstImportLine != lineNext) {
1061	indentNextLevel++;
1062	}
1063	}
1064
1065	indentNext = indentNextLevel \| (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1066
1067	const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1068
1069	// Now set all the indent levels on the lines we skipped
1070	// Do this from end to start. Once we encounter one line
1071	// which is indented more than the line after the end of
1072	// the comment-block, use the level of the block before
1073
1074	Sci_Position skipLine = lineNext;
1075	int skipLevel = indentNextLevel;
1076
1077	while (--skipLine > lineCurrent) {
1078	int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1079
1080	if (options.foldCompact) {
1081	if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1082	skipLevel = levelBeforeComments;
1083	}
1084
1085	int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1086
1087	styler.SetLevel(skipLine, skipLevel \| whiteFlag);
1088	} else {
1089	if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1090	&& !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1091	skipLevel = levelBeforeComments;
1092	}
1093
1094	styler.SetLevel(skipLine, skipLevel);
1095	}
1096	}
1097
1098	int lev = indentCurrent;
1099
1100	if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1101	if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1102	lev \|= SC_FOLDLEVELHEADERFLAG;
1103	}
1104
1105	// Set fold level for this line and move to next line
1106	styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1107
1108	indentCurrent = indentNext;
1109	indentCurrentLevel = indentNextLevel;
1110	lineCurrent = lineNext;
1111	}
1112
1113	// NOTE: Cannot set level of last line here because indentCurrent doesn't have
1114	// header flag set; the loop above is crafted to take care of this case!
1115	//styler.SetLevel(lineCurrent, indentCurrent);
1116	}
1117
1118	LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1119	LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);
1120

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexHaskell.cxx