LexRaku.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexRaku.cxx]

1	/* @file LexRaku.cxx*
2	** Lexer for Raku
3	**
4	** Copyright (c) 2019 Mark Reay <mark@reay.net.au>
5	**/
6	// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7	// The License.txt file describes the conditions under which this software may be distributed.
8
9	/*
10	* Raku (Perl6) Lexer for Scintilla
11	* ---------------------------------
12	* ---------------------------------
13	* 06-Dec-2019: More Unicode support:
14	* - Added a full scope of allowed numbers and letters
15	* 29-Nov-2019: More highlighting / implemented basic folding:
16	* - Operators (blanket cover, no sequence checking)
17	* - Class / Grammar name highlighting
18	* - Folding:
19	* - Comments: line / multi-line
20	* - POD sections
21	* - Code blocks {}
22	* 26-Nov-2019: Basic syntax highlighting covering the following:
23	* - Comments, both line and embedded (multi-line)
24	* - POD, no inline highlighting as yet...
25	* - Heredoc block string, with variable highlighting (with qq)
26	* - Strings, with variable highlighting (with ")
27	* - Q Language, including adverbs (also basic q and qq)
28	* - Regex, including adverbs
29	* - Numbers
30	* - Bareword / identifiers
31	* - Types
32	* - Variables: mu, positional, associative, callable
33	* TODO:
34	* - POD inline
35	* - Better operator sequence coverage
36	*/
37
38	#include <stdlib.h>
39	#include <string.h>
40	#include <stdio.h>
41	#include <stdarg.h>
42	#include <assert.h>
43	#include <ctype.h>
44
45	#include <string>
46	#include <string_view>
47	#include <vector>
48	#include <map>
49	#include <functional>
50
51	#include "ILexer.h"
52	#include "Scintilla.h"
53	#include "SciLexer.h"
54
55	#include "WordList.h"
56	#include "LexAccessor.h"
57	#include "StyleContext.h"
58	#include "CharacterSet.h"
59	#include "CharacterCategory.h"
60	#include "LexerModule.h"
61	#include "OptionSet.h"
62	#include "DefaultLexer.h"
63
64	using namespace Scintilla;
65	using namespace Lexilla;
66
67	namespace { // anonymous namespace to isolate any name clashes
68	/----------------------------------------------------------------------------
69	* --- DEFINITIONS: OPTIONS / CONSTANTS ---
70	----------------------------------------------------------------------------/
71
72	// Number types
73	#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
74	#define RAKUNUM_OCTAL 2
75	#define RAKUNUM_FLOAT_EXP 3 // exponent part only
76	#define RAKUNUM_HEX 4 // may be a hex float
77	#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
78	#define RAKUNUM_VECTOR 6
79	#define RAKUNUM_V_VECTOR 7
80	#define RAKUNUM_VERSION 8 // can contain multiple '.'s
81	#define RAKUNUM_BAD 9
82
83	// Regex / Q string types
84	#define RAKUTYPE_REGEX_NORM 0 // 0 char ident
85	#define RAKUTYPE_REGEX_S 1 // order is significant:
86	#define RAKUTYPE_REGEX_M 2 // 1 char ident
87	#define RAKUTYPE_REGEX_Y 3 // 1 char ident
88	#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers
89	#define RAKUTYPE_REGEX_RX 5 // 2 char ident
90	#define RAKUTYPE_REGEX_TR 6 // 2 char ident
91	#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_?
92	#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote >
93	#define RAKUTYPE_STR_Q 9 // 1 char ident
94	#define RAKUTYPE_STR_QX 10 // 2 char ident
95	#define RAKUTYPE_STR_QW 11 // 2 char ident
96	#define RAKUTYPE_STR_QQ 12 // 2 char ident
97	#define RAKUTYPE_STR_QQX 13 // 3 char ident
98	#define RAKUTYPE_STR_QQW 14 // 3 char ident
99	#define RAKUTYPE_STR_QQWW 15 // 4 char ident
100
101	// Delimiter types
102	#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language
103	#define RAKUDELIM_QUOTE 1 // quote: normal string
104
105	// rakuWordLists: keywords as defined in config
106	const char *const rakuWordLists[] = {
107	"Keywords and identifiers",
108	"Functions",
109	"Types basic",
110	"Types composite",
111	"Types domain-specific",
112	"Types exception",
113	"Adverbs",
114	nullptr,
115	};
116
117	// Options and defaults
118	struct OptionsRaku {
119	bool fold;
120	bool foldCompact;
121	bool foldComment;
122	bool foldCommentMultiline;
123	bool foldCommentPOD;
124	OptionsRaku() {
125	fold = true;
126	foldCompact = false;
127	foldComment = true;
128	foldCommentMultiline = true;
129	foldCommentPOD = true;
130	}
131	};
132
133	// init options and words
134	struct OptionSetRaku : public OptionSet<OptionsRaku> {
135	OptionSetRaku() {
136	DefineProperty("fold", &OptionsRaku::fold);
137	DefineProperty("fold.comment", &OptionsRaku::foldComment);
138	DefineProperty("fold.compact", &OptionsRaku::foldCompact);
139
140	DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline,
141	"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
142	DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD,
143	"Set this property to 0 to disable folding POD comments when fold.comment=1.");
144
145	// init word lists
146	DefineWordListSets(rakuWordLists);
147	}
148	};
149
150	// Delimiter pair
151	struct DelimPair {
152	int opener; // opener char
153	int closer[`2`]; // closer chars
154	bool interpol; // can variables be interpolated?
155	short count; // delimiter char count
156	DelimPair() {
157	opener = `0`;
158	closer[`0`] = `0`;
159	closer[`1`] = `0`;
160	interpol = false;
161	count = `0`;
162	}
163	bool isCloser(int ch) const {
164	return ch == closer[`0`] \|\| ch == closer[`1`];
165	}
166	};
167
168	/----------------------------------------------------------------------------
169	* --- FUNCTIONS ---
170	----------------------------------------------------------------------------/
171
172	/*
173	* IsANewLine
174	* - returns true if this is a new line char
175	*/
176	constexpr bool IsANewLine(int ch) noexcept {
177	return ch == `'\r'` \|\| ch == `'\n'`;
178	}
179
180	/*
181	* IsAWhitespace
182	* - returns true if this is a whitespace (or newline) char
183	*/
184	bool IsAWhitespace(int ch) noexcept {
185	return IsASpaceOrTab(ch) \|\| IsANewLine(ch);
186	}
187
188	/*
189	* IsAlphabet
190	* - returns true if this is an alphabetical char
191	*/
192	constexpr bool IsAlphabet(int ch) noexcept {
193	return (ch >= `'a'` && ch <= `'z'`) \|\| (ch >= `'A'` && ch <= `'Z'`);
194	}
195
196	/*
197	* IsCommentLine
198	* - returns true if this is a comment line
199	* - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED
200	* modified from: LexPerl.cxx
201	*/
202	bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) {
203	Sci_Position pos = styler.LineStart(line);
204	Sci_Position eol_pos = styler.LineStart(line + `1`) - `1`;
205	for (Sci_Position i = pos; i < eol_pos; i++) {
206	char ch = styler [i];
207	int style = styler.StyleAt(i);
208	if (type == SCE_RAKU_COMMENTEMBED) {
209	if (i == (eol_pos - `1`) && style == type)
210	return true;
211	} else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED
212	if (ch == `'#'` && style == type && styler [i+`1`] != '`' )
213	return true;
214	else if (!IsASpaceOrTab(ch))
215	return false;
216	}
217	}
218	return false;
219	}
220
221	/*
222	* ContainsQTo
223	* - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start
224	* of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ.
225	*/
226	bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) {
227	std::string adverb;
228	for (Sci_Position i = start; i < end; i++) {
229	if (styler.StyleAt(i) == SCE_RAKU_ADVERB) {
230	adverb.push_back(styler [i]);
231	}
232	}
233	return adverb.find(":to") != std::string::npos;
234	}
235
236	/*
237	* GetBracketCloseChar
238	* - returns the end bracket char: opposite of start
239	* - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section)
240	* - Categories are general matches for valid BiDi types
241	* - Most closer chars are opener + 1
242	*/
243	int GetBracketCloseChar(const int ch) noexcept {
244	const CharacterCategory cc = CategoriseCharacter(ch);
245	switch (cc) {
246	case ccSm:
247	switch (ch) {
248	case `0x3C`: return `0x3E`; // LESS-THAN SIGN
249	case `0x2208`: return `0x220B`; // ELEMENT OF
250	case `0x2209`: return `0x220C`; // NOT AN ELEMENT OF
251	case `0x220A`: return `0x220D`; // SMALL ELEMENT OF
252	case `0x2215`: return `0x29F5`; // DIVISION SLASH
253	case `0x2243`: return `0x22CD`; // ASYMPTOTICALLY EQUAL TO
254	case `0x2298`: return `0x29B8`; // CIRCLED DIVISION SLASH
255	case `0x22A6`: return `0x2ADE`; // ASSERTION
256	case `0x22A8`: return `0x2AE4`; // TRUE
257	case `0x22A9`: return `0x2AE3`; // FORCES
258	case `0x22AB`: return `0x2AE5`; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
259	case `0x22F2`: return `0x22FA`; // ELEMENT OF WITH LONG HORIZONTAL STROKE
260	case `0x22F3`: return `0x22FB`; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
261	case `0x22F4`: return `0x22FC`; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
262	case `0x22F6`: return `0x22FD`; // ELEMENT OF WITH OVERBAR
263	case `0x22F7`: return `0x22FE`; // SMALL ELEMENT OF WITH OVERBAR
264	case `0xFF1C`: return `0xFF1E`; // FULLWIDTH LESS-THAN SIGN
265	}
266	break;
267	case ccPs:
268	switch (ch) {
269	case `0x5B`: return `0x5D`; // LEFT SQUARE BRACKET
270	case `0x7B`: return `0x7D`; // LEFT CURLY BRACKET
271	case `0x298D`: return `0x2990`; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
272	case `0x298F`: return `0x298E`; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
273	case `0xFF3B`: return `0xFF3D`; // FULLWIDTH LEFT SQUARE BRACKET
274	case `0xFF5B`: return `0xFF5D`; // FULLWIDTH LEFT CURLY BRACKET
275	}
276	break;
277	case ccPi:
278	break;
279	default: return `0`;
280	}
281	return ch + `1`;
282	}
283
284	/*
285	* IsValidQuoteOpener
286	* -
287	*/
288	bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept {
289	dp.closer[`0`] = `0`;
290	dp.closer[`1`] = `0`;
291	dp.interpol = true;
292	if (type == RAKUDELIM_QUOTE) {
293	switch (ch) {
294	// Opener Closer Description
295	case `'\''`: dp.closer[`0`] = `'\''`; // APOSTROPHE
296	dp.interpol = false;
297	break;
298	case `'"'`: dp.closer[`0`] = `'"'`; // QUOTATION MARK
299	break;
300	case `0x2018`: dp.closer[`0`] = `0x2019`; // LEFT SINGLE QUOTATION MARK
301	dp.interpol = false;
302	break;
303	case `0x201C`: dp.closer[`0`] = `0x201D`; // LEFT DOUBLE QUOTATION MARK
304	break;
305	case `0x201D`: dp.closer[`0`] = `0x201C`; // RIGHT DOUBLE QUOTATION MARK
306	break;
307	case `0x201E`: dp.closer[`0`] = `0x201C`; // DOUBLE LOW-9 QUOTATION MARK
308	dp.closer[`1`] = `0x201D`;
309	break;
310	case `0xFF62`: dp.closer[`0`] = `0xFF63`; // HALFWIDTH LEFT CORNER BRACKET
311	dp.interpol = false;
312	break;
313	default: return false;
314	}
315	} else if (type == RAKUDELIM_BRACKET) {
316	dp.closer[`0`] = GetBracketCloseChar(ch);
317	}
318	dp.opener = ch;
319	dp.count = `1`;
320	return dp.closer[`0`] > `0`;
321	}
322
323	/*
324	* IsBracketOpenChar
325	* - true if this is a valid start bracket character
326	*/
327	bool IsBracketOpenChar(int ch) noexcept {
328	return GetBracketCloseChar(ch) > `0`;
329	}
330
331	/*
332	* IsValidRegOrQAdjacent
333	* - returns true if ch is a valid character to put directly after Q / q
334	* * ref: Q Language: https://docs.raku.org/language/quoting
335	*/
336	bool IsValidRegOrQAdjacent(int ch) noexcept {
337	return !(IsAlphaNumeric(ch) \|\| ch == `'_'` \|\| ch == `'('` \|\| ch == `')'` \|\| ch == `'\''` );
338	}
339
340	/*
341	* IsValidRegOrQPrecede
342	* - returns true if ch is a valid preceeding character to put directly before Q / q
343	* * ref: Q Language: https://docs.raku.org/language/quoting
344	*/
345	bool IsValidRegOrQPrecede(int ch) noexcept {
346	return !(IsAlphaNumeric(ch) \|\| ch == `'_'`);
347	}
348
349	/*
350	* MatchCharInRange
351	* - returns true if the mach character is found in range (of length)
352	* - ignoreDelim (default false)
353	*/
354	bool MatchCharInRange(StyleContext &sc, const Sci_Position length,
355	const int match, bool ignoreDelim = false) {
356	Sci_Position len = `0`;
357	int chPrev = sc.chPrev;
358	while (++len < length) {
359	const int ch = sc.GetRelativeCharacter(len);
360	if (ch == match && (ignoreDelim \|\| chPrev != `'\\'`))
361	return true;
362	}
363	return false;
364	}
365
366	/*
367	* PrevNonWhitespaceChar
368	* - returns the last non-whitespace char
369	*/
370	int PrevNonWhitespaceChar(StyleContext &sc) {
371	Sci_Position rel = `0`;
372	Sci_Position max_back = `0` - sc.currentPos;
373	while (--rel > max_back) {
374	const int ch = sc.GetRelativeCharacter(rel);
375	if (!IsAWhitespace(ch))
376	return ch;
377	}
378	return `0`; // no matching char
379	}
380
381	/*
382	* IsQLangStartAtScPos
383	* - returns true if this is a valid Q Language sc position
384	* - ref: https://docs.raku.org/language/quoting
385	* - Q :adverb :adverb //;
386	* - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /;
387	*/
388	bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) {
389	const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
390	const int chFw2 = sc.GetRelativeCharacter(`2`);
391	const int chFw3 = sc.GetRelativeCharacter(`3`);
392	type = -`1`;
393	if (IsValidRegOrQPrecede(sc.chPrev)) {
394	if (sc.ch == `'Q'` && valid_adj) {
395	type = RAKUTYPE_QLANG;
396	} else if (sc.ch == `'q'`) {
397	switch (sc.chNext) {
398	case `'x'`:
399	type = RAKUTYPE_STR_QX;
400	break;
401	case `'w'`:
402	type = RAKUTYPE_STR_QW;
403	break;
404	case `'q'`:
405	if (chFw2 == `'x'`) {
406	type = RAKUTYPE_STR_QQX;
407	} else if (chFw2 == `'w'`) {
408	if (chFw3 == `'w'`) {
409	type = RAKUTYPE_STR_QQWW;
410	} else {
411	type = RAKUTYPE_STR_QQW;
412	}
413	} else {
414	type = RAKUTYPE_STR_QQ;
415	}
416	break;
417	default:
418	type = RAKUTYPE_STR_Q;
419	}
420	} else if (sc.ch == `'<'` && MatchCharInRange(sc, length, `'>'`)) {
421	type = RAKUTYPE_STR_WQ; // < word quote >
422	}
423	}
424	return type >= `0`;
425	}
426
427	/*
428	* IsRegexStartAtScPos
429	* - returns true if this is a valid Regex sc position
430	* - ref: https://docs.raku.org/language/regexes
431	* - Regex: (rx/s/m/tr/y) :adverb /:adverb /;
432	* - regex R :adverb //;
433	* - /:adverb /;
434	*/
435	bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) {
436	const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext);
437	type = -`1`;
438	if (IsValidRegOrQPrecede(sc.chPrev)) {
439	switch (sc.ch) {
440	case `'r'`:
441	if (sc.chNext == `'x'`)
442	type = RAKUTYPE_REGEX_RX;
443	break;
444	case `'t'`:
445	case `'T'`:
446	if (sc.chNext == `'r'` \|\| sc.chNext == `'R'`)
447	type = RAKUTYPE_REGEX_TR;
448	break;
449	case `'m'`:
450	if (valid_adj)
451	type = RAKUTYPE_REGEX_M;
452	break;
453	case `'s'`:
454	case `'S'`:
455	if (valid_adj)
456	type = RAKUTYPE_REGEX_S;
457	break;
458	case `'y'`:
459	if (valid_adj)
460	type = RAKUTYPE_REGEX_Y;
461	break;
462	case `'/'`:
463	if (set.Contains(PrevNonWhitespaceChar(sc)))
464	type = RAKUTYPE_REGEX_NORM;
465	}
466	}
467	return type >= `0`;
468	}
469
470	/*
471	* IsValidIdentPrecede
472	* - returns if ch is a valid preceeding char to put directly before an identifier
473	*/
474	bool IsValidIdentPrecede(int ch) noexcept {
475	return !(IsAlphaNumeric(ch) \|\| ch == `'_'` \|\| ch == `'@'` \|\| ch == `'$'` \|\| ch == `'%'`);
476	}
477
478	/*
479	* IsValidDelimiter
480	* - returns if ch is a valid delimiter (most chars are valid)
481	* * ref: Q Language: https://docs.raku.org/language/quoting
482	*/
483	bool IsValidDelimiter(int ch) noexcept {
484	return !(IsAlphaNumeric(ch) \|\| ch == `':'`);
485	}
486
487	/*
488	* GetDelimiterCloseChar
489	* - returns the corrisponding close char for a given delimiter (could be the same char)
490	*/
491	int GetDelimiterCloseChar(int ch) noexcept {
492	int ch_end = GetBracketCloseChar(ch);
493	if (ch_end == `0` && IsValidDelimiter(ch)) {
494	ch_end = ch;
495	}
496	return ch_end;
497	}
498
499	/*
500	* GetRepeatCharCount
501	* - returns the occurence count of match
502	*/
503	Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) {
504	Sci_Position cnt = `0`;
505	while (cnt < length) {
506	if (sc.GetRelativeCharacter(cnt) != chMatch) {
507	break;
508	}
509	cnt++;
510	}
511	return cnt;
512	}
513
514	/*
515	* LengthToDelimiter
516	* - returns the length until the end of a delimited string section
517	* - Ignores nested delimiters (if opener != closer)
518	* - no trailing char after last closer (default false)
519	*/
520	Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp,
521	Sci_Position length, bool noTrailing = false) {
522	short cnt_open = `0`; // count open bracket
523	short cnt_close = `0`; // count close bracket
524	Sci_Position len = `0`; // count characters
525	int chOpener = dp.opener; // look for nested opener / closer
526	if (dp.opener == dp.closer[`0`])
527	chOpener = `0`; // no opening delimiter (no nesting possible)
528
529	while (len < length) {
530	const int chPrev = sc.GetRelativeCharacter(len - `1`);
531	const int ch = sc.GetRelativeCharacter(len);
532	const int chNext = sc.GetRelativeCharacter(len+`1`);
533
534	if (cnt_open == `0` && cnt_close == dp.count) {
535	return len; // end condition has been met
536	} else {
537	if (chPrev != `'\\'` && ch == chOpener) { // ignore escape sequence
538	cnt_open++; // open nested bracket
539	} else if (chPrev != `'\\'` && dp.isCloser(ch)) { // ignore escape sequence
540	if ( cnt_open > `0` ) {
541	cnt_open--; // close nested bracket
542	} else if (dp.count > `1` && cnt_close < (dp.count - `1`)) {
543	if (cnt_close > `1`) {
544	if (dp.isCloser(chPrev)) {
545	cnt_close++;
546	} else { // reset if previous char was not close
547	cnt_close = `0`;
548	}
549	} else {
550	cnt_close++;
551	}
552	} else if (!noTrailing \|\| (IsAWhitespace(chNext))) {
553	cnt_close++; // found last close
554	if (cnt_close > `1` && !dp.isCloser(chPrev)) {
555	cnt_close = `0`; // reset if previous char was not close
556	}
557	} else {
558	cnt_close = `0`; // non handled close: reset
559	}
560	} else if (IsANewLine(ch)) {
561	cnt_open = `0`; // reset after each line
562	cnt_close = `0`;
563	}
564	}
565	len++;
566	}
567	return -`1`; // end condition has NOT been met
568	}
569
570	/*
571	* LengthToEndHeredoc
572	* - returns the length until the end of a heredoc section
573	* - delimiter string MUST begin on a new line
574	*/
575	Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler,
576	const Sci_Position length, const char *delim) {
577	bool on_new_ln = false;
578	int i = `0`; // str index
579	for (int n = `0`; n < length; n++) {
580	const char ch = styler.SafeGetCharAt(sc.currentPos + n, `0`);
581	if (on_new_ln) {
582	if (delim[i] == `'\0'`)
583	return n; // at end of str, match found!
584	if (ch != delim[i++])
585	i = `0`; // no char match, reset 'i'ndex
586	}
587	if (i == `0`) // detect new line
588	on_new_ln = IsANewLine(ch);
589	}
590	return -`1`; // no match found
591	}
592
593	/*
594	* LengthToNextChar
595	* - returns the length until the next character
596	*/
597	Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) {
598	Sci_Position len = `0`;
599	while (++len < length) {
600	const int ch = sc.GetRelativeCharacter(len);
601	if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) {
602	break;
603	}
604	}
605	return len;
606	}
607
608	/*
609	* GetRelativeString
610	* - gets a relitive string and sets it in &str
611	* - resets string before seting
612	*/
613	void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length,
614	std::string &str) {
615	Sci_Position pos = offset;
616	str.clear();
617	while (pos < length) {
618	str += sc.GetRelativeCharacter(pos++);
619	}
620	}
621
622	} // end anonymous namespace
623
624	/----------------------------------------------------------------------------
625	* --- class: LexerRaku ---
626	----------------------------------------------------------------------------/
627	//class LexerRaku : public ILexerWithMetaData {
628	class LexerRaku : public DefaultLexer {
629	CharacterSet setWord;
630	CharacterSet setSigil;
631	CharacterSet setTwigil;
632	CharacterSet setOperator;
633	CharacterSet setSpecialVar;
634	WordList regexIdent; // identifiers that specify a regex
635	OptionsRaku options; // Options from config
636	OptionSetRaku osRaku;
637	WordList keywords; // Word Lists from config
638	WordList functions;
639	WordList typesBasic;
640	WordList typesComposite;
641	WordList typesDomainSpecific;
642	WordList typesExceptions;
643	WordList adverbs;
644
645	public:
646	// Defined as explicit, so that constructor can not be copied
647	explicit LexerRaku() :
648	DefaultLexer ("raku", SCLEX_RAKU),
649	setWord (CharacterSet::setAlphaNum, "-_", `0x80`),
650	setSigil (CharacterSet::setNone, "$&%@"),
651	setTwigil (CharacterSet::setNone, "!*.:<=?^~"),
652	setOperator (CharacterSet::setNone, "^&\\()-+=\|{}[]:;<>,?!.~"),
653	setSpecialVar (CharacterSet::setNone, "_/!") {
654	regexIdent.Set("regex rule token");
655	}
656	// Deleted so LexerRaku objects can not be copied.
657	LexerRaku(const LexerRaku &) = delete;
658	LexerRaku(LexerRaku &&) = delete;
659	void operator=(const LexerRaku &) = delete;
660	void operator=(LexerRaku &&) = delete;
661	virtual ~LexerRaku() {
662	}
663	void SCI_METHOD Release() noexcept override {
664	delete this;
665	}
666	int SCI_METHOD Version() const noexcept override {
667	return lvRelease5;
668	}
669	const char *SCI_METHOD PropertyNames() override {
670	return osRaku.PropertyNames();
671	}
672	int SCI_METHOD PropertyType(const char *name) override {
673	return osRaku.PropertyType(name);
674	}
675	const char SCI_METHOD DescribeProperty(const* char *name) override {
676	return osRaku.DescribeProperty(name);
677	}
678	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override;
679	const char SCI_METHOD PropertyGet(const* char *key) override {
680	return osRaku.PropertyGet(key);
681	}
682	const char *SCI_METHOD DescribeWordListSets() override {
683	return osRaku.DescribeWordListSets();
684	}
685	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
686	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
687	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
688
689	static ILexer5 *LexerFactoryRaku() {
690	return new LexerRaku ();
691	}
692
693	protected:
694	bool IsOperatorChar(const int ch);
695	bool IsWordChar(const int ch, bool allowNumber = true);
696	bool IsWordStartChar(const int ch);
697	bool IsNumberChar(const int ch, int base = `10`);
698	bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
699	int &type, const DelimPair &dp);
700	void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState);
701	bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
702	WordList &wordsAdverbs, DelimPair &dp);
703	Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length,
704	char s, const* int size, Sci_Position offset = `0`);
705	};
706
707	/----------------------------------------------------------------------------
708	* --- METHODS: LexerRaku ---
709	----------------------------------------------------------------------------/
710
711	/*
712	* LexerRaku::IsOperatorChar
713	* - Test for both ASCII and Unicode operators
714	* see: https://docs.raku.org/language/unicode_entry
715	*/
716	bool LexerRaku::IsOperatorChar(const int ch) {
717	if (ch > `0x7F`) {
718	switch (ch) {
719	// Unicode ASCII Equiv.
720	case `0x2208`: // (elem)
721	case `0x2209`: // !(elem)
722	case `0x220B`: // (cont)
723	case `0x220C`: // !(cont)
724	case `0x2216`: // (-)
725	case `0x2229`: // (&)
726	case `0x222A`: // (\|)
727	case `0x2282`: // (<)
728	case `0x2283`: // (>)
729	case `0x2284`: // !(<)
730	case `0x2285`: // !(>)
731	case `0x2286`: // (<=)
732	case `0x2287`: // (>=)
733	case `0x2288`: // !(<=)
734	case `0x2289`: // !(>=)
735	case `0x228D`: // (.)
736	case `0x228E`: // (+)
737	case `0x2296`: // (^)
738	return true;
739	}
740	}
741	return setOperator.Contains(ch);
742	}
743
744	/*
745	* LexerRaku::IsWordChar
746	* - Test for both ASCII and Unicode identifier characters
747	* see: https://docs.raku.org/language/unicode_ascii
748	* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
749	* FIXME: still may not contain all valid characters
750	*/
751	bool LexerRaku::IsWordChar(const int ch, bool allowNumber) {
752	// Unicode numbers should not apear in word identifiers
753	if (ch > `0x7F`) {
754	const CharacterCategory cc = CategoriseCharacter(ch);
755	switch (cc) {
756	// Letters
757	case ccLu:
758	case ccLl:
759	case ccLt:
760	case ccLm:
761	case ccLo:
762	return true;
763	default:
764	return false;
765	}
766	} else if (allowNumber && IsADigit(ch)) {
767	return true; // an ASCII number type
768	}
769	return setWord.Contains(ch);
770	}
771
772	/*
773	* LexerRaku::IsWordStartChar
774	* - Test for both ASCII and Unicode identifier "start / first" characters
775	*/
776	bool LexerRaku::IsWordStartChar(const int ch) {
777	return ch != `'-'` && IsWordChar(ch, false); // no numbers allowed
778	}
779
780	/*
781	* LexerRaku::IsNumberChar
782	* - Test for both ASCII and Unicode identifier number characters
783	* see: https://docs.raku.org/language/unicode_ascii
784	* also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
785	* FILTERED by Unicode letters that are NUMBER
786	* and NOT PARENTHESIZED or CIRCLED
787	* FIXME: still may not contain all valid number characters
788	*/
789	bool LexerRaku::IsNumberChar(const int ch, int base) {
790	if (ch > `0x7F`) {
791	const CharacterCategory cc = CategoriseCharacter(ch);
792	switch (cc) {
793	// Numbers
794	case ccNd:
795	case ccNl:
796	case ccNo:
797	return true;
798	default:
799	return false;
800	}
801	}
802	return IsADigit(ch, base);
803	}
804
805	/*
806	* LexerRaku::PropertySet
807	* -
808	*/
809	Sci_Position SCI_METHOD LexerRaku::PropertySet(const char key, const* char *val) {
810	if (osRaku.PropertySet(&options, key, val))
811	return `0`;
812	return -`1`;
813	}
814
815	/*
816	* LexerRaku::WordListSet
817	* -
818	*/
819	Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) {
820	WordList wordListN = nullptr*;
821	switch (n) {
822	case `0`:
823	wordListN = &keywords;
824	break;
825	case `1`:
826	wordListN = &functions;
827	break;
828	case `2`:
829	wordListN = &typesBasic;
830	break;
831	case `3`:
832	wordListN = &typesComposite;
833	break;
834	case `4`:
835	wordListN = &typesDomainSpecific;
836	break;
837	case `5`:
838	wordListN = &typesExceptions;
839	break;
840	case `6`:
841	wordListN = &adverbs;
842	break;
843	}
844	Sci_Position firstModification = -`1`;
845	if (wordListN) {
846	WordList wlNew;
847	wlNew.Set(wl);
848	if (*wordListN != wlNew) {
849	wordListN->Set(wl);
850	firstModification = `0`;
851	}
852	}
853	return firstModification;
854	}
855
856	/*
857	* LexerRaku::ProcessRegexTwinCapture
858	* - processes the transition between a regex pair (two sets of delimiters)
859	* - moves to first new delimiter, if a bracket
860	* - returns true when valid delimiter start found (if bracket)
861	*/
862	bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length,
863	int &type, const DelimPair &dp) {
864
865	if (type == RAKUTYPE_REGEX_S \|\| type == RAKUTYPE_REGEX_TR \|\| type == RAKUTYPE_REGEX_Y) {
866	type = -`1`; // clear type
867
868	// move past chRegQClose if it was the previous char
869	if (dp.isCloser(sc.chPrev))
870	sc.Forward();
871
872	// no processing needed for non-bracket
873	if (dp.isCloser(dp.opener))
874	return true;
875
876	// move to next opening bracket
877	const Sci_Position len = LengthToNextChar(sc, length);
878	if (sc.GetRelativeCharacter(len) == dp.opener) {
879	sc.Forward(len);
880	return true;
881	}
882	}
883	return false;
884	}
885
886	/*
887	* LexerRaku::ProcessStringVars
888	* - processes a string and highlights any valid variables
889	*/
890	void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) {
891	const int state = sc.state;
892	for (Sci_Position pos = `0`; pos < length; pos++) {
893	if (sc.state == varState && !IsWordChar(sc.ch)) {
894	sc.SetState(state);
895	} else if (sc.chPrev != `'\\'`
896	&& (sc.ch == `'$'` \|\| sc.ch == `'@'`)
897	&& IsWordStartChar(sc.chNext)) {
898	sc.SetState(varState);
899	}
900	sc.Forward(); // Next character
901	}
902	}
903	/*
904	* LexerRaku::ProcessValidRegQlangStart
905	* - processes a section of the document range from after a Regex / Q delimiter
906	* - returns true on success
907	* - sets: adverbs, chOpen, chClose, chCount
908	* ref: https://docs.raku.org/language/regexes
909	*/
910	bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type,
911	WordList &wordsAdverbs, DelimPair &dp) {
912	Sci_Position startPos = sc.currentPos;
913	Sci_Position startLen = length;
914	const int target_state = sc.state;
915	int state = SCE_RAKU_DEFAULT;
916	std::string str;
917
918	// find our opening delimiter (and occurrences) / save any adverbs
919	dp.opener = `0`; // adverbs can be after the first delimiter
920	bool got_all_adverbs = false; // in Regex statements
921	bool got_ident = false; // regex can have an identifier: 'regex R'
922	sc.SetState(state); // set state default to avoid pre-highlights
923	while ((dp.opener == `0` \|\| !got_all_adverbs) && sc.More()) {
924
925	// move to the next non-space character
926	const bool was_space = IsAWhitespace(sc.ch);
927	if (!got_all_adverbs && was_space) {
928	sc.Forward(LengthToNextChar(sc, length));
929	}
930	length = startLen - (sc.currentPos - startPos); // update length remaining
931
932	// parse / eat an identifier (if type == RAKUTYPE_REGEX)
933	if (dp.opener == `0` && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) {
934
935	// eat identifier / account for special adverb :sym<name>
936	bool got_sym = false;
937	while (sc.More()) {
938	sc.SetState(SCE_RAKU_IDENTIFIER);
939	while (sc.More() && (IsAlphaNumeric(sc.chNext)
940	\|\| sc.chNext == `'_'` \|\| sc.chNext == `'-'`)) {
941	sc.Forward();
942	}
943	sc.Forward();
944	if (got_sym && sc.ch == `'>'`) {
945	sc.SetState(SCE_RAKU_OPERATOR); // '>'
946	sc.Forward();
947	break;
948	} else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) {
949	sc.SetState(SCE_RAKU_ADVERB); // ':sym'
950	sc.Forward(`4`);
951	sc.SetState(SCE_RAKU_OPERATOR); // '<'
952	sc.Forward();
953	got_sym = true;
954	} else {
955	break;
956	}
957	}
958	sc.SetState(state);
959	got_ident = true;
960	}
961
962	// parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim
963	// >= RAKUTYPE_QLANG only has adverbs before delim
964	else if (!got_all_adverbs && sc.ch == `':'` && (!(dp.opener == `0` && got_ident)
965	&& !(dp.opener > `0` && type >= RAKUTYPE_QLANG))) {
966	sc.SetState(SCE_RAKU_ADVERB);
967	while (IsAlphaNumeric(sc.chNext) && sc.More()) {
968	sc.Forward();
969	str += sc.ch;
970	}
971	str += `' '`;
972	sc.Forward();
973	sc.SetState(state);
974	}
975
976	// find starting delimiter
977	else if (dp.opener == `0` && (was_space \|\| IsValidRegOrQAdjacent(sc.ch))
978	&& IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are)
979	sc.SetState((state = target_state));// start state here...
980	dp.opener = sc.ch; // this is our delimiter, get count
981	if (type < RAKUTYPE_QLANG) // type is Regex
982	dp.count = `1`; // has only one delimiter
983	else
984	dp.count = GetRepeatCharCount(sc, dp.opener, length);
985	sc.Forward(dp.count);
986	}
987
988	// we must have all the adverbs by now...
989	else {
990	if (got_all_adverbs)
991	break; // prevent infinite loop: occurs on missing open char
992	got_all_adverbs = true;
993	}
994	}
995
996	// set word list / find a valid closing delimiter (or bomb!)
997	wordsAdverbs.Set(str.c_str());
998	dp.closer[`0`] = GetDelimiterCloseChar(dp.opener);
999	dp.closer[`1`] = `0`; // no other closer char
1000	return dp.closer[`0`] > `0`;
1001	}
1002
1003	/*
1004	* LexerRaku::LengthToNonWordChar
1005	* - returns the length until the next non "word" character: AlphaNum + '_'
1006	* - also sets all the parsed chars in 's'
1007	*/
1008	Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length,
1009	char s, const* int size, Sci_Position offset) {
1010	Sci_Position len = `0`;
1011	Sci_Position max_length = size < length ? size : length;
1012	while (len <= max_length) {
1013	const int ch = sc.GetRelativeCharacter(len + offset);
1014	if (!IsWordChar(ch)) {
1015	s[len] = `'\0'`;
1016	break;
1017	}
1018	s[len] = ch;
1019	len++;
1020	}
1021	s[len + `1`] = `'\0'`;
1022	return len;
1023	}
1024
1025	/*
1026	* LexerRaku::Lex
1027	* - Main lexer method
1028	*/
1029	void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1030	LexAccessor styler(pAccess);
1031	DelimPair dpEmbeded; // delimiter pair: embeded comments
1032	DelimPair dpString; // delimiter pair: string
1033	DelimPair dpRegQ; // delimiter pair: Regex / Q Lang
1034	std::string hereDelim; // heredoc delimiter (if in heredoc)
1035	int hereState = `0`; // heredoc state to use (Q / QQ)
1036	int numState = `0`; // number state / type
1037	short cntDecimal = `0`; // number decinal count
1038	std::string wordLast; // last word seen
1039	std::string identLast; // last identifier seen
1040	std::string adverbLast; // last (single) adverb seen
1041	WordList lastAdverbs; // last adverbs seen
1042	Sci_Position len; // temp length value
1043	char s[`100`]; // temp char string
1044	int typeDetect; // temp type detected (for regex and Q lang)
1045	Sci_Position lengthToEnd; // length until the end of range
1046
1047	// Backtrack to safe start position before complex quoted elements
1048
1049	Sci_PositionU newStartPos = startPos;
1050	if (initStyle != SCE_RAKU_DEFAULT) {
1051	// Backtrack to last SCE_RAKU_DEFAULT or 0
1052	while (newStartPos > `0`) {
1053	newStartPos--;
1054	if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT)
1055	break;
1056	}
1057	// Backtrack to start of line before SCE_RAKU_HEREDOC_Q?
1058	if (initStyle == SCE_RAKU_HEREDOC_Q \|\| initStyle == SCE_RAKU_HEREDOC_QQ) {
1059	if (newStartPos > `0`) {
1060	newStartPos = styler.LineStart(styler.GetLine(newStartPos));
1061	}
1062	}
1063	} else {
1064	const Sci_Position line = styler.GetLine(newStartPos);
1065	if (line > `0`) {
1066	// If the previous line is a start of a q or qq heredoc, backtrack to start of line
1067	const Sci_Position startPreviousLine = styler.LineStart(line-`1`);
1068	if (ContainsQTo(startPreviousLine, newStartPos, styler)) {
1069	newStartPos = startPreviousLine;
1070	}
1071	}
1072	}
1073
1074
1075	// Re-calculate (any) changed startPos, length and initStyle state
1076	if (newStartPos < startPos) {
1077	initStyle = SCE_RAKU_DEFAULT;
1078	length += startPos - newStartPos;
1079	startPos = newStartPos;
1080	}
1081
1082	// init StyleContext
1083	StyleContext sc(startPos, length, initStyle, styler);
1084
1085	// StyleContext Loop
1086	for (; sc.More(); sc.Forward()) {
1087	lengthToEnd = (length - (sc.currentPos - startPos)); // end of range
1088
1089	/* * Determine if the current state should terminate ************ *
1090	* Everything within the 'switch' statement processes characters up
1091	* until the end of a syntax highlight section / state.
1092	* ****************************************************************** */
1093	switch (sc.state) {
1094	case SCE_RAKU_OPERATOR:
1095	sc.SetState(SCE_RAKU_DEFAULT);
1096	break; // FIXME: better valid operator sequences needed?
1097	case SCE_RAKU_COMMENTLINE:
1098	if (IsANewLine(sc.ch)) {
1099	sc.SetState(SCE_RAKU_DEFAULT);
1100	}
1101	break;
1102	case SCE_RAKU_COMMENTEMBED:
1103	if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= `0`) {
1104	sc.Forward(len); // Move to end delimiter
1105	sc.SetState(SCE_RAKU_DEFAULT);
1106	} else {
1107	sc.Forward(lengthToEnd); // no end delimiter found
1108	}
1109	break;
1110	case SCE_RAKU_POD:
1111	if (sc.atLineStart && sc.Match("=end pod")) {
1112	sc.Forward(`8`);
1113	sc.SetState(SCE_RAKU_DEFAULT);
1114	}
1115	break;
1116	case SCE_RAKU_STRING:
1117
1118	// Process the string for variables: move to end delimiter
1119	if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= `0`) {
1120	if (dpString.interpol) {
1121	ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
1122	} else {
1123	sc.Forward(len);
1124	}
1125	sc.SetState(SCE_RAKU_DEFAULT);
1126	} else {
1127	sc.Forward(lengthToEnd); // no end delimiter found
1128	}
1129	break;
1130	case SCE_RAKU_STRING_Q:
1131	case SCE_RAKU_STRING_QQ:
1132	case SCE_RAKU_STRING_Q_LANG:
1133
1134	// No string: previous char was the delimiter
1135	if (dpRegQ.count == `1` && dpRegQ.isCloser(sc.chPrev)) {
1136	sc.SetState(SCE_RAKU_DEFAULT);
1137	}
1138
1139	// Process the string for variables: move to end delimiter
1140	else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= `0`) {
1141
1142	// set (any) heredoc delimiter string
1143	if (lastAdverbs.InList("to")) {
1144	GetRelativeString(sc, -`1`, len - dpRegQ.count, hereDelim);
1145	hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state
1146	}
1147
1148	// select variable identifiers
1149	if (sc.state == SCE_RAKU_STRING_QQ \|\| lastAdverbs.InList("qq")) {
1150	ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
1151	hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state
1152	} else {
1153	sc.Forward(len);
1154	}
1155	sc.SetState(SCE_RAKU_DEFAULT);
1156	} else {
1157	sc.Forward(lengthToEnd); // no end delimiter found
1158	}
1159	break;
1160	case SCE_RAKU_HEREDOC_Q:
1161	case SCE_RAKU_HEREDOC_QQ:
1162	if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= `0`) {
1163	// select variable identifiers
1164	if (sc.state == SCE_RAKU_HEREDOC_QQ) {
1165	ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR);
1166	} else {
1167	sc.Forward(len);
1168	}
1169	sc.SetState(SCE_RAKU_DEFAULT);
1170	} else {
1171	sc.Forward(lengthToEnd); // no end delimiter found
1172	}
1173	hereDelim.clear(); // clear heredoc delimiter
1174	break;
1175	case SCE_RAKU_REGEX:
1176	// account for typeDetect = RAKUTYPE_REGEX_S/TR/Y
1177	while (sc.state == SCE_RAKU_REGEX) {
1178
1179	// No string: previous char was the delimiter
1180	if (dpRegQ.count == `1` && dpRegQ.isCloser(sc.chPrev)) {
1181	if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
1182	continue;
1183	sc.SetState(SCE_RAKU_DEFAULT);
1184	break;
1185	}
1186
1187	// Process the string for variables: move to end delimiter
1188	else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= `0`) {
1189	ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR);
1190	if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ))
1191	continue;
1192	sc.SetState(SCE_RAKU_DEFAULT);
1193	break;
1194	} else {
1195	sc.Forward(lengthToEnd); // no end delimiter found
1196	break;
1197	}
1198	}
1199	break;
1200	case SCE_RAKU_NUMBER:
1201	if (sc.ch == `'.'`) {
1202	if (sc.chNext == `'.'`) { // '..' is an operator
1203	sc.SetState(SCE_RAKU_OPERATOR);
1204	sc.Forward();
1205	if (sc.chNext == `'.'`) // '...' is also an operator
1206	sc.Forward();
1207	break;
1208	} else if (numState > RAKUNUM_FLOAT_EXP
1209	&& (cntDecimal < `1` \|\| numState == RAKUNUM_VERSION)) {
1210	cntDecimal++;
1211	sc.Forward();
1212	} else {
1213	sc.SetState(SCE_RAKU_DEFAULT);
1214	break; // too many decinal places
1215	}
1216	}
1217	switch (numState) {
1218	case RAKUNUM_BINARY:
1219	if (!IsNumberChar(sc.ch, `2`))
1220	sc.SetState(SCE_RAKU_DEFAULT);
1221	break;
1222	case RAKUNUM_OCTAL:
1223	if (!IsNumberChar(sc.ch, `8`))
1224	sc.SetState(SCE_RAKU_DEFAULT);
1225	break;
1226	case RAKUNUM_HEX:
1227	if (!IsNumberChar(sc.ch, `16`))
1228	sc.SetState(SCE_RAKU_DEFAULT);
1229	break;
1230	case RAKUNUM_DECIMAL:
1231	case RAKUNUM_VERSION:
1232	if (!IsNumberChar(sc.ch))
1233	sc.SetState(SCE_RAKU_DEFAULT);
1234	}
1235	break;
1236	case SCE_RAKU_WORD:
1237	case SCE_RAKU_FUNCTION:
1238	case SCE_RAKU_TYPEDEF:
1239	case SCE_RAKU_ADVERB:
1240	sc.SetState(SCE_RAKU_DEFAULT);
1241	break;
1242	case SCE_RAKU_MU:
1243	case SCE_RAKU_POSITIONAL:
1244	case SCE_RAKU_ASSOCIATIVE:
1245	case SCE_RAKU_CALLABLE:
1246	case SCE_RAKU_IDENTIFIER:
1247	case SCE_RAKU_GRAMMAR:
1248	case SCE_RAKU_CLASS:
1249	sc.SetState(SCE_RAKU_DEFAULT);
1250	break;
1251	}
1252
1253	/* * Determine if a new state should be entered ***************** *
1254	* Everything below here identifies the beginning of a state, all or part
1255	* of the characters within this state are processed here, the rest are
1256	* completed above in the terminate state section.
1257	* ****************************************************************** */
1258	if (sc.state == SCE_RAKU_DEFAULT) {
1259
1260	// --- Single line comment
1261	if (sc.ch == `'#'`) {
1262	sc.SetState(SCE_RAKU_COMMENTLINE);
1263	}
1264
1265	// --- POD block
1266	else if (sc.atLineStart && sc.Match("=begin pod")) {
1267	sc.SetState(SCE_RAKU_POD);
1268	sc.Forward(`10`);
1269	}
1270
1271	// --- String (normal)
1272	else if (sc.chPrev != `'\\'` && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) {
1273	sc.SetState(SCE_RAKU_STRING);
1274	}
1275
1276	// --- String (Q Language) ----------------------------------------
1277	// - https://docs.raku.org/language/quoting
1278	// - Q :adverb :adverb //;
1279	// - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //;
1280	else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) {
1281	int state = SCE_RAKU_STRING_Q_LANG;
1282	Sci_Position forward = `1`; // single char ident (default)
1283	if (typeDetect > RAKUTYPE_QLANG) {
1284	state = SCE_RAKU_STRING_Q;
1285	if (typeDetect == RAKUTYPE_STR_WQ)
1286	forward = `0`; // no char ident
1287	}
1288	if (typeDetect > RAKUTYPE_STR_Q) {
1289	if (typeDetect == RAKUTYPE_STR_QQ)
1290	state = SCE_RAKU_STRING_QQ;
1291	forward++; // two char ident
1292	}
1293	if (typeDetect > RAKUTYPE_STR_QQ)
1294	forward++; // three char ident
1295	if (typeDetect == RAKUTYPE_STR_QQWW)
1296	forward++; // four char ident
1297
1298	// Proceed: check for a valid character after statement
1299	if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) \|\| typeDetect == RAKUTYPE_QLANG) {
1300	sc.SetState(state);
1301	sc.Forward(forward);
1302	lastAdverbs.Clear();
1303
1304	// Process: adverbs / opening delimiter / adverbs after delim
1305	if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
1306	lastAdverbs, dpRegQ))
1307	sc.SetState(state);
1308	}
1309	}
1310
1311	// --- Regex (rx/s/m/tr/y) ----------------------------------------
1312	// - https://docs.raku.org/language/regexes
1313	else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) \|\| regexIdent.InList(wordLast.c_str()))) {
1314	if (typeDetect == -`1`) { // must be a regex identifier word
1315	wordLast.clear();
1316	typeDetect = RAKUTYPE_REGEX;
1317	}
1318	Sci_Position forward = `0`; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM)
1319	if (typeDetect > `0` && typeDetect != RAKUTYPE_REGEX)
1320	forward++; // single char ident
1321	if (typeDetect > RAKUTYPE_REGEX)
1322	forward++; // two char ident
1323
1324	// Proceed: check for a valid character after statement
1325	if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) \|\| typeDetect == RAKUTYPE_REGEX_NORM) {
1326	sc.SetState(SCE_RAKU_REGEX);
1327	sc.Forward(forward);
1328	lastAdverbs.Clear();
1329
1330	// Process: adverbs / opening delimiter / adverbs after delim
1331	if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect,
1332	lastAdverbs, dpRegQ))
1333	sc.SetState(SCE_RAKU_REGEX);
1334	}
1335	}
1336
1337	// --- Numbers ----------------------------------------------------
1338	else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch)
1339	\|\| (sc.ch == `'v'` && IsNumberChar(sc.chNext) && wordLast == "use"))) {
1340	numState = RAKUNUM_DECIMAL; // default: decimal (base 10)
1341	cntDecimal = `0`;
1342	sc.SetState(SCE_RAKU_NUMBER);
1343	if (sc.ch == `'v'`) // forward past 'v'
1344	sc.Forward();
1345	if (wordLast == "use") { // package version number
1346	numState = RAKUNUM_VERSION;
1347	} else if (sc.ch == `'0'`) { // other type of number
1348	switch (sc.chNext) {
1349	case `'b'`: // binary (base 2)
1350	numState = RAKUNUM_BINARY;
1351	break;
1352	case `'o'`: // octal (base 8)
1353	numState = RAKUNUM_OCTAL;
1354	break;
1355	case `'x'`: // hexadecimal (base 16)
1356	numState = RAKUNUM_HEX;
1357	}
1358	if (numState != RAKUNUM_DECIMAL)
1359	sc.Forward(); // forward to number type char
1360	}
1361	}
1362
1363	// --- Keywords / functions / types / barewords -------------------
1364	else if ((sc.currentPos == `0` \|\| sc.atLineStart \|\| IsValidIdentPrecede(sc.chPrev))
1365	&& IsWordStartChar(sc.ch)) {
1366	len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s));
1367	if (keywords.InList(s)) {
1368	sc.SetState(SCE_RAKU_WORD); // Keywords
1369	} else if(functions.InList(s)) {
1370	sc.SetState(SCE_RAKU_FUNCTION); // Functions
1371	} else if(typesBasic.InList(s)) {
1372	sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic)
1373	} else if(typesComposite.InList(s)) {
1374	sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite)
1375	} else if(typesDomainSpecific.InList(s)) {
1376	sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific)
1377	} else if(typesExceptions.InList(s)) {
1378	sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions)
1379	} else {
1380	if (wordLast == "class")
1381	sc.SetState(SCE_RAKU_CLASS); // a Class ident
1382	else if (wordLast == "grammar")
1383	sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident
1384	else
1385	sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword
1386	identLast = s; // save identifier
1387	}
1388	if (adverbLast == "sym") { // special adverb ":sym"
1389	sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier
1390	identLast = s; // save identifier
1391	}
1392	if (sc.state != SCE_RAKU_IDENTIFIER)
1393	wordLast = s; // save word
1394	sc.Forward(len - `1`); // ...forward past word
1395	}
1396
1397	// --- Adverbs ----------------------------------------------------
1398	else if (sc.ch == `':'` && IsWordStartChar(sc.chNext)) {
1399	len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), `1`);
1400	if (adverbs.InList(s)) {
1401	sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':')
1402	adverbLast = s; // save word
1403	sc.Forward(len); // ...forward past word (less offset: 1)
1404	}
1405	}
1406
1407	// --- Identifiers: $mu / @positional / %associative / &callable --
1408	// see: https://docs.raku.org/language/variables
1409	else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext)
1410	\|\| setSpecialVar.Contains(sc.chNext)
1411	\|\| IsWordStartChar(sc.chNext))) {
1412
1413	// State based on sigil
1414	switch (sc.ch) {
1415	case `'$'`: sc.SetState(SCE_RAKU_MU);
1416	break;
1417	case `'@'`: sc.SetState(SCE_RAKU_POSITIONAL);
1418	break;
1419	case `'%'`: sc.SetState(SCE_RAKU_ASSOCIATIVE);
1420	break;
1421	case `'&'`: sc.SetState(SCE_RAKU_CALLABLE);
1422	}
1423	const int state = sc.state;
1424	sc.Forward();
1425	char ch_delim = `0`;
1426	if (setSpecialVar.Contains(sc.ch)
1427	&& !setWord.Contains(sc.chNext)) { // Process Special Var
1428	ch_delim = -`1`;
1429	} else if (setTwigil.Contains(sc.ch)) { // Process Twigil
1430	sc.SetState(SCE_RAKU_OPERATOR);
1431	if (sc.ch == `'<'` && setWord.Contains(sc.chNext))
1432	ch_delim = `'>'`;
1433	sc.Forward();
1434	sc.SetState(state);
1435	}
1436
1437	// Process (any) identifier
1438	if (ch_delim >= `0`) {
1439	sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - `1`);
1440	if (ch_delim > `0` && sc.chNext == ch_delim) {
1441	sc.Forward();
1442	sc.SetState(SCE_RAKU_OPERATOR);
1443	}
1444	identLast = s; // save identifier
1445	}
1446	}
1447
1448	// --- Operators --------------------------------------------------
1449	else if (IsOperatorChar(sc.ch)) {
1450	// FIXME: better valid operator sequences needed?
1451	sc.SetState(SCE_RAKU_OPERATOR);
1452	}
1453
1454	// --- Heredoc: begin ---------------------------------------------
1455	else if (!hereDelim.empty() && sc.atLineEnd) {
1456	if (IsANewLine(sc.ch))
1457	sc.Forward(); // skip a possible CRLF situation
1458	sc.SetState(hereState);
1459	}
1460
1461	// Reset words: on operator simi-colon OR '}' (end of statement)
1462	if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == `';'` \|\| sc.ch == `'}'`)) {
1463	wordLast.clear();
1464	identLast.clear();
1465	adverbLast.clear();
1466	}
1467	}
1468
1469	/* * Determine if an "embedded comment" is to be entered ******** *
1470	* This type of embedded comment section, or multi-line comment comes
1471	* after a normal comment has begun... e.g: #`[ ... ]
1472	* ****************************************************************** */
1473	else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == `'#'` && sc.ch == '`') {
1474	if (IsBracketOpenChar(sc.chNext)) {
1475	sc.Forward(); // Condition met for "embedded comment"
1476	dpEmbeded.opener = sc.ch;
1477
1478	// Find the opposite (termination) closeing bracket (if any)
1479	dpEmbeded.closer[`0`] = GetBracketCloseChar(dpEmbeded.opener);
1480	if (dpEmbeded.closer[`0`] > `0`) { // Enter "embedded comment"
1481
1482	// Find multiple opening character occurence
1483	dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd);
1484	sc.SetState(SCE_RAKU_COMMENTEMBED);
1485	sc.Forward(dpEmbeded.count - `1`); // incremented in the next loop
1486	}
1487	}
1488	}
1489	}
1490
1491	// And we're done...
1492	sc.Complete();
1493	}
1494
1495	/*
1496	* LexerRaku::Lex
1497	* - Main fold method
1498	* NOTE: although Raku uses and supports UNICODE characters, we're only looking
1499	* at normal chars here, using 'SafeGetCharAt' - for folding purposes
1500	* that is all we need.
1501	*/
1502	#define RAKU_HEADFOLD_SHIFT 4
1503	#define RAKU_HEADFOLD_MASK 0xF0
1504	void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int / initStyle /, IDocument *pAccess) {
1505
1506	// init LexAccessor / return if fold option is off
1507	if (!options.fold) return;
1508	LexAccessor styler(pAccess);
1509
1510	// init char and line positions
1511	const Sci_PositionU endPos = startPos + length;
1512	Sci_Position lineCurrent = styler.GetLine(startPos);
1513
1514	// Backtrack to last SCE_RAKU_DEFAULT line
1515	if (startPos > `0` && lineCurrent > `0`) {
1516	while (lineCurrent > `0` && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) {
1517	lineCurrent--;
1518	startPos = styler.LineStart(lineCurrent);
1519	}
1520	lineCurrent = styler.GetLine(startPos);
1521	}
1522	Sci_PositionU lineStart = startPos;
1523	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + `1`);
1524
1525	// init line folding level
1526	int levelPrev = SC_FOLDLEVELBASE;
1527	if (lineCurrent > `0`)
1528	levelPrev = styler.LevelAt(lineCurrent - `1`) >> `16`;
1529	int levelCurrent = levelPrev;
1530
1531	// init char and style variables
1532	char chNext = styler [startPos];
1533	int stylePrev = styler.StyleAt(startPos - `1`);
1534	int styleNext = styler.StyleAt(startPos);
1535	int styleNextStartLine = styler.StyleAt(lineStartNext);
1536	int visibleChars = `0`;
1537	bool wasCommentMulti = false;
1538
1539	// main loop
1540	for (Sci_PositionU i = startPos; i < endPos; i++) {
1541
1542	// next char, style and flags
1543	const char ch = chNext;
1544	chNext = styler.SafeGetCharAt(i + `1`);
1545	const int style = styleNext;
1546	styleNext = styler.StyleAt(i + `1`);
1547	const bool atEOL = i == (lineStartNext - `1`);
1548	const bool atLineStart = i == lineStart;
1549
1550	// --- Comments / Multi-line / POD ------------------------------------
1551	if (options.foldComment) {
1552
1553	// Multi-line
1554	if (options.foldCommentMultiline) {
1555	if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == `'#'` && chNext == '`'
1556	&& styleNextStartLine == SCE_RAKU_COMMENTEMBED) {
1557	levelCurrent++;
1558	wasCommentMulti = true; // don't confuse line comments
1559	} else if (style == SCE_RAKU_COMMENTEMBED && atLineStart
1560	&& styleNextStartLine != SCE_RAKU_COMMENTEMBED) {
1561	levelCurrent--;
1562	}
1563	}
1564
1565	// Line comments
1566	if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE
1567	&& IsCommentLine(lineCurrent, styler)) {
1568	if (!IsCommentLine(lineCurrent - `1`, styler)
1569	&& IsCommentLine(lineCurrent + `1`, styler))
1570	levelCurrent++;
1571	else if (IsCommentLine(lineCurrent - `1`, styler)
1572	&& !IsCommentLine(lineCurrent + `1`, styler))
1573	levelCurrent--;
1574	}
1575
1576	// POD
1577	if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) {
1578	if (styler.Match(i, "=begin"))
1579	levelCurrent++;
1580	else if (styler.Match(i, "=end"))
1581	levelCurrent--;
1582	}
1583	}
1584
1585	// --- Code block -----------------------------------------------------
1586	if (style == SCE_RAKU_OPERATOR) {
1587	if (ch == `'{'`) {
1588	if (levelCurrent < levelPrev) levelPrev--;
1589	levelCurrent++;
1590	} else if (ch == `'}'`) {
1591	levelCurrent--;
1592	}
1593	}
1594
1595	// --- at end of line / range / apply fold ----------------------------
1596	if (atEOL) {
1597	int level = levelPrev;
1598
1599	// set level flags
1600	level \|= levelCurrent << `16`;
1601	if (visibleChars == `0` && options.foldCompact)
1602	level \|= SC_FOLDLEVELWHITEFLAG;
1603	if ((levelCurrent > levelPrev) && (visibleChars > `0`))
1604	level \|= SC_FOLDLEVELHEADERFLAG;
1605	if (level != styler.LevelAt(lineCurrent)) {
1606	styler.SetLevel(lineCurrent, level);
1607	}
1608	lineCurrent++;
1609	lineStart = lineStartNext;
1610	lineStartNext = styler.LineStart(lineCurrent + `1`);
1611	styleNextStartLine = styler.StyleAt(lineStartNext);
1612	levelPrev = levelCurrent;
1613	visibleChars = `0`;
1614	wasCommentMulti = false;
1615	}
1616
1617	// increment visibleChars / set previous char
1618	if (!isspacechar(ch))
1619	visibleChars++;
1620	stylePrev = style;
1621	}
1622
1623	// Done: set real level of the next line
1624	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1625	styler.SetLevel(lineCurrent, levelPrev \| flagsNext);
1626	}
1627
1628	/----------------------------------------------------------------------------
1629	* --- Scintilla: LexerModule ---
1630	----------------------------------------------------------------------------/
1631
1632	LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists);
1633

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexRaku.cxx