LexPerl.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexPerl.cxx]

1	// Scintilla source code edit control
2	/* @file LexPerl.cxx*
3	** Lexer for Perl.
4	** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5	**/
6	// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7	// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8	// The License.txt file describes the conditions under which this software may be distributed.
9
10	#include <stdlib.h>
11	#include <string.h>
12	#include <stdio.h>
13	#include <stdarg.h>
14	#include <assert.h>
15	#include <ctype.h>
16
17	#include <string>
18	#include <string_view>
19	#include <map>
20	#include <functional>
21
22	#include "ILexer.h"
23	#include "Scintilla.h"
24	#include "SciLexer.h"
25
26	#include "WordList.h"
27	#include "LexAccessor.h"
28	#include "StyleContext.h"
29	#include "CharacterSet.h"
30	#include "LexerModule.h"
31	#include "OptionSet.h"
32	#include "DefaultLexer.h"
33
34	using namespace Scintilla;
35	using namespace Lexilla;
36
37	// Info for HERE document handling from perldata.pod (reformatted):
38	// ----------------------------------------------------------------
39	// A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
40	// Following a << you specify a string to terminate the quoted material, and
41	// all lines following the current line down to the terminating string are
42	// the value of the item.
43	// Prefixing the terminating string with a "~" specifies that you want to
44	// use "Indented Here-docs" (see below).
45	// The terminating string may be either an identifier (a word), or some*
46	// quoted text.
47	// If quoted, the type of quotes you use determines the treatment of the*
48	// text, just as in regular quoting.
49	// An unquoted identifier works like double quotes.*
50	// There must be no space between the << and the identifier.*
51	// (If you put a space it will be treated as a null identifier,
52	// which is valid, and matches the first empty line.)
53	// (This is deprecated, -w warns of this syntax)
54	// The terminating string must appear by itself (unquoted and*
55	// with no surrounding whitespace) on the terminating line.
56	//
57	// Indented Here-docs
58	// ------------------
59	// The here-doc modifier "~" allows you to indent your here-docs to
60	// make the code more readable.
61	// The delimiter is used to determine the exact whitespace to remove
62	// from the beginning of each line. All lines must have at least the
63	// same starting whitespace (except lines only containing a newline)
64	// or perl will croak. Tabs and spaces can be mixed, but are matched
65	// exactly. One tab will not be equal to 8 spaces!
66	// Additional beginning whitespace (beyond what preceded the
67	// delimiter) will be preserved.
68
69	#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
70
71	#define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
72	#define PERLNUM_OCTAL 2
73	#define PERLNUM_FLOAT_EXP 3 // exponent part only
74	#define PERLNUM_HEX 4 // may be a hex float
75	#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
76	#define PERLNUM_VECTOR 6
77	#define PERLNUM_V_VECTOR 7
78	#define PERLNUM_BAD 8
79
80	#define BACK_NONE 0 // lookback state for bareword disambiguation:
81	#define BACK_OPERATOR 1 // whitespace/comments are insignificant
82	#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
83
84	#define SUB_BEGIN 0 // states for subroutine prototype scan:
85	#define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
86	#define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
87	#define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
88	#define SUB_HAS_SUB 4 // 'sub' keyword
89
90	// all interpolated styles are different from their parent styles by a constant difference
91	// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
92	#define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
93
94	static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
95	// old-style keyword matcher; needed because GetCurrent() needs
96	// current segment to be committed, but we may abandon early...
97	char s[`100`];
98	Sci_PositionU i, len = end - start;
99	if (len > `30`) { len = `30`; }
100	for (i = `0`; i < len; i++, start++) s[i] = styler [start];
101	s[i] = `'\0'`;
102	return keywords.InList(s);
103	}
104
105	static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
106	int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
107	// identifiers are recognized by Perl as barewords under some
108	// conditions, the following attempts to do the disambiguation
109	// by looking backward and forward; result in 2 LSB
110	int result = `0`;
111	bool moreback = false; // true if passed newline/comments
112	bool brace = false; // true if opening brace found
113	// if BACK_NONE, neither operator nor keyword, so skip test
114	if (backFlag == BACK_NONE)
115	return result;
116	// first look backwards past whitespace/comments to set EOL flag
117	// (some disambiguation patterns must be on a single line)
118	if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
119	moreback = true;
120	// look backwards at last significant lexed item for disambiguation
121	bk = backPos - `1`;
122	int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
123	if (ch == `'{'` && !moreback) {
124	// {bareword: possible variable spec
125	brace = true;
126	} else if ((ch == `'&'` && styler.SafeGetCharAt(bk - `1`) != `'&'`)
127	// &bareword: subroutine call
128	\|\| styler.Match(bk - `1`, "->")
129	// ->bareword: part of variable spec
130	\|\| styler.Match(bk - `1`, "::")
131	// ::bareword: part of module spec
132	\|\| styler.Match(bk - `2`, "sub")) {
133	// sub bareword: subroutine declaration
134	// (implied BACK_KEYWORD, no keywords end in 'sub'!)
135	result \|= `1`;
136	}
137	// next, scan forward after word past tab/spaces only;
138	// if ch isn't one of '[{(,' we can skip the test
139	if ((ch == `'{'` \|\| ch == `'('` \|\| ch == `'['`\|\| ch == `','`)
140	&& fw < endPos) {
141	while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)))
142	&& fw < endPos) {
143	fw++;
144	}
145	if ((ch == `'}'` && brace)
146	// {bareword}: variable spec
147	\|\| styler.Match(fw, "=>")) {
148	// [{(, bareword=>: hash literal
149	result \|= `2`;
150	}
151	}
152	return result;
153	}
154
155	static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
156	// when backtracking, we need to skip whitespace and comments
157	while (p > `0`) {
158	const int style = styler.StyleAt(p);
159	if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE)
160	break;
161	p--;
162	}
163	}
164
165	static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
166	// scan backward past whitespace and comments to find a lexeme
167	skipWhitespaceComment(styler, bk);
168	if (bk == `0`)
169	return `0`;
170	int sz = `1`;
171	style = styler.StyleAt(bk);
172	while (bk > `0`) { // find extent of lexeme
173	if (styler.StyleAt(bk - `1`) == style) {
174	bk--; sz++;
175	} else
176	break;
177	}
178	return sz;
179	}
180
181	static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
182	// backtrack to find open '{' corresponding to a '}', balanced
183	// return significant style to be tested for '/' disambiguation
184	int braceCount = `1`;
185	if (bk == `0`)
186	return SCE_PL_DEFAULT;
187	while (--bk > `0`) {
188	if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
189	int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
190	if (bkch == `';'`) { // early out
191	break;
192	} else if (bkch == `'}'`) {
193	braceCount++;
194	} else if (bkch == `'{'`) {
195	if (--braceCount == `0`) break;
196	}
197	}
198	}
199	if (bk > `0` && braceCount == `0`) {
200	// balanced { found, bk > 0, skip more whitespace/comments
201	bk--;
202	skipWhitespaceComment(styler, bk);
203	return styler.StyleAt(bk);
204	}
205	return SCE_PL_DEFAULT;
206	}
207
208	static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
209	// backtrack to classify sub-styles of identifier under test
210	// return sub-style to be tested for '/' disambiguation
211	if (styler.SafeGetCharAt(bk) == `'>'`) // inputsymbol, like <foo>
212	return `1`;
213	// backtrack to check for possible "->" or "::" before identifier
214	while (bk > `0` && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
215	bk--;
216	}
217	while (bk > `0`) {
218	int bkstyle = styler.StyleAt(bk);
219	if (bkstyle == SCE_PL_DEFAULT
220	\|\| bkstyle == SCE_PL_COMMENTLINE) {
221	// skip whitespace, comments
222	} else if (bkstyle == SCE_PL_OPERATOR) {
223	// test for "->" and "::"
224	if (styler.Match(bk - `1`, "->") \|\| styler.Match(bk - `1`, "::"))
225	return `2`;
226	} else
227	return `3`; // bare identifier
228	bk--;
229	}
230	return `0`;
231	}
232
233	static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
234	// forward scan the current line to classify line for POD style
235	int state = -`1`;
236	while (pos < endPos) {
237	int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
238	if (ch == `'\n'` \|\| ch == `'\r'`) {
239	if (ch == `'\r'` && styler.SafeGetCharAt(pos + `1`) == `'\n'`) pos++;
240	break;
241	}
242	if (IsASpaceOrTab(ch)) { // whitespace, take note
243	if (state == -`1`)
244	state = SCE_PL_DEFAULT;
245	} else if (state == SCE_PL_DEFAULT) { // verbatim POD line
246	state = SCE_PL_POD_VERB;
247	} else if (state != SCE_PL_POD_VERB) { // regular POD line
248	state = SCE_PL_POD;
249	}
250	pos++;
251	}
252	if (state == -`1`)
253	state = SCE_PL_DEFAULT;
254	return state;
255	}
256
257	static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
258	// backtrack to identify if we're starting a subroutine prototype
259	// we also need to ignore whitespace/comments, format is like:
260	// sub abc::pqr :const :prototype(...)
261	// lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
262	// and a state machine generates legal subroutine syntax matches
263	styler.Flush();
264	int state = SUB_BEGIN;
265	do {
266	// find two lexemes, lexeme 2 follows lexeme 1
267	int style2 = SCE_PL_DEFAULT;
268	Sci_PositionU pos2 = bk;
269	int len2 = findPrevLexeme(styler, pos2, style2);
270	int style1 = SCE_PL_DEFAULT;
271	Sci_PositionU pos1 = pos2;
272	if (pos1 > `0`) pos1--;
273	int len1 = findPrevLexeme(styler, pos1, style1);
274	if (len1 == `0` \|\| len2 == `0`) // lexeme pair must exist
275	break;
276
277	// match parts of syntax, if invalid subroutine syntax, break off
278	if (style1 == SCE_PL_OPERATOR && len1 == `1` &&
279	styler.SafeGetCharAt(pos1) == `':'`) { // ':'
280	if (style2 == SCE_PL_IDENTIFIER \|\| style2 == SCE_PL_WORD) {
281	if (len2 == `9` && styler.Match(pos2, "prototype")) { // ':' 'prototype'
282	if (state == SUB_BEGIN) {
283	state = SUB_HAS_PROTO;
284	} else
285	break;
286	} else { // ':' <attribute>
287	if (state == SUB_HAS_PROTO \|\| state == SUB_HAS_ATTRIB) {
288	state = SUB_HAS_ATTRIB;
289	} else
290	break;
291	}
292	} else
293	break;
294	} else if (style1 == SCE_PL_OPERATOR && len1 == `2` &&
295	styler.Match(pos1, "::")) { // '::'
296	if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier>
297	state = SUB_HAS_MODULE;
298	} else
299	break;
300	} else if (style1 == SCE_PL_WORD && len1 == `3` &&
301	styler.Match(pos1, "sub")) { // 'sub'
302	if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier>
303	state = SUB_HAS_SUB;
304	} else
305	break;
306	} else
307	break;
308	bk = pos1; // set position for finding next lexeme pair
309	if (bk > `0`) bk--;
310	} while (state != SUB_HAS_SUB);
311	return (state == SUB_HAS_SUB);
312	}
313
314	static int actualNumStyle(int numberStyle) {
315	if (numberStyle == PERLNUM_VECTOR \|\| numberStyle == PERLNUM_V_VECTOR) {
316	return SCE_PL_STRING;
317	} else if (numberStyle == PERLNUM_BAD) {
318	return SCE_PL_ERROR;
319	}
320	return SCE_PL_NUMBER;
321	}
322
323	static int opposite(int ch) {
324	if (ch == `'('`) return `')'`;
325	if (ch == `'['`) return `']'`;
326	if (ch == `'{'`) return `'}'`;
327	if (ch == `'<'`) return `'>'`;
328	return ch;
329	}
330
331	static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
332	Sci_Position pos = styler.LineStart(line);
333	Sci_Position eol_pos = styler.LineStart(line + `1`) - `1`;
334	for (Sci_Position i = pos; i < eol_pos; i++) {
335	char ch = styler [i];
336	int style = styler.StyleAt(i);
337	if (ch == `'#'` && style == SCE_PL_COMMENTLINE)
338	return true;
339	else if (!IsASpaceOrTab(ch))
340	return false;
341	}
342	return false;
343	}
344
345	static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
346	Sci_Position pos = styler.LineStart(line);
347	int style = styler.StyleAt(pos);
348	if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
349	return true;
350	}
351	return false;
352	}
353
354	static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
355	int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + `5`));
356	if (lvl >= `'1'` && lvl <= `'4'`) {
357	return lvl - `'0'`;
358	}
359	return `0`;
360	}
361
362	// An individual named option for use in an OptionSet
363
364	// Options used for LexerPerl
365	struct OptionsPerl {
366	bool fold;
367	bool foldComment;
368	bool foldCompact;
369	// Custom folding of POD and packages
370	bool foldPOD; // fold.perl.pod
371	// Enable folding Pod blocks when using the Perl lexer.
372	bool foldPackage; // fold.perl.package
373	// Enable folding packages when using the Perl lexer.
374
375	bool foldCommentExplicit;
376
377	bool foldAtElse;
378
379	OptionsPerl() {
380	fold = false;
381	foldComment = false;
382	foldCompact = true;
383	foldPOD = true;
384	foldPackage = true;
385	foldCommentExplicit = true;
386	foldAtElse = false;
387	}
388	};
389
390	static const char *const perlWordListDesc[] = {
391	"Keywords",
392	`0`
393	};
394
395	struct OptionSetPerl : public OptionSet<OptionsPerl> {
396	OptionSetPerl() {
397	DefineProperty("fold", &OptionsPerl::fold);
398
399	DefineProperty("fold.comment", &OptionsPerl::foldComment);
400
401	DefineProperty("fold.compact", &OptionsPerl::foldCompact);
402
403	DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
404	"Set to 0 to disable folding Pod blocks when using the Perl lexer.");
405
406	DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
407	"Set to 0 to disable folding packages when using the Perl lexer.");
408
409	DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
410	"Set to 0 to disable explicit folding.");
411
412	DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
413	"This option enables Perl folding on a \"} else {\" line of an if statement.");
414
415	DefineWordListSets(perlWordListDesc);
416	}
417	};
418
419	class LexerPerl : public DefaultLexer {
420	CharacterSet setWordStart;
421	CharacterSet setWord;
422	CharacterSet setSpecialVar;
423	CharacterSet setControlVar;
424	WordList keywords;
425	OptionsPerl options;
426	OptionSetPerl osPerl;
427	public:
428	LexerPerl() :
429	DefaultLexer ("perl", SCLEX_PERL),
430	setWordStart (CharacterSet::setAlpha, "_", `0x80`, true),
431	setWord (CharacterSet::setAlphaNum, "_", `0x80`, true),
432	setSpecialVar (CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
433	setControlVar (CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
434	}
435	virtual ~LexerPerl() {
436	}
437	void SCI_METHOD Release() override {
438	delete this;
439	}
440	int SCI_METHOD Version() const override {
441	return lvRelease5;
442	}
443	const char *SCI_METHOD PropertyNames() override {
444	return osPerl.PropertyNames();
445	}
446	int SCI_METHOD PropertyType(const char *name) override {
447	return osPerl.PropertyType(name);
448	}
449	const char SCI_METHOD DescribeProperty(const* char *name) override {
450	return osPerl.DescribeProperty(name);
451	}
452	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override;
453	const char * SCI_METHOD PropertyGet(const char *key) override {
454	return osPerl.PropertyGet(key);
455	}
456	const char *SCI_METHOD DescribeWordListSets() override {
457	return osPerl.DescribeWordListSets();
458	}
459	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
460	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
461	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
462
463	void SCI_METHOD PrivateCall(int, void* *) override {
464	return `0`;
465	}
466
467	static ILexer5 *LexerFactoryPerl() {
468	return new LexerPerl ();
469	}
470	int InputSymbolScan(StyleContext &sc);
471	void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
472	};
473
474	Sci_Position SCI_METHOD LexerPerl::PropertySet(const char key, const* char *val) {
475	if (osPerl.PropertySet(&options, key, val)) {
476	return `0`;
477	}
478	return -`1`;
479	}
480
481	Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
482	WordList *wordListN = `0`;
483	switch (n) {
484	case `0`:
485	wordListN = &keywords;
486	break;
487	}
488	Sci_Position firstModification = -`1`;
489	if (wordListN) {
490	WordList wlNew;
491	wlNew.Set(wl);
492	if (*wordListN != wlNew) {
493	wordListN->Set(wl);
494	firstModification = `0`;
495	}
496	}
497	return firstModification;
498	}
499
500	int LexerPerl::InputSymbolScan(StyleContext &sc) {
501	// forward scan for matching > on same line; file handles
502	int c, sLen = `0`;
503	while ((c = sc.GetRelativeCharacter(++sLen)) != `0`) {
504	if (c == `'\r'` \|\| c == `'\n'`) {
505	return `0`;
506	} else if (c == `'>'`) {
507	if (sc.Match("<=>")) // '<=>' case
508	return `0`;
509	return sLen;
510	}
511	}
512	return `0`;
513	}
514
515	void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
516	// interpolate a segment (with no active backslashes or delimiters within)
517	// switch in or out of an interpolation style or continue current style
518	// commit variable patterns if found, trim segment, repeat until done
519	while (maxSeg > `0`) {
520	bool isVar = false;
521	int sLen = `0`;
522	if ((maxSeg > `1`) && (sc.ch == `'$'` \|\| sc.ch == `'@'`)) {
523	// $#[$]word [$@][$]word (where word or {word} is always present)
524	bool braces = false;
525	sLen = `1`;
526	if (sc.ch == `'$'` && sc.chNext == `'#'`) { // starts with $#
527	sLen++;
528	}
529	while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == `'$'`)) // >0 $ dereference within
530	sLen++;
531	if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == `'{'`)) { // { start for {word}
532	sLen++;
533	braces = true;
534	}
535	if (maxSeg > sLen) {
536	int c = sc.GetRelativeCharacter(sLen);
537	if (setWordStart.Contains(c)) { // word (various)
538	sLen++;
539	isVar = true;
540	while (maxSeg > sLen) {
541	if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
542	break;
543	sLen++;
544	}
545	} else if (braces && IsADigit(c) && (sLen == `2`)) { // digit for ${digit}
546	sLen++;
547	isVar = true;
548	}
549	}
550	if (braces) {
551	if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == `'}'`)) { // } end for {word}
552	sLen++;
553	} else
554	isVar = false;
555	}
556	}
557	if (!isVar && (maxSeg > `1`)) { // $- or @-specific variable patterns
558	int c = sc.chNext;
559	if (sc.ch == `'$'`) {
560	sLen = `1`;
561	if (IsADigit(c)) { // $[0-9] and slurp trailing digits
562	sLen++;
563	isVar = true;
564	while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
565	sLen++;
566	} else if (setSpecialVar.Contains(c)) { // $ special variables
567	sLen++;
568	isVar = true;
569	} else if (!isPattern && ((c == `'('`) \|\| (c == `')'`) \|\| (c == `'\|'`))) { // $ additional
570	sLen++;
571	isVar = true;
572	} else if (c == `'^'`) { // $^A control-char style
573	sLen++;
574	if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
575	sLen++;
576	isVar = true;
577	}
578	}
579	} else if (sc.ch == `'@'`) {
580	sLen = `1`;
581	if (!isPattern && ((c == `'+'`) \|\| (c == `'-'`))) { // @ specials non-pattern
582	sLen++;
583	isVar = true;
584	}
585	}
586	}
587	if (isVar) { // commit as interpolated variable or normal character
588	if (sc.state < SCE_PL_STRING_VAR)
589	sc.SetState(sc.state + INTERPOLATE_SHIFT);
590	sc.Forward(sLen);
591	maxSeg -= sLen;
592	} else {
593	if (sc.state >= SCE_PL_STRING_VAR)
594	sc.SetState(sc.state - INTERPOLATE_SHIFT);
595	sc.Forward();
596	maxSeg--;
597	}
598	}
599	if (sc.state >= SCE_PL_STRING_VAR)
600	sc.SetState(sc.state - INTERPOLATE_SHIFT);
601	}
602
603	void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
604	LexAccessor styler(pAccess);
605
606	// keywords that forces /PATTERN/ at all times; should track vim's behaviour
607	WordList reWords;
608	reWords.Set("elsif if split while");
609
610	// charset classes
611	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
612	// lexing of "%</" operators is non-trivial; these are missing in the set below*
613	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=\|{}[]:;>,?!.~");
614	CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
615	CharacterSet setModifiers(CharacterSet::setAlpha);
616	CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
617	// setArray and setHash also accepts chars for special vars like $_,
618	// which are then truncated when the next char does not match setVar
619	CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", `0x80`, true);
620	CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", `0x80`, true);
621	CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", `0x80`, true);
622	CharacterSet &setPOD = setModifiers;
623	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
624	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
625	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
626	CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
627	// for format identifiers
628	CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
629	CharacterSet &setFormat = setHereDocDelim;
630
631	// Lexer for perl often has to backtrack to start of current style to determine
632	// which characters are being used as quotes, how deeply nested is the
633	// start position and what the termination string is for HERE documents.
634
635	class HereDocCls { // Class to manage HERE doc sequence
636	public:
637	int State;
638	// 0: '<<' encountered
639	// 1: collect the delimiter
640	// 2: here doc text (lines after the delimiter)
641	int Quote; // the char after '<<'
642	bool Quoted; // true if Quote in ('\'','"','`')
643	bool StripIndent; // true if '<<~' requested to strip leading whitespace
644	int DelimiterLength; // strlen(Delimiter)
645	char Delimiter[HERE_DELIM_MAX]; // the Delimiter
646	HereDocCls() {
647	State = `0`;
648	Quote = `0`;
649	Quoted = false;
650	StripIndent = false;
651	DelimiterLength = `0`;
652	Delimiter[`0`] = `'\0'`;
653	}
654	void Append(int ch) {
655	Delimiter[DelimiterLength++] = static_cast<char>(ch);
656	Delimiter[DelimiterLength] = `'\0'`;
657	}
658	~HereDocCls() {
659	}
660	};
661	HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
662
663	class QuoteCls { // Class to manage quote pairs
664	public:
665	int Rep;
666	int Count;
667	int Up, Down;
668	QuoteCls() {
669	New(`1`);
670	}
671	void New(int r = `1`) {
672	Rep = r;
673	Count = `0`;
674	Up = `'\0'`;
675	Down = `'\0'`;
676	}
677	void Open(int u) {
678	Count++;
679	Up = u;
680	Down = opposite(Up);
681	}
682	};
683	QuoteCls Quote;
684
685	// additional state for number lexing
686	int numState = PERLNUM_DECIMAL;
687	int dotCount = `0`;
688
689	Sci_PositionU endPos = startPos + length;
690
691	// Backtrack to beginning of style if required...
692	// If in a long distance lexical state, backtrack to find quote characters.
693	// Includes strings (may be multi-line), numbers (additional state), format
694	// bodies, as well as POD sections.
695	if (initStyle == SCE_PL_HERE_Q
696	\|\| initStyle == SCE_PL_HERE_QQ
697	\|\| initStyle == SCE_PL_HERE_QX
698	\|\| initStyle == SCE_PL_FORMAT
699	\|\| initStyle == SCE_PL_HERE_QQ_VAR
700	\|\| initStyle == SCE_PL_HERE_QX_VAR
701	) {
702	// backtrack through multiple styles to reach the delimiter start
703	int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
704	while ((startPos > `1`) && (styler.StyleAt(startPos) != delim)) {
705	startPos--;
706	}
707	startPos = styler.LineStart(styler.GetLine(startPos));
708	initStyle = styler.StyleAt(startPos - `1`);
709	}
710	if (initStyle == SCE_PL_STRING
711	\|\| initStyle == SCE_PL_STRING_QQ
712	\|\| initStyle == SCE_PL_BACKTICKS
713	\|\| initStyle == SCE_PL_STRING_QX
714	\|\| initStyle == SCE_PL_REGEX
715	\|\| initStyle == SCE_PL_STRING_QR
716	\|\| initStyle == SCE_PL_REGSUBST
717	\|\| initStyle == SCE_PL_STRING_VAR
718	\|\| initStyle == SCE_PL_STRING_QQ_VAR
719	\|\| initStyle == SCE_PL_BACKTICKS_VAR
720	\|\| initStyle == SCE_PL_STRING_QX_VAR
721	\|\| initStyle == SCE_PL_REGEX_VAR
722	\|\| initStyle == SCE_PL_STRING_QR_VAR
723	\|\| initStyle == SCE_PL_REGSUBST_VAR
724	) {
725	// for interpolation, must backtrack through a mix of two different styles
726	int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
727	initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
728	while (startPos > `1`) {
729	int st = styler.StyleAt(startPos - `1`);
730	if ((st != initStyle) && (st != otherStyle))
731	break;
732	startPos--;
733	}
734	initStyle = SCE_PL_DEFAULT;
735	} else if (initStyle == SCE_PL_STRING_Q
736	\|\| initStyle == SCE_PL_STRING_QW
737	\|\| initStyle == SCE_PL_XLAT
738	\|\| initStyle == SCE_PL_CHARACTER
739	\|\| initStyle == SCE_PL_NUMBER
740	\|\| initStyle == SCE_PL_IDENTIFIER
741	\|\| initStyle == SCE_PL_ERROR
742	\|\| initStyle == SCE_PL_SUB_PROTOTYPE
743	) {
744	while ((startPos > `1`) && (styler.StyleAt(startPos - `1`) == initStyle)) {
745	startPos--;
746	}
747	initStyle = SCE_PL_DEFAULT;
748	} else if (initStyle == SCE_PL_POD
749	\|\| initStyle == SCE_PL_POD_VERB
750	) {
751	// POD backtracking finds preceding blank lines and goes back past them
752	Sci_Position ln = styler.GetLine(startPos);
753	if (ln > `0`) {
754	initStyle = styler.StyleAt(styler.LineStart(--ln));
755	if (initStyle == SCE_PL_POD \|\| initStyle == SCE_PL_POD_VERB) {
756	while (ln > `0` && styler.GetLineState(ln) == SCE_PL_DEFAULT)
757	ln--;
758	}
759	startPos = styler.LineStart(++ln);
760	initStyle = styler.StyleAt(startPos - `1`);
761	} else {
762	startPos = `0`;
763	initStyle = SCE_PL_DEFAULT;
764	}
765	}
766
767	// backFlag, backPos are additional state to aid identifier corner cases.
768	// Look backwards past whitespace and comments in order to detect either
769	// operator or keyword. Later updated as we go along.
770	int backFlag = BACK_NONE;
771	Sci_PositionU backPos = startPos;
772	if (backPos > `0`) {
773	backPos--;
774	skipWhitespaceComment(styler, backPos);
775	if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
776	backFlag = BACK_OPERATOR;
777	else if (styler.StyleAt(backPos) == SCE_PL_WORD)
778	backFlag = BACK_KEYWORD;
779	backPos++;
780	}
781
782	StyleContext sc(startPos, endPos - startPos, initStyle, styler);
783
784	for (; sc.More(); sc.Forward()) {
785
786	// Determine if the current state should terminate.
787	switch (sc.state) {
788	case SCE_PL_OPERATOR:
789	sc.SetState(SCE_PL_DEFAULT);
790	backFlag = BACK_OPERATOR;
791	backPos = sc.currentPos;
792	break;
793	case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
794	if ((!setWord.Contains(sc.ch) && sc.ch != `'\''`)
795	\|\| sc.Match(`'.'`, `'.'`)
796	\|\| sc.chPrev == `'>'`) { // end of inputsymbol
797	sc.SetState(SCE_PL_DEFAULT);
798	}
799	break;
800	case SCE_PL_WORD: // keyword, plus special cases
801	if (!setWord.Contains(sc.ch)) {
802	char s[`100`];
803	sc.GetCurrent(s, sizeof(s));
804	if ((strcmp(s, "__DATA__") == `0`) \|\| (strcmp(s, "__END__") == `0`)) {
805	sc.ChangeState(SCE_PL_DATASECTION);
806	} else {
807	if ((strcmp(s, "format") == `0`)) {
808	sc.SetState(SCE_PL_FORMAT_IDENT);
809	HereDoc.State = `0`;
810	} else {
811	sc.SetState(SCE_PL_DEFAULT);
812	}
813	backFlag = BACK_KEYWORD;
814	backPos = sc.currentPos;
815	}
816	}
817	break;
818	case SCE_PL_SCALAR:
819	case SCE_PL_ARRAY:
820	case SCE_PL_HASH:
821	case SCE_PL_SYMBOLTABLE:
822	if (sc.Match(`':'`, `':'`)) { // skip ::
823	sc.Forward();
824	} else if (!setVar.Contains(sc.ch)) {
825	if (sc.LengthCurrent() == `1`) {
826	// Special variable: $(, $_ etc.
827	sc.Forward();
828	}
829	sc.SetState(SCE_PL_DEFAULT);
830	}
831	break;
832	case SCE_PL_NUMBER:
833	// if no early break, number style is terminated at "(go through)"
834	if (sc.ch == `'.'`) {
835	if (sc.chNext == `'.'`) {
836	// double dot is always an operator (go through)
837	} else if (numState <= PERLNUM_FLOAT_EXP) {
838	// non-decimal number or float exponent, consume next dot
839	sc.SetState(SCE_PL_OPERATOR);
840	break;
841	} else { // decimal or vectors allows dots
842	dotCount++;
843	if (numState == PERLNUM_DECIMAL) {
844	if (dotCount <= `1`) // number with one dot in it
845	break;
846	if (IsADigit(sc.chNext)) { // really a vector
847	numState = PERLNUM_VECTOR;
848	break;
849	}
850	// number then dot (go through)
851	} else if (numState == PERLNUM_HEX) {
852	if (dotCount <= `1` && IsADigit(sc.chNext, `16`)) {
853	break; // hex with one dot is a hex float
854	} else {
855	sc.SetState(SCE_PL_OPERATOR);
856	break;
857	}
858	// hex then dot (go through)
859	} else if (IsADigit(sc.chNext)) // vectors
860	break;
861	// vector then dot (go through)
862	}
863	} else if (sc.ch == `'_'`) {
864	// permissive underscoring for number and vector literals
865	break;
866	} else if (numState == PERLNUM_DECIMAL) {
867	if (sc.ch == `'E'` \|\| sc.ch == `'e'`) { // exponent, sign
868	numState = PERLNUM_FLOAT_EXP;
869	if (sc.chNext == `'+'` \|\| sc.chNext == `'-'`) {
870	sc.Forward();
871	}
872	break;
873	} else if (IsADigit(sc.ch))
874	break;
875	// number then word (go through)
876	} else if (numState == PERLNUM_HEX) {
877	if (sc.ch == `'P'` \|\| sc.ch == `'p'`) { // hex float exponent, sign
878	numState = PERLNUM_FLOAT_EXP;
879	if (sc.chNext == `'+'` \|\| sc.chNext == `'-'`) {
880	sc.Forward();
881	}
882	break;
883	} else if (IsADigit(sc.ch, `16`))
884	break;
885	// hex or hex float then word (go through)
886	} else if (numState == PERLNUM_VECTOR \|\| numState == PERLNUM_V_VECTOR) {
887	if (IsADigit(sc.ch)) // vector
888	break;
889	if (setWord.Contains(sc.ch) && dotCount == `0`) { // change to word
890	sc.ChangeState(SCE_PL_IDENTIFIER);
891	break;
892	}
893	// vector then word (go through)
894	} else if (IsADigit(sc.ch)) {
895	if (numState == PERLNUM_FLOAT_EXP) {
896	break;
897	} else if (numState == PERLNUM_OCTAL) {
898	if (sc.ch <= `'7'`) break;
899	} else if (numState == PERLNUM_BINARY) {
900	if (sc.ch <= `'1'`) break;
901	}
902	// mark invalid octal, binary numbers (go through)
903	numState = PERLNUM_BAD;
904	break;
905	}
906	// complete current number or vector
907	sc.ChangeState(actualNumStyle(numState));
908	sc.SetState(SCE_PL_DEFAULT);
909	break;
910	case SCE_PL_COMMENTLINE:
911	if (sc.atLineStart) {
912	sc.SetState(SCE_PL_DEFAULT);
913	}
914	break;
915	case SCE_PL_HERE_DELIM:
916	if (HereDoc.State == `0`) { // '<<' encountered
917	int delim_ch = sc.chNext;
918	Sci_Position ws_skip = `0`;
919	HereDoc.State = `1`; // pre-init HERE doc class
920	HereDoc.Quote = sc.chNext;
921	HereDoc.Quoted = false;
922	HereDoc.StripIndent = false;
923	HereDoc.DelimiterLength = `0`;
924	HereDoc.Delimiter[HereDoc.DelimiterLength] = `'\0'`;
925	if (delim_ch == `'~'`) { // was actually '<<~'
926	sc.Forward();
927	HereDoc.StripIndent = true;
928	HereDoc.Quote = delim_ch = sc.chNext;
929	}
930	if (IsASpaceOrTab(delim_ch)) {
931	// skip whitespace; legal only for quoted delimiters
932	Sci_PositionU i = sc.currentPos + `1`;
933	while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
934	i++;
935	delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
936	}
937	ws_skip = i - sc.currentPos - `1`;
938	}
939	if (delim_ch == `'\''` \|\| delim_ch == `'"'` \|\| delim_ch == '`') {
940	// a quoted here-doc delimiter; skip any whitespace
941	sc.Forward(ws_skip + `1`);
942	HereDoc.Quote = delim_ch;
943	HereDoc.Quoted = true;
944	} else if ((ws_skip == `0` && setNonHereDoc.Contains(sc.chNext))
945	\|\| ws_skip > `0`) {
946	// left shift << or <<= operator cases
947	// restore position if operator
948	sc.ChangeState(SCE_PL_OPERATOR);
949	sc.ForwardSetState(SCE_PL_DEFAULT);
950	backFlag = BACK_OPERATOR;
951	backPos = sc.currentPos;
952	HereDoc.State = `0`;
953	} else {
954	// specially handle initial '\' for identifier
955	if (ws_skip == `0` && HereDoc.Quote == `'\\'`)
956	sc.Forward();
957	// an unquoted here-doc delimiter, no special handling
958	// (cannot be prefixed by spaces/tabs), or
959	// symbols terminates; deprecated zero-length delimiter
960	}
961	} else if (HereDoc.State == `1`) { // collect the delimiter
962	backFlag = BACK_NONE;
963	if (HereDoc.Quoted) { // a quoted here-doc delimiter
964	if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
965	sc.ForwardSetState(SCE_PL_DEFAULT);
966	} else if (!sc.atLineEnd) {
967	if (sc.Match(`'\\'`, static_cast<char>(HereDoc.Quote))) { // escaped quote
968	sc.Forward();
969	}
970	if (sc.ch != `'\r'`) { // skip CR if CRLF
971	int i = `0`; // else append char, possibly an extended char
972	while (i < sc.width) {
973	HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
974	i++;
975	}
976	}
977	}
978	} else { // an unquoted here-doc delimiter, no extended charsets
979	if (setHereDocDelim.Contains(sc.ch)) {
980	HereDoc.Append(sc.ch);
981	} else {
982	sc.SetState(SCE_PL_DEFAULT);
983	}
984	}
985	if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - `1`) {
986	sc.SetState(SCE_PL_ERROR);
987	HereDoc.State = `0`;
988	}
989	}
990	break;
991	case SCE_PL_HERE_Q:
992	case SCE_PL_HERE_QQ:
993	case SCE_PL_HERE_QX:
994	// also implies HereDoc.State == 2
995	sc.Complete();
996	if (HereDoc.StripIndent) {
997	// skip whitespace
998	while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
999	sc.Forward();
1000	}
1001	if (HereDoc.DelimiterLength == `0` \|\| sc.Match(HereDoc.Delimiter)) {
1002	int c = sc.GetRelative(HereDoc.DelimiterLength);
1003	if (c == `'\r'` \|\| c == `'\n'`) { // peek first, do not consume match
1004	sc.ForwardBytes(HereDoc.DelimiterLength);
1005	sc.SetState(SCE_PL_DEFAULT);
1006	backFlag = BACK_NONE;
1007	HereDoc.State = `0`;
1008	if (!sc.atLineEnd)
1009	sc.Forward();
1010	break;
1011	}
1012	}
1013	if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
1014	while (!sc.atLineEnd)
1015	sc.Forward();
1016	break;
1017	}
1018	while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
1019	int c, sLen = `0`, endType = `0`;
1020	while ((c = sc.GetRelativeCharacter(sLen)) != `0`) {
1021	// scan to break string into segments
1022	if (c == `'\\'`) {
1023	endType = `1`; break;
1024	} else if (c == `'\r'` \|\| c == `'\n'`) {
1025	endType = `2`; break;
1026	}
1027	sLen++;
1028	}
1029	if (sLen > `0`) // process non-empty segments
1030	InterpolateSegment(sc, sLen);
1031	if (endType == `1`) {
1032	sc.Forward();
1033	// \ at end-of-line does not appear to have any effect, skip
1034	if (sc.ch != `'\r'` && sc.ch != `'\n'`)
1035	sc.Forward();
1036	} else if (endType == `2`) {
1037	if (!sc.atLineEnd)
1038	sc.Forward();
1039	}
1040	}
1041	break;
1042	case SCE_PL_POD:
1043	case SCE_PL_POD_VERB: {
1044	Sci_PositionU fw = sc.currentPos;
1045	Sci_Position ln = styler.GetLine(fw);
1046	if (sc.atLineStart && sc.Match("=cut")) { // end of POD
1047	sc.SetState(SCE_PL_POD);
1048	sc.Forward(`4`);
1049	sc.SetState(SCE_PL_DEFAULT);
1050	styler.SetLineState(ln, SCE_PL_POD);
1051	break;
1052	}
1053	int pod = podLineScan(styler, fw, endPos); // classify POD line
1054	styler.SetLineState(ln, pod);
1055	if (pod == SCE_PL_DEFAULT) {
1056	if (sc.state == SCE_PL_POD_VERB) {
1057	Sci_PositionU fw2 = fw;
1058	while (fw2 < (endPos - `1`) && pod == SCE_PL_DEFAULT) {
1059	fw = fw2++; // penultimate line (last blank line)
1060	pod = podLineScan(styler, fw2, endPos);
1061	styler.SetLineState(styler.GetLine(fw2), pod);
1062	}
1063	if (pod == SCE_PL_POD) { // truncate verbatim POD early
1064	sc.SetState(SCE_PL_POD);
1065	} else
1066	fw = fw2;
1067	}
1068	} else {
1069	if (pod == SCE_PL_POD_VERB // still part of current paragraph
1070	&& (styler.GetLineState(ln - `1`) == SCE_PL_POD)) {
1071	pod = SCE_PL_POD;
1072	styler.SetLineState(ln, pod);
1073	} else if (pod == SCE_PL_POD
1074	&& (styler.GetLineState(ln - `1`) == SCE_PL_POD_VERB)) {
1075	pod = SCE_PL_POD_VERB;
1076	styler.SetLineState(ln, pod);
1077	}
1078	sc.SetState(pod);
1079	}
1080	sc.ForwardBytes(fw - sc.currentPos); // commit style
1081	}
1082	break;
1083	case SCE_PL_REGEX:
1084	case SCE_PL_STRING_QR:
1085	if (Quote.Rep <= `0`) {
1086	if (!setModifiers.Contains(sc.ch))
1087	sc.SetState(SCE_PL_DEFAULT);
1088	} else if (!Quote.Up && !IsASpace(sc.ch)) {
1089	Quote.Open(sc.ch);
1090	} else {
1091	int c, sLen = `0`, endType = `0`;
1092	while ((c = sc.GetRelativeCharacter(sLen)) != `0`) {
1093	// scan to break string into segments
1094	if (IsASpace(c)) {
1095	break;
1096	} else if (c == `'\\'` && Quote.Up != `'\\'`) {
1097	endType = `1`; break;
1098	} else if (c == Quote.Down) {
1099	Quote.Count--;
1100	if (Quote.Count == `0`) {
1101	Quote.Rep--;
1102	break;
1103	}
1104	} else if (c == Quote.Up)
1105	Quote.Count++;
1106	sLen++;
1107	}
1108	if (sLen > `0`) { // process non-empty segments
1109	if (Quote.Up != `'\''`) {
1110	InterpolateSegment(sc, sLen, true);
1111	} else // non-interpolated path
1112	sc.Forward(sLen);
1113	}
1114	if (endType == `1`)
1115	sc.Forward();
1116	}
1117	break;
1118	case SCE_PL_REGSUBST:
1119	case SCE_PL_XLAT:
1120	if (Quote.Rep <= `0`) {
1121	if (!setModifiers.Contains(sc.ch))
1122	sc.SetState(SCE_PL_DEFAULT);
1123	} else if (!Quote.Up && !IsASpace(sc.ch)) {
1124	Quote.Open(sc.ch);
1125	} else {
1126	int c, sLen = `0`, endType = `0`;
1127	bool isPattern = (Quote.Rep == `2`);
1128	while ((c = sc.GetRelativeCharacter(sLen)) != `0`) {
1129	// scan to break string into segments
1130	if (c == `'\\'` && Quote.Up != `'\\'`) {
1131	endType = `2`; break;
1132	} else if (Quote.Count == `0` && Quote.Rep == `1`) {
1133	// We matched something like s(...) or tr{...}, Perl 5.10
1134	// appears to allow almost any character for use as the
1135	// next delimiters. Whitespace and comments are accepted in
1136	// between, but we'll limit to whitespace here.
1137	// For '#', if no whitespace in between, it's a delimiter.
1138	if (IsASpace(c)) {
1139	// Keep going
1140	} else if (c == `'#'` && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - `1`))) {
1141	endType = `3`;
1142	} else
1143	Quote.Open(c);
1144	break;
1145	} else if (c == Quote.Down) {
1146	Quote.Count--;
1147	if (Quote.Count == `0`) {
1148	Quote.Rep--;
1149	endType = `1`;
1150	}
1151	if (Quote.Up == Quote.Down)
1152	Quote.Count++;
1153	if (endType == `1`)
1154	break;
1155	} else if (c == Quote.Up) {
1156	Quote.Count++;
1157	} else if (IsASpace(c))
1158	break;
1159	sLen++;
1160	}
1161	if (sLen > `0`) { // process non-empty segments
1162	if (sc.state == SCE_PL_REGSUBST && Quote.Up != `'\''`) {
1163	InterpolateSegment(sc, sLen, isPattern);
1164	} else // non-interpolated path
1165	sc.Forward(sLen);
1166	}
1167	if (endType == `2`) {
1168	sc.Forward();
1169	} else if (endType == `3`)
1170	sc.SetState(SCE_PL_DEFAULT);
1171	}
1172	break;
1173	case SCE_PL_STRING_Q:
1174	case SCE_PL_STRING_QQ:
1175	case SCE_PL_STRING_QX:
1176	case SCE_PL_STRING_QW:
1177	case SCE_PL_STRING:
1178	case SCE_PL_CHARACTER:
1179	case SCE_PL_BACKTICKS:
1180	if (!Quote.Down && !IsASpace(sc.ch)) {
1181	Quote.Open(sc.ch);
1182	} else {
1183	int c, sLen = `0`, endType = `0`;
1184	while ((c = sc.GetRelativeCharacter(sLen)) != `0`) {
1185	// scan to break string into segments
1186	if (IsASpace(c)) {
1187	break;
1188	} else if (c == `'\\'` && Quote.Up != `'\\'`) {
1189	endType = `2`; break;
1190	} else if (c == Quote.Down) {
1191	Quote.Count--;
1192	if (Quote.Count == `0`) {
1193	endType = `3`; break;
1194	}
1195	} else if (c == Quote.Up)
1196	Quote.Count++;
1197	sLen++;
1198	}
1199	if (sLen > `0`) { // process non-empty segments
1200	switch (sc.state) {
1201	case SCE_PL_STRING:
1202	case SCE_PL_STRING_QQ:
1203	case SCE_PL_BACKTICKS:
1204	InterpolateSegment(sc, sLen);
1205	break;
1206	case SCE_PL_STRING_QX:
1207	if (Quote.Up != `'\''`) {
1208	InterpolateSegment(sc, sLen);
1209	break;
1210	}
1211	// (continued for ' delim)
1212	// Falls through.
1213	default: // non-interpolated path
1214	sc.Forward(sLen);
1215	}
1216	}
1217	if (endType == `2`) {
1218	sc.Forward();
1219	} else if (endType == `3`)
1220	sc.ForwardSetState(SCE_PL_DEFAULT);
1221	}
1222	break;
1223	case SCE_PL_SUB_PROTOTYPE: {
1224	int i = `0`;
1225	// forward scan; must all be valid proto characters
1226	while (setSubPrototype.Contains(sc.GetRelative(i)))
1227	i++;
1228	if (sc.GetRelative(i) == `')'`) { // valid sub prototype
1229	sc.ForwardBytes(i);
1230	sc.ForwardSetState(SCE_PL_DEFAULT);
1231	} else {
1232	// abandon prototype, restart from '('
1233	sc.ChangeState(SCE_PL_OPERATOR);
1234	sc.SetState(SCE_PL_DEFAULT);
1235	}
1236	}
1237	break;
1238	case SCE_PL_FORMAT: {
1239	sc.Complete();
1240	if (sc.Match(`'.'`)) {
1241	sc.Forward();
1242	if (sc.atLineEnd \|\| ((sc.ch == `'\r'` && sc.chNext == `'\n'`)))
1243	sc.SetState(SCE_PL_DEFAULT);
1244	}
1245	while (!sc.atLineEnd)
1246	sc.Forward();
1247	}
1248	break;
1249	case SCE_PL_ERROR:
1250	break;
1251	}
1252	// Needed for specific continuation styles (one follows the other)
1253	switch (sc.state) {
1254	// continued from SCE_PL_WORD
1255	case SCE_PL_FORMAT_IDENT:
1256	// occupies HereDoc state 3 to avoid clashing with HERE docs
1257	if (IsASpaceOrTab(sc.ch)) { // skip whitespace
1258	sc.ChangeState(SCE_PL_DEFAULT);
1259	while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1260	sc.Forward();
1261	sc.SetState(SCE_PL_FORMAT_IDENT);
1262	}
1263	if (setFormatStart.Contains(sc.ch)) { // identifier or '='
1264	if (sc.ch != `'='`) {
1265	do {
1266	sc.Forward();
1267	} while (setFormat.Contains(sc.ch));
1268	}
1269	while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1270	sc.Forward();
1271	if (sc.ch == `'='`) {
1272	sc.ForwardSetState(SCE_PL_DEFAULT);
1273	HereDoc.State = `3`;
1274	} else {
1275	// invalid identifier; inexact fallback, but hey
1276	sc.ChangeState(SCE_PL_IDENTIFIER);
1277	sc.SetState(SCE_PL_DEFAULT);
1278	}
1279	} else {
1280	sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1281	}
1282	backFlag = BACK_NONE;
1283	break;
1284	}
1285
1286	// Must check end of HereDoc states here before default state is handled
1287	if (HereDoc.State == `1` && sc.atLineEnd) {
1288	// Begin of here-doc (the line after the here-doc delimiter):
1289	// Lexically, the here-doc starts from the next line after the >>, but the
1290	// first line of here-doc seem to follow the style of the last EOL sequence
1291	int st_new = SCE_PL_HERE_QQ;
1292	HereDoc.State = `2`;
1293	if (HereDoc.Quoted) {
1294	if (sc.state == SCE_PL_HERE_DELIM) {
1295	// Missing quote at end of string! We are stricter than perl.
1296	// Colour here-doc anyway while marking this bit as an error.
1297	sc.ChangeState(SCE_PL_ERROR);
1298	}
1299	switch (HereDoc.Quote) {
1300	case `'\''`:
1301	st_new = SCE_PL_HERE_Q;
1302	break;
1303	case `'"'` :
1304	st_new = SCE_PL_HERE_QQ;
1305	break;
1306	case '`' :
1307	st_new = SCE_PL_HERE_QX;
1308	break;
1309	}
1310	} else {
1311	if (HereDoc.Quote == `'\\'`)
1312	st_new = SCE_PL_HERE_Q;
1313	}
1314	sc.SetState(st_new);
1315	}
1316	if (HereDoc.State == `3` && sc.atLineEnd) {
1317	// Start of format body.
1318	HereDoc.State = `0`;
1319	sc.SetState(SCE_PL_FORMAT);
1320	}
1321
1322	// Determine if a new state should be entered.
1323	if (sc.state == SCE_PL_DEFAULT) {
1324	if (IsADigit(sc.ch) \|\|
1325	(IsADigit(sc.chNext) && (sc.ch == `'.'` \|\| sc.ch == `'v'`))) {
1326	sc.SetState(SCE_PL_NUMBER);
1327	backFlag = BACK_NONE;
1328	numState = PERLNUM_DECIMAL;
1329	dotCount = `0`;
1330	if (sc.ch == `'0'`) { // hex,bin,octal
1331	if (sc.chNext == `'x'` \|\| sc.chNext == `'X'`) {
1332	numState = PERLNUM_HEX;
1333	} else if (sc.chNext == `'b'` \|\| sc.chNext == `'B'`) {
1334	numState = PERLNUM_BINARY;
1335	} else if (IsADigit(sc.chNext)) {
1336	numState = PERLNUM_OCTAL;
1337	}
1338	if (numState != PERLNUM_DECIMAL) {
1339	sc.Forward();
1340	}
1341	} else if (sc.ch == `'v'`) { // vector
1342	numState = PERLNUM_V_VECTOR;
1343	}
1344	} else if (setWord.Contains(sc.ch)) {
1345	// if immediately prefixed by '::', always a bareword
1346	sc.SetState(SCE_PL_WORD);
1347	if (sc.chPrev == `':'` && sc.GetRelative(-`2`) == `':'`) {
1348	sc.ChangeState(SCE_PL_IDENTIFIER);
1349	}
1350	Sci_PositionU bk = sc.currentPos;
1351	Sci_PositionU fw = sc.currentPos + `1`;
1352	// first check for possible quote-like delimiter
1353	if (sc.ch == `'s'` && !setWord.Contains(sc.chNext)) {
1354	sc.ChangeState(SCE_PL_REGSUBST);
1355	Quote.New(`2`);
1356	} else if (sc.ch == `'m'` && !setWord.Contains(sc.chNext)) {
1357	sc.ChangeState(SCE_PL_REGEX);
1358	Quote.New();
1359	} else if (sc.ch == `'q'` && !setWord.Contains(sc.chNext)) {
1360	sc.ChangeState(SCE_PL_STRING_Q);
1361	Quote.New();
1362	} else if (sc.ch == `'y'` && !setWord.Contains(sc.chNext)) {
1363	sc.ChangeState(SCE_PL_XLAT);
1364	Quote.New(`2`);
1365	} else if (sc.Match(`'t'`, `'r'`) && !setWord.Contains(sc.GetRelative(`2`))) {
1366	sc.ChangeState(SCE_PL_XLAT);
1367	Quote.New(`2`);
1368	sc.Forward();
1369	fw++;
1370	} else if (sc.ch == `'q'` && setQDelim.Contains(sc.chNext)
1371	&& !setWord.Contains(sc.GetRelative(`2`))) {
1372	if (sc.chNext == `'q'`) sc.ChangeState(SCE_PL_STRING_QQ);
1373	else if (sc.chNext == `'x'`) sc.ChangeState(SCE_PL_STRING_QX);
1374	else if (sc.chNext == `'r'`) sc.ChangeState(SCE_PL_STRING_QR);
1375	else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
1376	Quote.New();
1377	sc.Forward();
1378	fw++;
1379	} else if (sc.ch == `'x'` && (sc.chNext == `'='` \|\| // repetition
1380	!setWord.Contains(sc.chNext) \|\|
1381	(setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1382	sc.ChangeState(SCE_PL_OPERATOR);
1383	}
1384	// if potentially a keyword, scan forward and grab word, then check
1385	// if it's really one; if yes, disambiguation test is performed
1386	// otherwise it is always a bareword and we skip a lot of scanning
1387	if (sc.state == SCE_PL_WORD) {
1388	while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1389	fw++;
1390	if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1391	sc.ChangeState(SCE_PL_IDENTIFIER);
1392	}
1393	}
1394	// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1395	// for quote-like delimiters/keywords, attempt to disambiguate
1396	// to select for bareword, change state -> SCE_PL_IDENTIFIER
1397	if (sc.state != SCE_PL_IDENTIFIER && bk > `0`) {
1398	if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1399	sc.ChangeState(SCE_PL_IDENTIFIER);
1400	}
1401	backFlag = BACK_NONE;
1402	} else if (sc.ch == `'#'`) {
1403	sc.SetState(SCE_PL_COMMENTLINE);
1404	} else if (sc.ch == `'\"'`) {
1405	sc.SetState(SCE_PL_STRING);
1406	Quote.New();
1407	Quote.Open(sc.ch);
1408	backFlag = BACK_NONE;
1409	} else if (sc.ch == `'\''`) {
1410	if (sc.chPrev == `'&'` && setWordStart.Contains(sc.chNext)) {
1411	// Archaic call
1412	sc.SetState(SCE_PL_IDENTIFIER);
1413	} else {
1414	sc.SetState(SCE_PL_CHARACTER);
1415	Quote.New();
1416	Quote.Open(sc.ch);
1417	}
1418	backFlag = BACK_NONE;
1419	} else if (sc.ch == '`') {
1420	sc.SetState(SCE_PL_BACKTICKS);
1421	Quote.New();
1422	Quote.Open(sc.ch);
1423	backFlag = BACK_NONE;
1424	} else if (sc.ch == `'$'`) {
1425	sc.SetState(SCE_PL_SCALAR);
1426	if (sc.chNext == `'{'`) {
1427	sc.ForwardSetState(SCE_PL_OPERATOR);
1428	} else if (IsASpace(sc.chNext)) {
1429	sc.ForwardSetState(SCE_PL_DEFAULT);
1430	} else {
1431	sc.Forward();
1432	if (sc.Match('`', '`') \|\| sc.Match(`':'`, `':'`)) {
1433	sc.Forward();
1434	}
1435	}
1436	backFlag = BACK_NONE;
1437	} else if (sc.ch == `'@'`) {
1438	sc.SetState(SCE_PL_ARRAY);
1439	if (setArray.Contains(sc.chNext)) {
1440	// no special treatment
1441	} else if (sc.chNext == `':'` && sc.GetRelative(`2`) == `':'`) {
1442	sc.ForwardBytes(`2`);
1443	} else if (sc.chNext == `'{'` \|\| sc.chNext == `'['`) {
1444	sc.ForwardSetState(SCE_PL_OPERATOR);
1445	} else {
1446	sc.ChangeState(SCE_PL_OPERATOR);
1447	}
1448	backFlag = BACK_NONE;
1449	} else if (setPreferRE.Contains(sc.ch)) {
1450	// Explicit backward peeking to set a consistent preferRE for
1451	// any slash found, so no longer need to track preferRE state.
1452	// Find first previous significant lexed element and interpret.
1453	// A few symbols shares this code for disambiguation.
1454	bool preferRE = false;
1455	bool isHereDoc = sc.Match(`'<'`, `'<'`);
1456	bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
1457	Sci_PositionU bk = (sc.currentPos > `0`) ? sc.currentPos - `1`: `0`;
1458	sc.Complete();
1459	styler.Flush();
1460	if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1461	hereDocSpace = true;
1462	skipWhitespaceComment(styler, bk);
1463	if (bk == `0`) {
1464	// avoid backward scanning breakage
1465	preferRE = true;
1466	} else {
1467	int bkstyle = styler.StyleAt(bk);
1468	int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1469	switch (bkstyle) {
1470	case SCE_PL_OPERATOR:
1471	preferRE = true;
1472	if (bkch == `')'` \|\| bkch == `']'`) {
1473	preferRE = false;
1474	} else if (bkch == `'}'`) {
1475	// backtrack by counting balanced brace pairs
1476	// needed to test for variables like ${}, @{} etc.
1477	bkstyle = styleBeforeBracePair(styler, bk);
1478	if (bkstyle == SCE_PL_SCALAR
1479	\|\| bkstyle == SCE_PL_ARRAY
1480	\|\| bkstyle == SCE_PL_HASH
1481	\|\| bkstyle == SCE_PL_SYMBOLTABLE
1482	\|\| bkstyle == SCE_PL_OPERATOR) {
1483	preferRE = false;
1484	}
1485	} else if (bkch == `'+'` \|\| bkch == `'-'`) {
1486	if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - `1`))
1487	&& bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - `2`)))
1488	// exceptions for operators: unary suffixes ++, --
1489	preferRE = false;
1490	}
1491	break;
1492	case SCE_PL_IDENTIFIER:
1493	preferRE = true;
1494	bkstyle = styleCheckIdentifier(styler, bk);
1495	if ((bkstyle == `1`) \|\| (bkstyle == `2`)) {
1496	// inputsymbol or var with "->" or "::" before identifier
1497	preferRE = false;
1498	} else if (bkstyle == `3`) {
1499	// bare identifier, test cases follows:
1500	if (sc.ch == `'/'`) {
1501	// if '/', /PATTERN/ unless digit/space immediately after '/'
1502	// if '//', always expect defined-or operator to follow identifier
1503	if (IsASpace(sc.chNext) \|\| IsADigit(sc.chNext) \|\| sc.chNext == `'/'`)
1504	preferRE = false;
1505	} else if (sc.ch == `'*'` \|\| sc.ch == `'%'`) {
1506	if (IsASpace(sc.chNext) \|\| IsADigit(sc.chNext) \|\| sc.Match(`''`, `''`))
1507	preferRE = false;
1508	} else if (sc.ch == `'<'`) {
1509	if (IsASpace(sc.chNext) \|\| sc.chNext == `'='`)
1510	preferRE = false;
1511	}
1512	}
1513	break;
1514	case SCE_PL_SCALAR: // for $var<< case:
1515	if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', always* a HERE doc*
1516	preferRE = true;
1517	break;
1518	case SCE_PL_WORD:
1519	preferRE = true;
1520	// for HERE docs, always true
1521	if (sc.ch == `'/'`) {
1522	// adopt heuristics similar to vim-style rules:
1523	// keywords always forced as /PATTERN/: split, if, elsif, while
1524	// everything else /PATTERN/ unless digit/space immediately after '/'
1525	// for '//', defined-or favoured unless special keywords
1526	Sci_PositionU bkend = bk + `1`;
1527	while (bk > `0` && styler.StyleAt(bk - `1`) == SCE_PL_WORD) {
1528	bk--;
1529	}
1530	if (isPerlKeyword(bk, bkend, reWords, styler))
1531	break;
1532	if (IsASpace(sc.chNext) \|\| IsADigit(sc.chNext) \|\| sc.chNext == `'/'`)
1533	preferRE = false;
1534	} else if (sc.ch == `'*'` \|\| sc.ch == `'%'`) {
1535	if (IsASpace(sc.chNext) \|\| IsADigit(sc.chNext) \|\| sc.Match(`''`, `''`))
1536	preferRE = false;
1537	} else if (sc.ch == `'<'`) {
1538	if (IsASpace(sc.chNext) \|\| sc.chNext == `'='`)
1539	preferRE = false;
1540	}
1541	break;
1542
1543	// other styles uses the default, preferRE=false
1544	case SCE_PL_POD:
1545	case SCE_PL_HERE_Q:
1546	case SCE_PL_HERE_QQ:
1547	case SCE_PL_HERE_QX:
1548	preferRE = true;
1549	break;
1550	}
1551	}
1552	backFlag = BACK_NONE;
1553	if (isHereDoc) { // handle '<<', HERE doc
1554	if (sc.Match("<<>>")) { // double-diamond operator (5.22)
1555	sc.SetState(SCE_PL_OPERATOR);
1556	sc.Forward(`3`);
1557	} else if (preferRE) {
1558	sc.SetState(SCE_PL_HERE_DELIM);
1559	HereDoc.State = `0`;
1560	} else { // << operator
1561	sc.SetState(SCE_PL_OPERATOR);
1562	sc.Forward();
1563	}
1564	} else if (sc.ch == `''`) { // handle '', typeglob
1565	if (preferRE) {
1566	sc.SetState(SCE_PL_SYMBOLTABLE);
1567	if (sc.chNext == `':'` && sc.GetRelative(`2`) == `':'`) {
1568	sc.ForwardBytes(`2`);
1569	} else if (sc.chNext == `'{'`) {
1570	sc.ForwardSetState(SCE_PL_OPERATOR);
1571	} else {
1572	sc.Forward();
1573	}
1574	} else {
1575	sc.SetState(SCE_PL_OPERATOR);
1576	if (sc.chNext == `''`) // exponentiation*
1577	sc.Forward();
1578	}
1579	} else if (sc.ch == `'%'`) { // handle '%', hash
1580	if (preferRE) {
1581	sc.SetState(SCE_PL_HASH);
1582	if (setHash.Contains(sc.chNext)) {
1583	sc.Forward();
1584	} else if (sc.chNext == `':'` && sc.GetRelative(`2`) == `':'`) {
1585	sc.ForwardBytes(`2`);
1586	} else if (sc.chNext == `'{'`) {
1587	sc.ForwardSetState(SCE_PL_OPERATOR);
1588	} else {
1589	sc.ChangeState(SCE_PL_OPERATOR);
1590	}
1591	} else {
1592	sc.SetState(SCE_PL_OPERATOR);
1593	}
1594	} else if (sc.ch == `'<'`) { // handle '<', inputsymbol
1595	if (preferRE) {
1596	// forward scan
1597	int i = InputSymbolScan(sc);
1598	if (i > `0`) {
1599	sc.SetState(SCE_PL_IDENTIFIER);
1600	sc.Forward(i);
1601	} else {
1602	sc.SetState(SCE_PL_OPERATOR);
1603	}
1604	} else {
1605	sc.SetState(SCE_PL_OPERATOR);
1606	}
1607	} else { // handle '/', regexp
1608	if (preferRE) {
1609	sc.SetState(SCE_PL_REGEX);
1610	Quote.New();
1611	Quote.Open(sc.ch);
1612	} else { // / and // operators
1613	sc.SetState(SCE_PL_OPERATOR);
1614	if (sc.chNext == `'/'`) {
1615	sc.Forward();
1616	}
1617	}
1618	}
1619	} else if (sc.ch == `'='` // POD
1620	&& setPOD.Contains(sc.chNext)
1621	&& sc.atLineStart) {
1622	sc.SetState(SCE_PL_POD);
1623	backFlag = BACK_NONE;
1624	} else if (sc.ch == `'-'` && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1625	Sci_PositionU bk = sc.currentPos;
1626	Sci_PositionU fw = `2`;
1627	if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1628	!setWord.Contains(sc.GetRelative(`2`))) {
1629	sc.SetState(SCE_PL_WORD);
1630	} else {
1631	// nominally a minus and bareword; find extent of bareword
1632	while (setWord.Contains(sc.GetRelative(fw)))
1633	fw++;
1634	sc.SetState(SCE_PL_OPERATOR);
1635	}
1636	// force to bareword for hash key => or {variable literal} cases
1637	if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & `2`) {
1638	sc.ChangeState(SCE_PL_IDENTIFIER);
1639	}
1640	backFlag = BACK_NONE;
1641	} else if (sc.ch == `'('` && sc.currentPos > `0`) { // '(' or subroutine prototype
1642	sc.Complete();
1643	if (styleCheckSubPrototype(styler, sc.currentPos - `1`)) {
1644	sc.SetState(SCE_PL_SUB_PROTOTYPE);
1645	backFlag = BACK_NONE;
1646	} else {
1647	sc.SetState(SCE_PL_OPERATOR);
1648	}
1649	} else if (setPerlOperator.Contains(sc.ch)) { // operators
1650	sc.SetState(SCE_PL_OPERATOR);
1651	if (sc.Match(`'.'`, `'.'`)) { // .. and ...
1652	sc.Forward();
1653	if (sc.chNext == `'.'`) sc.Forward();
1654	}
1655	} else if (sc.ch == `4` \|\| sc.ch == `26`) { // ^D and ^Z ends valid perl source
1656	sc.SetState(SCE_PL_DATASECTION);
1657	} else {
1658	// keep colouring defaults
1659	sc.Complete();
1660	}
1661	}
1662	}
1663	sc.Complete();
1664	if (sc.state == SCE_PL_HERE_Q
1665	\|\| sc.state == SCE_PL_HERE_QQ
1666	\|\| sc.state == SCE_PL_HERE_QX
1667	\|\| sc.state == SCE_PL_FORMAT) {
1668	styler.ChangeLexerState(sc.currentPos, styler.Length());
1669	}
1670	sc.Complete();
1671	}
1672
1673	#define PERL_HEADFOLD_SHIFT 4
1674	#define PERL_HEADFOLD_MASK 0xF0
1675
1676	void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int / initStyle /, IDocument *pAccess) {
1677
1678	if (!options.fold)
1679	return;
1680
1681	LexAccessor styler(pAccess);
1682
1683	Sci_PositionU endPos = startPos + length;
1684	int visibleChars = `0`;
1685	Sci_Position lineCurrent = styler.GetLine(startPos);
1686
1687	// Backtrack to previous line in case need to fix its fold status
1688	if (startPos > `0`) {
1689	if (lineCurrent > `0`) {
1690	lineCurrent--;
1691	startPos = styler.LineStart(lineCurrent);
1692	}
1693	}
1694
1695	int levelPrev = SC_FOLDLEVELBASE;
1696	if (lineCurrent > `0`)
1697	levelPrev = styler.LevelAt(lineCurrent - `1`) >> `16`;
1698	int levelCurrent = levelPrev;
1699	char chNext = styler [startPos];
1700	char chPrev = styler.SafeGetCharAt(startPos - `1`);
1701	int styleNext = styler.StyleAt(startPos);
1702	// Used at end of line to determine if the line was a package definition
1703	bool isPackageLine = false;
1704	int podHeading = `0`;
1705	for (Sci_PositionU i = startPos; i < endPos; i++) {
1706	char ch = chNext;
1707	chNext = styler.SafeGetCharAt(i + `1`);
1708	int style = styleNext;
1709	styleNext = styler.StyleAt(i + `1`);
1710	int stylePrevCh = (i) ? styler.StyleAt(i - `1`):SCE_PL_DEFAULT;
1711	bool atEOL = (ch == `'\r'` && chNext != `'\n'`) \|\| (ch == `'\n'`);
1712	bool atLineStart = ((chPrev == `'\r'`) \|\| (chPrev == `'\n'`)) \|\| i == `0`;
1713	// Comment folding
1714	if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1715	if (!IsCommentLine(lineCurrent - `1`, styler)
1716	&& IsCommentLine(lineCurrent + `1`, styler))
1717	levelCurrent++;
1718	else if (IsCommentLine(lineCurrent - `1`, styler)
1719	&& !IsCommentLine(lineCurrent + `1`, styler))
1720	levelCurrent--;
1721	}
1722	// {} [] block folding
1723	if (style == SCE_PL_OPERATOR) {
1724	if (ch == `'{'`) {
1725	if (options.foldAtElse && levelCurrent < levelPrev)
1726	--levelPrev;
1727	levelCurrent++;
1728	} else if (ch == `'}'`) {
1729	levelCurrent--;
1730	}
1731	if (ch == `'['`) {
1732	if (options.foldAtElse && levelCurrent < levelPrev)
1733	--levelPrev;
1734	levelCurrent++;
1735	} else if (ch == `']'`) {
1736	levelCurrent--;
1737	}
1738	} else if (style == SCE_PL_STRING_QW) {
1739	// qw
1740	if (stylePrevCh != style)
1741	levelCurrent++;
1742	else if (styleNext != style)
1743	levelCurrent--;
1744	}
1745	// POD folding
1746	if (options.foldPOD && atLineStart) {
1747	if (style == SCE_PL_POD) {
1748	if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1749	levelCurrent++;
1750	else if (styler.Match(i, "=cut"))
1751	levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - `1`;
1752	else if (styler.Match(i, "=head"))
1753	podHeading = PodHeadingLevel(i, styler);
1754	} else if (style == SCE_PL_DATASECTION) {
1755	if (ch == `'='` && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1756	levelCurrent++;
1757	else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1758	levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - `1`;
1759	else if (styler.Match(i, "=head"))
1760	podHeading = PodHeadingLevel(i, styler);
1761	// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1762	// reset needed as level test is vs. SC_FOLDLEVELBASE
1763	else if (stylePrevCh != SCE_PL_DATASECTION)
1764	levelCurrent = SC_FOLDLEVELBASE;
1765	}
1766	}
1767	// package folding
1768	if (options.foldPackage && atLineStart) {
1769	if (IsPackageLine(lineCurrent, styler)
1770	&& !IsPackageLine(lineCurrent + `1`, styler))
1771	isPackageLine = true;
1772	}
1773
1774	//heredoc folding
1775	switch (style) {
1776	case SCE_PL_HERE_QQ :
1777	case SCE_PL_HERE_Q :
1778	case SCE_PL_HERE_QX :
1779	switch (stylePrevCh) {
1780	case SCE_PL_HERE_QQ :
1781	case SCE_PL_HERE_Q :
1782	case SCE_PL_HERE_QX :
1783	//do nothing;
1784	break;
1785	default :
1786	levelCurrent++;
1787	break;
1788	}
1789	break;
1790	default:
1791	switch (stylePrevCh) {
1792	case SCE_PL_HERE_QQ :
1793	case SCE_PL_HERE_Q :
1794	case SCE_PL_HERE_QX :
1795	levelCurrent--;
1796	break;
1797	default :
1798	//do nothing;
1799	break;
1800	}
1801	break;
1802	}
1803
1804	//explicit folding
1805	if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == `'#'`) {
1806	if (chNext == `'{'`) {
1807	levelCurrent++;
1808	} else if (levelCurrent > SC_FOLDLEVELBASE && chNext == `'}'`) {
1809	levelCurrent--;
1810	}
1811	}
1812
1813	if (atEOL) {
1814	int lev = levelPrev;
1815	// POD headings occupy bits 7-4, leaving some breathing room for
1816	// non-standard practice -- POD sections stuck in blocks, etc.
1817	if (podHeading > `0`) {
1818	levelCurrent = (lev & ~PERL_HEADFOLD_MASK) \| (podHeading << PERL_HEADFOLD_SHIFT);
1819	lev = levelCurrent - `1`;
1820	lev \|= SC_FOLDLEVELHEADERFLAG;
1821	podHeading = `0`;
1822	}
1823	// Check if line was a package declaration
1824	// because packages need "special" treatment
1825	if (isPackageLine) {
1826	lev = SC_FOLDLEVELBASE \| SC_FOLDLEVELHEADERFLAG;
1827	levelCurrent = SC_FOLDLEVELBASE + `1`;
1828	isPackageLine = false;
1829	}
1830	lev \|= levelCurrent << `16`;
1831	if (visibleChars == `0` && options.foldCompact)
1832	lev \|= SC_FOLDLEVELWHITEFLAG;
1833	if ((levelCurrent > levelPrev) && (visibleChars > `0`))
1834	lev \|= SC_FOLDLEVELHEADERFLAG;
1835	if (lev != styler.LevelAt(lineCurrent)) {
1836	styler.SetLevel(lineCurrent, lev);
1837	}
1838	lineCurrent++;
1839	levelPrev = levelCurrent;
1840	visibleChars = `0`;
1841	}
1842	if (!isspacechar(ch))
1843	visibleChars++;
1844	chPrev = ch;
1845	}
1846	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1847	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1848	styler.SetLevel(lineCurrent, levelPrev \| flagsNext);
1849	}
1850
1851	LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
1852

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexPerl.cxx