1// Scintilla source code edit control
2/** @file LexPerl.cxx
3 ** Lexer for Perl.
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13#include <stdarg.h>
14#include <assert.h>
15#include <ctype.h>
16
17#include <string>
18#include <string_view>
19#include <map>
20#include <functional>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "WordList.h"
27#include "LexAccessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31#include "OptionSet.h"
32#include "DefaultLexer.h"
33
34using namespace Scintilla;
35using namespace Lexilla;
36
37// Info for HERE document handling from perldata.pod (reformatted):
38// ----------------------------------------------------------------
39// A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
40// Following a << you specify a string to terminate the quoted material, and
41// all lines following the current line down to the terminating string are
42// the value of the item.
43// Prefixing the terminating string with a "~" specifies that you want to
44// use "Indented Here-docs" (see below).
45// * The terminating string may be either an identifier (a word), or some
46// quoted text.
47// * If quoted, the type of quotes you use determines the treatment of the
48// text, just as in regular quoting.
49// * An unquoted identifier works like double quotes.
50// * There must be no space between the << and the identifier.
51// (If you put a space it will be treated as a null identifier,
52// which is valid, and matches the first empty line.)
53// (This is deprecated, -w warns of this syntax)
54// * The terminating string must appear by itself (unquoted and
55// with no surrounding whitespace) on the terminating line.
56//
57// Indented Here-docs
58// ------------------
59// The here-doc modifier "~" allows you to indent your here-docs to
60// make the code more readable.
61// The delimiter is used to determine the exact whitespace to remove
62// from the beginning of each line. All lines must have at least the
63// same starting whitespace (except lines only containing a newline)
64// or perl will croak. Tabs and spaces can be mixed, but are matched
65// exactly. One tab will not be equal to 8 spaces!
66// Additional beginning whitespace (beyond what preceded the
67// delimiter) will be preserved.
68
69#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
70
71#define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
72#define PERLNUM_OCTAL 2
73#define PERLNUM_FLOAT_EXP 3 // exponent part only
74#define PERLNUM_HEX 4 // may be a hex float
75#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
76#define PERLNUM_VECTOR 6
77#define PERLNUM_V_VECTOR 7
78#define PERLNUM_BAD 8
79
80#define BACK_NONE 0 // lookback state for bareword disambiguation:
81#define BACK_OPERATOR 1 // whitespace/comments are insignificant
82#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
83
84#define SUB_BEGIN 0 // states for subroutine prototype scan:
85#define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
86#define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
87#define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
88#define SUB_HAS_SUB 4 // 'sub' keyword
89
90// all interpolated styles are different from their parent styles by a constant difference
91// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
92#define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
93
94static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
95 // old-style keyword matcher; needed because GetCurrent() needs
96 // current segment to be committed, but we may abandon early...
97 char s[100];
98 Sci_PositionU i, len = end - start;
99 if (len > 30) { len = 30; }
100 for (i = 0; i < len; i++, start++) s[i] = styler[start];
101 s[i] = '\0';
102 return keywords.InList(s);
103}
104
105static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
106 int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
107 // identifiers are recognized by Perl as barewords under some
108 // conditions, the following attempts to do the disambiguation
109 // by looking backward and forward; result in 2 LSB
110 int result = 0;
111 bool moreback = false; // true if passed newline/comments
112 bool brace = false; // true if opening brace found
113 // if BACK_NONE, neither operator nor keyword, so skip test
114 if (backFlag == BACK_NONE)
115 return result;
116 // first look backwards past whitespace/comments to set EOL flag
117 // (some disambiguation patterns must be on a single line)
118 if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
119 moreback = true;
120 // look backwards at last significant lexed item for disambiguation
121 bk = backPos - 1;
122 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
123 if (ch == '{' && !moreback) {
124 // {bareword: possible variable spec
125 brace = true;
126 } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
127 // &bareword: subroutine call
128 || styler.Match(bk - 1, "->")
129 // ->bareword: part of variable spec
130 || styler.Match(bk - 1, "::")
131 // ::bareword: part of module spec
132 || styler.Match(bk - 2, "sub")) {
133 // sub bareword: subroutine declaration
134 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
135 result |= 1;
136 }
137 // next, scan forward after word past tab/spaces only;
138 // if ch isn't one of '[{(,' we can skip the test
139 if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
140 && fw < endPos) {
141 while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)))
142 && fw < endPos) {
143 fw++;
144 }
145 if ((ch == '}' && brace)
146 // {bareword}: variable spec
147 || styler.Match(fw, "=>")) {
148 // [{(, bareword=>: hash literal
149 result |= 2;
150 }
151 }
152 return result;
153}
154
155static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
156 // when backtracking, we need to skip whitespace and comments
157 while (p > 0) {
158 const int style = styler.StyleAt(p);
159 if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE)
160 break;
161 p--;
162 }
163}
164
165static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
166 // scan backward past whitespace and comments to find a lexeme
167 skipWhitespaceComment(styler, bk);
168 if (bk == 0)
169 return 0;
170 int sz = 1;
171 style = styler.StyleAt(bk);
172 while (bk > 0) { // find extent of lexeme
173 if (styler.StyleAt(bk - 1) == style) {
174 bk--; sz++;
175 } else
176 break;
177 }
178 return sz;
179}
180
181static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
182 // backtrack to find open '{' corresponding to a '}', balanced
183 // return significant style to be tested for '/' disambiguation
184 int braceCount = 1;
185 if (bk == 0)
186 return SCE_PL_DEFAULT;
187 while (--bk > 0) {
188 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
189 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
190 if (bkch == ';') { // early out
191 break;
192 } else if (bkch == '}') {
193 braceCount++;
194 } else if (bkch == '{') {
195 if (--braceCount == 0) break;
196 }
197 }
198 }
199 if (bk > 0 && braceCount == 0) {
200 // balanced { found, bk > 0, skip more whitespace/comments
201 bk--;
202 skipWhitespaceComment(styler, bk);
203 return styler.StyleAt(bk);
204 }
205 return SCE_PL_DEFAULT;
206}
207
208static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
209 // backtrack to classify sub-styles of identifier under test
210 // return sub-style to be tested for '/' disambiguation
211 if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
212 return 1;
213 // backtrack to check for possible "->" or "::" before identifier
214 while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
215 bk--;
216 }
217 while (bk > 0) {
218 int bkstyle = styler.StyleAt(bk);
219 if (bkstyle == SCE_PL_DEFAULT
220 || bkstyle == SCE_PL_COMMENTLINE) {
221 // skip whitespace, comments
222 } else if (bkstyle == SCE_PL_OPERATOR) {
223 // test for "->" and "::"
224 if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
225 return 2;
226 } else
227 return 3; // bare identifier
228 bk--;
229 }
230 return 0;
231}
232
233static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
234 // forward scan the current line to classify line for POD style
235 int state = -1;
236 while (pos < endPos) {
237 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
238 if (ch == '\n' || ch == '\r') {
239 if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
240 break;
241 }
242 if (IsASpaceOrTab(ch)) { // whitespace, take note
243 if (state == -1)
244 state = SCE_PL_DEFAULT;
245 } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
246 state = SCE_PL_POD_VERB;
247 } else if (state != SCE_PL_POD_VERB) { // regular POD line
248 state = SCE_PL_POD;
249 }
250 pos++;
251 }
252 if (state == -1)
253 state = SCE_PL_DEFAULT;
254 return state;
255}
256
257static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
258 // backtrack to identify if we're starting a subroutine prototype
259 // we also need to ignore whitespace/comments, format is like:
260 // sub abc::pqr :const :prototype(...)
261 // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
262 // and a state machine generates legal subroutine syntax matches
263 styler.Flush();
264 int state = SUB_BEGIN;
265 do {
266 // find two lexemes, lexeme 2 follows lexeme 1
267 int style2 = SCE_PL_DEFAULT;
268 Sci_PositionU pos2 = bk;
269 int len2 = findPrevLexeme(styler, pos2, style2);
270 int style1 = SCE_PL_DEFAULT;
271 Sci_PositionU pos1 = pos2;
272 if (pos1 > 0) pos1--;
273 int len1 = findPrevLexeme(styler, pos1, style1);
274 if (len1 == 0 || len2 == 0) // lexeme pair must exist
275 break;
276
277 // match parts of syntax, if invalid subroutine syntax, break off
278 if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
279 styler.SafeGetCharAt(pos1) == ':') { // ':'
280 if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
281 if (len2 == 9 && styler.Match(pos2, "prototype")) { // ':' 'prototype'
282 if (state == SUB_BEGIN) {
283 state = SUB_HAS_PROTO;
284 } else
285 break;
286 } else { // ':' <attribute>
287 if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
288 state = SUB_HAS_ATTRIB;
289 } else
290 break;
291 }
292 } else
293 break;
294 } else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
295 styler.Match(pos1, "::")) { // '::'
296 if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier>
297 state = SUB_HAS_MODULE;
298 } else
299 break;
300 } else if (style1 == SCE_PL_WORD && len1 == 3 &&
301 styler.Match(pos1, "sub")) { // 'sub'
302 if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier>
303 state = SUB_HAS_SUB;
304 } else
305 break;
306 } else
307 break;
308 bk = pos1; // set position for finding next lexeme pair
309 if (bk > 0) bk--;
310 } while (state != SUB_HAS_SUB);
311 return (state == SUB_HAS_SUB);
312}
313
314static int actualNumStyle(int numberStyle) {
315 if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
316 return SCE_PL_STRING;
317 } else if (numberStyle == PERLNUM_BAD) {
318 return SCE_PL_ERROR;
319 }
320 return SCE_PL_NUMBER;
321}
322
323static int opposite(int ch) {
324 if (ch == '(') return ')';
325 if (ch == '[') return ']';
326 if (ch == '{') return '}';
327 if (ch == '<') return '>';
328 return ch;
329}
330
331static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
332 Sci_Position pos = styler.LineStart(line);
333 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
334 for (Sci_Position i = pos; i < eol_pos; i++) {
335 char ch = styler[i];
336 int style = styler.StyleAt(i);
337 if (ch == '#' && style == SCE_PL_COMMENTLINE)
338 return true;
339 else if (!IsASpaceOrTab(ch))
340 return false;
341 }
342 return false;
343}
344
345static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
346 Sci_Position pos = styler.LineStart(line);
347 int style = styler.StyleAt(pos);
348 if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
349 return true;
350 }
351 return false;
352}
353
354static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
355 int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
356 if (lvl >= '1' && lvl <= '4') {
357 return lvl - '0';
358 }
359 return 0;
360}
361
362// An individual named option for use in an OptionSet
363
364// Options used for LexerPerl
365struct OptionsPerl {
366 bool fold;
367 bool foldComment;
368 bool foldCompact;
369 // Custom folding of POD and packages
370 bool foldPOD; // fold.perl.pod
371 // Enable folding Pod blocks when using the Perl lexer.
372 bool foldPackage; // fold.perl.package
373 // Enable folding packages when using the Perl lexer.
374
375 bool foldCommentExplicit;
376
377 bool foldAtElse;
378
379 OptionsPerl() {
380 fold = false;
381 foldComment = false;
382 foldCompact = true;
383 foldPOD = true;
384 foldPackage = true;
385 foldCommentExplicit = true;
386 foldAtElse = false;
387 }
388};
389
390static const char *const perlWordListDesc[] = {
391 "Keywords",
392 0
393};
394
395struct OptionSetPerl : public OptionSet<OptionsPerl> {
396 OptionSetPerl() {
397 DefineProperty("fold", &OptionsPerl::fold);
398
399 DefineProperty("fold.comment", &OptionsPerl::foldComment);
400
401 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
402
403 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
404 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
405
406 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
407 "Set to 0 to disable folding packages when using the Perl lexer.");
408
409 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
410 "Set to 0 to disable explicit folding.");
411
412 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
413 "This option enables Perl folding on a \"} else {\" line of an if statement.");
414
415 DefineWordListSets(perlWordListDesc);
416 }
417};
418
419class LexerPerl : public DefaultLexer {
420 CharacterSet setWordStart;
421 CharacterSet setWord;
422 CharacterSet setSpecialVar;
423 CharacterSet setControlVar;
424 WordList keywords;
425 OptionsPerl options;
426 OptionSetPerl osPerl;
427public:
428 LexerPerl() :
429 DefaultLexer("perl", SCLEX_PERL),
430 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
431 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
432 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
433 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
434 }
435 virtual ~LexerPerl() {
436 }
437 void SCI_METHOD Release() override {
438 delete this;
439 }
440 int SCI_METHOD Version() const override {
441 return lvRelease5;
442 }
443 const char *SCI_METHOD PropertyNames() override {
444 return osPerl.PropertyNames();
445 }
446 int SCI_METHOD PropertyType(const char *name) override {
447 return osPerl.PropertyType(name);
448 }
449 const char *SCI_METHOD DescribeProperty(const char *name) override {
450 return osPerl.DescribeProperty(name);
451 }
452 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
453 const char * SCI_METHOD PropertyGet(const char *key) override {
454 return osPerl.PropertyGet(key);
455 }
456 const char *SCI_METHOD DescribeWordListSets() override {
457 return osPerl.DescribeWordListSets();
458 }
459 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
460 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
461 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
462
463 void *SCI_METHOD PrivateCall(int, void *) override {
464 return 0;
465 }
466
467 static ILexer5 *LexerFactoryPerl() {
468 return new LexerPerl();
469 }
470 int InputSymbolScan(StyleContext &sc);
471 void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
472};
473
474Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
475 if (osPerl.PropertySet(&options, key, val)) {
476 return 0;
477 }
478 return -1;
479}
480
481Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
482 WordList *wordListN = 0;
483 switch (n) {
484 case 0:
485 wordListN = &keywords;
486 break;
487 }
488 Sci_Position firstModification = -1;
489 if (wordListN) {
490 WordList wlNew;
491 wlNew.Set(wl);
492 if (*wordListN != wlNew) {
493 wordListN->Set(wl);
494 firstModification = 0;
495 }
496 }
497 return firstModification;
498}
499
500int LexerPerl::InputSymbolScan(StyleContext &sc) {
501 // forward scan for matching > on same line; file handles
502 int c, sLen = 0;
503 while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
504 if (c == '\r' || c == '\n') {
505 return 0;
506 } else if (c == '>') {
507 if (sc.Match("<=>")) // '<=>' case
508 return 0;
509 return sLen;
510 }
511 }
512 return 0;
513}
514
515void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
516 // interpolate a segment (with no active backslashes or delimiters within)
517 // switch in or out of an interpolation style or continue current style
518 // commit variable patterns if found, trim segment, repeat until done
519 while (maxSeg > 0) {
520 bool isVar = false;
521 int sLen = 0;
522 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
523 // $#[$]*word [$@][$]*word (where word or {word} is always present)
524 bool braces = false;
525 sLen = 1;
526 if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
527 sLen++;
528 }
529 while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within
530 sLen++;
531 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word}
532 sLen++;
533 braces = true;
534 }
535 if (maxSeg > sLen) {
536 int c = sc.GetRelativeCharacter(sLen);
537 if (setWordStart.Contains(c)) { // word (various)
538 sLen++;
539 isVar = true;
540 while (maxSeg > sLen) {
541 if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
542 break;
543 sLen++;
544 }
545 } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
546 sLen++;
547 isVar = true;
548 }
549 }
550 if (braces) {
551 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word}
552 sLen++;
553 } else
554 isVar = false;
555 }
556 }
557 if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
558 int c = sc.chNext;
559 if (sc.ch == '$') {
560 sLen = 1;
561 if (IsADigit(c)) { // $[0-9] and slurp trailing digits
562 sLen++;
563 isVar = true;
564 while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
565 sLen++;
566 } else if (setSpecialVar.Contains(c)) { // $ special variables
567 sLen++;
568 isVar = true;
569 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
570 sLen++;
571 isVar = true;
572 } else if (c == '^') { // $^A control-char style
573 sLen++;
574 if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
575 sLen++;
576 isVar = true;
577 }
578 }
579 } else if (sc.ch == '@') {
580 sLen = 1;
581 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
582 sLen++;
583 isVar = true;
584 }
585 }
586 }
587 if (isVar) { // commit as interpolated variable or normal character
588 if (sc.state < SCE_PL_STRING_VAR)
589 sc.SetState(sc.state + INTERPOLATE_SHIFT);
590 sc.Forward(sLen);
591 maxSeg -= sLen;
592 } else {
593 if (sc.state >= SCE_PL_STRING_VAR)
594 sc.SetState(sc.state - INTERPOLATE_SHIFT);
595 sc.Forward();
596 maxSeg--;
597 }
598 }
599 if (sc.state >= SCE_PL_STRING_VAR)
600 sc.SetState(sc.state - INTERPOLATE_SHIFT);
601}
602
603void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
604 LexAccessor styler(pAccess);
605
606 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
607 WordList reWords;
608 reWords.Set("elsif if split while");
609
610 // charset classes
611 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
612 // lexing of "%*</" operators is non-trivial; these are missing in the set below
613 CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
614 CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
615 CharacterSet setModifiers(CharacterSet::setAlpha);
616 CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
617 // setArray and setHash also accepts chars for special vars like $_,
618 // which are then truncated when the next char does not match setVar
619 CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
620 CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
621 CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
622 CharacterSet &setPOD = setModifiers;
623 CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
624 CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
625 CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
626 CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
627 // for format identifiers
628 CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
629 CharacterSet &setFormat = setHereDocDelim;
630
631 // Lexer for perl often has to backtrack to start of current style to determine
632 // which characters are being used as quotes, how deeply nested is the
633 // start position and what the termination string is for HERE documents.
634
635 class HereDocCls { // Class to manage HERE doc sequence
636 public:
637 int State;
638 // 0: '<<' encountered
639 // 1: collect the delimiter
640 // 2: here doc text (lines after the delimiter)
641 int Quote; // the char after '<<'
642 bool Quoted; // true if Quote in ('\'','"','`')
643 bool StripIndent; // true if '<<~' requested to strip leading whitespace
644 int DelimiterLength; // strlen(Delimiter)
645 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
646 HereDocCls() {
647 State = 0;
648 Quote = 0;
649 Quoted = false;
650 StripIndent = false;
651 DelimiterLength = 0;
652 Delimiter[0] = '\0';
653 }
654 void Append(int ch) {
655 Delimiter[DelimiterLength++] = static_cast<char>(ch);
656 Delimiter[DelimiterLength] = '\0';
657 }
658 ~HereDocCls() {
659 }
660 };
661 HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
662
663 class QuoteCls { // Class to manage quote pairs
664 public:
665 int Rep;
666 int Count;
667 int Up, Down;
668 QuoteCls() {
669 New(1);
670 }
671 void New(int r = 1) {
672 Rep = r;
673 Count = 0;
674 Up = '\0';
675 Down = '\0';
676 }
677 void Open(int u) {
678 Count++;
679 Up = u;
680 Down = opposite(Up);
681 }
682 };
683 QuoteCls Quote;
684
685 // additional state for number lexing
686 int numState = PERLNUM_DECIMAL;
687 int dotCount = 0;
688
689 Sci_PositionU endPos = startPos + length;
690
691 // Backtrack to beginning of style if required...
692 // If in a long distance lexical state, backtrack to find quote characters.
693 // Includes strings (may be multi-line), numbers (additional state), format
694 // bodies, as well as POD sections.
695 if (initStyle == SCE_PL_HERE_Q
696 || initStyle == SCE_PL_HERE_QQ
697 || initStyle == SCE_PL_HERE_QX
698 || initStyle == SCE_PL_FORMAT
699 || initStyle == SCE_PL_HERE_QQ_VAR
700 || initStyle == SCE_PL_HERE_QX_VAR
701 ) {
702 // backtrack through multiple styles to reach the delimiter start
703 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
704 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
705 startPos--;
706 }
707 startPos = styler.LineStart(styler.GetLine(startPos));
708 initStyle = styler.StyleAt(startPos - 1);
709 }
710 if (initStyle == SCE_PL_STRING
711 || initStyle == SCE_PL_STRING_QQ
712 || initStyle == SCE_PL_BACKTICKS
713 || initStyle == SCE_PL_STRING_QX
714 || initStyle == SCE_PL_REGEX
715 || initStyle == SCE_PL_STRING_QR
716 || initStyle == SCE_PL_REGSUBST
717 || initStyle == SCE_PL_STRING_VAR
718 || initStyle == SCE_PL_STRING_QQ_VAR
719 || initStyle == SCE_PL_BACKTICKS_VAR
720 || initStyle == SCE_PL_STRING_QX_VAR
721 || initStyle == SCE_PL_REGEX_VAR
722 || initStyle == SCE_PL_STRING_QR_VAR
723 || initStyle == SCE_PL_REGSUBST_VAR
724 ) {
725 // for interpolation, must backtrack through a mix of two different styles
726 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
727 initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
728 while (startPos > 1) {
729 int st = styler.StyleAt(startPos - 1);
730 if ((st != initStyle) && (st != otherStyle))
731 break;
732 startPos--;
733 }
734 initStyle = SCE_PL_DEFAULT;
735 } else if (initStyle == SCE_PL_STRING_Q
736 || initStyle == SCE_PL_STRING_QW
737 || initStyle == SCE_PL_XLAT
738 || initStyle == SCE_PL_CHARACTER
739 || initStyle == SCE_PL_NUMBER
740 || initStyle == SCE_PL_IDENTIFIER
741 || initStyle == SCE_PL_ERROR
742 || initStyle == SCE_PL_SUB_PROTOTYPE
743 ) {
744 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
745 startPos--;
746 }
747 initStyle = SCE_PL_DEFAULT;
748 } else if (initStyle == SCE_PL_POD
749 || initStyle == SCE_PL_POD_VERB
750 ) {
751 // POD backtracking finds preceding blank lines and goes back past them
752 Sci_Position ln = styler.GetLine(startPos);
753 if (ln > 0) {
754 initStyle = styler.StyleAt(styler.LineStart(--ln));
755 if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
756 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
757 ln--;
758 }
759 startPos = styler.LineStart(++ln);
760 initStyle = styler.StyleAt(startPos - 1);
761 } else {
762 startPos = 0;
763 initStyle = SCE_PL_DEFAULT;
764 }
765 }
766
767 // backFlag, backPos are additional state to aid identifier corner cases.
768 // Look backwards past whitespace and comments in order to detect either
769 // operator or keyword. Later updated as we go along.
770 int backFlag = BACK_NONE;
771 Sci_PositionU backPos = startPos;
772 if (backPos > 0) {
773 backPos--;
774 skipWhitespaceComment(styler, backPos);
775 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
776 backFlag = BACK_OPERATOR;
777 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
778 backFlag = BACK_KEYWORD;
779 backPos++;
780 }
781
782 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
783
784 for (; sc.More(); sc.Forward()) {
785
786 // Determine if the current state should terminate.
787 switch (sc.state) {
788 case SCE_PL_OPERATOR:
789 sc.SetState(SCE_PL_DEFAULT);
790 backFlag = BACK_OPERATOR;
791 backPos = sc.currentPos;
792 break;
793 case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
794 if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
795 || sc.Match('.', '.')
796 || sc.chPrev == '>') { // end of inputsymbol
797 sc.SetState(SCE_PL_DEFAULT);
798 }
799 break;
800 case SCE_PL_WORD: // keyword, plus special cases
801 if (!setWord.Contains(sc.ch)) {
802 char s[100];
803 sc.GetCurrent(s, sizeof(s));
804 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
805 sc.ChangeState(SCE_PL_DATASECTION);
806 } else {
807 if ((strcmp(s, "format") == 0)) {
808 sc.SetState(SCE_PL_FORMAT_IDENT);
809 HereDoc.State = 0;
810 } else {
811 sc.SetState(SCE_PL_DEFAULT);
812 }
813 backFlag = BACK_KEYWORD;
814 backPos = sc.currentPos;
815 }
816 }
817 break;
818 case SCE_PL_SCALAR:
819 case SCE_PL_ARRAY:
820 case SCE_PL_HASH:
821 case SCE_PL_SYMBOLTABLE:
822 if (sc.Match(':', ':')) { // skip ::
823 sc.Forward();
824 } else if (!setVar.Contains(sc.ch)) {
825 if (sc.LengthCurrent() == 1) {
826 // Special variable: $(, $_ etc.
827 sc.Forward();
828 }
829 sc.SetState(SCE_PL_DEFAULT);
830 }
831 break;
832 case SCE_PL_NUMBER:
833 // if no early break, number style is terminated at "(go through)"
834 if (sc.ch == '.') {
835 if (sc.chNext == '.') {
836 // double dot is always an operator (go through)
837 } else if (numState <= PERLNUM_FLOAT_EXP) {
838 // non-decimal number or float exponent, consume next dot
839 sc.SetState(SCE_PL_OPERATOR);
840 break;
841 } else { // decimal or vectors allows dots
842 dotCount++;
843 if (numState == PERLNUM_DECIMAL) {
844 if (dotCount <= 1) // number with one dot in it
845 break;
846 if (IsADigit(sc.chNext)) { // really a vector
847 numState = PERLNUM_VECTOR;
848 break;
849 }
850 // number then dot (go through)
851 } else if (numState == PERLNUM_HEX) {
852 if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
853 break; // hex with one dot is a hex float
854 } else {
855 sc.SetState(SCE_PL_OPERATOR);
856 break;
857 }
858 // hex then dot (go through)
859 } else if (IsADigit(sc.chNext)) // vectors
860 break;
861 // vector then dot (go through)
862 }
863 } else if (sc.ch == '_') {
864 // permissive underscoring for number and vector literals
865 break;
866 } else if (numState == PERLNUM_DECIMAL) {
867 if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
868 numState = PERLNUM_FLOAT_EXP;
869 if (sc.chNext == '+' || sc.chNext == '-') {
870 sc.Forward();
871 }
872 break;
873 } else if (IsADigit(sc.ch))
874 break;
875 // number then word (go through)
876 } else if (numState == PERLNUM_HEX) {
877 if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign
878 numState = PERLNUM_FLOAT_EXP;
879 if (sc.chNext == '+' || sc.chNext == '-') {
880 sc.Forward();
881 }
882 break;
883 } else if (IsADigit(sc.ch, 16))
884 break;
885 // hex or hex float then word (go through)
886 } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
887 if (IsADigit(sc.ch)) // vector
888 break;
889 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
890 sc.ChangeState(SCE_PL_IDENTIFIER);
891 break;
892 }
893 // vector then word (go through)
894 } else if (IsADigit(sc.ch)) {
895 if (numState == PERLNUM_FLOAT_EXP) {
896 break;
897 } else if (numState == PERLNUM_OCTAL) {
898 if (sc.ch <= '7') break;
899 } else if (numState == PERLNUM_BINARY) {
900 if (sc.ch <= '1') break;
901 }
902 // mark invalid octal, binary numbers (go through)
903 numState = PERLNUM_BAD;
904 break;
905 }
906 // complete current number or vector
907 sc.ChangeState(actualNumStyle(numState));
908 sc.SetState(SCE_PL_DEFAULT);
909 break;
910 case SCE_PL_COMMENTLINE:
911 if (sc.atLineStart) {
912 sc.SetState(SCE_PL_DEFAULT);
913 }
914 break;
915 case SCE_PL_HERE_DELIM:
916 if (HereDoc.State == 0) { // '<<' encountered
917 int delim_ch = sc.chNext;
918 Sci_Position ws_skip = 0;
919 HereDoc.State = 1; // pre-init HERE doc class
920 HereDoc.Quote = sc.chNext;
921 HereDoc.Quoted = false;
922 HereDoc.StripIndent = false;
923 HereDoc.DelimiterLength = 0;
924 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
925 if (delim_ch == '~') { // was actually '<<~'
926 sc.Forward();
927 HereDoc.StripIndent = true;
928 HereDoc.Quote = delim_ch = sc.chNext;
929 }
930 if (IsASpaceOrTab(delim_ch)) {
931 // skip whitespace; legal only for quoted delimiters
932 Sci_PositionU i = sc.currentPos + 1;
933 while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
934 i++;
935 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
936 }
937 ws_skip = i - sc.currentPos - 1;
938 }
939 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
940 // a quoted here-doc delimiter; skip any whitespace
941 sc.Forward(ws_skip + 1);
942 HereDoc.Quote = delim_ch;
943 HereDoc.Quoted = true;
944 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
945 || ws_skip > 0) {
946 // left shift << or <<= operator cases
947 // restore position if operator
948 sc.ChangeState(SCE_PL_OPERATOR);
949 sc.ForwardSetState(SCE_PL_DEFAULT);
950 backFlag = BACK_OPERATOR;
951 backPos = sc.currentPos;
952 HereDoc.State = 0;
953 } else {
954 // specially handle initial '\' for identifier
955 if (ws_skip == 0 && HereDoc.Quote == '\\')
956 sc.Forward();
957 // an unquoted here-doc delimiter, no special handling
958 // (cannot be prefixed by spaces/tabs), or
959 // symbols terminates; deprecated zero-length delimiter
960 }
961 } else if (HereDoc.State == 1) { // collect the delimiter
962 backFlag = BACK_NONE;
963 if (HereDoc.Quoted) { // a quoted here-doc delimiter
964 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
965 sc.ForwardSetState(SCE_PL_DEFAULT);
966 } else if (!sc.atLineEnd) {
967 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
968 sc.Forward();
969 }
970 if (sc.ch != '\r') { // skip CR if CRLF
971 int i = 0; // else append char, possibly an extended char
972 while (i < sc.width) {
973 HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
974 i++;
975 }
976 }
977 }
978 } else { // an unquoted here-doc delimiter, no extended charsets
979 if (setHereDocDelim.Contains(sc.ch)) {
980 HereDoc.Append(sc.ch);
981 } else {
982 sc.SetState(SCE_PL_DEFAULT);
983 }
984 }
985 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
986 sc.SetState(SCE_PL_ERROR);
987 HereDoc.State = 0;
988 }
989 }
990 break;
991 case SCE_PL_HERE_Q:
992 case SCE_PL_HERE_QQ:
993 case SCE_PL_HERE_QX:
994 // also implies HereDoc.State == 2
995 sc.Complete();
996 if (HereDoc.StripIndent) {
997 // skip whitespace
998 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
999 sc.Forward();
1000 }
1001 if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
1002 int c = sc.GetRelative(HereDoc.DelimiterLength);
1003 if (c == '\r' || c == '\n') { // peek first, do not consume match
1004 sc.ForwardBytes(HereDoc.DelimiterLength);
1005 sc.SetState(SCE_PL_DEFAULT);
1006 backFlag = BACK_NONE;
1007 HereDoc.State = 0;
1008 if (!sc.atLineEnd)
1009 sc.Forward();
1010 break;
1011 }
1012 }
1013 if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
1014 while (!sc.atLineEnd)
1015 sc.Forward();
1016 break;
1017 }
1018 while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
1019 int c, sLen = 0, endType = 0;
1020 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1021 // scan to break string into segments
1022 if (c == '\\') {
1023 endType = 1; break;
1024 } else if (c == '\r' || c == '\n') {
1025 endType = 2; break;
1026 }
1027 sLen++;
1028 }
1029 if (sLen > 0) // process non-empty segments
1030 InterpolateSegment(sc, sLen);
1031 if (endType == 1) {
1032 sc.Forward();
1033 // \ at end-of-line does not appear to have any effect, skip
1034 if (sc.ch != '\r' && sc.ch != '\n')
1035 sc.Forward();
1036 } else if (endType == 2) {
1037 if (!sc.atLineEnd)
1038 sc.Forward();
1039 }
1040 }
1041 break;
1042 case SCE_PL_POD:
1043 case SCE_PL_POD_VERB: {
1044 Sci_PositionU fw = sc.currentPos;
1045 Sci_Position ln = styler.GetLine(fw);
1046 if (sc.atLineStart && sc.Match("=cut")) { // end of POD
1047 sc.SetState(SCE_PL_POD);
1048 sc.Forward(4);
1049 sc.SetState(SCE_PL_DEFAULT);
1050 styler.SetLineState(ln, SCE_PL_POD);
1051 break;
1052 }
1053 int pod = podLineScan(styler, fw, endPos); // classify POD line
1054 styler.SetLineState(ln, pod);
1055 if (pod == SCE_PL_DEFAULT) {
1056 if (sc.state == SCE_PL_POD_VERB) {
1057 Sci_PositionU fw2 = fw;
1058 while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
1059 fw = fw2++; // penultimate line (last blank line)
1060 pod = podLineScan(styler, fw2, endPos);
1061 styler.SetLineState(styler.GetLine(fw2), pod);
1062 }
1063 if (pod == SCE_PL_POD) { // truncate verbatim POD early
1064 sc.SetState(SCE_PL_POD);
1065 } else
1066 fw = fw2;
1067 }
1068 } else {
1069 if (pod == SCE_PL_POD_VERB // still part of current paragraph
1070 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
1071 pod = SCE_PL_POD;
1072 styler.SetLineState(ln, pod);
1073 } else if (pod == SCE_PL_POD
1074 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
1075 pod = SCE_PL_POD_VERB;
1076 styler.SetLineState(ln, pod);
1077 }
1078 sc.SetState(pod);
1079 }
1080 sc.ForwardBytes(fw - sc.currentPos); // commit style
1081 }
1082 break;
1083 case SCE_PL_REGEX:
1084 case SCE_PL_STRING_QR:
1085 if (Quote.Rep <= 0) {
1086 if (!setModifiers.Contains(sc.ch))
1087 sc.SetState(SCE_PL_DEFAULT);
1088 } else if (!Quote.Up && !IsASpace(sc.ch)) {
1089 Quote.Open(sc.ch);
1090 } else {
1091 int c, sLen = 0, endType = 0;
1092 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1093 // scan to break string into segments
1094 if (IsASpace(c)) {
1095 break;
1096 } else if (c == '\\' && Quote.Up != '\\') {
1097 endType = 1; break;
1098 } else if (c == Quote.Down) {
1099 Quote.Count--;
1100 if (Quote.Count == 0) {
1101 Quote.Rep--;
1102 break;
1103 }
1104 } else if (c == Quote.Up)
1105 Quote.Count++;
1106 sLen++;
1107 }
1108 if (sLen > 0) { // process non-empty segments
1109 if (Quote.Up != '\'') {
1110 InterpolateSegment(sc, sLen, true);
1111 } else // non-interpolated path
1112 sc.Forward(sLen);
1113 }
1114 if (endType == 1)
1115 sc.Forward();
1116 }
1117 break;
1118 case SCE_PL_REGSUBST:
1119 case SCE_PL_XLAT:
1120 if (Quote.Rep <= 0) {
1121 if (!setModifiers.Contains(sc.ch))
1122 sc.SetState(SCE_PL_DEFAULT);
1123 } else if (!Quote.Up && !IsASpace(sc.ch)) {
1124 Quote.Open(sc.ch);
1125 } else {
1126 int c, sLen = 0, endType = 0;
1127 bool isPattern = (Quote.Rep == 2);
1128 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1129 // scan to break string into segments
1130 if (c == '\\' && Quote.Up != '\\') {
1131 endType = 2; break;
1132 } else if (Quote.Count == 0 && Quote.Rep == 1) {
1133 // We matched something like s(...) or tr{...}, Perl 5.10
1134 // appears to allow almost any character for use as the
1135 // next delimiters. Whitespace and comments are accepted in
1136 // between, but we'll limit to whitespace here.
1137 // For '#', if no whitespace in between, it's a delimiter.
1138 if (IsASpace(c)) {
1139 // Keep going
1140 } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1141 endType = 3;
1142 } else
1143 Quote.Open(c);
1144 break;
1145 } else if (c == Quote.Down) {
1146 Quote.Count--;
1147 if (Quote.Count == 0) {
1148 Quote.Rep--;
1149 endType = 1;
1150 }
1151 if (Quote.Up == Quote.Down)
1152 Quote.Count++;
1153 if (endType == 1)
1154 break;
1155 } else if (c == Quote.Up) {
1156 Quote.Count++;
1157 } else if (IsASpace(c))
1158 break;
1159 sLen++;
1160 }
1161 if (sLen > 0) { // process non-empty segments
1162 if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1163 InterpolateSegment(sc, sLen, isPattern);
1164 } else // non-interpolated path
1165 sc.Forward(sLen);
1166 }
1167 if (endType == 2) {
1168 sc.Forward();
1169 } else if (endType == 3)
1170 sc.SetState(SCE_PL_DEFAULT);
1171 }
1172 break;
1173 case SCE_PL_STRING_Q:
1174 case SCE_PL_STRING_QQ:
1175 case SCE_PL_STRING_QX:
1176 case SCE_PL_STRING_QW:
1177 case SCE_PL_STRING:
1178 case SCE_PL_CHARACTER:
1179 case SCE_PL_BACKTICKS:
1180 if (!Quote.Down && !IsASpace(sc.ch)) {
1181 Quote.Open(sc.ch);
1182 } else {
1183 int c, sLen = 0, endType = 0;
1184 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1185 // scan to break string into segments
1186 if (IsASpace(c)) {
1187 break;
1188 } else if (c == '\\' && Quote.Up != '\\') {
1189 endType = 2; break;
1190 } else if (c == Quote.Down) {
1191 Quote.Count--;
1192 if (Quote.Count == 0) {
1193 endType = 3; break;
1194 }
1195 } else if (c == Quote.Up)
1196 Quote.Count++;
1197 sLen++;
1198 }
1199 if (sLen > 0) { // process non-empty segments
1200 switch (sc.state) {
1201 case SCE_PL_STRING:
1202 case SCE_PL_STRING_QQ:
1203 case SCE_PL_BACKTICKS:
1204 InterpolateSegment(sc, sLen);
1205 break;
1206 case SCE_PL_STRING_QX:
1207 if (Quote.Up != '\'') {
1208 InterpolateSegment(sc, sLen);
1209 break;
1210 }
1211 // (continued for ' delim)
1212 // Falls through.
1213 default: // non-interpolated path
1214 sc.Forward(sLen);
1215 }
1216 }
1217 if (endType == 2) {
1218 sc.Forward();
1219 } else if (endType == 3)
1220 sc.ForwardSetState(SCE_PL_DEFAULT);
1221 }
1222 break;
1223 case SCE_PL_SUB_PROTOTYPE: {
1224 int i = 0;
1225 // forward scan; must all be valid proto characters
1226 while (setSubPrototype.Contains(sc.GetRelative(i)))
1227 i++;
1228 if (sc.GetRelative(i) == ')') { // valid sub prototype
1229 sc.ForwardBytes(i);
1230 sc.ForwardSetState(SCE_PL_DEFAULT);
1231 } else {
1232 // abandon prototype, restart from '('
1233 sc.ChangeState(SCE_PL_OPERATOR);
1234 sc.SetState(SCE_PL_DEFAULT);
1235 }
1236 }
1237 break;
1238 case SCE_PL_FORMAT: {
1239 sc.Complete();
1240 if (sc.Match('.')) {
1241 sc.Forward();
1242 if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1243 sc.SetState(SCE_PL_DEFAULT);
1244 }
1245 while (!sc.atLineEnd)
1246 sc.Forward();
1247 }
1248 break;
1249 case SCE_PL_ERROR:
1250 break;
1251 }
1252 // Needed for specific continuation styles (one follows the other)
1253 switch (sc.state) {
1254 // continued from SCE_PL_WORD
1255 case SCE_PL_FORMAT_IDENT:
1256 // occupies HereDoc state 3 to avoid clashing with HERE docs
1257 if (IsASpaceOrTab(sc.ch)) { // skip whitespace
1258 sc.ChangeState(SCE_PL_DEFAULT);
1259 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1260 sc.Forward();
1261 sc.SetState(SCE_PL_FORMAT_IDENT);
1262 }
1263 if (setFormatStart.Contains(sc.ch)) { // identifier or '='
1264 if (sc.ch != '=') {
1265 do {
1266 sc.Forward();
1267 } while (setFormat.Contains(sc.ch));
1268 }
1269 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1270 sc.Forward();
1271 if (sc.ch == '=') {
1272 sc.ForwardSetState(SCE_PL_DEFAULT);
1273 HereDoc.State = 3;
1274 } else {
1275 // invalid identifier; inexact fallback, but hey
1276 sc.ChangeState(SCE_PL_IDENTIFIER);
1277 sc.SetState(SCE_PL_DEFAULT);
1278 }
1279 } else {
1280 sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1281 }
1282 backFlag = BACK_NONE;
1283 break;
1284 }
1285
1286 // Must check end of HereDoc states here before default state is handled
1287 if (HereDoc.State == 1 && sc.atLineEnd) {
1288 // Begin of here-doc (the line after the here-doc delimiter):
1289 // Lexically, the here-doc starts from the next line after the >>, but the
1290 // first line of here-doc seem to follow the style of the last EOL sequence
1291 int st_new = SCE_PL_HERE_QQ;
1292 HereDoc.State = 2;
1293 if (HereDoc.Quoted) {
1294 if (sc.state == SCE_PL_HERE_DELIM) {
1295 // Missing quote at end of string! We are stricter than perl.
1296 // Colour here-doc anyway while marking this bit as an error.
1297 sc.ChangeState(SCE_PL_ERROR);
1298 }
1299 switch (HereDoc.Quote) {
1300 case '\'':
1301 st_new = SCE_PL_HERE_Q;
1302 break;
1303 case '"' :
1304 st_new = SCE_PL_HERE_QQ;
1305 break;
1306 case '`' :
1307 st_new = SCE_PL_HERE_QX;
1308 break;
1309 }
1310 } else {
1311 if (HereDoc.Quote == '\\')
1312 st_new = SCE_PL_HERE_Q;
1313 }
1314 sc.SetState(st_new);
1315 }
1316 if (HereDoc.State == 3 && sc.atLineEnd) {
1317 // Start of format body.
1318 HereDoc.State = 0;
1319 sc.SetState(SCE_PL_FORMAT);
1320 }
1321
1322 // Determine if a new state should be entered.
1323 if (sc.state == SCE_PL_DEFAULT) {
1324 if (IsADigit(sc.ch) ||
1325 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1326 sc.SetState(SCE_PL_NUMBER);
1327 backFlag = BACK_NONE;
1328 numState = PERLNUM_DECIMAL;
1329 dotCount = 0;
1330 if (sc.ch == '0') { // hex,bin,octal
1331 if (sc.chNext == 'x' || sc.chNext == 'X') {
1332 numState = PERLNUM_HEX;
1333 } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1334 numState = PERLNUM_BINARY;
1335 } else if (IsADigit(sc.chNext)) {
1336 numState = PERLNUM_OCTAL;
1337 }
1338 if (numState != PERLNUM_DECIMAL) {
1339 sc.Forward();
1340 }
1341 } else if (sc.ch == 'v') { // vector
1342 numState = PERLNUM_V_VECTOR;
1343 }
1344 } else if (setWord.Contains(sc.ch)) {
1345 // if immediately prefixed by '::', always a bareword
1346 sc.SetState(SCE_PL_WORD);
1347 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1348 sc.ChangeState(SCE_PL_IDENTIFIER);
1349 }
1350 Sci_PositionU bk = sc.currentPos;
1351 Sci_PositionU fw = sc.currentPos + 1;
1352 // first check for possible quote-like delimiter
1353 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1354 sc.ChangeState(SCE_PL_REGSUBST);
1355 Quote.New(2);
1356 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1357 sc.ChangeState(SCE_PL_REGEX);
1358 Quote.New();
1359 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1360 sc.ChangeState(SCE_PL_STRING_Q);
1361 Quote.New();
1362 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1363 sc.ChangeState(SCE_PL_XLAT);
1364 Quote.New(2);
1365 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1366 sc.ChangeState(SCE_PL_XLAT);
1367 Quote.New(2);
1368 sc.Forward();
1369 fw++;
1370 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1371 && !setWord.Contains(sc.GetRelative(2))) {
1372 if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1373 else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1374 else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1375 else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
1376 Quote.New();
1377 sc.Forward();
1378 fw++;
1379 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1380 !setWord.Contains(sc.chNext) ||
1381 (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1382 sc.ChangeState(SCE_PL_OPERATOR);
1383 }
1384 // if potentially a keyword, scan forward and grab word, then check
1385 // if it's really one; if yes, disambiguation test is performed
1386 // otherwise it is always a bareword and we skip a lot of scanning
1387 if (sc.state == SCE_PL_WORD) {
1388 while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1389 fw++;
1390 if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1391 sc.ChangeState(SCE_PL_IDENTIFIER);
1392 }
1393 }
1394 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1395 // for quote-like delimiters/keywords, attempt to disambiguate
1396 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1397 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1398 if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1399 sc.ChangeState(SCE_PL_IDENTIFIER);
1400 }
1401 backFlag = BACK_NONE;
1402 } else if (sc.ch == '#') {
1403 sc.SetState(SCE_PL_COMMENTLINE);
1404 } else if (sc.ch == '\"') {
1405 sc.SetState(SCE_PL_STRING);
1406 Quote.New();
1407 Quote.Open(sc.ch);
1408 backFlag = BACK_NONE;
1409 } else if (sc.ch == '\'') {
1410 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1411 // Archaic call
1412 sc.SetState(SCE_PL_IDENTIFIER);
1413 } else {
1414 sc.SetState(SCE_PL_CHARACTER);
1415 Quote.New();
1416 Quote.Open(sc.ch);
1417 }
1418 backFlag = BACK_NONE;
1419 } else if (sc.ch == '`') {
1420 sc.SetState(SCE_PL_BACKTICKS);
1421 Quote.New();
1422 Quote.Open(sc.ch);
1423 backFlag = BACK_NONE;
1424 } else if (sc.ch == '$') {
1425 sc.SetState(SCE_PL_SCALAR);
1426 if (sc.chNext == '{') {
1427 sc.ForwardSetState(SCE_PL_OPERATOR);
1428 } else if (IsASpace(sc.chNext)) {
1429 sc.ForwardSetState(SCE_PL_DEFAULT);
1430 } else {
1431 sc.Forward();
1432 if (sc.Match('`', '`') || sc.Match(':', ':')) {
1433 sc.Forward();
1434 }
1435 }
1436 backFlag = BACK_NONE;
1437 } else if (sc.ch == '@') {
1438 sc.SetState(SCE_PL_ARRAY);
1439 if (setArray.Contains(sc.chNext)) {
1440 // no special treatment
1441 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1442 sc.ForwardBytes(2);
1443 } else if (sc.chNext == '{' || sc.chNext == '[') {
1444 sc.ForwardSetState(SCE_PL_OPERATOR);
1445 } else {
1446 sc.ChangeState(SCE_PL_OPERATOR);
1447 }
1448 backFlag = BACK_NONE;
1449 } else if (setPreferRE.Contains(sc.ch)) {
1450 // Explicit backward peeking to set a consistent preferRE for
1451 // any slash found, so no longer need to track preferRE state.
1452 // Find first previous significant lexed element and interpret.
1453 // A few symbols shares this code for disambiguation.
1454 bool preferRE = false;
1455 bool isHereDoc = sc.Match('<', '<');
1456 bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
1457 Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1458 sc.Complete();
1459 styler.Flush();
1460 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1461 hereDocSpace = true;
1462 skipWhitespaceComment(styler, bk);
1463 if (bk == 0) {
1464 // avoid backward scanning breakage
1465 preferRE = true;
1466 } else {
1467 int bkstyle = styler.StyleAt(bk);
1468 int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1469 switch (bkstyle) {
1470 case SCE_PL_OPERATOR:
1471 preferRE = true;
1472 if (bkch == ')' || bkch == ']') {
1473 preferRE = false;
1474 } else if (bkch == '}') {
1475 // backtrack by counting balanced brace pairs
1476 // needed to test for variables like ${}, @{} etc.
1477 bkstyle = styleBeforeBracePair(styler, bk);
1478 if (bkstyle == SCE_PL_SCALAR
1479 || bkstyle == SCE_PL_ARRAY
1480 || bkstyle == SCE_PL_HASH
1481 || bkstyle == SCE_PL_SYMBOLTABLE
1482 || bkstyle == SCE_PL_OPERATOR) {
1483 preferRE = false;
1484 }
1485 } else if (bkch == '+' || bkch == '-') {
1486 if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1487 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1488 // exceptions for operators: unary suffixes ++, --
1489 preferRE = false;
1490 }
1491 break;
1492 case SCE_PL_IDENTIFIER:
1493 preferRE = true;
1494 bkstyle = styleCheckIdentifier(styler, bk);
1495 if ((bkstyle == 1) || (bkstyle == 2)) {
1496 // inputsymbol or var with "->" or "::" before identifier
1497 preferRE = false;
1498 } else if (bkstyle == 3) {
1499 // bare identifier, test cases follows:
1500 if (sc.ch == '/') {
1501 // if '/', /PATTERN/ unless digit/space immediately after '/'
1502 // if '//', always expect defined-or operator to follow identifier
1503 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1504 preferRE = false;
1505 } else if (sc.ch == '*' || sc.ch == '%') {
1506 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1507 preferRE = false;
1508 } else if (sc.ch == '<') {
1509 if (IsASpace(sc.chNext) || sc.chNext == '=')
1510 preferRE = false;
1511 }
1512 }
1513 break;
1514 case SCE_PL_SCALAR: // for $var<< case:
1515 if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1516 preferRE = true;
1517 break;
1518 case SCE_PL_WORD:
1519 preferRE = true;
1520 // for HERE docs, always true
1521 if (sc.ch == '/') {
1522 // adopt heuristics similar to vim-style rules:
1523 // keywords always forced as /PATTERN/: split, if, elsif, while
1524 // everything else /PATTERN/ unless digit/space immediately after '/'
1525 // for '//', defined-or favoured unless special keywords
1526 Sci_PositionU bkend = bk + 1;
1527 while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1528 bk--;
1529 }
1530 if (isPerlKeyword(bk, bkend, reWords, styler))
1531 break;
1532 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1533 preferRE = false;
1534 } else if (sc.ch == '*' || sc.ch == '%') {
1535 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1536 preferRE = false;
1537 } else if (sc.ch == '<') {
1538 if (IsASpace(sc.chNext) || sc.chNext == '=')
1539 preferRE = false;
1540 }
1541 break;
1542
1543 // other styles uses the default, preferRE=false
1544 case SCE_PL_POD:
1545 case SCE_PL_HERE_Q:
1546 case SCE_PL_HERE_QQ:
1547 case SCE_PL_HERE_QX:
1548 preferRE = true;
1549 break;
1550 }
1551 }
1552 backFlag = BACK_NONE;
1553 if (isHereDoc) { // handle '<<', HERE doc
1554 if (sc.Match("<<>>")) { // double-diamond operator (5.22)
1555 sc.SetState(SCE_PL_OPERATOR);
1556 sc.Forward(3);
1557 } else if (preferRE) {
1558 sc.SetState(SCE_PL_HERE_DELIM);
1559 HereDoc.State = 0;
1560 } else { // << operator
1561 sc.SetState(SCE_PL_OPERATOR);
1562 sc.Forward();
1563 }
1564 } else if (sc.ch == '*') { // handle '*', typeglob
1565 if (preferRE) {
1566 sc.SetState(SCE_PL_SYMBOLTABLE);
1567 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1568 sc.ForwardBytes(2);
1569 } else if (sc.chNext == '{') {
1570 sc.ForwardSetState(SCE_PL_OPERATOR);
1571 } else {
1572 sc.Forward();
1573 }
1574 } else {
1575 sc.SetState(SCE_PL_OPERATOR);
1576 if (sc.chNext == '*') // exponentiation
1577 sc.Forward();
1578 }
1579 } else if (sc.ch == '%') { // handle '%', hash
1580 if (preferRE) {
1581 sc.SetState(SCE_PL_HASH);
1582 if (setHash.Contains(sc.chNext)) {
1583 sc.Forward();
1584 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1585 sc.ForwardBytes(2);
1586 } else if (sc.chNext == '{') {
1587 sc.ForwardSetState(SCE_PL_OPERATOR);
1588 } else {
1589 sc.ChangeState(SCE_PL_OPERATOR);
1590 }
1591 } else {
1592 sc.SetState(SCE_PL_OPERATOR);
1593 }
1594 } else if (sc.ch == '<') { // handle '<', inputsymbol
1595 if (preferRE) {
1596 // forward scan
1597 int i = InputSymbolScan(sc);
1598 if (i > 0) {
1599 sc.SetState(SCE_PL_IDENTIFIER);
1600 sc.Forward(i);
1601 } else {
1602 sc.SetState(SCE_PL_OPERATOR);
1603 }
1604 } else {
1605 sc.SetState(SCE_PL_OPERATOR);
1606 }
1607 } else { // handle '/', regexp
1608 if (preferRE) {
1609 sc.SetState(SCE_PL_REGEX);
1610 Quote.New();
1611 Quote.Open(sc.ch);
1612 } else { // / and // operators
1613 sc.SetState(SCE_PL_OPERATOR);
1614 if (sc.chNext == '/') {
1615 sc.Forward();
1616 }
1617 }
1618 }
1619 } else if (sc.ch == '=' // POD
1620 && setPOD.Contains(sc.chNext)
1621 && sc.atLineStart) {
1622 sc.SetState(SCE_PL_POD);
1623 backFlag = BACK_NONE;
1624 } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1625 Sci_PositionU bk = sc.currentPos;
1626 Sci_PositionU fw = 2;
1627 if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1628 !setWord.Contains(sc.GetRelative(2))) {
1629 sc.SetState(SCE_PL_WORD);
1630 } else {
1631 // nominally a minus and bareword; find extent of bareword
1632 while (setWord.Contains(sc.GetRelative(fw)))
1633 fw++;
1634 sc.SetState(SCE_PL_OPERATOR);
1635 }
1636 // force to bareword for hash key => or {variable literal} cases
1637 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1638 sc.ChangeState(SCE_PL_IDENTIFIER);
1639 }
1640 backFlag = BACK_NONE;
1641 } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1642 sc.Complete();
1643 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1644 sc.SetState(SCE_PL_SUB_PROTOTYPE);
1645 backFlag = BACK_NONE;
1646 } else {
1647 sc.SetState(SCE_PL_OPERATOR);
1648 }
1649 } else if (setPerlOperator.Contains(sc.ch)) { // operators
1650 sc.SetState(SCE_PL_OPERATOR);
1651 if (sc.Match('.', '.')) { // .. and ...
1652 sc.Forward();
1653 if (sc.chNext == '.') sc.Forward();
1654 }
1655 } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1656 sc.SetState(SCE_PL_DATASECTION);
1657 } else {
1658 // keep colouring defaults
1659 sc.Complete();
1660 }
1661 }
1662 }
1663 sc.Complete();
1664 if (sc.state == SCE_PL_HERE_Q
1665 || sc.state == SCE_PL_HERE_QQ
1666 || sc.state == SCE_PL_HERE_QX
1667 || sc.state == SCE_PL_FORMAT) {
1668 styler.ChangeLexerState(sc.currentPos, styler.Length());
1669 }
1670 sc.Complete();
1671}
1672
1673#define PERL_HEADFOLD_SHIFT 4
1674#define PERL_HEADFOLD_MASK 0xF0
1675
1676void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
1677
1678 if (!options.fold)
1679 return;
1680
1681 LexAccessor styler(pAccess);
1682
1683 Sci_PositionU endPos = startPos + length;
1684 int visibleChars = 0;
1685 Sci_Position lineCurrent = styler.GetLine(startPos);
1686
1687 // Backtrack to previous line in case need to fix its fold status
1688 if (startPos > 0) {
1689 if (lineCurrent > 0) {
1690 lineCurrent--;
1691 startPos = styler.LineStart(lineCurrent);
1692 }
1693 }
1694
1695 int levelPrev = SC_FOLDLEVELBASE;
1696 if (lineCurrent > 0)
1697 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1698 int levelCurrent = levelPrev;
1699 char chNext = styler[startPos];
1700 char chPrev = styler.SafeGetCharAt(startPos - 1);
1701 int styleNext = styler.StyleAt(startPos);
1702 // Used at end of line to determine if the line was a package definition
1703 bool isPackageLine = false;
1704 int podHeading = 0;
1705 for (Sci_PositionU i = startPos; i < endPos; i++) {
1706 char ch = chNext;
1707 chNext = styler.SafeGetCharAt(i + 1);
1708 int style = styleNext;
1709 styleNext = styler.StyleAt(i + 1);
1710 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1711 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1712 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1713 // Comment folding
1714 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1715 if (!IsCommentLine(lineCurrent - 1, styler)
1716 && IsCommentLine(lineCurrent + 1, styler))
1717 levelCurrent++;
1718 else if (IsCommentLine(lineCurrent - 1, styler)
1719 && !IsCommentLine(lineCurrent + 1, styler))
1720 levelCurrent--;
1721 }
1722 // {} [] block folding
1723 if (style == SCE_PL_OPERATOR) {
1724 if (ch == '{') {
1725 if (options.foldAtElse && levelCurrent < levelPrev)
1726 --levelPrev;
1727 levelCurrent++;
1728 } else if (ch == '}') {
1729 levelCurrent--;
1730 }
1731 if (ch == '[') {
1732 if (options.foldAtElse && levelCurrent < levelPrev)
1733 --levelPrev;
1734 levelCurrent++;
1735 } else if (ch == ']') {
1736 levelCurrent--;
1737 }
1738 } else if (style == SCE_PL_STRING_QW) {
1739 // qw
1740 if (stylePrevCh != style)
1741 levelCurrent++;
1742 else if (styleNext != style)
1743 levelCurrent--;
1744 }
1745 // POD folding
1746 if (options.foldPOD && atLineStart) {
1747 if (style == SCE_PL_POD) {
1748 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1749 levelCurrent++;
1750 else if (styler.Match(i, "=cut"))
1751 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1752 else if (styler.Match(i, "=head"))
1753 podHeading = PodHeadingLevel(i, styler);
1754 } else if (style == SCE_PL_DATASECTION) {
1755 if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1756 levelCurrent++;
1757 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1758 levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1759 else if (styler.Match(i, "=head"))
1760 podHeading = PodHeadingLevel(i, styler);
1761 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1762 // reset needed as level test is vs. SC_FOLDLEVELBASE
1763 else if (stylePrevCh != SCE_PL_DATASECTION)
1764 levelCurrent = SC_FOLDLEVELBASE;
1765 }
1766 }
1767 // package folding
1768 if (options.foldPackage && atLineStart) {
1769 if (IsPackageLine(lineCurrent, styler)
1770 && !IsPackageLine(lineCurrent + 1, styler))
1771 isPackageLine = true;
1772 }
1773
1774 //heredoc folding
1775 switch (style) {
1776 case SCE_PL_HERE_QQ :
1777 case SCE_PL_HERE_Q :
1778 case SCE_PL_HERE_QX :
1779 switch (stylePrevCh) {
1780 case SCE_PL_HERE_QQ :
1781 case SCE_PL_HERE_Q :
1782 case SCE_PL_HERE_QX :
1783 //do nothing;
1784 break;
1785 default :
1786 levelCurrent++;
1787 break;
1788 }
1789 break;
1790 default:
1791 switch (stylePrevCh) {
1792 case SCE_PL_HERE_QQ :
1793 case SCE_PL_HERE_Q :
1794 case SCE_PL_HERE_QX :
1795 levelCurrent--;
1796 break;
1797 default :
1798 //do nothing;
1799 break;
1800 }
1801 break;
1802 }
1803
1804 //explicit folding
1805 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1806 if (chNext == '{') {
1807 levelCurrent++;
1808 } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
1809 levelCurrent--;
1810 }
1811 }
1812
1813 if (atEOL) {
1814 int lev = levelPrev;
1815 // POD headings occupy bits 7-4, leaving some breathing room for
1816 // non-standard practice -- POD sections stuck in blocks, etc.
1817 if (podHeading > 0) {
1818 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1819 lev = levelCurrent - 1;
1820 lev |= SC_FOLDLEVELHEADERFLAG;
1821 podHeading = 0;
1822 }
1823 // Check if line was a package declaration
1824 // because packages need "special" treatment
1825 if (isPackageLine) {
1826 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1827 levelCurrent = SC_FOLDLEVELBASE + 1;
1828 isPackageLine = false;
1829 }
1830 lev |= levelCurrent << 16;
1831 if (visibleChars == 0 && options.foldCompact)
1832 lev |= SC_FOLDLEVELWHITEFLAG;
1833 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1834 lev |= SC_FOLDLEVELHEADERFLAG;
1835 if (lev != styler.LevelAt(lineCurrent)) {
1836 styler.SetLevel(lineCurrent, lev);
1837 }
1838 lineCurrent++;
1839 levelPrev = levelCurrent;
1840 visibleChars = 0;
1841 }
1842 if (!isspacechar(ch))
1843 visibleChars++;
1844 chPrev = ch;
1845 }
1846 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1847 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1848 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1849}
1850
1851LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);
1852