1 | // Scintilla source code edit control |
2 | /** @file LexPerl.cxx |
3 | ** Lexer for Perl. |
4 | ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net> |
5 | **/ |
6 | // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org> |
7 | // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> |
8 | // The License.txt file describes the conditions under which this software may be distributed. |
9 | |
10 | #include <stdlib.h> |
11 | #include <string.h> |
12 | #include <stdio.h> |
13 | #include <stdarg.h> |
14 | #include <assert.h> |
15 | #include <ctype.h> |
16 | |
17 | #include <string> |
18 | #include <string_view> |
19 | #include <map> |
20 | #include <functional> |
21 | |
22 | #include "ILexer.h" |
23 | #include "Scintilla.h" |
24 | #include "SciLexer.h" |
25 | |
26 | #include "WordList.h" |
27 | #include "LexAccessor.h" |
28 | #include "StyleContext.h" |
29 | #include "CharacterSet.h" |
30 | #include "LexerModule.h" |
31 | #include "OptionSet.h" |
32 | #include "DefaultLexer.h" |
33 | |
34 | using namespace Scintilla; |
35 | using namespace Lexilla; |
36 | |
37 | // Info for HERE document handling from perldata.pod (reformatted): |
38 | // ---------------------------------------------------------------- |
39 | // A line-oriented form of quoting is based on the shell ``here-doc'' syntax. |
40 | // Following a << you specify a string to terminate the quoted material, and |
41 | // all lines following the current line down to the terminating string are |
42 | // the value of the item. |
43 | // Prefixing the terminating string with a "~" specifies that you want to |
44 | // use "Indented Here-docs" (see below). |
45 | // * The terminating string may be either an identifier (a word), or some |
46 | // quoted text. |
47 | // * If quoted, the type of quotes you use determines the treatment of the |
48 | // text, just as in regular quoting. |
49 | // * An unquoted identifier works like double quotes. |
50 | // * There must be no space between the << and the identifier. |
51 | // (If you put a space it will be treated as a null identifier, |
52 | // which is valid, and matches the first empty line.) |
53 | // (This is deprecated, -w warns of this syntax) |
54 | // * The terminating string must appear by itself (unquoted and |
55 | // with no surrounding whitespace) on the terminating line. |
56 | // |
57 | // Indented Here-docs |
58 | // ------------------ |
59 | // The here-doc modifier "~" allows you to indent your here-docs to |
60 | // make the code more readable. |
61 | // The delimiter is used to determine the exact whitespace to remove |
62 | // from the beginning of each line. All lines must have at least the |
63 | // same starting whitespace (except lines only containing a newline) |
64 | // or perl will croak. Tabs and spaces can be mixed, but are matched |
65 | // exactly. One tab will not be equal to 8 spaces! |
66 | // Additional beginning whitespace (beyond what preceded the |
67 | // delimiter) will be preserved. |
68 | |
69 | #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter |
70 | |
71 | #define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot |
72 | #define PERLNUM_OCTAL 2 |
73 | #define PERLNUM_FLOAT_EXP 3 // exponent part only |
74 | #define PERLNUM_HEX 4 // may be a hex float |
75 | #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings |
76 | #define PERLNUM_VECTOR 6 |
77 | #define PERLNUM_V_VECTOR 7 |
78 | #define PERLNUM_BAD 8 |
79 | |
80 | #define BACK_NONE 0 // lookback state for bareword disambiguation: |
81 | #define BACK_OPERATOR 1 // whitespace/comments are insignificant |
82 | #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation |
83 | |
84 | #define SUB_BEGIN 0 // states for subroutine prototype scan: |
85 | #define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes |
86 | #define SUB_HAS_ATTRIB 2 // other attributes can exist leftward |
87 | #define SUB_HAS_MODULE 3 // sub name can have a ::identifier part |
88 | #define SUB_HAS_SUB 4 // 'sub' keyword |
89 | |
90 | // all interpolated styles are different from their parent styles by a constant difference |
91 | // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value |
92 | #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING) |
93 | |
94 | static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) { |
95 | // old-style keyword matcher; needed because GetCurrent() needs |
96 | // current segment to be committed, but we may abandon early... |
97 | char s[100]; |
98 | Sci_PositionU i, len = end - start; |
99 | if (len > 30) { len = 30; } |
100 | for (i = 0; i < len; i++, start++) s[i] = styler[start]; |
101 | s[i] = '\0'; |
102 | return keywords.InList(s); |
103 | } |
104 | |
105 | static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw, |
106 | int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) { |
107 | // identifiers are recognized by Perl as barewords under some |
108 | // conditions, the following attempts to do the disambiguation |
109 | // by looking backward and forward; result in 2 LSB |
110 | int result = 0; |
111 | bool moreback = false; // true if passed newline/comments |
112 | bool brace = false; // true if opening brace found |
113 | // if BACK_NONE, neither operator nor keyword, so skip test |
114 | if (backFlag == BACK_NONE) |
115 | return result; |
116 | // first look backwards past whitespace/comments to set EOL flag |
117 | // (some disambiguation patterns must be on a single line) |
118 | if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk)))) |
119 | moreback = true; |
120 | // look backwards at last significant lexed item for disambiguation |
121 | bk = backPos - 1; |
122 | int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
123 | if (ch == '{' && !moreback) { |
124 | // {bareword: possible variable spec |
125 | brace = true; |
126 | } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&') |
127 | // &bareword: subroutine call |
128 | || styler.Match(bk - 1, "->" ) |
129 | // ->bareword: part of variable spec |
130 | || styler.Match(bk - 1, "::" ) |
131 | // ::bareword: part of module spec |
132 | || styler.Match(bk - 2, "sub" )) { |
133 | // sub bareword: subroutine declaration |
134 | // (implied BACK_KEYWORD, no keywords end in 'sub'!) |
135 | result |= 1; |
136 | } |
137 | // next, scan forward after word past tab/spaces only; |
138 | // if ch isn't one of '[{(,' we can skip the test |
139 | if ((ch == '{' || ch == '(' || ch == '['|| ch == ',') |
140 | && fw < endPos) { |
141 | while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw))) |
142 | && fw < endPos) { |
143 | fw++; |
144 | } |
145 | if ((ch == '}' && brace) |
146 | // {bareword}: variable spec |
147 | || styler.Match(fw, "=>" )) { |
148 | // [{(, bareword=>: hash literal |
149 | result |= 2; |
150 | } |
151 | } |
152 | return result; |
153 | } |
154 | |
155 | static void (LexAccessor &styler, Sci_PositionU &p) { |
156 | // when backtracking, we need to skip whitespace and comments |
157 | while (p > 0) { |
158 | const int style = styler.StyleAt(p); |
159 | if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE) |
160 | break; |
161 | p--; |
162 | } |
163 | } |
164 | |
165 | static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) { |
166 | // scan backward past whitespace and comments to find a lexeme |
167 | skipWhitespaceComment(styler, bk); |
168 | if (bk == 0) |
169 | return 0; |
170 | int sz = 1; |
171 | style = styler.StyleAt(bk); |
172 | while (bk > 0) { // find extent of lexeme |
173 | if (styler.StyleAt(bk - 1) == style) { |
174 | bk--; sz++; |
175 | } else |
176 | break; |
177 | } |
178 | return sz; |
179 | } |
180 | |
181 | static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) { |
182 | // backtrack to find open '{' corresponding to a '}', balanced |
183 | // return significant style to be tested for '/' disambiguation |
184 | int braceCount = 1; |
185 | if (bk == 0) |
186 | return SCE_PL_DEFAULT; |
187 | while (--bk > 0) { |
188 | if (styler.StyleAt(bk) == SCE_PL_OPERATOR) { |
189 | int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
190 | if (bkch == ';') { // early out |
191 | break; |
192 | } else if (bkch == '}') { |
193 | braceCount++; |
194 | } else if (bkch == '{') { |
195 | if (--braceCount == 0) break; |
196 | } |
197 | } |
198 | } |
199 | if (bk > 0 && braceCount == 0) { |
200 | // balanced { found, bk > 0, skip more whitespace/comments |
201 | bk--; |
202 | skipWhitespaceComment(styler, bk); |
203 | return styler.StyleAt(bk); |
204 | } |
205 | return SCE_PL_DEFAULT; |
206 | } |
207 | |
208 | static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) { |
209 | // backtrack to classify sub-styles of identifier under test |
210 | // return sub-style to be tested for '/' disambiguation |
211 | if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo> |
212 | return 1; |
213 | // backtrack to check for possible "->" or "::" before identifier |
214 | while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { |
215 | bk--; |
216 | } |
217 | while (bk > 0) { |
218 | int bkstyle = styler.StyleAt(bk); |
219 | if (bkstyle == SCE_PL_DEFAULT |
220 | || bkstyle == SCE_PL_COMMENTLINE) { |
221 | // skip whitespace, comments |
222 | } else if (bkstyle == SCE_PL_OPERATOR) { |
223 | // test for "->" and "::" |
224 | if (styler.Match(bk - 1, "->" ) || styler.Match(bk - 1, "::" )) |
225 | return 2; |
226 | } else |
227 | return 3; // bare identifier |
228 | bk--; |
229 | } |
230 | return 0; |
231 | } |
232 | |
233 | static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) { |
234 | // forward scan the current line to classify line for POD style |
235 | int state = -1; |
236 | while (pos < endPos) { |
237 | int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); |
238 | if (ch == '\n' || ch == '\r') { |
239 | if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++; |
240 | break; |
241 | } |
242 | if (IsASpaceOrTab(ch)) { // whitespace, take note |
243 | if (state == -1) |
244 | state = SCE_PL_DEFAULT; |
245 | } else if (state == SCE_PL_DEFAULT) { // verbatim POD line |
246 | state = SCE_PL_POD_VERB; |
247 | } else if (state != SCE_PL_POD_VERB) { // regular POD line |
248 | state = SCE_PL_POD; |
249 | } |
250 | pos++; |
251 | } |
252 | if (state == -1) |
253 | state = SCE_PL_DEFAULT; |
254 | return state; |
255 | } |
256 | |
257 | static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) { |
258 | // backtrack to identify if we're starting a subroutine prototype |
259 | // we also need to ignore whitespace/comments, format is like: |
260 | // sub abc::pqr :const :prototype(...) |
261 | // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc. |
262 | // and a state machine generates legal subroutine syntax matches |
263 | styler.Flush(); |
264 | int state = SUB_BEGIN; |
265 | do { |
266 | // find two lexemes, lexeme 2 follows lexeme 1 |
267 | int style2 = SCE_PL_DEFAULT; |
268 | Sci_PositionU pos2 = bk; |
269 | int len2 = findPrevLexeme(styler, pos2, style2); |
270 | int style1 = SCE_PL_DEFAULT; |
271 | Sci_PositionU pos1 = pos2; |
272 | if (pos1 > 0) pos1--; |
273 | int len1 = findPrevLexeme(styler, pos1, style1); |
274 | if (len1 == 0 || len2 == 0) // lexeme pair must exist |
275 | break; |
276 | |
277 | // match parts of syntax, if invalid subroutine syntax, break off |
278 | if (style1 == SCE_PL_OPERATOR && len1 == 1 && |
279 | styler.SafeGetCharAt(pos1) == ':') { // ':' |
280 | if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) { |
281 | if (len2 == 9 && styler.Match(pos2, "prototype" )) { // ':' 'prototype' |
282 | if (state == SUB_BEGIN) { |
283 | state = SUB_HAS_PROTO; |
284 | } else |
285 | break; |
286 | } else { // ':' <attribute> |
287 | if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) { |
288 | state = SUB_HAS_ATTRIB; |
289 | } else |
290 | break; |
291 | } |
292 | } else |
293 | break; |
294 | } else if (style1 == SCE_PL_OPERATOR && len1 == 2 && |
295 | styler.Match(pos1, "::" )) { // '::' |
296 | if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier> |
297 | state = SUB_HAS_MODULE; |
298 | } else |
299 | break; |
300 | } else if (style1 == SCE_PL_WORD && len1 == 3 && |
301 | styler.Match(pos1, "sub" )) { // 'sub' |
302 | if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier> |
303 | state = SUB_HAS_SUB; |
304 | } else |
305 | break; |
306 | } else |
307 | break; |
308 | bk = pos1; // set position for finding next lexeme pair |
309 | if (bk > 0) bk--; |
310 | } while (state != SUB_HAS_SUB); |
311 | return (state == SUB_HAS_SUB); |
312 | } |
313 | |
314 | static int actualNumStyle(int numberStyle) { |
315 | if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { |
316 | return SCE_PL_STRING; |
317 | } else if (numberStyle == PERLNUM_BAD) { |
318 | return SCE_PL_ERROR; |
319 | } |
320 | return SCE_PL_NUMBER; |
321 | } |
322 | |
323 | static int opposite(int ch) { |
324 | if (ch == '(') return ')'; |
325 | if (ch == '[') return ']'; |
326 | if (ch == '{') return '}'; |
327 | if (ch == '<') return '>'; |
328 | return ch; |
329 | } |
330 | |
331 | static bool (Sci_Position line, LexAccessor &styler) { |
332 | Sci_Position pos = styler.LineStart(line); |
333 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
334 | for (Sci_Position i = pos; i < eol_pos; i++) { |
335 | char ch = styler[i]; |
336 | int style = styler.StyleAt(i); |
337 | if (ch == '#' && style == SCE_PL_COMMENTLINE) |
338 | return true; |
339 | else if (!IsASpaceOrTab(ch)) |
340 | return false; |
341 | } |
342 | return false; |
343 | } |
344 | |
345 | static bool IsPackageLine(Sci_Position line, LexAccessor &styler) { |
346 | Sci_Position pos = styler.LineStart(line); |
347 | int style = styler.StyleAt(pos); |
348 | if (style == SCE_PL_WORD && styler.Match(pos, "package" )) { |
349 | return true; |
350 | } |
351 | return false; |
352 | } |
353 | |
354 | static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) { |
355 | int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5)); |
356 | if (lvl >= '1' && lvl <= '4') { |
357 | return lvl - '0'; |
358 | } |
359 | return 0; |
360 | } |
361 | |
362 | // An individual named option for use in an OptionSet |
363 | |
364 | // Options used for LexerPerl |
365 | struct OptionsPerl { |
366 | bool fold; |
367 | bool ; |
368 | bool foldCompact; |
369 | // Custom folding of POD and packages |
370 | bool foldPOD; // fold.perl.pod |
371 | // Enable folding Pod blocks when using the Perl lexer. |
372 | bool foldPackage; // fold.perl.package |
373 | // Enable folding packages when using the Perl lexer. |
374 | |
375 | bool ; |
376 | |
377 | bool foldAtElse; |
378 | |
379 | OptionsPerl() { |
380 | fold = false; |
381 | foldComment = false; |
382 | foldCompact = true; |
383 | foldPOD = true; |
384 | foldPackage = true; |
385 | foldCommentExplicit = true; |
386 | foldAtElse = false; |
387 | } |
388 | }; |
389 | |
390 | static const char *const perlWordListDesc[] = { |
391 | "Keywords" , |
392 | 0 |
393 | }; |
394 | |
395 | struct OptionSetPerl : public OptionSet<OptionsPerl> { |
396 | OptionSetPerl() { |
397 | DefineProperty("fold" , &OptionsPerl::fold); |
398 | |
399 | DefineProperty("fold.comment" , &OptionsPerl::foldComment); |
400 | |
401 | DefineProperty("fold.compact" , &OptionsPerl::foldCompact); |
402 | |
403 | DefineProperty("fold.perl.pod" , &OptionsPerl::foldPOD, |
404 | "Set to 0 to disable folding Pod blocks when using the Perl lexer." ); |
405 | |
406 | DefineProperty("fold.perl.package" , &OptionsPerl::foldPackage, |
407 | "Set to 0 to disable folding packages when using the Perl lexer." ); |
408 | |
409 | DefineProperty("fold.perl.comment.explicit" , &OptionsPerl::foldCommentExplicit, |
410 | "Set to 0 to disable explicit folding." ); |
411 | |
412 | DefineProperty("fold.perl.at.else" , &OptionsPerl::foldAtElse, |
413 | "This option enables Perl folding on a \"} else {\" line of an if statement." ); |
414 | |
415 | DefineWordListSets(perlWordListDesc); |
416 | } |
417 | }; |
418 | |
419 | class LexerPerl : public DefaultLexer { |
420 | CharacterSet setWordStart; |
421 | CharacterSet setWord; |
422 | CharacterSet setSpecialVar; |
423 | CharacterSet setControlVar; |
424 | WordList keywords; |
425 | OptionsPerl options; |
426 | OptionSetPerl osPerl; |
427 | public: |
428 | LexerPerl() : |
429 | DefaultLexer("perl" , SCLEX_PERL), |
430 | setWordStart(CharacterSet::setAlpha, "_" , 0x80, true), |
431 | setWord(CharacterSet::setAlphaNum, "_" , 0x80, true), |
432 | setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]" ), |
433 | setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX" ) { |
434 | } |
435 | virtual ~LexerPerl() { |
436 | } |
437 | void SCI_METHOD Release() override { |
438 | delete this; |
439 | } |
440 | int SCI_METHOD Version() const override { |
441 | return lvRelease5; |
442 | } |
443 | const char *SCI_METHOD PropertyNames() override { |
444 | return osPerl.PropertyNames(); |
445 | } |
446 | int SCI_METHOD PropertyType(const char *name) override { |
447 | return osPerl.PropertyType(name); |
448 | } |
449 | const char *SCI_METHOD DescribeProperty(const char *name) override { |
450 | return osPerl.DescribeProperty(name); |
451 | } |
452 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
453 | const char * SCI_METHOD PropertyGet(const char *key) override { |
454 | return osPerl.PropertyGet(key); |
455 | } |
456 | const char *SCI_METHOD DescribeWordListSets() override { |
457 | return osPerl.DescribeWordListSets(); |
458 | } |
459 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
460 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
461 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
462 | |
463 | void *SCI_METHOD PrivateCall(int, void *) override { |
464 | return 0; |
465 | } |
466 | |
467 | static ILexer5 *LexerFactoryPerl() { |
468 | return new LexerPerl(); |
469 | } |
470 | int InputSymbolScan(StyleContext &sc); |
471 | void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false); |
472 | }; |
473 | |
474 | Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) { |
475 | if (osPerl.PropertySet(&options, key, val)) { |
476 | return 0; |
477 | } |
478 | return -1; |
479 | } |
480 | |
481 | Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) { |
482 | WordList *wordListN = 0; |
483 | switch (n) { |
484 | case 0: |
485 | wordListN = &keywords; |
486 | break; |
487 | } |
488 | Sci_Position firstModification = -1; |
489 | if (wordListN) { |
490 | WordList wlNew; |
491 | wlNew.Set(wl); |
492 | if (*wordListN != wlNew) { |
493 | wordListN->Set(wl); |
494 | firstModification = 0; |
495 | } |
496 | } |
497 | return firstModification; |
498 | } |
499 | |
500 | int LexerPerl::InputSymbolScan(StyleContext &sc) { |
501 | // forward scan for matching > on same line; file handles |
502 | int c, sLen = 0; |
503 | while ((c = sc.GetRelativeCharacter(++sLen)) != 0) { |
504 | if (c == '\r' || c == '\n') { |
505 | return 0; |
506 | } else if (c == '>') { |
507 | if (sc.Match("<=>" )) // '<=>' case |
508 | return 0; |
509 | return sLen; |
510 | } |
511 | } |
512 | return 0; |
513 | } |
514 | |
515 | void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) { |
516 | // interpolate a segment (with no active backslashes or delimiters within) |
517 | // switch in or out of an interpolation style or continue current style |
518 | // commit variable patterns if found, trim segment, repeat until done |
519 | while (maxSeg > 0) { |
520 | bool isVar = false; |
521 | int sLen = 0; |
522 | if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) { |
523 | // $#[$]*word [$@][$]*word (where word or {word} is always present) |
524 | bool braces = false; |
525 | sLen = 1; |
526 | if (sc.ch == '$' && sc.chNext == '#') { // starts with $# |
527 | sLen++; |
528 | } |
529 | while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within |
530 | sLen++; |
531 | if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word} |
532 | sLen++; |
533 | braces = true; |
534 | } |
535 | if (maxSeg > sLen) { |
536 | int c = sc.GetRelativeCharacter(sLen); |
537 | if (setWordStart.Contains(c)) { // word (various) |
538 | sLen++; |
539 | isVar = true; |
540 | while (maxSeg > sLen) { |
541 | if (!setWord.Contains(sc.GetRelativeCharacter(sLen))) |
542 | break; |
543 | sLen++; |
544 | } |
545 | } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit} |
546 | sLen++; |
547 | isVar = true; |
548 | } |
549 | } |
550 | if (braces) { |
551 | if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word} |
552 | sLen++; |
553 | } else |
554 | isVar = false; |
555 | } |
556 | } |
557 | if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns |
558 | int c = sc.chNext; |
559 | if (sc.ch == '$') { |
560 | sLen = 1; |
561 | if (IsADigit(c)) { // $[0-9] and slurp trailing digits |
562 | sLen++; |
563 | isVar = true; |
564 | while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen))) |
565 | sLen++; |
566 | } else if (setSpecialVar.Contains(c)) { // $ special variables |
567 | sLen++; |
568 | isVar = true; |
569 | } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional |
570 | sLen++; |
571 | isVar = true; |
572 | } else if (c == '^') { // $^A control-char style |
573 | sLen++; |
574 | if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) { |
575 | sLen++; |
576 | isVar = true; |
577 | } |
578 | } |
579 | } else if (sc.ch == '@') { |
580 | sLen = 1; |
581 | if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern |
582 | sLen++; |
583 | isVar = true; |
584 | } |
585 | } |
586 | } |
587 | if (isVar) { // commit as interpolated variable or normal character |
588 | if (sc.state < SCE_PL_STRING_VAR) |
589 | sc.SetState(sc.state + INTERPOLATE_SHIFT); |
590 | sc.Forward(sLen); |
591 | maxSeg -= sLen; |
592 | } else { |
593 | if (sc.state >= SCE_PL_STRING_VAR) |
594 | sc.SetState(sc.state - INTERPOLATE_SHIFT); |
595 | sc.Forward(); |
596 | maxSeg--; |
597 | } |
598 | } |
599 | if (sc.state >= SCE_PL_STRING_VAR) |
600 | sc.SetState(sc.state - INTERPOLATE_SHIFT); |
601 | } |
602 | |
603 | void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
604 | LexAccessor styler(pAccess); |
605 | |
606 | // keywords that forces /PATTERN/ at all times; should track vim's behaviour |
607 | WordList reWords; |
608 | reWords.Set("elsif if split while" ); |
609 | |
610 | // charset classes |
611 | CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC" ); |
612 | // lexing of "%*</" operators is non-trivial; these are missing in the set below |
613 | CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~" ); |
614 | CharacterSet setQDelim(CharacterSet::setNone, "qrwx" ); |
615 | CharacterSet setModifiers(CharacterSet::setAlpha); |
616 | CharacterSet setPreferRE(CharacterSet::setNone, "*/<%" ); |
617 | // setArray and setHash also accepts chars for special vars like $_, |
618 | // which are then truncated when the next char does not match setVar |
619 | CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'" , 0x80, true); |
620 | CharacterSet setArray(CharacterSet::setAlpha, "#$_+-" , 0x80, true); |
621 | CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-" , 0x80, true); |
622 | CharacterSet &setPOD = setModifiers; |
623 | CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@" ); |
624 | CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_" ); |
625 | CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t" ); |
626 | CharacterSet setRepetition(CharacterSet::setDigits, ")\"'" ); |
627 | // for format identifiers |
628 | CharacterSet setFormatStart(CharacterSet::setAlpha, "_=" ); |
629 | CharacterSet &setFormat = setHereDocDelim; |
630 | |
631 | // Lexer for perl often has to backtrack to start of current style to determine |
632 | // which characters are being used as quotes, how deeply nested is the |
633 | // start position and what the termination string is for HERE documents. |
634 | |
635 | class HereDocCls { // Class to manage HERE doc sequence |
636 | public: |
637 | int State; |
638 | // 0: '<<' encountered |
639 | // 1: collect the delimiter |
640 | // 2: here doc text (lines after the delimiter) |
641 | int Quote; // the char after '<<' |
642 | bool Quoted; // true if Quote in ('\'','"','`') |
643 | bool StripIndent; // true if '<<~' requested to strip leading whitespace |
644 | int DelimiterLength; // strlen(Delimiter) |
645 | char Delimiter[HERE_DELIM_MAX]; // the Delimiter |
646 | HereDocCls() { |
647 | State = 0; |
648 | Quote = 0; |
649 | Quoted = false; |
650 | StripIndent = false; |
651 | DelimiterLength = 0; |
652 | Delimiter[0] = '\0'; |
653 | } |
654 | void Append(int ch) { |
655 | Delimiter[DelimiterLength++] = static_cast<char>(ch); |
656 | Delimiter[DelimiterLength] = '\0'; |
657 | } |
658 | ~HereDocCls() { |
659 | } |
660 | }; |
661 | HereDocCls HereDoc; // TODO: FIFO for stacked here-docs |
662 | |
663 | class QuoteCls { // Class to manage quote pairs |
664 | public: |
665 | int Rep; |
666 | int Count; |
667 | int Up, Down; |
668 | QuoteCls() { |
669 | New(1); |
670 | } |
671 | void New(int r = 1) { |
672 | Rep = r; |
673 | Count = 0; |
674 | Up = '\0'; |
675 | Down = '\0'; |
676 | } |
677 | void Open(int u) { |
678 | Count++; |
679 | Up = u; |
680 | Down = opposite(Up); |
681 | } |
682 | }; |
683 | QuoteCls Quote; |
684 | |
685 | // additional state for number lexing |
686 | int numState = PERLNUM_DECIMAL; |
687 | int dotCount = 0; |
688 | |
689 | Sci_PositionU endPos = startPos + length; |
690 | |
691 | // Backtrack to beginning of style if required... |
692 | // If in a long distance lexical state, backtrack to find quote characters. |
693 | // Includes strings (may be multi-line), numbers (additional state), format |
694 | // bodies, as well as POD sections. |
695 | if (initStyle == SCE_PL_HERE_Q |
696 | || initStyle == SCE_PL_HERE_QQ |
697 | || initStyle == SCE_PL_HERE_QX |
698 | || initStyle == SCE_PL_FORMAT |
699 | || initStyle == SCE_PL_HERE_QQ_VAR |
700 | || initStyle == SCE_PL_HERE_QX_VAR |
701 | ) { |
702 | // backtrack through multiple styles to reach the delimiter start |
703 | int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM; |
704 | while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) { |
705 | startPos--; |
706 | } |
707 | startPos = styler.LineStart(styler.GetLine(startPos)); |
708 | initStyle = styler.StyleAt(startPos - 1); |
709 | } |
710 | if (initStyle == SCE_PL_STRING |
711 | || initStyle == SCE_PL_STRING_QQ |
712 | || initStyle == SCE_PL_BACKTICKS |
713 | || initStyle == SCE_PL_STRING_QX |
714 | || initStyle == SCE_PL_REGEX |
715 | || initStyle == SCE_PL_STRING_QR |
716 | || initStyle == SCE_PL_REGSUBST |
717 | || initStyle == SCE_PL_STRING_VAR |
718 | || initStyle == SCE_PL_STRING_QQ_VAR |
719 | || initStyle == SCE_PL_BACKTICKS_VAR |
720 | || initStyle == SCE_PL_STRING_QX_VAR |
721 | || initStyle == SCE_PL_REGEX_VAR |
722 | || initStyle == SCE_PL_STRING_QR_VAR |
723 | || initStyle == SCE_PL_REGSUBST_VAR |
724 | ) { |
725 | // for interpolation, must backtrack through a mix of two different styles |
726 | int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ? |
727 | initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT; |
728 | while (startPos > 1) { |
729 | int st = styler.StyleAt(startPos - 1); |
730 | if ((st != initStyle) && (st != otherStyle)) |
731 | break; |
732 | startPos--; |
733 | } |
734 | initStyle = SCE_PL_DEFAULT; |
735 | } else if (initStyle == SCE_PL_STRING_Q |
736 | || initStyle == SCE_PL_STRING_QW |
737 | || initStyle == SCE_PL_XLAT |
738 | || initStyle == SCE_PL_CHARACTER |
739 | || initStyle == SCE_PL_NUMBER |
740 | || initStyle == SCE_PL_IDENTIFIER |
741 | || initStyle == SCE_PL_ERROR |
742 | || initStyle == SCE_PL_SUB_PROTOTYPE |
743 | ) { |
744 | while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { |
745 | startPos--; |
746 | } |
747 | initStyle = SCE_PL_DEFAULT; |
748 | } else if (initStyle == SCE_PL_POD |
749 | || initStyle == SCE_PL_POD_VERB |
750 | ) { |
751 | // POD backtracking finds preceding blank lines and goes back past them |
752 | Sci_Position ln = styler.GetLine(startPos); |
753 | if (ln > 0) { |
754 | initStyle = styler.StyleAt(styler.LineStart(--ln)); |
755 | if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) { |
756 | while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT) |
757 | ln--; |
758 | } |
759 | startPos = styler.LineStart(++ln); |
760 | initStyle = styler.StyleAt(startPos - 1); |
761 | } else { |
762 | startPos = 0; |
763 | initStyle = SCE_PL_DEFAULT; |
764 | } |
765 | } |
766 | |
767 | // backFlag, backPos are additional state to aid identifier corner cases. |
768 | // Look backwards past whitespace and comments in order to detect either |
769 | // operator or keyword. Later updated as we go along. |
770 | int backFlag = BACK_NONE; |
771 | Sci_PositionU backPos = startPos; |
772 | if (backPos > 0) { |
773 | backPos--; |
774 | skipWhitespaceComment(styler, backPos); |
775 | if (styler.StyleAt(backPos) == SCE_PL_OPERATOR) |
776 | backFlag = BACK_OPERATOR; |
777 | else if (styler.StyleAt(backPos) == SCE_PL_WORD) |
778 | backFlag = BACK_KEYWORD; |
779 | backPos++; |
780 | } |
781 | |
782 | StyleContext sc(startPos, endPos - startPos, initStyle, styler); |
783 | |
784 | for (; sc.More(); sc.Forward()) { |
785 | |
786 | // Determine if the current state should terminate. |
787 | switch (sc.state) { |
788 | case SCE_PL_OPERATOR: |
789 | sc.SetState(SCE_PL_DEFAULT); |
790 | backFlag = BACK_OPERATOR; |
791 | backPos = sc.currentPos; |
792 | break; |
793 | case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol |
794 | if ((!setWord.Contains(sc.ch) && sc.ch != '\'') |
795 | || sc.Match('.', '.') |
796 | || sc.chPrev == '>') { // end of inputsymbol |
797 | sc.SetState(SCE_PL_DEFAULT); |
798 | } |
799 | break; |
800 | case SCE_PL_WORD: // keyword, plus special cases |
801 | if (!setWord.Contains(sc.ch)) { |
802 | char s[100]; |
803 | sc.GetCurrent(s, sizeof(s)); |
804 | if ((strcmp(s, "__DATA__" ) == 0) || (strcmp(s, "__END__" ) == 0)) { |
805 | sc.ChangeState(SCE_PL_DATASECTION); |
806 | } else { |
807 | if ((strcmp(s, "format" ) == 0)) { |
808 | sc.SetState(SCE_PL_FORMAT_IDENT); |
809 | HereDoc.State = 0; |
810 | } else { |
811 | sc.SetState(SCE_PL_DEFAULT); |
812 | } |
813 | backFlag = BACK_KEYWORD; |
814 | backPos = sc.currentPos; |
815 | } |
816 | } |
817 | break; |
818 | case SCE_PL_SCALAR: |
819 | case SCE_PL_ARRAY: |
820 | case SCE_PL_HASH: |
821 | case SCE_PL_SYMBOLTABLE: |
822 | if (sc.Match(':', ':')) { // skip :: |
823 | sc.Forward(); |
824 | } else if (!setVar.Contains(sc.ch)) { |
825 | if (sc.LengthCurrent() == 1) { |
826 | // Special variable: $(, $_ etc. |
827 | sc.Forward(); |
828 | } |
829 | sc.SetState(SCE_PL_DEFAULT); |
830 | } |
831 | break; |
832 | case SCE_PL_NUMBER: |
833 | // if no early break, number style is terminated at "(go through)" |
834 | if (sc.ch == '.') { |
835 | if (sc.chNext == '.') { |
836 | // double dot is always an operator (go through) |
837 | } else if (numState <= PERLNUM_FLOAT_EXP) { |
838 | // non-decimal number or float exponent, consume next dot |
839 | sc.SetState(SCE_PL_OPERATOR); |
840 | break; |
841 | } else { // decimal or vectors allows dots |
842 | dotCount++; |
843 | if (numState == PERLNUM_DECIMAL) { |
844 | if (dotCount <= 1) // number with one dot in it |
845 | break; |
846 | if (IsADigit(sc.chNext)) { // really a vector |
847 | numState = PERLNUM_VECTOR; |
848 | break; |
849 | } |
850 | // number then dot (go through) |
851 | } else if (numState == PERLNUM_HEX) { |
852 | if (dotCount <= 1 && IsADigit(sc.chNext, 16)) { |
853 | break; // hex with one dot is a hex float |
854 | } else { |
855 | sc.SetState(SCE_PL_OPERATOR); |
856 | break; |
857 | } |
858 | // hex then dot (go through) |
859 | } else if (IsADigit(sc.chNext)) // vectors |
860 | break; |
861 | // vector then dot (go through) |
862 | } |
863 | } else if (sc.ch == '_') { |
864 | // permissive underscoring for number and vector literals |
865 | break; |
866 | } else if (numState == PERLNUM_DECIMAL) { |
867 | if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign |
868 | numState = PERLNUM_FLOAT_EXP; |
869 | if (sc.chNext == '+' || sc.chNext == '-') { |
870 | sc.Forward(); |
871 | } |
872 | break; |
873 | } else if (IsADigit(sc.ch)) |
874 | break; |
875 | // number then word (go through) |
876 | } else if (numState == PERLNUM_HEX) { |
877 | if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign |
878 | numState = PERLNUM_FLOAT_EXP; |
879 | if (sc.chNext == '+' || sc.chNext == '-') { |
880 | sc.Forward(); |
881 | } |
882 | break; |
883 | } else if (IsADigit(sc.ch, 16)) |
884 | break; |
885 | // hex or hex float then word (go through) |
886 | } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { |
887 | if (IsADigit(sc.ch)) // vector |
888 | break; |
889 | if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word |
890 | sc.ChangeState(SCE_PL_IDENTIFIER); |
891 | break; |
892 | } |
893 | // vector then word (go through) |
894 | } else if (IsADigit(sc.ch)) { |
895 | if (numState == PERLNUM_FLOAT_EXP) { |
896 | break; |
897 | } else if (numState == PERLNUM_OCTAL) { |
898 | if (sc.ch <= '7') break; |
899 | } else if (numState == PERLNUM_BINARY) { |
900 | if (sc.ch <= '1') break; |
901 | } |
902 | // mark invalid octal, binary numbers (go through) |
903 | numState = PERLNUM_BAD; |
904 | break; |
905 | } |
906 | // complete current number or vector |
907 | sc.ChangeState(actualNumStyle(numState)); |
908 | sc.SetState(SCE_PL_DEFAULT); |
909 | break; |
910 | case SCE_PL_COMMENTLINE: |
911 | if (sc.atLineStart) { |
912 | sc.SetState(SCE_PL_DEFAULT); |
913 | } |
914 | break; |
915 | case SCE_PL_HERE_DELIM: |
916 | if (HereDoc.State == 0) { // '<<' encountered |
917 | int delim_ch = sc.chNext; |
918 | Sci_Position ws_skip = 0; |
919 | HereDoc.State = 1; // pre-init HERE doc class |
920 | HereDoc.Quote = sc.chNext; |
921 | HereDoc.Quoted = false; |
922 | HereDoc.StripIndent = false; |
923 | HereDoc.DelimiterLength = 0; |
924 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; |
925 | if (delim_ch == '~') { // was actually '<<~' |
926 | sc.Forward(); |
927 | HereDoc.StripIndent = true; |
928 | HereDoc.Quote = delim_ch = sc.chNext; |
929 | } |
930 | if (IsASpaceOrTab(delim_ch)) { |
931 | // skip whitespace; legal only for quoted delimiters |
932 | Sci_PositionU i = sc.currentPos + 1; |
933 | while ((i < endPos) && IsASpaceOrTab(delim_ch)) { |
934 | i++; |
935 | delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i)); |
936 | } |
937 | ws_skip = i - sc.currentPos - 1; |
938 | } |
939 | if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') { |
940 | // a quoted here-doc delimiter; skip any whitespace |
941 | sc.Forward(ws_skip + 1); |
942 | HereDoc.Quote = delim_ch; |
943 | HereDoc.Quoted = true; |
944 | } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext)) |
945 | || ws_skip > 0) { |
946 | // left shift << or <<= operator cases |
947 | // restore position if operator |
948 | sc.ChangeState(SCE_PL_OPERATOR); |
949 | sc.ForwardSetState(SCE_PL_DEFAULT); |
950 | backFlag = BACK_OPERATOR; |
951 | backPos = sc.currentPos; |
952 | HereDoc.State = 0; |
953 | } else { |
954 | // specially handle initial '\' for identifier |
955 | if (ws_skip == 0 && HereDoc.Quote == '\\') |
956 | sc.Forward(); |
957 | // an unquoted here-doc delimiter, no special handling |
958 | // (cannot be prefixed by spaces/tabs), or |
959 | // symbols terminates; deprecated zero-length delimiter |
960 | } |
961 | } else if (HereDoc.State == 1) { // collect the delimiter |
962 | backFlag = BACK_NONE; |
963 | if (HereDoc.Quoted) { // a quoted here-doc delimiter |
964 | if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter |
965 | sc.ForwardSetState(SCE_PL_DEFAULT); |
966 | } else if (!sc.atLineEnd) { |
967 | if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote |
968 | sc.Forward(); |
969 | } |
970 | if (sc.ch != '\r') { // skip CR if CRLF |
971 | int i = 0; // else append char, possibly an extended char |
972 | while (i < sc.width) { |
973 | HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i))); |
974 | i++; |
975 | } |
976 | } |
977 | } |
978 | } else { // an unquoted here-doc delimiter, no extended charsets |
979 | if (setHereDocDelim.Contains(sc.ch)) { |
980 | HereDoc.Append(sc.ch); |
981 | } else { |
982 | sc.SetState(SCE_PL_DEFAULT); |
983 | } |
984 | } |
985 | if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { |
986 | sc.SetState(SCE_PL_ERROR); |
987 | HereDoc.State = 0; |
988 | } |
989 | } |
990 | break; |
991 | case SCE_PL_HERE_Q: |
992 | case SCE_PL_HERE_QQ: |
993 | case SCE_PL_HERE_QX: |
994 | // also implies HereDoc.State == 2 |
995 | sc.Complete(); |
996 | if (HereDoc.StripIndent) { |
997 | // skip whitespace |
998 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
999 | sc.Forward(); |
1000 | } |
1001 | if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) { |
1002 | int c = sc.GetRelative(HereDoc.DelimiterLength); |
1003 | if (c == '\r' || c == '\n') { // peek first, do not consume match |
1004 | sc.ForwardBytes(HereDoc.DelimiterLength); |
1005 | sc.SetState(SCE_PL_DEFAULT); |
1006 | backFlag = BACK_NONE; |
1007 | HereDoc.State = 0; |
1008 | if (!sc.atLineEnd) |
1009 | sc.Forward(); |
1010 | break; |
1011 | } |
1012 | } |
1013 | if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated |
1014 | while (!sc.atLineEnd) |
1015 | sc.Forward(); |
1016 | break; |
1017 | } |
1018 | while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated |
1019 | int c, sLen = 0, endType = 0; |
1020 | while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
1021 | // scan to break string into segments |
1022 | if (c == '\\') { |
1023 | endType = 1; break; |
1024 | } else if (c == '\r' || c == '\n') { |
1025 | endType = 2; break; |
1026 | } |
1027 | sLen++; |
1028 | } |
1029 | if (sLen > 0) // process non-empty segments |
1030 | InterpolateSegment(sc, sLen); |
1031 | if (endType == 1) { |
1032 | sc.Forward(); |
1033 | // \ at end-of-line does not appear to have any effect, skip |
1034 | if (sc.ch != '\r' && sc.ch != '\n') |
1035 | sc.Forward(); |
1036 | } else if (endType == 2) { |
1037 | if (!sc.atLineEnd) |
1038 | sc.Forward(); |
1039 | } |
1040 | } |
1041 | break; |
1042 | case SCE_PL_POD: |
1043 | case SCE_PL_POD_VERB: { |
1044 | Sci_PositionU fw = sc.currentPos; |
1045 | Sci_Position ln = styler.GetLine(fw); |
1046 | if (sc.atLineStart && sc.Match("=cut" )) { // end of POD |
1047 | sc.SetState(SCE_PL_POD); |
1048 | sc.Forward(4); |
1049 | sc.SetState(SCE_PL_DEFAULT); |
1050 | styler.SetLineState(ln, SCE_PL_POD); |
1051 | break; |
1052 | } |
1053 | int pod = podLineScan(styler, fw, endPos); // classify POD line |
1054 | styler.SetLineState(ln, pod); |
1055 | if (pod == SCE_PL_DEFAULT) { |
1056 | if (sc.state == SCE_PL_POD_VERB) { |
1057 | Sci_PositionU fw2 = fw; |
1058 | while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) { |
1059 | fw = fw2++; // penultimate line (last blank line) |
1060 | pod = podLineScan(styler, fw2, endPos); |
1061 | styler.SetLineState(styler.GetLine(fw2), pod); |
1062 | } |
1063 | if (pod == SCE_PL_POD) { // truncate verbatim POD early |
1064 | sc.SetState(SCE_PL_POD); |
1065 | } else |
1066 | fw = fw2; |
1067 | } |
1068 | } else { |
1069 | if (pod == SCE_PL_POD_VERB // still part of current paragraph |
1070 | && (styler.GetLineState(ln - 1) == SCE_PL_POD)) { |
1071 | pod = SCE_PL_POD; |
1072 | styler.SetLineState(ln, pod); |
1073 | } else if (pod == SCE_PL_POD |
1074 | && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) { |
1075 | pod = SCE_PL_POD_VERB; |
1076 | styler.SetLineState(ln, pod); |
1077 | } |
1078 | sc.SetState(pod); |
1079 | } |
1080 | sc.ForwardBytes(fw - sc.currentPos); // commit style |
1081 | } |
1082 | break; |
1083 | case SCE_PL_REGEX: |
1084 | case SCE_PL_STRING_QR: |
1085 | if (Quote.Rep <= 0) { |
1086 | if (!setModifiers.Contains(sc.ch)) |
1087 | sc.SetState(SCE_PL_DEFAULT); |
1088 | } else if (!Quote.Up && !IsASpace(sc.ch)) { |
1089 | Quote.Open(sc.ch); |
1090 | } else { |
1091 | int c, sLen = 0, endType = 0; |
1092 | while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
1093 | // scan to break string into segments |
1094 | if (IsASpace(c)) { |
1095 | break; |
1096 | } else if (c == '\\' && Quote.Up != '\\') { |
1097 | endType = 1; break; |
1098 | } else if (c == Quote.Down) { |
1099 | Quote.Count--; |
1100 | if (Quote.Count == 0) { |
1101 | Quote.Rep--; |
1102 | break; |
1103 | } |
1104 | } else if (c == Quote.Up) |
1105 | Quote.Count++; |
1106 | sLen++; |
1107 | } |
1108 | if (sLen > 0) { // process non-empty segments |
1109 | if (Quote.Up != '\'') { |
1110 | InterpolateSegment(sc, sLen, true); |
1111 | } else // non-interpolated path |
1112 | sc.Forward(sLen); |
1113 | } |
1114 | if (endType == 1) |
1115 | sc.Forward(); |
1116 | } |
1117 | break; |
1118 | case SCE_PL_REGSUBST: |
1119 | case SCE_PL_XLAT: |
1120 | if (Quote.Rep <= 0) { |
1121 | if (!setModifiers.Contains(sc.ch)) |
1122 | sc.SetState(SCE_PL_DEFAULT); |
1123 | } else if (!Quote.Up && !IsASpace(sc.ch)) { |
1124 | Quote.Open(sc.ch); |
1125 | } else { |
1126 | int c, sLen = 0, endType = 0; |
1127 | bool isPattern = (Quote.Rep == 2); |
1128 | while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
1129 | // scan to break string into segments |
1130 | if (c == '\\' && Quote.Up != '\\') { |
1131 | endType = 2; break; |
1132 | } else if (Quote.Count == 0 && Quote.Rep == 1) { |
1133 | // We matched something like s(...) or tr{...}, Perl 5.10 |
1134 | // appears to allow almost any character for use as the |
1135 | // next delimiters. Whitespace and comments are accepted in |
1136 | // between, but we'll limit to whitespace here. |
1137 | // For '#', if no whitespace in between, it's a delimiter. |
1138 | if (IsASpace(c)) { |
1139 | // Keep going |
1140 | } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) { |
1141 | endType = 3; |
1142 | } else |
1143 | Quote.Open(c); |
1144 | break; |
1145 | } else if (c == Quote.Down) { |
1146 | Quote.Count--; |
1147 | if (Quote.Count == 0) { |
1148 | Quote.Rep--; |
1149 | endType = 1; |
1150 | } |
1151 | if (Quote.Up == Quote.Down) |
1152 | Quote.Count++; |
1153 | if (endType == 1) |
1154 | break; |
1155 | } else if (c == Quote.Up) { |
1156 | Quote.Count++; |
1157 | } else if (IsASpace(c)) |
1158 | break; |
1159 | sLen++; |
1160 | } |
1161 | if (sLen > 0) { // process non-empty segments |
1162 | if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') { |
1163 | InterpolateSegment(sc, sLen, isPattern); |
1164 | } else // non-interpolated path |
1165 | sc.Forward(sLen); |
1166 | } |
1167 | if (endType == 2) { |
1168 | sc.Forward(); |
1169 | } else if (endType == 3) |
1170 | sc.SetState(SCE_PL_DEFAULT); |
1171 | } |
1172 | break; |
1173 | case SCE_PL_STRING_Q: |
1174 | case SCE_PL_STRING_QQ: |
1175 | case SCE_PL_STRING_QX: |
1176 | case SCE_PL_STRING_QW: |
1177 | case SCE_PL_STRING: |
1178 | case SCE_PL_CHARACTER: |
1179 | case SCE_PL_BACKTICKS: |
1180 | if (!Quote.Down && !IsASpace(sc.ch)) { |
1181 | Quote.Open(sc.ch); |
1182 | } else { |
1183 | int c, sLen = 0, endType = 0; |
1184 | while ((c = sc.GetRelativeCharacter(sLen)) != 0) { |
1185 | // scan to break string into segments |
1186 | if (IsASpace(c)) { |
1187 | break; |
1188 | } else if (c == '\\' && Quote.Up != '\\') { |
1189 | endType = 2; break; |
1190 | } else if (c == Quote.Down) { |
1191 | Quote.Count--; |
1192 | if (Quote.Count == 0) { |
1193 | endType = 3; break; |
1194 | } |
1195 | } else if (c == Quote.Up) |
1196 | Quote.Count++; |
1197 | sLen++; |
1198 | } |
1199 | if (sLen > 0) { // process non-empty segments |
1200 | switch (sc.state) { |
1201 | case SCE_PL_STRING: |
1202 | case SCE_PL_STRING_QQ: |
1203 | case SCE_PL_BACKTICKS: |
1204 | InterpolateSegment(sc, sLen); |
1205 | break; |
1206 | case SCE_PL_STRING_QX: |
1207 | if (Quote.Up != '\'') { |
1208 | InterpolateSegment(sc, sLen); |
1209 | break; |
1210 | } |
1211 | // (continued for ' delim) |
1212 | // Falls through. |
1213 | default: // non-interpolated path |
1214 | sc.Forward(sLen); |
1215 | } |
1216 | } |
1217 | if (endType == 2) { |
1218 | sc.Forward(); |
1219 | } else if (endType == 3) |
1220 | sc.ForwardSetState(SCE_PL_DEFAULT); |
1221 | } |
1222 | break; |
1223 | case SCE_PL_SUB_PROTOTYPE: { |
1224 | int i = 0; |
1225 | // forward scan; must all be valid proto characters |
1226 | while (setSubPrototype.Contains(sc.GetRelative(i))) |
1227 | i++; |
1228 | if (sc.GetRelative(i) == ')') { // valid sub prototype |
1229 | sc.ForwardBytes(i); |
1230 | sc.ForwardSetState(SCE_PL_DEFAULT); |
1231 | } else { |
1232 | // abandon prototype, restart from '(' |
1233 | sc.ChangeState(SCE_PL_OPERATOR); |
1234 | sc.SetState(SCE_PL_DEFAULT); |
1235 | } |
1236 | } |
1237 | break; |
1238 | case SCE_PL_FORMAT: { |
1239 | sc.Complete(); |
1240 | if (sc.Match('.')) { |
1241 | sc.Forward(); |
1242 | if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) |
1243 | sc.SetState(SCE_PL_DEFAULT); |
1244 | } |
1245 | while (!sc.atLineEnd) |
1246 | sc.Forward(); |
1247 | } |
1248 | break; |
1249 | case SCE_PL_ERROR: |
1250 | break; |
1251 | } |
1252 | // Needed for specific continuation styles (one follows the other) |
1253 | switch (sc.state) { |
1254 | // continued from SCE_PL_WORD |
1255 | case SCE_PL_FORMAT_IDENT: |
1256 | // occupies HereDoc state 3 to avoid clashing with HERE docs |
1257 | if (IsASpaceOrTab(sc.ch)) { // skip whitespace |
1258 | sc.ChangeState(SCE_PL_DEFAULT); |
1259 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
1260 | sc.Forward(); |
1261 | sc.SetState(SCE_PL_FORMAT_IDENT); |
1262 | } |
1263 | if (setFormatStart.Contains(sc.ch)) { // identifier or '=' |
1264 | if (sc.ch != '=') { |
1265 | do { |
1266 | sc.Forward(); |
1267 | } while (setFormat.Contains(sc.ch)); |
1268 | } |
1269 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
1270 | sc.Forward(); |
1271 | if (sc.ch == '=') { |
1272 | sc.ForwardSetState(SCE_PL_DEFAULT); |
1273 | HereDoc.State = 3; |
1274 | } else { |
1275 | // invalid identifier; inexact fallback, but hey |
1276 | sc.ChangeState(SCE_PL_IDENTIFIER); |
1277 | sc.SetState(SCE_PL_DEFAULT); |
1278 | } |
1279 | } else { |
1280 | sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier |
1281 | } |
1282 | backFlag = BACK_NONE; |
1283 | break; |
1284 | } |
1285 | |
1286 | // Must check end of HereDoc states here before default state is handled |
1287 | if (HereDoc.State == 1 && sc.atLineEnd) { |
1288 | // Begin of here-doc (the line after the here-doc delimiter): |
1289 | // Lexically, the here-doc starts from the next line after the >>, but the |
1290 | // first line of here-doc seem to follow the style of the last EOL sequence |
1291 | int st_new = SCE_PL_HERE_QQ; |
1292 | HereDoc.State = 2; |
1293 | if (HereDoc.Quoted) { |
1294 | if (sc.state == SCE_PL_HERE_DELIM) { |
1295 | // Missing quote at end of string! We are stricter than perl. |
1296 | // Colour here-doc anyway while marking this bit as an error. |
1297 | sc.ChangeState(SCE_PL_ERROR); |
1298 | } |
1299 | switch (HereDoc.Quote) { |
1300 | case '\'': |
1301 | st_new = SCE_PL_HERE_Q; |
1302 | break; |
1303 | case '"' : |
1304 | st_new = SCE_PL_HERE_QQ; |
1305 | break; |
1306 | case '`' : |
1307 | st_new = SCE_PL_HERE_QX; |
1308 | break; |
1309 | } |
1310 | } else { |
1311 | if (HereDoc.Quote == '\\') |
1312 | st_new = SCE_PL_HERE_Q; |
1313 | } |
1314 | sc.SetState(st_new); |
1315 | } |
1316 | if (HereDoc.State == 3 && sc.atLineEnd) { |
1317 | // Start of format body. |
1318 | HereDoc.State = 0; |
1319 | sc.SetState(SCE_PL_FORMAT); |
1320 | } |
1321 | |
1322 | // Determine if a new state should be entered. |
1323 | if (sc.state == SCE_PL_DEFAULT) { |
1324 | if (IsADigit(sc.ch) || |
1325 | (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) { |
1326 | sc.SetState(SCE_PL_NUMBER); |
1327 | backFlag = BACK_NONE; |
1328 | numState = PERLNUM_DECIMAL; |
1329 | dotCount = 0; |
1330 | if (sc.ch == '0') { // hex,bin,octal |
1331 | if (sc.chNext == 'x' || sc.chNext == 'X') { |
1332 | numState = PERLNUM_HEX; |
1333 | } else if (sc.chNext == 'b' || sc.chNext == 'B') { |
1334 | numState = PERLNUM_BINARY; |
1335 | } else if (IsADigit(sc.chNext)) { |
1336 | numState = PERLNUM_OCTAL; |
1337 | } |
1338 | if (numState != PERLNUM_DECIMAL) { |
1339 | sc.Forward(); |
1340 | } |
1341 | } else if (sc.ch == 'v') { // vector |
1342 | numState = PERLNUM_V_VECTOR; |
1343 | } |
1344 | } else if (setWord.Contains(sc.ch)) { |
1345 | // if immediately prefixed by '::', always a bareword |
1346 | sc.SetState(SCE_PL_WORD); |
1347 | if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') { |
1348 | sc.ChangeState(SCE_PL_IDENTIFIER); |
1349 | } |
1350 | Sci_PositionU bk = sc.currentPos; |
1351 | Sci_PositionU fw = sc.currentPos + 1; |
1352 | // first check for possible quote-like delimiter |
1353 | if (sc.ch == 's' && !setWord.Contains(sc.chNext)) { |
1354 | sc.ChangeState(SCE_PL_REGSUBST); |
1355 | Quote.New(2); |
1356 | } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) { |
1357 | sc.ChangeState(SCE_PL_REGEX); |
1358 | Quote.New(); |
1359 | } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) { |
1360 | sc.ChangeState(SCE_PL_STRING_Q); |
1361 | Quote.New(); |
1362 | } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { |
1363 | sc.ChangeState(SCE_PL_XLAT); |
1364 | Quote.New(2); |
1365 | } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { |
1366 | sc.ChangeState(SCE_PL_XLAT); |
1367 | Quote.New(2); |
1368 | sc.Forward(); |
1369 | fw++; |
1370 | } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext) |
1371 | && !setWord.Contains(sc.GetRelative(2))) { |
1372 | if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ); |
1373 | else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX); |
1374 | else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR); |
1375 | else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w' |
1376 | Quote.New(); |
1377 | sc.Forward(); |
1378 | fw++; |
1379 | } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition |
1380 | !setWord.Contains(sc.chNext) || |
1381 | (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) { |
1382 | sc.ChangeState(SCE_PL_OPERATOR); |
1383 | } |
1384 | // if potentially a keyword, scan forward and grab word, then check |
1385 | // if it's really one; if yes, disambiguation test is performed |
1386 | // otherwise it is always a bareword and we skip a lot of scanning |
1387 | if (sc.state == SCE_PL_WORD) { |
1388 | while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw)))) |
1389 | fw++; |
1390 | if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) { |
1391 | sc.ChangeState(SCE_PL_IDENTIFIER); |
1392 | } |
1393 | } |
1394 | // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this |
1395 | // for quote-like delimiters/keywords, attempt to disambiguate |
1396 | // to select for bareword, change state -> SCE_PL_IDENTIFIER |
1397 | if (sc.state != SCE_PL_IDENTIFIER && bk > 0) { |
1398 | if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos)) |
1399 | sc.ChangeState(SCE_PL_IDENTIFIER); |
1400 | } |
1401 | backFlag = BACK_NONE; |
1402 | } else if (sc.ch == '#') { |
1403 | sc.SetState(SCE_PL_COMMENTLINE); |
1404 | } else if (sc.ch == '\"') { |
1405 | sc.SetState(SCE_PL_STRING); |
1406 | Quote.New(); |
1407 | Quote.Open(sc.ch); |
1408 | backFlag = BACK_NONE; |
1409 | } else if (sc.ch == '\'') { |
1410 | if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) { |
1411 | // Archaic call |
1412 | sc.SetState(SCE_PL_IDENTIFIER); |
1413 | } else { |
1414 | sc.SetState(SCE_PL_CHARACTER); |
1415 | Quote.New(); |
1416 | Quote.Open(sc.ch); |
1417 | } |
1418 | backFlag = BACK_NONE; |
1419 | } else if (sc.ch == '`') { |
1420 | sc.SetState(SCE_PL_BACKTICKS); |
1421 | Quote.New(); |
1422 | Quote.Open(sc.ch); |
1423 | backFlag = BACK_NONE; |
1424 | } else if (sc.ch == '$') { |
1425 | sc.SetState(SCE_PL_SCALAR); |
1426 | if (sc.chNext == '{') { |
1427 | sc.ForwardSetState(SCE_PL_OPERATOR); |
1428 | } else if (IsASpace(sc.chNext)) { |
1429 | sc.ForwardSetState(SCE_PL_DEFAULT); |
1430 | } else { |
1431 | sc.Forward(); |
1432 | if (sc.Match('`', '`') || sc.Match(':', ':')) { |
1433 | sc.Forward(); |
1434 | } |
1435 | } |
1436 | backFlag = BACK_NONE; |
1437 | } else if (sc.ch == '@') { |
1438 | sc.SetState(SCE_PL_ARRAY); |
1439 | if (setArray.Contains(sc.chNext)) { |
1440 | // no special treatment |
1441 | } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { |
1442 | sc.ForwardBytes(2); |
1443 | } else if (sc.chNext == '{' || sc.chNext == '[') { |
1444 | sc.ForwardSetState(SCE_PL_OPERATOR); |
1445 | } else { |
1446 | sc.ChangeState(SCE_PL_OPERATOR); |
1447 | } |
1448 | backFlag = BACK_NONE; |
1449 | } else if (setPreferRE.Contains(sc.ch)) { |
1450 | // Explicit backward peeking to set a consistent preferRE for |
1451 | // any slash found, so no longer need to track preferRE state. |
1452 | // Find first previous significant lexed element and interpret. |
1453 | // A few symbols shares this code for disambiguation. |
1454 | bool preferRE = false; |
1455 | bool isHereDoc = sc.Match('<', '<'); |
1456 | bool hereDocSpace = false; // for: SCALAR [whitespace] '<<' |
1457 | Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; |
1458 | sc.Complete(); |
1459 | styler.Flush(); |
1460 | if (styler.StyleAt(bk) == SCE_PL_DEFAULT) |
1461 | hereDocSpace = true; |
1462 | skipWhitespaceComment(styler, bk); |
1463 | if (bk == 0) { |
1464 | // avoid backward scanning breakage |
1465 | preferRE = true; |
1466 | } else { |
1467 | int bkstyle = styler.StyleAt(bk); |
1468 | int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); |
1469 | switch (bkstyle) { |
1470 | case SCE_PL_OPERATOR: |
1471 | preferRE = true; |
1472 | if (bkch == ')' || bkch == ']') { |
1473 | preferRE = false; |
1474 | } else if (bkch == '}') { |
1475 | // backtrack by counting balanced brace pairs |
1476 | // needed to test for variables like ${}, @{} etc. |
1477 | bkstyle = styleBeforeBracePair(styler, bk); |
1478 | if (bkstyle == SCE_PL_SCALAR |
1479 | || bkstyle == SCE_PL_ARRAY |
1480 | || bkstyle == SCE_PL_HASH |
1481 | || bkstyle == SCE_PL_SYMBOLTABLE |
1482 | || bkstyle == SCE_PL_OPERATOR) { |
1483 | preferRE = false; |
1484 | } |
1485 | } else if (bkch == '+' || bkch == '-') { |
1486 | if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1)) |
1487 | && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2))) |
1488 | // exceptions for operators: unary suffixes ++, -- |
1489 | preferRE = false; |
1490 | } |
1491 | break; |
1492 | case SCE_PL_IDENTIFIER: |
1493 | preferRE = true; |
1494 | bkstyle = styleCheckIdentifier(styler, bk); |
1495 | if ((bkstyle == 1) || (bkstyle == 2)) { |
1496 | // inputsymbol or var with "->" or "::" before identifier |
1497 | preferRE = false; |
1498 | } else if (bkstyle == 3) { |
1499 | // bare identifier, test cases follows: |
1500 | if (sc.ch == '/') { |
1501 | // if '/', /PATTERN/ unless digit/space immediately after '/' |
1502 | // if '//', always expect defined-or operator to follow identifier |
1503 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') |
1504 | preferRE = false; |
1505 | } else if (sc.ch == '*' || sc.ch == '%') { |
1506 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) |
1507 | preferRE = false; |
1508 | } else if (sc.ch == '<') { |
1509 | if (IsASpace(sc.chNext) || sc.chNext == '=') |
1510 | preferRE = false; |
1511 | } |
1512 | } |
1513 | break; |
1514 | case SCE_PL_SCALAR: // for $var<< case: |
1515 | if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc |
1516 | preferRE = true; |
1517 | break; |
1518 | case SCE_PL_WORD: |
1519 | preferRE = true; |
1520 | // for HERE docs, always true |
1521 | if (sc.ch == '/') { |
1522 | // adopt heuristics similar to vim-style rules: |
1523 | // keywords always forced as /PATTERN/: split, if, elsif, while |
1524 | // everything else /PATTERN/ unless digit/space immediately after '/' |
1525 | // for '//', defined-or favoured unless special keywords |
1526 | Sci_PositionU bkend = bk + 1; |
1527 | while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) { |
1528 | bk--; |
1529 | } |
1530 | if (isPerlKeyword(bk, bkend, reWords, styler)) |
1531 | break; |
1532 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') |
1533 | preferRE = false; |
1534 | } else if (sc.ch == '*' || sc.ch == '%') { |
1535 | if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) |
1536 | preferRE = false; |
1537 | } else if (sc.ch == '<') { |
1538 | if (IsASpace(sc.chNext) || sc.chNext == '=') |
1539 | preferRE = false; |
1540 | } |
1541 | break; |
1542 | |
1543 | // other styles uses the default, preferRE=false |
1544 | case SCE_PL_POD: |
1545 | case SCE_PL_HERE_Q: |
1546 | case SCE_PL_HERE_QQ: |
1547 | case SCE_PL_HERE_QX: |
1548 | preferRE = true; |
1549 | break; |
1550 | } |
1551 | } |
1552 | backFlag = BACK_NONE; |
1553 | if (isHereDoc) { // handle '<<', HERE doc |
1554 | if (sc.Match("<<>>" )) { // double-diamond operator (5.22) |
1555 | sc.SetState(SCE_PL_OPERATOR); |
1556 | sc.Forward(3); |
1557 | } else if (preferRE) { |
1558 | sc.SetState(SCE_PL_HERE_DELIM); |
1559 | HereDoc.State = 0; |
1560 | } else { // << operator |
1561 | sc.SetState(SCE_PL_OPERATOR); |
1562 | sc.Forward(); |
1563 | } |
1564 | } else if (sc.ch == '*') { // handle '*', typeglob |
1565 | if (preferRE) { |
1566 | sc.SetState(SCE_PL_SYMBOLTABLE); |
1567 | if (sc.chNext == ':' && sc.GetRelative(2) == ':') { |
1568 | sc.ForwardBytes(2); |
1569 | } else if (sc.chNext == '{') { |
1570 | sc.ForwardSetState(SCE_PL_OPERATOR); |
1571 | } else { |
1572 | sc.Forward(); |
1573 | } |
1574 | } else { |
1575 | sc.SetState(SCE_PL_OPERATOR); |
1576 | if (sc.chNext == '*') // exponentiation |
1577 | sc.Forward(); |
1578 | } |
1579 | } else if (sc.ch == '%') { // handle '%', hash |
1580 | if (preferRE) { |
1581 | sc.SetState(SCE_PL_HASH); |
1582 | if (setHash.Contains(sc.chNext)) { |
1583 | sc.Forward(); |
1584 | } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { |
1585 | sc.ForwardBytes(2); |
1586 | } else if (sc.chNext == '{') { |
1587 | sc.ForwardSetState(SCE_PL_OPERATOR); |
1588 | } else { |
1589 | sc.ChangeState(SCE_PL_OPERATOR); |
1590 | } |
1591 | } else { |
1592 | sc.SetState(SCE_PL_OPERATOR); |
1593 | } |
1594 | } else if (sc.ch == '<') { // handle '<', inputsymbol |
1595 | if (preferRE) { |
1596 | // forward scan |
1597 | int i = InputSymbolScan(sc); |
1598 | if (i > 0) { |
1599 | sc.SetState(SCE_PL_IDENTIFIER); |
1600 | sc.Forward(i); |
1601 | } else { |
1602 | sc.SetState(SCE_PL_OPERATOR); |
1603 | } |
1604 | } else { |
1605 | sc.SetState(SCE_PL_OPERATOR); |
1606 | } |
1607 | } else { // handle '/', regexp |
1608 | if (preferRE) { |
1609 | sc.SetState(SCE_PL_REGEX); |
1610 | Quote.New(); |
1611 | Quote.Open(sc.ch); |
1612 | } else { // / and // operators |
1613 | sc.SetState(SCE_PL_OPERATOR); |
1614 | if (sc.chNext == '/') { |
1615 | sc.Forward(); |
1616 | } |
1617 | } |
1618 | } |
1619 | } else if (sc.ch == '=' // POD |
1620 | && setPOD.Contains(sc.chNext) |
1621 | && sc.atLineStart) { |
1622 | sc.SetState(SCE_PL_POD); |
1623 | backFlag = BACK_NONE; |
1624 | } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases |
1625 | Sci_PositionU bk = sc.currentPos; |
1626 | Sci_PositionU fw = 2; |
1627 | if (setSingleCharOp.Contains(sc.chNext) && // file test operators |
1628 | !setWord.Contains(sc.GetRelative(2))) { |
1629 | sc.SetState(SCE_PL_WORD); |
1630 | } else { |
1631 | // nominally a minus and bareword; find extent of bareword |
1632 | while (setWord.Contains(sc.GetRelative(fw))) |
1633 | fw++; |
1634 | sc.SetState(SCE_PL_OPERATOR); |
1635 | } |
1636 | // force to bareword for hash key => or {variable literal} cases |
1637 | if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) { |
1638 | sc.ChangeState(SCE_PL_IDENTIFIER); |
1639 | } |
1640 | backFlag = BACK_NONE; |
1641 | } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype |
1642 | sc.Complete(); |
1643 | if (styleCheckSubPrototype(styler, sc.currentPos - 1)) { |
1644 | sc.SetState(SCE_PL_SUB_PROTOTYPE); |
1645 | backFlag = BACK_NONE; |
1646 | } else { |
1647 | sc.SetState(SCE_PL_OPERATOR); |
1648 | } |
1649 | } else if (setPerlOperator.Contains(sc.ch)) { // operators |
1650 | sc.SetState(SCE_PL_OPERATOR); |
1651 | if (sc.Match('.', '.')) { // .. and ... |
1652 | sc.Forward(); |
1653 | if (sc.chNext == '.') sc.Forward(); |
1654 | } |
1655 | } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source |
1656 | sc.SetState(SCE_PL_DATASECTION); |
1657 | } else { |
1658 | // keep colouring defaults |
1659 | sc.Complete(); |
1660 | } |
1661 | } |
1662 | } |
1663 | sc.Complete(); |
1664 | if (sc.state == SCE_PL_HERE_Q |
1665 | || sc.state == SCE_PL_HERE_QQ |
1666 | || sc.state == SCE_PL_HERE_QX |
1667 | || sc.state == SCE_PL_FORMAT) { |
1668 | styler.ChangeLexerState(sc.currentPos, styler.Length()); |
1669 | } |
1670 | sc.Complete(); |
1671 | } |
1672 | |
1673 | #define PERL_HEADFOLD_SHIFT 4 |
1674 | #define PERL_HEADFOLD_MASK 0xF0 |
1675 | |
1676 | void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { |
1677 | |
1678 | if (!options.fold) |
1679 | return; |
1680 | |
1681 | LexAccessor styler(pAccess); |
1682 | |
1683 | Sci_PositionU endPos = startPos + length; |
1684 | int visibleChars = 0; |
1685 | Sci_Position lineCurrent = styler.GetLine(startPos); |
1686 | |
1687 | // Backtrack to previous line in case need to fix its fold status |
1688 | if (startPos > 0) { |
1689 | if (lineCurrent > 0) { |
1690 | lineCurrent--; |
1691 | startPos = styler.LineStart(lineCurrent); |
1692 | } |
1693 | } |
1694 | |
1695 | int levelPrev = SC_FOLDLEVELBASE; |
1696 | if (lineCurrent > 0) |
1697 | levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; |
1698 | int levelCurrent = levelPrev; |
1699 | char chNext = styler[startPos]; |
1700 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
1701 | int styleNext = styler.StyleAt(startPos); |
1702 | // Used at end of line to determine if the line was a package definition |
1703 | bool isPackageLine = false; |
1704 | int podHeading = 0; |
1705 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
1706 | char ch = chNext; |
1707 | chNext = styler.SafeGetCharAt(i + 1); |
1708 | int style = styleNext; |
1709 | styleNext = styler.StyleAt(i + 1); |
1710 | int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT; |
1711 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
1712 | bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; |
1713 | // Comment folding |
1714 | if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) { |
1715 | if (!IsCommentLine(lineCurrent - 1, styler) |
1716 | && IsCommentLine(lineCurrent + 1, styler)) |
1717 | levelCurrent++; |
1718 | else if (IsCommentLine(lineCurrent - 1, styler) |
1719 | && !IsCommentLine(lineCurrent + 1, styler)) |
1720 | levelCurrent--; |
1721 | } |
1722 | // {} [] block folding |
1723 | if (style == SCE_PL_OPERATOR) { |
1724 | if (ch == '{') { |
1725 | if (options.foldAtElse && levelCurrent < levelPrev) |
1726 | --levelPrev; |
1727 | levelCurrent++; |
1728 | } else if (ch == '}') { |
1729 | levelCurrent--; |
1730 | } |
1731 | if (ch == '[') { |
1732 | if (options.foldAtElse && levelCurrent < levelPrev) |
1733 | --levelPrev; |
1734 | levelCurrent++; |
1735 | } else if (ch == ']') { |
1736 | levelCurrent--; |
1737 | } |
1738 | } else if (style == SCE_PL_STRING_QW) { |
1739 | // qw |
1740 | if (stylePrevCh != style) |
1741 | levelCurrent++; |
1742 | else if (styleNext != style) |
1743 | levelCurrent--; |
1744 | } |
1745 | // POD folding |
1746 | if (options.foldPOD && atLineStart) { |
1747 | if (style == SCE_PL_POD) { |
1748 | if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB) |
1749 | levelCurrent++; |
1750 | else if (styler.Match(i, "=cut" )) |
1751 | levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1; |
1752 | else if (styler.Match(i, "=head" )) |
1753 | podHeading = PodHeadingLevel(i, styler); |
1754 | } else if (style == SCE_PL_DATASECTION) { |
1755 | if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) |
1756 | levelCurrent++; |
1757 | else if (styler.Match(i, "=cut" ) && levelCurrent > SC_FOLDLEVELBASE) |
1758 | levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1; |
1759 | else if (styler.Match(i, "=head" )) |
1760 | podHeading = PodHeadingLevel(i, styler); |
1761 | // if package used or unclosed brace, level > SC_FOLDLEVELBASE! |
1762 | // reset needed as level test is vs. SC_FOLDLEVELBASE |
1763 | else if (stylePrevCh != SCE_PL_DATASECTION) |
1764 | levelCurrent = SC_FOLDLEVELBASE; |
1765 | } |
1766 | } |
1767 | // package folding |
1768 | if (options.foldPackage && atLineStart) { |
1769 | if (IsPackageLine(lineCurrent, styler) |
1770 | && !IsPackageLine(lineCurrent + 1, styler)) |
1771 | isPackageLine = true; |
1772 | } |
1773 | |
1774 | //heredoc folding |
1775 | switch (style) { |
1776 | case SCE_PL_HERE_QQ : |
1777 | case SCE_PL_HERE_Q : |
1778 | case SCE_PL_HERE_QX : |
1779 | switch (stylePrevCh) { |
1780 | case SCE_PL_HERE_QQ : |
1781 | case SCE_PL_HERE_Q : |
1782 | case SCE_PL_HERE_QX : |
1783 | //do nothing; |
1784 | break; |
1785 | default : |
1786 | levelCurrent++; |
1787 | break; |
1788 | } |
1789 | break; |
1790 | default: |
1791 | switch (stylePrevCh) { |
1792 | case SCE_PL_HERE_QQ : |
1793 | case SCE_PL_HERE_Q : |
1794 | case SCE_PL_HERE_QX : |
1795 | levelCurrent--; |
1796 | break; |
1797 | default : |
1798 | //do nothing; |
1799 | break; |
1800 | } |
1801 | break; |
1802 | } |
1803 | |
1804 | //explicit folding |
1805 | if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') { |
1806 | if (chNext == '{') { |
1807 | levelCurrent++; |
1808 | } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') { |
1809 | levelCurrent--; |
1810 | } |
1811 | } |
1812 | |
1813 | if (atEOL) { |
1814 | int lev = levelPrev; |
1815 | // POD headings occupy bits 7-4, leaving some breathing room for |
1816 | // non-standard practice -- POD sections stuck in blocks, etc. |
1817 | if (podHeading > 0) { |
1818 | levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT); |
1819 | lev = levelCurrent - 1; |
1820 | lev |= SC_FOLDLEVELHEADERFLAG; |
1821 | podHeading = 0; |
1822 | } |
1823 | // Check if line was a package declaration |
1824 | // because packages need "special" treatment |
1825 | if (isPackageLine) { |
1826 | lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; |
1827 | levelCurrent = SC_FOLDLEVELBASE + 1; |
1828 | isPackageLine = false; |
1829 | } |
1830 | lev |= levelCurrent << 16; |
1831 | if (visibleChars == 0 && options.foldCompact) |
1832 | lev |= SC_FOLDLEVELWHITEFLAG; |
1833 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
1834 | lev |= SC_FOLDLEVELHEADERFLAG; |
1835 | if (lev != styler.LevelAt(lineCurrent)) { |
1836 | styler.SetLevel(lineCurrent, lev); |
1837 | } |
1838 | lineCurrent++; |
1839 | levelPrev = levelCurrent; |
1840 | visibleChars = 0; |
1841 | } |
1842 | if (!isspacechar(ch)) |
1843 | visibleChars++; |
1844 | chPrev = ch; |
1845 | } |
1846 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
1847 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
1848 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
1849 | } |
1850 | |
1851 | LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl" , perlWordListDesc); |
1852 | |