1// Scintilla source code edit control
2/** @file LexBash.cxx
3 ** Lexer for Bash.
4 **/
5// Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
6// Adapted from LexPerl by Kein-Hong Man 2004
7// The License.txt file describes the conditions under which this software may be distributed.
8
9#include <stdlib.h>
10#include <string.h>
11#include <stdio.h>
12#include <stdarg.h>
13#include <assert.h>
14
15#include <string>
16#include <string_view>
17#include <vector>
18#include <map>
19#include <functional>
20
21#include "ILexer.h"
22#include "Scintilla.h"
23#include "SciLexer.h"
24
25#include "StringCopy.h"
26#include "WordList.h"
27#include "LexAccessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31#include "OptionSet.h"
32#include "SubStyles.h"
33#include "DefaultLexer.h"
34
35using namespace Scintilla;
36using namespace Lexilla;
37
38#define HERE_DELIM_MAX 256
39
40// define this if you want 'invalid octals' to be marked as errors
41// usually, this is not a good idea, permissive lexing is better
42#undef PEDANTIC_OCTAL
43
44#define BASH_BASE_ERROR 65
45#define BASH_BASE_DECIMAL 66
46#define BASH_BASE_HEX 67
47#ifdef PEDANTIC_OCTAL
48#define BASH_BASE_OCTAL 68
49#define BASH_BASE_OCTAL_ERROR 69
50#endif
51
52// state constants for parts of a bash command segment
53#define BASH_CMD_BODY 0
54#define BASH_CMD_START 1
55#define BASH_CMD_WORD 2
56#define BASH_CMD_TEST 3
57#define BASH_CMD_ARITH 4
58#define BASH_CMD_DELIM 5
59
60// state constants for nested delimiter pairs, used by
61// SCE_SH_STRING and SCE_SH_BACKTICKS processing
62#define BASH_DELIM_LITERAL 0
63#define BASH_DELIM_STRING 1
64#define BASH_DELIM_CSTRING 2
65#define BASH_DELIM_LSTRING 3
66#define BASH_DELIM_COMMAND 4
67#define BASH_DELIM_BACKTICK 5
68
69#define BASH_DELIM_STACK_MAX 7
70
71namespace {
72
73inline int translateBashDigit(int ch) {
74 if (ch >= '0' && ch <= '9') {
75 return ch - '0';
76 } else if (ch >= 'a' && ch <= 'z') {
77 return ch - 'a' + 10;
78 } else if (ch >= 'A' && ch <= 'Z') {
79 return ch - 'A' + 36;
80 } else if (ch == '@') {
81 return 62;
82 } else if (ch == '_') {
83 return 63;
84 }
85 return BASH_BASE_ERROR;
86}
87
88inline int getBashNumberBase(char *s) {
89 int i = 0;
90 int base = 0;
91 while (*s) {
92 base = base * 10 + (*s++ - '0');
93 i++;
94 }
95 if (base > 64 || i > 2) {
96 return BASH_BASE_ERROR;
97 }
98 return base;
99}
100
101int opposite(int ch) {
102 if (ch == '(') return ')';
103 if (ch == '[') return ']';
104 if (ch == '{') return '}';
105 if (ch == '<') return '>';
106 return ch;
107}
108
109int GlobScan(StyleContext &sc) {
110 // forward scan for zsh globs, disambiguate versus bash arrays
111 // complex expressions may still fail, e.g. unbalanced () '' "" etc
112 int c, sLen = 0;
113 int pCount = 0;
114 int hash = 0;
115 while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
116 if (IsASpace(c)) {
117 return 0;
118 } else if (c == '\'' || c == '\"') {
119 if (hash != 2) return 0;
120 } else if (c == '#' && hash == 0) {
121 hash = (sLen == 1) ? 2:1;
122 } else if (c == '(') {
123 pCount++;
124 } else if (c == ')') {
125 if (pCount == 0) {
126 if (hash) return sLen;
127 return 0;
128 }
129 pCount--;
130 }
131 }
132 return 0;
133}
134
135bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
136 Sci_Position pos = styler.LineStart(line);
137 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
138 for (Sci_Position i = pos; i < eol_pos; i++) {
139 char ch = styler[i];
140 if (ch == '#')
141 return true;
142 else if (ch != ' ' && ch != '\t')
143 return false;
144 }
145 return false;
146}
147
148struct OptionsBash {
149 bool fold;
150 bool foldComment;
151 bool foldCompact;
152
153 OptionsBash() {
154 fold = false;
155 foldComment = false;
156 foldCompact = true;
157 }
158};
159
160const char * const bashWordListDesc[] = {
161 "Keywords",
162 0
163};
164
165struct OptionSetBash : public OptionSet<OptionsBash> {
166 OptionSetBash() {
167 DefineProperty("fold", &OptionsBash::fold);
168
169 DefineProperty("fold.comment", &OptionsBash::foldComment);
170
171 DefineProperty("fold.compact", &OptionsBash::foldCompact);
172
173 DefineWordListSets(bashWordListDesc);
174 }
175};
176
177const char styleSubable[] = { SCE_SH_IDENTIFIER, SCE_SH_SCALAR, 0 };
178
179LexicalClass lexicalClasses[] = {
180 // Lexer Bash SCLEX_BASH SCE_SH_:
181 0, "SCE_SH_DEFAULT", "default", "White space",
182 1, "SCE_SH_ERROR", "error", "Error",
183 2, "SCE_SH_COMMENTLINE", "comment line", "Line comment: #",
184 3, "SCE_SH_NUMBER", "literal numeric", "Number",
185 4, "SCE_SH_WORD", "keyword", "Keyword",
186 5, "SCE_SH_STRING", "literal string", "String",
187 6, "SCE_SH_CHARACTER", "literal string", "Single quoted string",
188 7, "SCE_SH_OPERATOR", "operator", "Operators",
189 8, "SCE_SH_IDENTIFIER", "identifier", "Identifiers",
190 9, "SCE_SH_SCALAR", "identifier", "Scalar variable",
191 10, "SCE_SH_PARAM", "identifier", "Parameter",
192 11, "SCE_SH_BACKTICKS", "literal string", "Backtick quoted command",
193 12, "SCE_SH_HERE_DELIM", "operator", "Heredoc delimiter",
194 13, "SCE_SH_HERE_Q", "literal string", "Heredoc quoted string",
195};
196
197}
198
199class LexerBash : public DefaultLexer {
200 WordList keywords;
201 OptionsBash options;
202 OptionSetBash osBash;
203 enum { ssIdentifier, ssScalar };
204 SubStyles subStyles;
205public:
206 LexerBash() :
207 DefaultLexer("bash", SCLEX_BASH, lexicalClasses, ELEMENTS(lexicalClasses)),
208 subStyles(styleSubable, 0x80, 0x40, 0) {
209 }
210 virtual ~LexerBash() {
211 }
212 void SCI_METHOD Release() override {
213 delete this;
214 }
215 int SCI_METHOD Version() const override {
216 return lvRelease5;
217 }
218 const char * SCI_METHOD PropertyNames() override {
219 return osBash.PropertyNames();
220 }
221 int SCI_METHOD PropertyType(const char* name) override {
222 return osBash.PropertyType(name);
223 }
224 const char * SCI_METHOD DescribeProperty(const char *name) override {
225 return osBash.DescribeProperty(name);
226 }
227 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
228 const char * SCI_METHOD PropertyGet(const char* key) override {
229 return osBash.PropertyGet(key);
230 }
231 const char * SCI_METHOD DescribeWordListSets() override {
232 return osBash.DescribeWordListSets();
233 }
234 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
235 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
236 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
237
238 void * SCI_METHOD PrivateCall(int, void *) override {
239 return 0;
240 }
241
242 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
243 return subStyles.Allocate(styleBase, numberStyles);
244 }
245 int SCI_METHOD SubStylesStart(int styleBase) override {
246 return subStyles.Start(styleBase);
247 }
248 int SCI_METHOD SubStylesLength(int styleBase) override {
249 return subStyles.Length(styleBase);
250 }
251 int SCI_METHOD StyleFromSubStyle(int subStyle) override {
252 const int styleBase = subStyles.BaseStyle(subStyle);
253 return styleBase;
254 }
255 int SCI_METHOD PrimaryStyleFromStyle(int style) override {
256 return style;
257 }
258 void SCI_METHOD FreeSubStyles() override {
259 subStyles.Free();
260 }
261 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
262 subStyles.SetIdentifiers(style, identifiers);
263 }
264 int SCI_METHOD DistanceToSecondaryStyles() override {
265 return 0;
266 }
267 const char *SCI_METHOD GetSubStyleBases() override {
268 return styleSubable;
269 }
270
271 static ILexer5 *LexerFactoryBash() {
272 return new LexerBash();
273 }
274};
275
276Sci_Position SCI_METHOD LexerBash::PropertySet(const char *key, const char *val) {
277 if (osBash.PropertySet(&options, key, val)) {
278 return 0;
279 }
280 return -1;
281}
282
283Sci_Position SCI_METHOD LexerBash::WordListSet(int n, const char *wl) {
284 WordList *wordListN = 0;
285 switch (n) {
286 case 0:
287 wordListN = &keywords;
288 break;
289 }
290 Sci_Position firstModification = -1;
291 if (wordListN) {
292 WordList wlNew;
293 wlNew.Set(wl);
294 if (*wordListN != wlNew) {
295 wordListN->Set(wl);
296 firstModification = 0;
297 }
298 }
299 return firstModification;
300}
301
302void SCI_METHOD LexerBash::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
303 WordList cmdDelimiter, bashStruct, bashStruct_in;
304 cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
305 bashStruct.Set("if elif fi while until else then do done esac eval");
306 bashStruct_in.Set("for case select");
307
308 CharacterSet setWordStart(CharacterSet::setAlpha, "_");
309 // note that [+-] are often parts of identifiers in shell scripts
310 CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
311 CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
312 setMetaCharacter.Add(0);
313 CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
314 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
315 CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
316 CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
317 CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
318 CharacterSet setLeftShift(CharacterSet::setDigits, "$");
319
320 class HereDocCls { // Class to manage HERE document elements
321 public:
322 int State; // 0: '<<' encountered
323 // 1: collect the delimiter
324 // 2: here doc text (lines after the delimiter)
325 int Quote; // the char after '<<'
326 bool Quoted; // true if Quote in ('\'','"','`')
327 bool Indent; // indented delimiter (for <<-)
328 int DelimiterLength; // strlen(Delimiter)
329 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
330 HereDocCls() {
331 State = 0;
332 Quote = 0;
333 Quoted = false;
334 Indent = 0;
335 DelimiterLength = 0;
336 Delimiter[0] = '\0';
337 }
338 void Append(int ch) {
339 Delimiter[DelimiterLength++] = static_cast<char>(ch);
340 Delimiter[DelimiterLength] = '\0';
341 }
342 ~HereDocCls() {
343 }
344 };
345 HereDocCls HereDoc;
346
347 class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
348 public:
349 int Count;
350 int Up, Down;
351 QuoteCls() {
352 Count = 0;
353 Up = '\0';
354 Down = '\0';
355 }
356 void Open(int u) {
357 Count++;
358 Up = u;
359 Down = opposite(Up);
360 }
361 void Start(int u) {
362 Count = 0;
363 Open(u);
364 }
365 };
366 QuoteCls Quote;
367
368 class QuoteStackCls { // Class to manage quote pairs that nest
369 public:
370 int Count;
371 int Up, Down;
372 int Style;
373 int Depth; // levels pushed
374 int CountStack[BASH_DELIM_STACK_MAX];
375 int UpStack [BASH_DELIM_STACK_MAX];
376 int StyleStack[BASH_DELIM_STACK_MAX];
377 QuoteStackCls() {
378 Count = 0;
379 Up = '\0';
380 Down = '\0';
381 Style = 0;
382 Depth = 0;
383 }
384 void Start(int u, int s) {
385 Count = 1;
386 Up = u;
387 Down = opposite(Up);
388 Style = s;
389 }
390 void Push(int u, int s) {
391 if (Depth >= BASH_DELIM_STACK_MAX)
392 return;
393 CountStack[Depth] = Count;
394 UpStack [Depth] = Up;
395 StyleStack[Depth] = Style;
396 Depth++;
397 Count = 1;
398 Up = u;
399 Down = opposite(Up);
400 Style = s;
401 }
402 void Pop(void) {
403 if (Depth <= 0)
404 return;
405 Depth--;
406 Count = CountStack[Depth];
407 Up = UpStack [Depth];
408 Style = StyleStack[Depth];
409 Down = opposite(Up);
410 }
411 ~QuoteStackCls() {
412 }
413 };
414 QuoteStackCls QuoteStack;
415
416 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_SH_IDENTIFIER);
417 const WordClassifier &classifierScalars = subStyles.Classifier(SCE_SH_SCALAR);
418
419 int numBase = 0;
420 int digit;
421 Sci_PositionU endPos = startPos + length;
422 int cmdState = BASH_CMD_START;
423 int testExprType = 0;
424 LexAccessor styler(pAccess);
425
426 // Always backtracks to the start of a line that is not a continuation
427 // of the previous line (i.e. start of a bash command segment)
428 Sci_Position ln = styler.GetLine(startPos);
429 if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
430 ln--;
431 for (;;) {
432 startPos = styler.LineStart(ln);
433 if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START)
434 break;
435 ln--;
436 }
437 initStyle = SCE_SH_DEFAULT;
438
439 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
440
441 for (; sc.More(); sc.Forward()) {
442
443 // handle line continuation, updates per-line stored state
444 if (sc.atLineStart) {
445 ln = styler.GetLine(sc.currentPos);
446 if (sc.state == SCE_SH_STRING
447 || sc.state == SCE_SH_BACKTICKS
448 || sc.state == SCE_SH_CHARACTER
449 || sc.state == SCE_SH_HERE_Q
450 || sc.state == SCE_SH_COMMENTLINE
451 || sc.state == SCE_SH_PARAM) {
452 // force backtrack while retaining cmdState
453 styler.SetLineState(ln, BASH_CMD_BODY);
454 } else {
455 if (ln > 0) {
456 if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n')
457 || sc.GetRelative(-2) == '\\') { // handle '\' line continuation
458 // retain last line's state
459 } else
460 cmdState = BASH_CMD_START;
461 }
462 styler.SetLineState(ln, cmdState);
463 }
464 }
465
466 // controls change of cmdState at the end of a non-whitespace element
467 // states BODY|TEST|ARITH persist until the end of a command segment
468 // state WORD persist, but ends with 'in' or 'do' construct keywords
469 int cmdStateNew = BASH_CMD_BODY;
470 if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD)
471 cmdStateNew = cmdState;
472 int stylePrev = sc.state;
473
474 // Determine if the current state should terminate.
475 switch (sc.state) {
476 case SCE_SH_OPERATOR:
477 sc.SetState(SCE_SH_DEFAULT);
478 if (cmdState == BASH_CMD_DELIM) // if command delimiter, start new command
479 cmdStateNew = BASH_CMD_START;
480 else if (sc.chPrev == '\\') // propagate command state if line continued
481 cmdStateNew = cmdState;
482 break;
483 case SCE_SH_WORD:
484 // "." never used in Bash variable names but used in file names
485 if (!setWord.Contains(sc.ch)) {
486 char s[500];
487 char s2[10];
488 sc.GetCurrent(s, sizeof(s));
489 int identifierStyle = SCE_SH_IDENTIFIER;
490 int subStyle = classifierIdentifiers.ValueFor(s);
491 if (subStyle >= 0) {
492 identifierStyle = subStyle;
493 }
494 // allow keywords ending in a whitespace or command delimiter
495 s2[0] = static_cast<char>(sc.ch);
496 s2[1] = '\0';
497 bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2);
498 // 'in' or 'do' may be construct keywords
499 if (cmdState == BASH_CMD_WORD) {
500 if (strcmp(s, "in") == 0 && keywordEnds)
501 cmdStateNew = BASH_CMD_BODY;
502 else if (strcmp(s, "do") == 0 && keywordEnds)
503 cmdStateNew = BASH_CMD_START;
504 else
505 sc.ChangeState(identifierStyle);
506 sc.SetState(SCE_SH_DEFAULT);
507 break;
508 }
509 // a 'test' keyword starts a test expression
510 if (strcmp(s, "test") == 0) {
511 if (cmdState == BASH_CMD_START && keywordEnds) {
512 cmdStateNew = BASH_CMD_TEST;
513 testExprType = 0;
514 } else
515 sc.ChangeState(identifierStyle);
516 }
517 // detect bash construct keywords
518 else if (bashStruct.InList(s)) {
519 if (cmdState == BASH_CMD_START && keywordEnds)
520 cmdStateNew = BASH_CMD_START;
521 else
522 sc.ChangeState(identifierStyle);
523 }
524 // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
525 else if (bashStruct_in.InList(s)) {
526 if (cmdState == BASH_CMD_START && keywordEnds)
527 cmdStateNew = BASH_CMD_WORD;
528 else
529 sc.ChangeState(identifierStyle);
530 }
531 // disambiguate option items and file test operators
532 else if (s[0] == '-') {
533 if (cmdState != BASH_CMD_TEST)
534 sc.ChangeState(identifierStyle);
535 }
536 // disambiguate keywords and identifiers
537 else if (cmdState != BASH_CMD_START
538 || !(keywords.InList(s) && keywordEnds)) {
539 sc.ChangeState(identifierStyle);
540 }
541 sc.SetState(SCE_SH_DEFAULT);
542 }
543 break;
544 case SCE_SH_IDENTIFIER:
545 if (sc.chPrev == '\\' || !setWord.Contains(sc.ch) ||
546 (cmdState == BASH_CMD_ARITH && !setWordStart.Contains(sc.ch))) {
547 char s[500];
548 sc.GetCurrent(s, sizeof(s));
549 int subStyle = classifierIdentifiers.ValueFor(s);
550 if (subStyle >= 0) {
551 sc.ChangeState(subStyle);
552 }
553 if (sc.chPrev == '\\') { // for escaped chars
554 sc.ForwardSetState(SCE_SH_DEFAULT);
555 } else {
556 sc.SetState(SCE_SH_DEFAULT);
557 }
558 }
559 break;
560 case SCE_SH_NUMBER:
561 digit = translateBashDigit(sc.ch);
562 if (numBase == BASH_BASE_DECIMAL) {
563 if (sc.ch == '#') {
564 char s[10];
565 sc.GetCurrent(s, sizeof(s));
566 numBase = getBashNumberBase(s);
567 if (numBase != BASH_BASE_ERROR)
568 break;
569 } else if (IsADigit(sc.ch))
570 break;
571 } else if (numBase == BASH_BASE_HEX) {
572 if (IsADigit(sc.ch, 16))
573 break;
574#ifdef PEDANTIC_OCTAL
575 } else if (numBase == BASH_BASE_OCTAL ||
576 numBase == BASH_BASE_OCTAL_ERROR) {
577 if (digit <= 7)
578 break;
579 if (digit <= 9) {
580 numBase = BASH_BASE_OCTAL_ERROR;
581 break;
582 }
583#endif
584 } else if (numBase == BASH_BASE_ERROR) {
585 if (digit <= 9)
586 break;
587 } else { // DD#DDDD number style handling
588 if (digit != BASH_BASE_ERROR) {
589 if (numBase <= 36) {
590 // case-insensitive if base<=36
591 if (digit >= 36) digit -= 26;
592 }
593 if (digit < numBase)
594 break;
595 if (digit <= 9) {
596 numBase = BASH_BASE_ERROR;
597 break;
598 }
599 }
600 }
601 // fallthrough when number is at an end or error
602 if (numBase == BASH_BASE_ERROR
603#ifdef PEDANTIC_OCTAL
604 || numBase == BASH_BASE_OCTAL_ERROR
605#endif
606 ) {
607 sc.ChangeState(SCE_SH_ERROR);
608 }
609 sc.SetState(SCE_SH_DEFAULT);
610 break;
611 case SCE_SH_COMMENTLINE:
612 if (sc.atLineEnd && sc.chPrev != '\\') {
613 sc.SetState(SCE_SH_DEFAULT);
614 }
615 break;
616 case SCE_SH_HERE_DELIM:
617 // From Bash info:
618 // ---------------
619 // Specifier format is: <<[-]WORD
620 // Optional '-' is for removal of leading tabs from here-doc.
621 // Whitespace acceptable after <<[-] operator
622 //
623 if (HereDoc.State == 0) { // '<<' encountered
624 HereDoc.Quote = sc.chNext;
625 HereDoc.Quoted = false;
626 HereDoc.DelimiterLength = 0;
627 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
628 if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
629 sc.Forward();
630 HereDoc.Quoted = true;
631 HereDoc.State = 1;
632 } else if (setHereDoc.Contains(sc.chNext) ||
633 (sc.chNext == '=' && cmdState != BASH_CMD_ARITH)) {
634 // an unquoted here-doc delimiter, no special handling
635 HereDoc.State = 1;
636 } else if (sc.chNext == '<') { // HERE string <<<
637 sc.Forward();
638 sc.ForwardSetState(SCE_SH_DEFAULT);
639 } else if (IsASpace(sc.chNext)) {
640 // eat whitespace
641 } else if (setLeftShift.Contains(sc.chNext) ||
642 (sc.chNext == '=' && cmdState == BASH_CMD_ARITH)) {
643 // left shift <<$var or <<= cases
644 sc.ChangeState(SCE_SH_OPERATOR);
645 sc.ForwardSetState(SCE_SH_DEFAULT);
646 } else {
647 // symbols terminates; deprecated zero-length delimiter
648 HereDoc.State = 1;
649 }
650 } else if (HereDoc.State == 1) { // collect the delimiter
651 // * if single quoted, there's no escape
652 // * if double quoted, there are \\ and \" escapes
653 if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
654 (HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
655 (HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
656 (setHereDoc2.Contains(sc.ch))) {
657 HereDoc.Append(sc.ch);
658 } else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
659 sc.ForwardSetState(SCE_SH_DEFAULT);
660 } else if (sc.ch == '\\') {
661 if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
662 // in quoted prefixes only \ and the quote eat the escape
663 HereDoc.Append(sc.ch);
664 } else {
665 // skip escape prefix
666 }
667 } else if (!HereDoc.Quoted) {
668 sc.SetState(SCE_SH_DEFAULT);
669 }
670 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
671 sc.SetState(SCE_SH_ERROR);
672 HereDoc.State = 0;
673 }
674 }
675 break;
676 case SCE_SH_HERE_Q:
677 // HereDoc.State == 2
678 if (sc.atLineStart) {
679 sc.SetState(SCE_SH_HERE_Q);
680 int prefixws = 0;
681 while (sc.ch == '\t' && !sc.atLineEnd) { // tabulation prefix
682 sc.Forward();
683 prefixws++;
684 }
685 if (prefixws > 0)
686 sc.SetState(SCE_SH_HERE_Q);
687 while (!sc.atLineEnd) {
688 sc.Forward();
689 }
690 char s[HERE_DELIM_MAX];
691 sc.GetCurrent(s, sizeof(s));
692 if (sc.LengthCurrent() == 0) { // '' or "" delimiters
693 if ((prefixws == 0 || HereDoc.Indent) &&
694 HereDoc.Quoted && HereDoc.DelimiterLength == 0)
695 sc.SetState(SCE_SH_DEFAULT);
696 break;
697 }
698 if (s[strlen(s) - 1] == '\r')
699 s[strlen(s) - 1] = '\0';
700 if (strcmp(HereDoc.Delimiter, s) == 0) {
701 if ((prefixws == 0) || // indentation rule
702 (prefixws > 0 && HereDoc.Indent)) {
703 sc.SetState(SCE_SH_DEFAULT);
704 break;
705 }
706 }
707 }
708 break;
709 case SCE_SH_SCALAR: // variable names
710 if (!setParam.Contains(sc.ch)) {
711 char s[500];
712 sc.GetCurrent(s, sizeof(s));
713 int subStyle = classifierScalars.ValueFor(&s[1]); // skip the $
714 if (subStyle >= 0) {
715 sc.ChangeState(subStyle);
716 }
717 if (sc.LengthCurrent() == 1) {
718 // Special variable: $(, $_ etc.
719 sc.ForwardSetState(SCE_SH_DEFAULT);
720 } else {
721 sc.SetState(SCE_SH_DEFAULT);
722 }
723 }
724 break;
725 case SCE_SH_STRING: // delimited styles, can nest
726 case SCE_SH_BACKTICKS:
727 if (sc.ch == '\\' && QuoteStack.Up != '\\') {
728 if (QuoteStack.Style != BASH_DELIM_LITERAL)
729 sc.Forward();
730 } else if (sc.ch == QuoteStack.Down) {
731 QuoteStack.Count--;
732 if (QuoteStack.Count == 0) {
733 if (QuoteStack.Depth > 0) {
734 QuoteStack.Pop();
735 } else
736 sc.ForwardSetState(SCE_SH_DEFAULT);
737 }
738 } else if (sc.ch == QuoteStack.Up) {
739 QuoteStack.Count++;
740 } else {
741 if (QuoteStack.Style == BASH_DELIM_STRING ||
742 QuoteStack.Style == BASH_DELIM_LSTRING
743 ) { // do nesting for "string", $"locale-string"
744 if (sc.ch == '`') {
745 QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
746 } else if (sc.ch == '$' && sc.chNext == '(') {
747 sc.Forward();
748 QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
749 }
750 } else if (QuoteStack.Style == BASH_DELIM_COMMAND ||
751 QuoteStack.Style == BASH_DELIM_BACKTICK
752 ) { // do nesting for $(command), `command`
753 if (sc.ch == '\'') {
754 QuoteStack.Push(sc.ch, BASH_DELIM_LITERAL);
755 } else if (sc.ch == '\"') {
756 QuoteStack.Push(sc.ch, BASH_DELIM_STRING);
757 } else if (sc.ch == '`') {
758 QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
759 } else if (sc.ch == '$') {
760 if (sc.chNext == '\'') {
761 sc.Forward();
762 QuoteStack.Push(sc.ch, BASH_DELIM_CSTRING);
763 } else if (sc.chNext == '\"') {
764 sc.Forward();
765 QuoteStack.Push(sc.ch, BASH_DELIM_LSTRING);
766 } else if (sc.chNext == '(') {
767 sc.Forward();
768 QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
769 }
770 }
771 }
772 }
773 break;
774 case SCE_SH_PARAM: // ${parameter}
775 if (sc.ch == '\\' && Quote.Up != '\\') {
776 sc.Forward();
777 } else if (sc.ch == Quote.Down) {
778 Quote.Count--;
779 if (Quote.Count == 0) {
780 sc.ForwardSetState(SCE_SH_DEFAULT);
781 }
782 } else if (sc.ch == Quote.Up) {
783 Quote.Count++;
784 }
785 break;
786 case SCE_SH_CHARACTER: // singly-quoted strings
787 if (sc.ch == Quote.Down) {
788 Quote.Count--;
789 if (Quote.Count == 0) {
790 sc.ForwardSetState(SCE_SH_DEFAULT);
791 }
792 }
793 break;
794 }
795
796 // Must check end of HereDoc state 1 before default state is handled
797 if (HereDoc.State == 1 && sc.atLineEnd) {
798 // Begin of here-doc (the line after the here-doc delimiter):
799 // Lexically, the here-doc starts from the next line after the >>, but the
800 // first line of here-doc seem to follow the style of the last EOL sequence
801 HereDoc.State = 2;
802 if (HereDoc.Quoted) {
803 if (sc.state == SCE_SH_HERE_DELIM) {
804 // Missing quote at end of string! Syntax error in bash 4.3
805 // Mark this bit as an error, do not colour any here-doc
806 sc.ChangeState(SCE_SH_ERROR);
807 sc.SetState(SCE_SH_DEFAULT);
808 } else {
809 // HereDoc.Quote always == '\''
810 sc.SetState(SCE_SH_HERE_Q);
811 }
812 } else if (HereDoc.DelimiterLength == 0) {
813 // no delimiter, illegal (but '' and "" are legal)
814 sc.ChangeState(SCE_SH_ERROR);
815 sc.SetState(SCE_SH_DEFAULT);
816 } else {
817 sc.SetState(SCE_SH_HERE_Q);
818 }
819 }
820
821 // update cmdState about the current command segment
822 if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) {
823 cmdState = cmdStateNew;
824 }
825 // Determine if a new state should be entered.
826 if (sc.state == SCE_SH_DEFAULT) {
827 if (sc.ch == '\\') {
828 // Bash can escape any non-newline as a literal
829 sc.SetState(SCE_SH_IDENTIFIER);
830 if (sc.chNext == '\r' || sc.chNext == '\n')
831 sc.SetState(SCE_SH_OPERATOR);
832 } else if (IsADigit(sc.ch)) {
833 sc.SetState(SCE_SH_NUMBER);
834 numBase = BASH_BASE_DECIMAL;
835 if (sc.ch == '0') { // hex,octal
836 if (sc.chNext == 'x' || sc.chNext == 'X') {
837 numBase = BASH_BASE_HEX;
838 sc.Forward();
839 } else if (IsADigit(sc.chNext)) {
840#ifdef PEDANTIC_OCTAL
841 numBase = BASH_BASE_OCTAL;
842#else
843 numBase = BASH_BASE_HEX;
844#endif
845 }
846 }
847 } else if (setWordStart.Contains(sc.ch)) {
848 sc.SetState(SCE_SH_WORD);
849 } else if (sc.ch == '#') {
850 if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
851 (sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
852 sc.SetState(SCE_SH_COMMENTLINE);
853 } else {
854 sc.SetState(SCE_SH_WORD);
855 }
856 // handle some zsh features within arithmetic expressions only
857 if (cmdState == BASH_CMD_ARITH) {
858 if (sc.chPrev == '[') { // [#8] [##8] output digit setting
859 sc.SetState(SCE_SH_WORD);
860 if (sc.chNext == '#') {
861 sc.Forward();
862 }
863 } else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) { // ##^A
864 sc.SetState(SCE_SH_IDENTIFIER);
865 sc.Forward(3);
866 } else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) { // ##a
867 sc.SetState(SCE_SH_IDENTIFIER);
868 sc.Forward(2);
869 } else if (setWordStart.Contains(sc.chNext)) { // #name
870 sc.SetState(SCE_SH_IDENTIFIER);
871 }
872 }
873 } else if (sc.ch == '\"') {
874 sc.SetState(SCE_SH_STRING);
875 QuoteStack.Start(sc.ch, BASH_DELIM_STRING);
876 } else if (sc.ch == '\'') {
877 sc.SetState(SCE_SH_CHARACTER);
878 Quote.Start(sc.ch);
879 } else if (sc.ch == '`') {
880 sc.SetState(SCE_SH_BACKTICKS);
881 QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
882 } else if (sc.ch == '$') {
883 if (sc.Match("$((")) {
884 sc.SetState(SCE_SH_OPERATOR); // handle '((' later
885 continue;
886 }
887 sc.SetState(SCE_SH_SCALAR);
888 sc.Forward();
889 if (sc.ch == '{') {
890 sc.ChangeState(SCE_SH_PARAM);
891 Quote.Start(sc.ch);
892 } else if (sc.ch == '\'') {
893 sc.ChangeState(SCE_SH_STRING);
894 QuoteStack.Start(sc.ch, BASH_DELIM_CSTRING);
895 } else if (sc.ch == '"') {
896 sc.ChangeState(SCE_SH_STRING);
897 QuoteStack.Start(sc.ch, BASH_DELIM_LSTRING);
898 } else if (sc.ch == '(') {
899 sc.ChangeState(SCE_SH_BACKTICKS);
900 QuoteStack.Start(sc.ch, BASH_DELIM_COMMAND);
901 } else if (sc.ch == '`') { // $` seen in a configure script, valid?
902 sc.ChangeState(SCE_SH_BACKTICKS);
903 QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
904 } else {
905 continue; // scalar has no delimiter pair
906 }
907 } else if (sc.Match('<', '<')) {
908 sc.SetState(SCE_SH_HERE_DELIM);
909 HereDoc.State = 0;
910 if (sc.GetRelative(2) == '-') { // <<- indent case
911 HereDoc.Indent = true;
912 sc.Forward();
913 } else {
914 HereDoc.Indent = false;
915 }
916 } else if (sc.ch == '-' && // one-char file test operators
917 setSingleCharOp.Contains(sc.chNext) &&
918 !setWord.Contains(sc.GetRelative(2)) &&
919 IsASpace(sc.chPrev)) {
920 sc.SetState(SCE_SH_WORD);
921 sc.Forward();
922 } else if (setBashOperator.Contains(sc.ch)) {
923 char s[10];
924 bool isCmdDelim = false;
925 sc.SetState(SCE_SH_OPERATOR);
926 // globs have no whitespace, do not appear in arithmetic expressions
927 if (cmdState != BASH_CMD_ARITH && sc.ch == '(' && sc.chNext != '(') {
928 int i = GlobScan(sc);
929 if (i > 1) {
930 sc.SetState(SCE_SH_IDENTIFIER);
931 sc.Forward(i);
932 continue;
933 }
934 }
935 // handle opening delimiters for test/arithmetic expressions - ((,[[,[
936 if (cmdState == BASH_CMD_START
937 || cmdState == BASH_CMD_BODY) {
938 if (sc.Match('(', '(')) {
939 cmdState = BASH_CMD_ARITH;
940 sc.Forward();
941 } else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
942 cmdState = BASH_CMD_TEST;
943 testExprType = 1;
944 sc.Forward();
945 } else if (sc.ch == '[' && IsASpace(sc.chNext)) {
946 cmdState = BASH_CMD_TEST;
947 testExprType = 2;
948 }
949 }
950 // special state -- for ((x;y;z)) in ... looping
951 if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) {
952 cmdState = BASH_CMD_ARITH;
953 sc.Forward();
954 continue;
955 }
956 // handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
957 if (cmdState == BASH_CMD_START
958 || cmdState == BASH_CMD_BODY
959 || cmdState == BASH_CMD_WORD
960 || (cmdState == BASH_CMD_TEST && testExprType == 0)) {
961 s[0] = static_cast<char>(sc.ch);
962 if (setBashOperator.Contains(sc.chNext)) {
963 s[1] = static_cast<char>(sc.chNext);
964 s[2] = '\0';
965 isCmdDelim = cmdDelimiter.InList(s);
966 if (isCmdDelim)
967 sc.Forward();
968 }
969 if (!isCmdDelim) {
970 s[1] = '\0';
971 isCmdDelim = cmdDelimiter.InList(s);
972 }
973 if (isCmdDelim) {
974 cmdState = BASH_CMD_DELIM;
975 continue;
976 }
977 }
978 // handle closing delimiters for test/arithmetic expressions - )),]],]
979 if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) {
980 cmdState = BASH_CMD_BODY;
981 sc.Forward();
982 } else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) {
983 if (sc.Match(']', ']') && testExprType == 1) {
984 sc.Forward();
985 cmdState = BASH_CMD_BODY;
986 } else if (sc.ch == ']' && testExprType == 2) {
987 cmdState = BASH_CMD_BODY;
988 }
989 }
990 }
991 }// sc.state
992 }
993 sc.Complete();
994 if (sc.state == SCE_SH_HERE_Q) {
995 styler.ChangeLexerState(sc.currentPos, styler.Length());
996 }
997 sc.Complete();
998}
999
1000void SCI_METHOD LexerBash::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
1001 if(!options.fold)
1002 return;
1003
1004 LexAccessor styler(pAccess);
1005
1006 Sci_PositionU endPos = startPos + length;
1007 int visibleChars = 0;
1008 int skipHereCh = 0;
1009 Sci_Position lineCurrent = styler.GetLine(startPos);
1010 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1011 int levelCurrent = levelPrev;
1012 char chNext = styler[startPos];
1013 int styleNext = styler.StyleAt(startPos);
1014 char word[8] = { '\0' }; // we're not interested in long words anyway
1015 unsigned int wordlen = 0;
1016 for (Sci_PositionU i = startPos; i < endPos; i++) {
1017 char ch = chNext;
1018 chNext = styler.SafeGetCharAt(i + 1);
1019 int style = styleNext;
1020 styleNext = styler.StyleAt(i + 1);
1021 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1022 // Comment folding
1023 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1024 {
1025 if (!IsCommentLine(lineCurrent - 1, styler)
1026 && IsCommentLine(lineCurrent + 1, styler))
1027 levelCurrent++;
1028 else if (IsCommentLine(lineCurrent - 1, styler)
1029 && !IsCommentLine(lineCurrent + 1, styler))
1030 levelCurrent--;
1031 }
1032 if (style == SCE_SH_WORD) {
1033 if ((wordlen + 1) < sizeof(word))
1034 word[wordlen++] = ch;
1035 if (styleNext != style) {
1036 word[wordlen] = '\0';
1037 wordlen = 0;
1038 if (strcmp(word, "if") == 0 || strcmp(word, "case") == 0 || strcmp(word, "do") == 0) {
1039 levelCurrent++;
1040 } else if (strcmp(word, "fi") == 0 || strcmp(word, "esac") == 0 || strcmp(word, "done") == 0) {
1041 levelCurrent--;
1042 }
1043 }
1044 }
1045 if (style == SCE_SH_OPERATOR) {
1046 if (ch == '{') {
1047 levelCurrent++;
1048 } else if (ch == '}') {
1049 levelCurrent--;
1050 }
1051 }
1052 // Here Document folding
1053 if (style == SCE_SH_HERE_DELIM) {
1054 if (ch == '<' && chNext == '<') {
1055 if (styler.SafeGetCharAt(i + 2) == '<') {
1056 skipHereCh = 1;
1057 } else {
1058 if (skipHereCh == 0) {
1059 levelCurrent++;
1060 } else {
1061 skipHereCh = 0;
1062 }
1063 }
1064 }
1065 } else if (style == SCE_SH_HERE_Q && styler.StyleAt(i+1) == SCE_SH_DEFAULT) {
1066 levelCurrent--;
1067 }
1068 if (atEOL) {
1069 int lev = levelPrev;
1070 if (visibleChars == 0 && options.foldCompact)
1071 lev |= SC_FOLDLEVELWHITEFLAG;
1072 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1073 lev |= SC_FOLDLEVELHEADERFLAG;
1074 if (lev != styler.LevelAt(lineCurrent)) {
1075 styler.SetLevel(lineCurrent, lev);
1076 }
1077 lineCurrent++;
1078 levelPrev = levelCurrent;
1079 visibleChars = 0;
1080 }
1081 if (!isspacechar(ch))
1082 visibleChars++;
1083 }
1084 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1085 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1086 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1087}
1088
1089LexerModule lmBash(SCLEX_BASH, LexerBash::LexerFactoryBash, "bash", bashWordListDesc);
1090