1 | // Scintilla\ source code edit control |
2 | /** @file LexTCMD.cxx |
3 | ** Lexer for Take Command / TCC batch scripts (.bat, .btm, .cmd). |
4 | **/ |
5 | // Written by Rex Conn (rconn [at] jpsoft [dot] com) |
6 | // based on the CMD lexer |
7 | // The License.txt file describes the conditions under which this software may be distributed. |
8 | |
9 | #include <stdlib.h> |
10 | #include <string.h> |
11 | #include <stdio.h> |
12 | #include <stdarg.h> |
13 | #include <assert.h> |
14 | #include <ctype.h> |
15 | |
16 | #include <string> |
17 | #include <string_view> |
18 | |
19 | #include "ILexer.h" |
20 | #include "Scintilla.h" |
21 | #include "SciLexer.h" |
22 | |
23 | #include "WordList.h" |
24 | #include "LexAccessor.h" |
25 | #include "Accessor.h" |
26 | #include "StyleContext.h" |
27 | #include "CharacterSet.h" |
28 | #include "LexerModule.h" |
29 | |
30 | using namespace Lexilla; |
31 | |
32 | |
33 | static bool IsAlphabetic(int ch) { |
34 | return IsASCII(ch) && isalpha(ch); |
35 | } |
36 | |
37 | static inline bool AtEOL(Accessor &styler, Sci_PositionU i) { |
38 | return (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')); |
39 | } |
40 | |
41 | // Tests for BATCH Operators |
42 | static bool IsBOperator(char ch) { |
43 | return (ch == '=') || (ch == '+') || (ch == '>') || (ch == '<') || (ch == '|') || (ch == '&') || (ch == '!') || (ch == '?') || (ch == '*') || (ch == '(') || (ch == ')'); |
44 | } |
45 | |
46 | // Tests for BATCH Separators |
47 | static bool IsBSeparator(char ch) { |
48 | return (ch == '\\') || (ch == '.') || (ch == ';') || (ch == ' ') || (ch == '\t') || (ch == '[') || (ch == ']') || (ch == '\"') || (ch == '\'') || (ch == '/'); |
49 | } |
50 | |
51 | // Find length of CMD FOR variable with modifier (%~...) or return 0 |
52 | static unsigned int GetBatchVarLen( char *wordBuffer ) |
53 | { |
54 | int nLength = 0; |
55 | if ( wordBuffer[0] == '%' ) { |
56 | |
57 | if ( wordBuffer[1] == '~' ) |
58 | nLength = 2; |
59 | else if (( wordBuffer[1] == '%' ) && ( wordBuffer[2] == '~' )) |
60 | nLength++; |
61 | else |
62 | return 0; |
63 | |
64 | for ( ; ( wordBuffer[nLength] ); nLength++ ) { |
65 | |
66 | switch ( toupper(wordBuffer[nLength]) ) { |
67 | case 'A': |
68 | // file attributes |
69 | case 'D': |
70 | // drive letter only |
71 | case 'F': |
72 | // fully qualified path name |
73 | case 'N': |
74 | // filename only |
75 | case 'P': |
76 | // path only |
77 | case 'S': |
78 | // short name |
79 | case 'T': |
80 | // date / time of file |
81 | case 'X': |
82 | // file extension only |
83 | case 'Z': |
84 | // file size |
85 | break; |
86 | default: |
87 | return nLength; |
88 | } |
89 | } |
90 | } |
91 | |
92 | return nLength; |
93 | } |
94 | |
95 | |
96 | static void ColouriseTCMDLine( char *lineBuffer, Sci_PositionU lengthLine, Sci_PositionU startLine, Sci_PositionU endPos, WordList *keywordlists[], Accessor &styler) |
97 | { |
98 | Sci_PositionU offset = 0; // Line Buffer Offset |
99 | char wordBuffer[260]; // Word Buffer - large to catch long paths |
100 | Sci_PositionU wbl; // Word Buffer Length |
101 | Sci_PositionU wbo; // Word Buffer Offset - also Special Keyword Buffer Length |
102 | WordList &keywords = *keywordlists[0]; // Internal Commands |
103 | // WordList &keywords2 = *keywordlists[1]; // Aliases (optional) |
104 | bool isDelayedExpansion = 1; // !var! |
105 | |
106 | bool continueProcessing = true; // Used to toggle Regular Keyword Checking |
107 | // Special Keywords are those that allow certain characters without whitespace after the command |
108 | // Examples are: cd. cd\ echo: echo. path= |
109 | bool inString = false; // Used for processing while "" |
110 | // Special Keyword Buffer used to determine if the first n characters is a Keyword |
111 | char sKeywordBuffer[260] = "" ; // Special Keyword Buffer |
112 | bool sKeywordFound; // Exit Special Keyword for-loop if found |
113 | |
114 | // Skip leading whitespace |
115 | while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) { |
116 | offset++; |
117 | } |
118 | // Colorize Default Text |
119 | styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT); |
120 | |
121 | if ( offset >= lengthLine ) |
122 | return; |
123 | |
124 | // Check for Fake Label (Comment) or Real Label - return if found |
125 | if (lineBuffer[offset] == ':') { |
126 | if (lineBuffer[offset + 1] == ':') { |
127 | // Colorize Fake Label (Comment) - :: is the same as REM |
128 | styler.ColourTo(endPos, SCE_TCMD_COMMENT); |
129 | } else { |
130 | // Colorize Real Label |
131 | styler.ColourTo(endPos, SCE_TCMD_LABEL); |
132 | } |
133 | return; |
134 | |
135 | // Check for Comment - return if found |
136 | } else if (( CompareNCaseInsensitive(lineBuffer+offset, "rem" , 3) == 0 ) && (( lineBuffer[offset+3] == 0 ) || ( isspace(lineBuffer[offset+3] )))) { |
137 | styler.ColourTo(endPos, SCE_TCMD_COMMENT); |
138 | return; |
139 | |
140 | // Check for Drive Change (Drive Change is internal command) - return if found |
141 | } else if ((IsAlphabetic(lineBuffer[offset])) && |
142 | (lineBuffer[offset + 1] == ':') && |
143 | ((isspacechar(lineBuffer[offset + 2])) || |
144 | (((lineBuffer[offset + 2] == '\\')) && |
145 | (isspacechar(lineBuffer[offset + 3]))))) { |
146 | // Colorize Regular Keyword |
147 | styler.ColourTo(endPos, SCE_TCMD_WORD); |
148 | return; |
149 | } |
150 | |
151 | // Check for Hide Command (@ECHO OFF/ON) |
152 | if (lineBuffer[offset] == '@') { |
153 | styler.ColourTo(startLine + offset, SCE_TCMD_HIDE); |
154 | offset++; |
155 | } |
156 | // Skip whitespace |
157 | while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) { |
158 | offset++; |
159 | } |
160 | |
161 | // Read remainder of line word-at-a-time or remainder-of-word-at-a-time |
162 | while (offset < lengthLine) { |
163 | if (offset > startLine) { |
164 | // Colorize Default Text |
165 | styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT); |
166 | } |
167 | // Copy word from Line Buffer into Word Buffer |
168 | wbl = 0; |
169 | for (; offset < lengthLine && ( wbl < 260 ) && !isspacechar(lineBuffer[offset]); wbl++, offset++) { |
170 | wordBuffer[wbl] = static_cast<char>(tolower(lineBuffer[offset])); |
171 | } |
172 | wordBuffer[wbl] = '\0'; |
173 | wbo = 0; |
174 | |
175 | // Check for Separator |
176 | if (IsBSeparator(wordBuffer[0])) { |
177 | |
178 | // Reset Offset to re-process remainder of word |
179 | offset -= (wbl - 1); |
180 | // Colorize Default Text |
181 | styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT); |
182 | |
183 | if (wordBuffer[0] == '"') |
184 | inString = !inString; |
185 | |
186 | // Check for Regular expression |
187 | } else if (( wordBuffer[0] == ':' ) && ( wordBuffer[1] == ':' ) && (continueProcessing)) { |
188 | |
189 | // Colorize Regular exoressuin |
190 | styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT); |
191 | // No need to Reset Offset |
192 | |
193 | // Check for Labels in text (... :label) |
194 | } else if (wordBuffer[0] == ':' && isspacechar(lineBuffer[offset - wbl - 1])) { |
195 | // Colorize Default Text |
196 | styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT); |
197 | // Colorize Label |
198 | styler.ColourTo(startLine + offset - 1, SCE_TCMD_CLABEL); |
199 | // No need to Reset Offset |
200 | // Check for delayed expansion Variable (!x...!) |
201 | } else if (isDelayedExpansion && wordBuffer[0] == '!') { |
202 | // Colorize Default Text |
203 | styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT); |
204 | wbo++; |
205 | // Search to end of word for second ! |
206 | while ((wbo < wbl) && (wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) { |
207 | wbo++; |
208 | } |
209 | if (wordBuffer[wbo] == '!') { |
210 | wbo++; |
211 | // Colorize Environment Variable |
212 | styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_EXPANSION); |
213 | } else { |
214 | wbo = 1; |
215 | // Colorize Symbol |
216 | styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_DEFAULT); |
217 | } |
218 | |
219 | // Reset Offset to re-process remainder of word |
220 | offset -= (wbl - wbo); |
221 | |
222 | // Check for Regular Keyword in list |
223 | } else if ((keywords.InList(wordBuffer)) && (!inString) && (continueProcessing)) { |
224 | |
225 | // ECHO, PATH, and PROMPT require no further Regular Keyword Checking |
226 | if ((CompareCaseInsensitive(wordBuffer, "echo" ) == 0) || |
227 | (CompareCaseInsensitive(sKeywordBuffer, "echos" ) == 0) || |
228 | (CompareCaseInsensitive(sKeywordBuffer, "echoerr" ) == 0) || |
229 | (CompareCaseInsensitive(sKeywordBuffer, "echoserr" ) == 0) || |
230 | (CompareCaseInsensitive(wordBuffer, "path" ) == 0) || |
231 | (CompareCaseInsensitive(wordBuffer, "prompt" ) == 0)) { |
232 | continueProcessing = false; |
233 | } |
234 | |
235 | // Colorize Regular keyword |
236 | styler.ColourTo(startLine + offset - 1, SCE_TCMD_WORD); |
237 | // No need to Reset Offset |
238 | |
239 | } else if ((wordBuffer[0] != '%') && (wordBuffer[0] != '!') && (!IsBOperator(wordBuffer[0])) && (!inString) && (continueProcessing)) { |
240 | |
241 | // a few commands accept "illegal" syntax -- cd\, echo., etc. |
242 | sscanf( wordBuffer, "%[^.<>|&=\\/]" , sKeywordBuffer ); |
243 | sKeywordFound = false; |
244 | |
245 | if ((CompareCaseInsensitive(sKeywordBuffer, "echo" ) == 0) || |
246 | (CompareCaseInsensitive(sKeywordBuffer, "echos" ) == 0) || |
247 | (CompareCaseInsensitive(sKeywordBuffer, "echoerr" ) == 0) || |
248 | (CompareCaseInsensitive(sKeywordBuffer, "echoserr" ) == 0) || |
249 | (CompareCaseInsensitive(sKeywordBuffer, "cd" ) == 0) || |
250 | (CompareCaseInsensitive(sKeywordBuffer, "path" ) == 0) || |
251 | (CompareCaseInsensitive(sKeywordBuffer, "prompt" ) == 0)) { |
252 | |
253 | // no further Regular Keyword Checking |
254 | continueProcessing = false; |
255 | sKeywordFound = true; |
256 | wbo = (Sci_PositionU)strlen( sKeywordBuffer ); |
257 | |
258 | // Colorize Special Keyword as Regular Keyword |
259 | styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_WORD); |
260 | // Reset Offset to re-process remainder of word |
261 | offset -= (wbl - wbo); |
262 | } |
263 | |
264 | // Check for Default Text |
265 | if (!sKeywordFound) { |
266 | wbo = 0; |
267 | // Read up to %, Operator or Separator |
268 | while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!isDelayedExpansion || wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) { |
269 | wbo++; |
270 | } |
271 | // Colorize Default Text |
272 | styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_DEFAULT); |
273 | // Reset Offset to re-process remainder of word |
274 | offset -= (wbl - wbo); |
275 | } |
276 | |
277 | // Check for Argument (%n), Environment Variable (%x...%) or Local Variable (%%a) |
278 | } else if (wordBuffer[0] == '%') { |
279 | unsigned int varlen; |
280 | unsigned int n = 1; |
281 | // Colorize Default Text |
282 | styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT); |
283 | wbo++; |
284 | |
285 | // check for %[nn] syntax |
286 | if ( wordBuffer[1] == '[' ) { |
287 | n++; |
288 | while ((n < wbl) && (wordBuffer[n] != ']')) { |
289 | n++; |
290 | } |
291 | if ( wordBuffer[n] == ']' ) |
292 | n++; |
293 | goto ColorizeArg; |
294 | } |
295 | |
296 | // Search to end of word for second % or to the first terminator (can be a long path) |
297 | while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) { |
298 | wbo++; |
299 | } |
300 | |
301 | // Check for Argument (%n) or (%*) |
302 | if (((isdigit(wordBuffer[1])) || (wordBuffer[1] == '*')) && (wordBuffer[wbo] != '%')) { |
303 | while (( wordBuffer[n] ) && ( strchr( "%0123456789*#$" , wordBuffer[n] ) != NULL )) |
304 | n++; |
305 | ColorizeArg: |
306 | // Colorize Argument |
307 | styler.ColourTo(startLine + offset - 1 - (wbl - n), SCE_TCMD_IDENTIFIER); |
308 | // Reset Offset to re-process remainder of word |
309 | offset -= (wbl - n); |
310 | |
311 | // Check for Variable with modifiers (%~...) |
312 | } else if ((varlen = GetBatchVarLen(wordBuffer)) != 0) { |
313 | |
314 | // Colorize Variable |
315 | styler.ColourTo(startLine + offset - 1 - (wbl - varlen), SCE_TCMD_IDENTIFIER); |
316 | // Reset Offset to re-process remainder of word |
317 | offset -= (wbl - varlen); |
318 | |
319 | // Check for Environment Variable (%x...%) |
320 | } else if (( wordBuffer[1] ) && ( wordBuffer[1] != '%')) { |
321 | if ( wordBuffer[wbo] == '%' ) |
322 | wbo++; |
323 | |
324 | // Colorize Environment Variable |
325 | styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_ENVIRONMENT); |
326 | // Reset Offset to re-process remainder of word |
327 | offset -= (wbl - wbo); |
328 | |
329 | // Check for Local Variable (%%a) |
330 | } else if ( (wbl > 2) && (wordBuffer[1] == '%') && (wordBuffer[2] != '%') && (!IsBOperator(wordBuffer[2])) && (!IsBSeparator(wordBuffer[2]))) { |
331 | |
332 | n = 2; |
333 | while (( wordBuffer[n] ) && (!IsBOperator(wordBuffer[n])) && (!IsBSeparator(wordBuffer[n]))) |
334 | n++; |
335 | |
336 | // Colorize Local Variable |
337 | styler.ColourTo(startLine + offset - 1 - (wbl - n), SCE_TCMD_IDENTIFIER); |
338 | // Reset Offset to re-process remainder of word |
339 | offset -= (wbl - n); |
340 | |
341 | // Check for %% |
342 | } else if ((wbl > 1) && (wordBuffer[1] == '%')) { |
343 | |
344 | // Colorize Symbols |
345 | styler.ColourTo(startLine + offset - 1 - (wbl - 2), SCE_TCMD_DEFAULT); |
346 | // Reset Offset to re-process remainder of word |
347 | offset -= (wbl - 2); |
348 | } else { |
349 | |
350 | // Colorize Symbol |
351 | styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_DEFAULT); |
352 | // Reset Offset to re-process remainder of word |
353 | offset -= (wbl - 1); |
354 | } |
355 | |
356 | // Check for Operator |
357 | } else if (IsBOperator(wordBuffer[0])) { |
358 | // Colorize Default Text |
359 | styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT); |
360 | |
361 | // Check for Pipe, compound, or conditional Operator |
362 | if ((wordBuffer[0] == '|') || (wordBuffer[0] == '&')) { |
363 | |
364 | // Colorize Pipe Operator |
365 | styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_OPERATOR); |
366 | // Reset Offset to re-process remainder of word |
367 | offset -= (wbl - 1); |
368 | continueProcessing = true; |
369 | |
370 | // Check for Other Operator |
371 | } else { |
372 | // Check for > Operator |
373 | if ((wordBuffer[0] == '>') || (wordBuffer[0] == '<')) { |
374 | // Turn Keyword and External Command / Program checking back on |
375 | continueProcessing = true; |
376 | } |
377 | // Colorize Other Operator |
378 | if (!inString || !(wordBuffer[0] == '(' || wordBuffer[0] == ')')) |
379 | styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_OPERATOR); |
380 | // Reset Offset to re-process remainder of word |
381 | offset -= (wbl - 1); |
382 | } |
383 | |
384 | // Check for Default Text |
385 | } else { |
386 | // Read up to %, Operator or Separator |
387 | while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!isDelayedExpansion || wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) { |
388 | wbo++; |
389 | } |
390 | // Colorize Default Text |
391 | styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_DEFAULT); |
392 | // Reset Offset to re-process remainder of word |
393 | offset -= (wbl - wbo); |
394 | } |
395 | |
396 | // Skip whitespace - nothing happens if Offset was Reset |
397 | while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) { |
398 | offset++; |
399 | } |
400 | } |
401 | // Colorize Default Text for remainder of line - currently not lexed |
402 | styler.ColourTo(endPos, SCE_TCMD_DEFAULT); |
403 | } |
404 | |
405 | static void ColouriseTCMDDoc( Sci_PositionU startPos, Sci_Position length, int /*initStyle*/, WordList *keywordlists[], Accessor &styler ) |
406 | { |
407 | char lineBuffer[16384]; |
408 | |
409 | styler.StartAt(startPos); |
410 | styler.StartSegment(startPos); |
411 | Sci_PositionU linePos = 0; |
412 | Sci_PositionU startLine = startPos; |
413 | for (Sci_PositionU i = startPos; i < startPos + length; i++) { |
414 | lineBuffer[linePos++] = styler[i]; |
415 | if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) { |
416 | // End of line (or of line buffer) met, colourise it |
417 | lineBuffer[linePos] = '\0'; |
418 | ColouriseTCMDLine(lineBuffer, linePos, startLine, i, keywordlists, styler); |
419 | linePos = 0; |
420 | startLine = i + 1; |
421 | } |
422 | } |
423 | if (linePos > 0) { // Last line does not have ending characters |
424 | lineBuffer[linePos] = '\0'; |
425 | ColouriseTCMDLine(lineBuffer, linePos, startLine, startPos + length - 1, keywordlists, styler); |
426 | } |
427 | } |
428 | |
429 | // Convert string to upper case |
430 | static void StrUpr(char *s) { |
431 | while (*s) { |
432 | *s = MakeUpperCase(*s); |
433 | s++; |
434 | } |
435 | } |
436 | |
437 | // Folding support (for DO, IFF, SWITCH, TEXT, and command groups) |
438 | static void FoldTCMDDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) |
439 | { |
440 | Sci_Position line = styler.GetLine(startPos); |
441 | int level = styler.LevelAt(line); |
442 | int levelIndent = 0; |
443 | Sci_PositionU endPos = startPos + length; |
444 | char s[16] = "" ; |
445 | |
446 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
447 | |
448 | // Scan for ( and ) |
449 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
450 | |
451 | int c = styler.SafeGetCharAt(i, '\n'); |
452 | int style = styler.StyleAt(i); |
453 | bool bLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; |
454 | |
455 | if (style == SCE_TCMD_OPERATOR) { |
456 | // CheckFoldPoint |
457 | if (c == '(') { |
458 | levelIndent += 1; |
459 | } else if (c == ')') { |
460 | levelIndent -= 1; |
461 | } |
462 | } |
463 | |
464 | if (( bLineStart ) && ( style == SCE_TCMD_WORD )) { |
465 | for (Sci_PositionU j = 0; j < 10; j++) { |
466 | if (!iswordchar(styler[i + j])) { |
467 | break; |
468 | } |
469 | s[j] = styler[i + j]; |
470 | s[j + 1] = '\0'; |
471 | } |
472 | |
473 | StrUpr( s ); |
474 | if ((strcmp(s, "DO" ) == 0) || (strcmp(s, "IFF" ) == 0) || (strcmp(s, "SWITCH" ) == 0) || (strcmp(s, "TEXT" ) == 0)) { |
475 | levelIndent++; |
476 | } else if ((strcmp(s, "ENDDO" ) == 0) || (strcmp(s, "ENDIFF" ) == 0) || (strcmp(s, "ENDSWITCH" ) == 0) || (strcmp(s, "ENDTEXT" ) == 0)) { |
477 | levelIndent--; |
478 | } |
479 | } |
480 | |
481 | if (c == '\n') { // line end |
482 | if (levelIndent > 0) { |
483 | level |= SC_FOLDLEVELHEADERFLAG; |
484 | } |
485 | if (level != styler.LevelAt(line)) |
486 | styler.SetLevel(line, level); |
487 | level += levelIndent; |
488 | if ((level & SC_FOLDLEVELNUMBERMASK) < SC_FOLDLEVELBASE) |
489 | level = SC_FOLDLEVELBASE; |
490 | line++; |
491 | // reset state |
492 | levelIndent = 0; |
493 | level &= ~SC_FOLDLEVELHEADERFLAG; |
494 | level &= ~SC_FOLDLEVELWHITEFLAG; |
495 | } |
496 | |
497 | chPrev = c; |
498 | } |
499 | } |
500 | |
501 | static const char *const tcmdWordListDesc[] = { |
502 | "Internal Commands" , |
503 | "Aliases" , |
504 | 0 |
505 | }; |
506 | |
507 | LexerModule lmTCMD(SCLEX_TCMD, ColouriseTCMDDoc, "tcmd" , FoldTCMDDoc, tcmdWordListDesc); |
508 | |