1 | // Scintilla source code edit control |
2 | /** @file LexCoffeeScript.cxx |
3 | ** Lexer for CoffeeScript. |
4 | **/ |
5 | // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org> |
6 | // Based on the Scintilla C++ Lexer |
7 | // Written by Eric Promislow <ericp@activestate.com> in 2011 for the Komodo IDE |
8 | // The License.txt file describes the conditions under which this software may be distributed. |
9 | |
10 | #include <stdlib.h> |
11 | #include <string.h> |
12 | #include <stdio.h> |
13 | #include <stdarg.h> |
14 | #include <assert.h> |
15 | #include <ctype.h> |
16 | |
17 | #include <string> |
18 | #include <string_view> |
19 | |
20 | #include <algorithm> |
21 | |
22 | #include "ILexer.h" |
23 | #include "Scintilla.h" |
24 | #include "SciLexer.h" |
25 | |
26 | #include "WordList.h" |
27 | #include "LexAccessor.h" |
28 | #include "Accessor.h" |
29 | #include "StyleContext.h" |
30 | #include "CharacterSet.h" |
31 | #include "LexerModule.h" |
32 | |
33 | using namespace Lexilla; |
34 | |
35 | static bool IsSpaceEquiv(int state) { |
36 | return (state == SCE_COFFEESCRIPT_DEFAULT |
37 | || state == SCE_COFFEESCRIPT_COMMENTLINE |
38 | || state == SCE_COFFEESCRIPT_COMMENTBLOCK |
39 | || state == SCE_COFFEESCRIPT_VERBOSE_REGEX |
40 | || state == SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT |
41 | || state == SCE_COFFEESCRIPT_WORD |
42 | || state == SCE_COFFEESCRIPT_REGEX); |
43 | } |
44 | |
45 | // Store the current lexer state and brace count prior to starting a new |
46 | // `#{}` interpolation level. |
47 | // Based on LexRuby.cxx. |
48 | static void enterInnerExpression(int *p_inner_string_types, |
49 | int *p_inner_expn_brace_counts, |
50 | int& inner_string_count, |
51 | int state, |
52 | int& brace_counts |
53 | ) { |
54 | p_inner_string_types[inner_string_count] = state; |
55 | p_inner_expn_brace_counts[inner_string_count] = brace_counts; |
56 | brace_counts = 0; |
57 | ++inner_string_count; |
58 | } |
59 | |
60 | // Restore the lexer state and brace count for the previous `#{}` interpolation |
61 | // level upon returning to it. |
62 | // Note the previous lexer state is the return value and needs to be restored |
63 | // manually by the StyleContext. |
64 | // Based on LexRuby.cxx. |
65 | static int exitInnerExpression(int *p_inner_string_types, |
66 | int *p_inner_expn_brace_counts, |
67 | int& inner_string_count, |
68 | int& brace_counts |
69 | ) { |
70 | --inner_string_count; |
71 | brace_counts = p_inner_expn_brace_counts[inner_string_count]; |
72 | return p_inner_string_types[inner_string_count]; |
73 | } |
74 | |
75 | // Preconditions: sc.currentPos points to a character after '+' or '-'. |
76 | // The test for pos reaching 0 should be redundant, |
77 | // and is in only for safety measures. |
78 | // Limitation: this code will give the incorrect answer for code like |
79 | // a = b+++/ptn/... |
80 | // Putting a space between the '++' post-inc operator and the '+' binary op |
81 | // fixes this, and is highly recommended for readability anyway. |
82 | static bool FollowsPostfixOperator(StyleContext &sc, Accessor &styler) { |
83 | Sci_Position pos = (Sci_Position) sc.currentPos; |
84 | while (--pos > 0) { |
85 | char ch = styler[pos]; |
86 | if (ch == '+' || ch == '-') { |
87 | return styler[pos - 1] == ch; |
88 | } |
89 | } |
90 | return false; |
91 | } |
92 | |
93 | static bool followsKeyword(StyleContext &sc, Accessor &styler) { |
94 | Sci_Position pos = (Sci_Position) sc.currentPos; |
95 | Sci_Position currentLine = styler.GetLine(pos); |
96 | Sci_Position lineStartPos = styler.LineStart(currentLine); |
97 | while (--pos > lineStartPos) { |
98 | char ch = styler.SafeGetCharAt(pos); |
99 | if (ch != ' ' && ch != '\t') { |
100 | break; |
101 | } |
102 | } |
103 | styler.Flush(); |
104 | return styler.StyleAt(pos) == SCE_COFFEESCRIPT_WORD; |
105 | } |
106 | |
107 | static void ColouriseCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], |
108 | Accessor &styler) { |
109 | |
110 | WordList &keywords = *keywordlists[0]; |
111 | WordList &keywords2 = *keywordlists[1]; |
112 | WordList &keywords4 = *keywordlists[3]; |
113 | |
114 | CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-" ); |
115 | CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-" ); |
116 | |
117 | CharacterSet setWordStart(CharacterSet::setAlpha, "_$@" , 0x80, true); |
118 | CharacterSet setWord(CharacterSet::setAlphaNum, "._$" , 0x80, true); |
119 | |
120 | int chPrevNonWhite = ' '; |
121 | int visibleChars = 0; |
122 | |
123 | // String/Regex interpolation variables, based on LexRuby.cxx. |
124 | // In most cases a value of 2 should be ample for the code the user is |
125 | // likely to enter. For example, |
126 | // "Filling the #{container} with #{liquid}..." |
127 | // from the CoffeeScript homepage nests to a level of 2 |
128 | // If the user actually hits a 6th occurrence of '#{' in a double-quoted |
129 | // string (including regexes), it will stay as a string. The problem with |
130 | // this is that quotes might flip, a 7th '#{' will look like a comment, |
131 | // and code-folding might be wrong. |
132 | #define INNER_STRINGS_MAX_COUNT 5 |
133 | // These vars track our instances of "...#{,,,'..#{,,,}...',,,}..." |
134 | int inner_string_types[INNER_STRINGS_MAX_COUNT]; |
135 | // Track # braces when we push a new #{ thing |
136 | int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT]; |
137 | int inner_string_count = 0; |
138 | int brace_counts = 0; // Number of #{ ... } things within an expression |
139 | for (int i = 0; i < INNER_STRINGS_MAX_COUNT; i++) { |
140 | inner_string_types[i] = 0; |
141 | inner_expn_brace_counts[i] = 0; |
142 | } |
143 | |
144 | // look back to set chPrevNonWhite properly for better regex colouring |
145 | Sci_Position endPos = startPos + length; |
146 | if (startPos > 0 && IsSpaceEquiv(initStyle)) { |
147 | Sci_PositionU back = startPos; |
148 | styler.Flush(); |
149 | while (back > 0 && IsSpaceEquiv(styler.StyleAt(--back))) |
150 | ; |
151 | if (styler.StyleAt(back) == SCE_COFFEESCRIPT_OPERATOR) { |
152 | chPrevNonWhite = styler.SafeGetCharAt(back); |
153 | } |
154 | if (startPos != back) { |
155 | initStyle = styler.StyleAt(back); |
156 | if (IsSpaceEquiv(initStyle)) { |
157 | initStyle = SCE_COFFEESCRIPT_DEFAULT; |
158 | } |
159 | } |
160 | startPos = back; |
161 | } |
162 | |
163 | StyleContext sc(startPos, endPos - startPos, initStyle, styler); |
164 | |
165 | for (; sc.More();) { |
166 | |
167 | if (sc.atLineStart) { |
168 | // Reset states to beginning of colourise so no surprises |
169 | // if different sets of lines lexed. |
170 | visibleChars = 0; |
171 | } |
172 | |
173 | // Determine if the current state should terminate. |
174 | switch (sc.state) { |
175 | case SCE_COFFEESCRIPT_OPERATOR: |
176 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
177 | break; |
178 | case SCE_COFFEESCRIPT_NUMBER: |
179 | // We accept almost anything because of hex. and number suffixes |
180 | if (!setWord.Contains(sc.ch) || sc.Match('.', '.')) { |
181 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
182 | } |
183 | break; |
184 | case SCE_COFFEESCRIPT_IDENTIFIER: |
185 | if (!setWord.Contains(sc.ch) || (sc.ch == '.') || (sc.ch == '$')) { |
186 | char s[1000]; |
187 | sc.GetCurrent(s, sizeof(s)); |
188 | if (keywords.InList(s)) { |
189 | sc.ChangeState(SCE_COFFEESCRIPT_WORD); |
190 | } else if (keywords2.InList(s)) { |
191 | sc.ChangeState(SCE_COFFEESCRIPT_WORD2); |
192 | } else if (keywords4.InList(s)) { |
193 | sc.ChangeState(SCE_COFFEESCRIPT_GLOBALCLASS); |
194 | } else if (sc.LengthCurrent() > 0 && s[0] == '@') { |
195 | sc.ChangeState(SCE_COFFEESCRIPT_INSTANCEPROPERTY); |
196 | } |
197 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
198 | } |
199 | break; |
200 | case SCE_COFFEESCRIPT_WORD: |
201 | case SCE_COFFEESCRIPT_WORD2: |
202 | case SCE_COFFEESCRIPT_GLOBALCLASS: |
203 | case SCE_COFFEESCRIPT_INSTANCEPROPERTY: |
204 | if (!setWord.Contains(sc.ch)) { |
205 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
206 | } |
207 | break; |
208 | case SCE_COFFEESCRIPT_COMMENTLINE: |
209 | if (sc.atLineStart) { |
210 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
211 | } |
212 | break; |
213 | case SCE_COFFEESCRIPT_STRING: |
214 | if (sc.ch == '\\') { |
215 | if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { |
216 | sc.Forward(); |
217 | } |
218 | } else if (sc.ch == '\"') { |
219 | sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT); |
220 | } else if (sc.ch == '#' && sc.chNext == '{' && inner_string_count < INNER_STRINGS_MAX_COUNT) { |
221 | // process interpolated code #{ ... } |
222 | enterInnerExpression(inner_string_types, |
223 | inner_expn_brace_counts, |
224 | inner_string_count, |
225 | sc.state, |
226 | brace_counts); |
227 | sc.SetState(SCE_COFFEESCRIPT_OPERATOR); |
228 | sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT); |
229 | } |
230 | break; |
231 | case SCE_COFFEESCRIPT_CHARACTER: |
232 | if (sc.ch == '\\') { |
233 | if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { |
234 | sc.Forward(); |
235 | } |
236 | } else if (sc.ch == '\'') { |
237 | sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT); |
238 | } |
239 | break; |
240 | case SCE_COFFEESCRIPT_REGEX: |
241 | if (sc.atLineStart) { |
242 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
243 | } else if (sc.ch == '/') { |
244 | sc.Forward(); |
245 | while ((sc.ch < 0x80) && islower(sc.ch)) |
246 | sc.Forward(); // gobble regex flags |
247 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
248 | } else if (sc.ch == '\\') { |
249 | // Gobble up the quoted character |
250 | if (sc.chNext == '\\' || sc.chNext == '/') { |
251 | sc.Forward(); |
252 | } |
253 | } |
254 | break; |
255 | case SCE_COFFEESCRIPT_STRINGEOL: |
256 | if (sc.atLineStart) { |
257 | sc.SetState(SCE_COFFEESCRIPT_DEFAULT); |
258 | } |
259 | break; |
260 | case SCE_COFFEESCRIPT_COMMENTBLOCK: |
261 | if (sc.Match("###" )) { |
262 | sc.Forward(); |
263 | sc.Forward(); |
264 | sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT); |
265 | } else if (sc.ch == '\\') { |
266 | sc.Forward(); |
267 | } |
268 | break; |
269 | case SCE_COFFEESCRIPT_VERBOSE_REGEX: |
270 | if (sc.Match("///" )) { |
271 | sc.Forward(); |
272 | sc.Forward(); |
273 | sc.ForwardSetState(SCE_COFFEESCRIPT_DEFAULT); |
274 | } else if (sc.Match('#')) { |
275 | sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT); |
276 | } else if (sc.ch == '\\') { |
277 | sc.Forward(); |
278 | } |
279 | break; |
280 | case SCE_COFFEESCRIPT_VERBOSE_REGEX_COMMENT: |
281 | if (sc.atLineStart) { |
282 | sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX); |
283 | } |
284 | break; |
285 | } |
286 | |
287 | // Determine if a new state should be entered. |
288 | if (sc.state == SCE_COFFEESCRIPT_DEFAULT) { |
289 | if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { |
290 | sc.SetState(SCE_COFFEESCRIPT_NUMBER); |
291 | } else if (setWordStart.Contains(sc.ch)) { |
292 | sc.SetState(SCE_COFFEESCRIPT_IDENTIFIER); |
293 | } else if (sc.Match("///" )) { |
294 | sc.SetState(SCE_COFFEESCRIPT_VERBOSE_REGEX); |
295 | sc.Forward(); |
296 | sc.Forward(); |
297 | } else if (sc.ch == '/' |
298 | && (setOKBeforeRE.Contains(chPrevNonWhite) |
299 | || followsKeyword(sc, styler)) |
300 | && (!setCouldBePostOp.Contains(chPrevNonWhite) |
301 | || !FollowsPostfixOperator(sc, styler))) { |
302 | sc.SetState(SCE_COFFEESCRIPT_REGEX); // JavaScript's RegEx |
303 | } else if (sc.ch == '\"') { |
304 | sc.SetState(SCE_COFFEESCRIPT_STRING); |
305 | } else if (sc.ch == '\'') { |
306 | sc.SetState(SCE_COFFEESCRIPT_CHARACTER); |
307 | } else if (sc.ch == '#') { |
308 | if (sc.Match("###" )) { |
309 | sc.SetState(SCE_COFFEESCRIPT_COMMENTBLOCK); |
310 | sc.Forward(); |
311 | sc.Forward(); |
312 | } else { |
313 | sc.SetState(SCE_COFFEESCRIPT_COMMENTLINE); |
314 | } |
315 | } else if (isoperator(static_cast<char>(sc.ch))) { |
316 | sc.SetState(SCE_COFFEESCRIPT_OPERATOR); |
317 | // Handle '..' and '...' operators correctly. |
318 | if (sc.ch == '.') { |
319 | for (int i = 0; i < 2 && sc.chNext == '.'; i++, sc.Forward()) ; |
320 | } else if (sc.ch == '{') { |
321 | ++brace_counts; |
322 | } else if (sc.ch == '}' && --brace_counts <= 0 && inner_string_count > 0) { |
323 | // Return to previous state before #{ ... } |
324 | sc.ForwardSetState(exitInnerExpression(inner_string_types, |
325 | inner_expn_brace_counts, |
326 | inner_string_count, |
327 | brace_counts)); |
328 | continue; // skip sc.Forward() at loop end |
329 | } |
330 | } |
331 | } |
332 | |
333 | if (!IsASpace(sc.ch) && !IsSpaceEquiv(sc.state)) { |
334 | chPrevNonWhite = sc.ch; |
335 | visibleChars++; |
336 | } |
337 | sc.Forward(); |
338 | } |
339 | sc.Complete(); |
340 | } |
341 | |
342 | static bool (Sci_Position line, Accessor &styler) { |
343 | Sci_Position pos = styler.LineStart(line); |
344 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
345 | for (Sci_Position i = pos; i < eol_pos; i++) { |
346 | char ch = styler[i]; |
347 | if (ch == '#') |
348 | return true; |
349 | else if (ch != ' ' && ch != '\t') |
350 | return false; |
351 | } |
352 | return false; |
353 | } |
354 | |
355 | static void FoldCoffeeScriptDoc(Sci_PositionU startPos, Sci_Position length, int, |
356 | WordList *[], Accessor &styler) { |
357 | // A simplified version of FoldPyDoc |
358 | const Sci_Position maxPos = startPos + length; |
359 | const Sci_Position maxLines = styler.GetLine(maxPos - 1); // Requested last line |
360 | const Sci_Position docLines = styler.GetLine(styler.Length() - 1); // Available last line |
361 | |
362 | // property fold.coffeescript.comment |
363 | const bool = styler.GetPropertyInt("fold.coffeescript.comment" ) != 0; |
364 | |
365 | const bool foldCompact = styler.GetPropertyInt("fold.compact" ) != 0; |
366 | |
367 | // Backtrack to previous non-blank line so we can determine indent level |
368 | // for any white space lines |
369 | // and so we can fix any preceding fold level (which is why we go back |
370 | // at least one line in all cases) |
371 | int spaceFlags = 0; |
372 | Sci_Position lineCurrent = styler.GetLine(startPos); |
373 | int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL); |
374 | while (lineCurrent > 0) { |
375 | lineCurrent--; |
376 | indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL); |
377 | if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) |
378 | && !IsCommentLine(lineCurrent, styler)) |
379 | break; |
380 | } |
381 | int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK; |
382 | |
383 | // Set up initial loop state |
384 | int = 0; |
385 | if (lineCurrent >= 1) |
386 | prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler); |
387 | |
388 | // Process all characters to end of requested range |
389 | // or comment that hangs over the end of the range. Cap processing in all cases |
390 | // to end of document (in case of comment at end). |
391 | while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevComment)) { |
392 | |
393 | // Gather info |
394 | int lev = indentCurrent; |
395 | Sci_Position lineNext = lineCurrent + 1; |
396 | int indentNext = indentCurrent; |
397 | if (lineNext <= docLines) { |
398 | // Information about next line is only available if not at end of document |
399 | indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL); |
400 | } |
401 | const int = foldComment && IsCommentLine(lineCurrent, styler); |
402 | const int = (comment && !prevComment && (lineNext <= docLines) && |
403 | IsCommentLine(lineNext, styler) && (lev > SC_FOLDLEVELBASE)); |
404 | const int = (comment && prevComment); |
405 | if (!comment) |
406 | indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK; |
407 | if (indentNext & SC_FOLDLEVELWHITEFLAG) |
408 | indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel; |
409 | |
410 | if (comment_start) { |
411 | // Place fold point at start of a block of comments |
412 | lev |= SC_FOLDLEVELHEADERFLAG; |
413 | } else if (comment_continue) { |
414 | // Add level to rest of lines in the block |
415 | lev = lev + 1; |
416 | } |
417 | |
418 | // Skip past any blank lines for next indent level info; we skip also |
419 | // comments (all comments, not just those starting in column 0) |
420 | // which effectively folds them into surrounding code rather |
421 | // than screwing up folding. |
422 | |
423 | while ((lineNext < docLines) && |
424 | ((indentNext & SC_FOLDLEVELWHITEFLAG) || |
425 | (lineNext <= docLines && IsCommentLine(lineNext, styler)))) { |
426 | |
427 | lineNext++; |
428 | indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL); |
429 | } |
430 | |
431 | const int = indentNext & SC_FOLDLEVELNUMBERMASK; |
432 | const int = std::max(indentCurrentLevel,levelAfterComments); |
433 | |
434 | // Now set all the indent levels on the lines we skipped |
435 | // Do this from end to start. Once we encounter one line |
436 | // which is indented more than the line after the end of |
437 | // the comment-block, use the level of the block before |
438 | |
439 | Sci_Position skipLine = lineNext; |
440 | int skipLevel = levelAfterComments; |
441 | |
442 | while (--skipLine > lineCurrent) { |
443 | int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL); |
444 | |
445 | if (foldCompact) { |
446 | if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments) |
447 | skipLevel = levelBeforeComments; |
448 | |
449 | int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG; |
450 | |
451 | styler.SetLevel(skipLine, skipLevel | whiteFlag); |
452 | } else { |
453 | if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments && |
454 | !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) && |
455 | !IsCommentLine(skipLine, styler)) |
456 | skipLevel = levelBeforeComments; |
457 | |
458 | styler.SetLevel(skipLine, skipLevel); |
459 | } |
460 | } |
461 | |
462 | // Set fold header on non-comment line |
463 | if (!comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) { |
464 | if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK)) |
465 | lev |= SC_FOLDLEVELHEADERFLAG; |
466 | } |
467 | |
468 | // Keep track of block comment state of previous line |
469 | prevComment = comment_start || comment_continue; |
470 | |
471 | // Set fold level for this line and move to next line |
472 | styler.SetLevel(lineCurrent, lev); |
473 | indentCurrent = indentNext; |
474 | lineCurrent = lineNext; |
475 | } |
476 | } |
477 | |
478 | static const char *const csWordLists[] = { |
479 | "Keywords" , |
480 | "Secondary keywords" , |
481 | "Unused" , |
482 | "Global classes" , |
483 | 0, |
484 | }; |
485 | |
486 | LexerModule lmCoffeeScript(SCLEX_COFFEESCRIPT, ColouriseCoffeeScriptDoc, "coffeescript" , FoldCoffeeScriptDoc, csWordLists); |
487 | |