1// Scintilla source code edit control
2/** @file LexRuby.cxx
3 ** Lexer for Ruby.
4 **/
5// Copyright 2001- by Clemens Wyss <wys@helbling.ch>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <stdlib.h>
9#include <string.h>
10#include <stdio.h>
11#include <stdarg.h>
12#include <assert.h>
13#include <ctype.h>
14
15#include <string>
16#include <string_view>
17
18#include "ILexer.h"
19#include "Scintilla.h"
20#include "SciLexer.h"
21
22#include "WordList.h"
23#include "LexAccessor.h"
24#include "Accessor.h"
25#include "StyleContext.h"
26#include "CharacterSet.h"
27#include "LexerModule.h"
28
29using namespace Lexilla;
30
31//XXX Identical to Perl, put in common area
32static inline bool isEOLChar(char ch) {
33 return (ch == '\r') || (ch == '\n');
34}
35
36#define isSafeASCII(ch) ((unsigned int)(ch) <= 127)
37// This one's redundant, but makes for more readable code
38#define isHighBitChar(ch) ((unsigned int)(ch) > 127)
39
40static inline bool isSafeAlpha(char ch) {
41 return (isSafeASCII(ch) && isalpha(ch)) || ch == '_';
42}
43
44static inline bool isSafeAlnum(char ch) {
45 return (isSafeASCII(ch) && isalnum(ch)) || ch == '_';
46}
47
48static inline bool isSafeAlnumOrHigh(char ch) {
49 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
50}
51
52static inline bool isSafeDigit(char ch) {
53 return isSafeASCII(ch) && isdigit(ch);
54}
55
56static inline bool isSafeWordcharOrHigh(char ch) {
57 // Error: scintilla's KeyWords.h includes '.' as a word-char
58 // we want to separate things that can take methods from the
59 // methods.
60 return isHighBitChar(ch) || isalnum(ch) || ch == '_';
61}
62
63static bool inline iswhitespace(char ch) {
64 return ch == ' ' || ch == '\t';
65}
66
67#define MAX_KEYWORD_LENGTH 200
68
69#define STYLE_MASK 63
70#define actual_style(style) (style & STYLE_MASK)
71
72static bool followsDot(Sci_PositionU pos, Accessor &styler) {
73 styler.Flush();
74 for (; pos >= 1; --pos) {
75 int style = actual_style(styler.StyleAt(pos));
76 char ch;
77 switch (style) {
78 case SCE_RB_DEFAULT:
79 ch = styler[pos];
80 if (ch == ' ' || ch == '\t') {
81 //continue
82 } else {
83 return false;
84 }
85 break;
86
87 case SCE_RB_OPERATOR:
88 return styler[pos] == '.';
89
90 default:
91 return false;
92 }
93 }
94 return false;
95}
96
97// Forward declarations
98static bool keywordIsAmbiguous(const char *prevWord);
99static bool keywordDoStartsLoop(Sci_Position pos,
100 Accessor &styler);
101static bool keywordIsModifier(const char *word,
102 Sci_Position pos,
103 Accessor &styler);
104
105static int ClassifyWordRb(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, char *prevWord) {
106 char s[MAX_KEYWORD_LENGTH];
107 Sci_PositionU i, j;
108 Sci_PositionU lim = end - start + 1; // num chars to copy
109 if (lim >= MAX_KEYWORD_LENGTH) {
110 lim = MAX_KEYWORD_LENGTH - 1;
111 }
112 for (i = start, j = 0; j < lim; i++, j++) {
113 s[j] = styler[i];
114 }
115 s[j] = '\0';
116 int chAttr;
117 if (0 == strcmp(prevWord, "class"))
118 chAttr = SCE_RB_CLASSNAME;
119 else if (0 == strcmp(prevWord, "module"))
120 chAttr = SCE_RB_MODULE_NAME;
121 else if (0 == strcmp(prevWord, "def"))
122 chAttr = SCE_RB_DEFNAME;
123 else if (keywords.InList(s) && ((start == 0) || !followsDot(start - 1, styler))) {
124 if (keywordIsAmbiguous(s)
125 && keywordIsModifier(s, start, styler)) {
126
127 // Demoted keywords are colored as keywords,
128 // but do not affect changes in indentation.
129 //
130 // Consider the word 'if':
131 // 1. <<if test ...>> : normal
132 // 2. <<stmt if test>> : demoted
133 // 3. <<lhs = if ...>> : normal: start a new indent level
134 // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
135
136 chAttr = SCE_RB_WORD_DEMOTED;
137 } else {
138 chAttr = SCE_RB_WORD;
139 }
140 } else
141 chAttr = SCE_RB_IDENTIFIER;
142 styler.ColourTo(end, chAttr);
143 if (chAttr == SCE_RB_WORD) {
144 strcpy(prevWord, s);
145 } else {
146 prevWord[0] = 0;
147 }
148 return chAttr;
149}
150
151
152//XXX Identical to Perl, put in common area
153static bool isMatch(Accessor &styler, Sci_Position lengthDoc, Sci_Position pos, const char *val) {
154 if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
155 return false;
156 }
157 while (*val) {
158 if (*val != styler[pos++]) {
159 return false;
160 }
161 val++;
162 }
163 return true;
164}
165
166// Do Ruby better -- find the end of the line, work back,
167// and then check for leading white space
168
169// Precondition: the here-doc target can be indented
170static bool lookingAtHereDocDelim(Accessor &styler,
171 Sci_Position pos,
172 Sci_Position lengthDoc,
173 const char *HereDocDelim)
174{
175 if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
176 return false;
177 }
178 while (--pos > 0) {
179 char ch = styler[pos];
180 if (isEOLChar(ch)) {
181 return true;
182 } else if (ch != ' ' && ch != '\t') {
183 return false;
184 }
185 }
186 return false;
187}
188
189//XXX Identical to Perl, put in common area
190static char opposite(char ch) {
191 if (ch == '(')
192 return ')';
193 if (ch == '[')
194 return ']';
195 if (ch == '{')
196 return '}';
197 if (ch == '<')
198 return '>';
199 return ch;
200}
201
202// Null transitions when we see we've reached the end
203// and need to relex the curr char.
204
205static void redo_char(Sci_Position &i, char &ch, char &chNext, char &chNext2,
206 int &state) {
207 i--;
208 chNext2 = chNext;
209 chNext = ch;
210 state = SCE_RB_DEFAULT;
211}
212
213static void advance_char(Sci_Position &i, char &ch, char &chNext, char &chNext2) {
214 i++;
215 ch = chNext;
216 chNext = chNext2;
217}
218
219// precondition: startPos points to one after the EOL char
220static bool currLineContainsHereDelims(Sci_Position &startPos,
221 Accessor &styler) {
222 if (startPos <= 1)
223 return false;
224
225 Sci_Position pos;
226 for (pos = startPos - 1; pos > 0; pos--) {
227 char ch = styler.SafeGetCharAt(pos);
228 if (isEOLChar(ch)) {
229 // Leave the pointers where they are -- there are no
230 // here doc delims on the current line, even if
231 // the EOL isn't default style
232
233 return false;
234 } else {
235 styler.Flush();
236 if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
237 break;
238 }
239 }
240 }
241 if (pos == 0) {
242 return false;
243 }
244 // Update the pointers so we don't have to re-analyze the string
245 startPos = pos;
246 return true;
247}
248
249// This class is used by the enter and exit methods, so it needs
250// to be hoisted out of the function.
251
252class QuoteCls {
253public:
254 int Count;
255 char Up;
256 char Down;
257 QuoteCls() noexcept {
258 New();
259 }
260 void New() noexcept {
261 Count = 0;
262 Up = '\0';
263 Down = '\0';
264 }
265 void Open(char u) {
266 Count++;
267 Up = u;
268 Down = opposite(Up);
269 }
270};
271
272
273static void enterInnerExpression(int *p_inner_string_types,
274 int *p_inner_expn_brace_counts,
275 QuoteCls *p_inner_quotes,
276 int &inner_string_count,
277 int &state,
278 int &brace_counts,
279 QuoteCls curr_quote
280 ) {
281 p_inner_string_types[inner_string_count] = state;
282 state = SCE_RB_DEFAULT;
283 p_inner_expn_brace_counts[inner_string_count] = brace_counts;
284 brace_counts = 0;
285 p_inner_quotes[inner_string_count] = curr_quote;
286 ++inner_string_count;
287}
288
289static void exitInnerExpression(int *p_inner_string_types,
290 int *p_inner_expn_brace_counts,
291 QuoteCls *p_inner_quotes,
292 int &inner_string_count,
293 int &state,
294 int &brace_counts,
295 QuoteCls &curr_quote
296 ) {
297 --inner_string_count;
298 state = p_inner_string_types[inner_string_count];
299 brace_counts = p_inner_expn_brace_counts[inner_string_count];
300 curr_quote = p_inner_quotes[inner_string_count];
301}
302
303static bool isEmptyLine(Sci_Position pos,
304 Accessor &styler) {
305 int spaceFlags = 0;
306 Sci_Position lineCurrent = styler.GetLine(pos);
307 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
308 return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
309}
310
311static bool RE_CanFollowKeyword(const char *keyword) {
312 if (!strcmp(keyword, "and")
313 || !strcmp(keyword, "begin")
314 || !strcmp(keyword, "break")
315 || !strcmp(keyword, "case")
316 || !strcmp(keyword, "do")
317 || !strcmp(keyword, "else")
318 || !strcmp(keyword, "elsif")
319 || !strcmp(keyword, "if")
320 || !strcmp(keyword, "next")
321 || !strcmp(keyword, "return")
322 || !strcmp(keyword, "when")
323 || !strcmp(keyword, "unless")
324 || !strcmp(keyword, "until")
325 || !strcmp(keyword, "not")
326 || !strcmp(keyword, "or")) {
327 return true;
328 }
329 return false;
330}
331
332// Look at chars up to but not including endPos
333// Don't look at styles in case we're looking forward
334
335static Sci_Position skipWhitespace(Sci_Position startPos,
336 Sci_Position endPos,
337 Accessor &styler) {
338 for (Sci_Position i = startPos; i < endPos; i++) {
339 if (!iswhitespace(styler[i])) {
340 return i;
341 }
342 }
343 return endPos;
344}
345
346// This routine looks for false positives like
347// undef foo, <<
348// There aren't too many.
349//
350// iPrev points to the start of <<
351
352static bool sureThisIsHeredoc(Sci_Position iPrev,
353 Accessor &styler,
354 char *prevWord) {
355
356 // Not so fast, since Ruby's so dynamic. Check the context
357 // to make sure we're OK.
358 int prevStyle;
359 Sci_Position lineStart = styler.GetLine(iPrev);
360 Sci_Position lineStartPosn = styler.LineStart(lineStart);
361 styler.Flush();
362
363 // Find the first word after some whitespace
364 Sci_Position firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler);
365 if (firstWordPosn >= iPrev) {
366 // Have something like {^ <<}
367 //XXX Look at the first previous non-comment non-white line
368 // to establish the context. Not too likely though.
369 return true;
370 } else {
371 switch (prevStyle = styler.StyleAt(firstWordPosn)) {
372 case SCE_RB_WORD:
373 case SCE_RB_WORD_DEMOTED:
374 case SCE_RB_IDENTIFIER:
375 break;
376 default:
377 return true;
378 }
379 }
380 Sci_Position firstWordEndPosn = firstWordPosn;
381 char *dst = prevWord;
382 for (;;) {
383 if (firstWordEndPosn >= iPrev ||
384 styler.StyleAt(firstWordEndPosn) != prevStyle) {
385 *dst = 0;
386 break;
387 }
388 *dst++ = styler[firstWordEndPosn];
389 firstWordEndPosn += 1;
390 }
391 //XXX Write a style-aware thing to regex scintilla buffer objects
392 if (!strcmp(prevWord, "undef")
393 || !strcmp(prevWord, "def")
394 || !strcmp(prevWord, "alias")) {
395 // These keywords are what we were looking for
396 return false;
397 }
398 return true;
399}
400
401// Routine that saves us from allocating a buffer for the here-doc target
402// targetEndPos points one past the end of the current target
403static bool haveTargetMatch(Sci_Position currPos,
404 Sci_Position lengthDoc,
405 Sci_Position targetStartPos,
406 Sci_Position targetEndPos,
407 Accessor &styler) {
408 if (lengthDoc - currPos < targetEndPos - targetStartPos) {
409 return false;
410 }
411 Sci_Position i, j;
412 for (i = targetStartPos, j = currPos;
413 i < targetEndPos && j < lengthDoc;
414 i++, j++) {
415 if (styler[i] != styler[j]) {
416 return false;
417 }
418 }
419 return true;
420}
421
422// Finds the start position of the expression containing @p pos
423// @p min_pos should be a known expression start, e.g. the start of the line
424static Sci_Position findExpressionStart(Sci_Position pos,
425 Sci_Position min_pos,
426 Accessor &styler) {
427 int depth = 0;
428 for (; pos > min_pos; pos -= 1) {
429 int style = styler.StyleAt(pos - 1);
430 if (style == SCE_RB_OPERATOR) {
431 int ch = styler[pos - 1];
432 if (ch == '}' || ch == ')' || ch == ']') {
433 depth += 1;
434 } else if (ch == '{' || ch == '(' || ch == '[') {
435 if (depth == 0) {
436 break;
437 } else {
438 depth -= 1;
439 }
440 } else if (ch == ';' && depth == 0) {
441 break;
442 }
443 }
444 }
445 return pos;
446}
447
448// We need a check because the form
449// [identifier] <<[target]
450// is ambiguous. The Ruby lexer/parser resolves it by
451// looking to see if [identifier] names a variable or a
452// function. If it's the first, it's the start of a here-doc.
453// If it's a var, it's an operator. This lexer doesn't
454// maintain a symbol table, so it looks ahead to see what's
455// going on, in cases where we have
456// ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
457//
458// If there's no occurrence of [target] on a line, assume we don't.
459
460// return true == yes, we have no heredocs
461
462static bool sureThisIsNotHeredoc(Sci_Position lt2StartPos,
463 Accessor &styler) {
464 int prevStyle;
465 // Use full document, not just part we're styling
466 Sci_Position lengthDoc = styler.Length();
467 Sci_Position lineStart = styler.GetLine(lt2StartPos);
468 Sci_Position lineStartPosn = styler.LineStart(lineStart);
469 styler.Flush();
470 const bool definitely_not_a_here_doc = true;
471 const bool looks_like_a_here_doc = false;
472
473 // find the expression start rather than the line start
474 Sci_Position exprStartPosn = findExpressionStart(lt2StartPos, lineStartPosn, styler);
475
476 // Find the first word after some whitespace
477 Sci_Position firstWordPosn = skipWhitespace(exprStartPosn, lt2StartPos, styler);
478 if (firstWordPosn >= lt2StartPos) {
479 return definitely_not_a_here_doc;
480 }
481 prevStyle = styler.StyleAt(firstWordPosn);
482 // If we have '<<' following a keyword, it's not a heredoc
483 if (prevStyle != SCE_RB_IDENTIFIER
484 && prevStyle != SCE_RB_SYMBOL
485 && prevStyle != SCE_RB_INSTANCE_VAR
486 && prevStyle != SCE_RB_CLASS_VAR) {
487 return definitely_not_a_here_doc;
488 }
489 int newStyle = prevStyle;
490 // Some compilers incorrectly warn about uninit newStyle
491 for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
492 // Inner loop looks at the name
493 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
494 newStyle = styler.StyleAt(firstWordPosn);
495 if (newStyle != prevStyle) {
496 break;
497 }
498 }
499 // Do we have '::' or '.'?
500 if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
501 char ch = styler[firstWordPosn];
502 if (ch == '.') {
503 // yes
504 } else if (ch == ':') {
505 if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
506 return definitely_not_a_here_doc;
507 } else if (styler[firstWordPosn] != ':') {
508 return definitely_not_a_here_doc;
509 }
510 } else {
511 break;
512 }
513 } else {
514 break;
515 }
516 // on second and next passes, only identifiers may appear since
517 // class and instance variable are private
518 prevStyle = SCE_RB_IDENTIFIER;
519 }
520 // Skip next batch of white-space
521 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
522 // possible symbol for an implicit hash argument
523 if (firstWordPosn < lt2StartPos && styler.StyleAt(firstWordPosn) == SCE_RB_SYMBOL) {
524 for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
525 if (styler.StyleAt(firstWordPosn) != SCE_RB_SYMBOL) {
526 break;
527 }
528 }
529 // Skip next batch of white-space
530 firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler);
531 }
532 if (firstWordPosn != lt2StartPos) {
533 // Have [[^ws[identifier]ws[*something_else*]ws<<
534 return definitely_not_a_here_doc;
535 }
536 // OK, now 'j' will point to the current spot moving ahead
537 Sci_Position j = firstWordPosn + 1;
538 if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
539 // This shouldn't happen
540 return definitely_not_a_here_doc;
541 }
542 Sci_Position nextLineStartPosn = styler.LineStart(lineStart + 1);
543 if (nextLineStartPosn >= lengthDoc) {
544 return definitely_not_a_here_doc;
545 }
546 j = skipWhitespace(j + 1, nextLineStartPosn, styler);
547 if (j >= lengthDoc) {
548 return definitely_not_a_here_doc;
549 }
550 bool allow_indent;
551 Sci_Position target_start, target_end;
552 // From this point on no more styling, since we're looking ahead
553 if (styler[j] == '-' || styler[j] == '~') {
554 allow_indent = true;
555 j++;
556 } else {
557 allow_indent = false;
558 }
559
560 // Allow for quoted targets.
561 char target_quote = 0;
562 switch (styler[j]) {
563 case '\'':
564 case '"':
565 case '`':
566 target_quote = styler[j];
567 j += 1;
568 }
569
570 if (isSafeAlnum(styler[j])) {
571 // Init target_end because some compilers think it won't
572 // be initialized by the time it's used
573 target_start = target_end = j;
574 j++;
575 } else {
576 return definitely_not_a_here_doc;
577 }
578 for (; j < lengthDoc; j++) {
579 if (!isSafeAlnum(styler[j])) {
580 if (target_quote && styler[j] != target_quote) {
581 // unquoted end
582 return definitely_not_a_here_doc;
583 }
584
585 // And for now make sure that it's a newline
586 // don't handle arbitrary expressions yet
587
588 target_end = j;
589 if (target_quote) {
590 // Now we can move to the character after the string delimiter.
591 j += 1;
592 }
593 j = skipWhitespace(j, lengthDoc, styler);
594 if (j >= lengthDoc) {
595 return definitely_not_a_here_doc;
596 } else {
597 char ch = styler[j];
598 if (ch == '#' || isEOLChar(ch)) {
599 // This is OK, so break and continue;
600 break;
601 } else {
602 return definitely_not_a_here_doc;
603 }
604 }
605 }
606 }
607
608 // Just look at the start of each line
609 Sci_Position last_line = styler.GetLine(lengthDoc - 1);
610 // But don't go too far
611 if (last_line > lineStart + 50) {
612 last_line = lineStart + 50;
613 }
614 for (Sci_Position line_num = lineStart + 1; line_num <= last_line; line_num++) {
615 if (allow_indent) {
616 j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler);
617 } else {
618 j = styler.LineStart(line_num);
619 }
620 // target_end is one past the end
621 if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
622 // We got it
623 return looks_like_a_here_doc;
624 }
625 }
626 return definitely_not_a_here_doc;
627}
628
629//todo: if we aren't looking at a stdio character,
630// move to the start of the first line that is not in a
631// multi-line construct
632
633static void synchronizeDocStart(Sci_PositionU &startPos,
634 Sci_Position &length,
635 int &initStyle,
636 Accessor &styler,
637 bool skipWhiteSpace=false) {
638
639 styler.Flush();
640 int style = actual_style(styler.StyleAt(startPos));
641 switch (style) {
642 case SCE_RB_STDIN:
643 case SCE_RB_STDOUT:
644 case SCE_RB_STDERR:
645 // Don't do anything else with these.
646 return;
647 }
648
649 Sci_Position pos = startPos;
650 // Quick way to characterize each line
651 Sci_Position lineStart;
652 for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
653 // Now look at the style before the previous line's EOL
654 pos = styler.LineStart(lineStart) - 1;
655 if (pos <= 10) {
656 lineStart = 0;
657 break;
658 }
659 char ch = styler.SafeGetCharAt(pos);
660 char chPrev = styler.SafeGetCharAt(pos - 1);
661 if (ch == '\n' && chPrev == '\r') {
662 pos--;
663 }
664 if (styler.SafeGetCharAt(pos - 1) == '\\') {
665 // Continuation line -- keep going
666 } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
667 // Part of multi-line construct -- keep going
668 } else if (currLineContainsHereDelims(pos, styler)) {
669 // Keep going, with pos and length now pointing
670 // at the end of the here-doc delimiter
671 } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
672 // Keep going
673 } else {
674 break;
675 }
676 }
677 pos = styler.LineStart(lineStart);
678 length += (startPos - pos);
679 startPos = pos;
680 initStyle = SCE_RB_DEFAULT;
681}
682
683static void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
684 WordList *keywordlists[], Accessor &styler) {
685
686 // Lexer for Ruby often has to backtrack to start of current style to determine
687 // which characters are being used as quotes, how deeply nested is the
688 // start position and what the termination string is for here documents
689
690 WordList &keywords = *keywordlists[0];
691
692 class HereDocCls {
693 public:
694 int State = 0;
695 // States
696 // 0: '<<' encountered
697 // 1: collect the delimiter
698 // 1b: text between the end of the delimiter and the EOL
699 // 2: here doc text (lines after the delimiter)
700 char Quote = 0; // the char after '<<'
701 bool Quoted = false; // true if Quote in ('\'','"','`')
702 int DelimiterLength = 0; // strlen(Delimiter)
703 char Delimiter[256] {}; // the Delimiter, limit of 256: from Perl
704 bool CanBeIndented = false;
705 };
706 HereDocCls HereDoc;
707
708 QuoteCls Quote;
709
710 int numDots = 0; // For numbers --
711 // Don't start lexing in the middle of a num
712
713 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
714 false);
715
716 bool preferRE = true;
717 int state = initStyle;
718 Sci_Position lengthDoc = startPos + length;
719
720 char prevWord[MAX_KEYWORD_LENGTH + 1] = ""; // 1 byte for zero
721 if (length == 0)
722 return;
723
724 char chPrev = styler.SafeGetCharAt(startPos - 1);
725 char chNext = styler.SafeGetCharAt(startPos);
726 bool is_real_number = true; // Differentiate between constants and ?-sequences.
727 styler.StartAt(startPos);
728 styler.StartSegment(startPos);
729
730 static int q_states[] = {SCE_RB_STRING_Q,
731 SCE_RB_STRING_QQ,
732 SCE_RB_STRING_QR,
733 SCE_RB_STRING_QW,
734 SCE_RB_STRING_QW,
735 SCE_RB_STRING_QX
736 };
737 static const char *q_chars = "qQrwWx";
738
739 // In most cases a value of 2 should be ample for the code in the
740 // Ruby library, and the code the user is likely to enter.
741 // For example,
742 // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
743 // if options[:verbose]
744 // from fileutils.rb nests to a level of 2
745 // If the user actually hits a 6th occurrence of '#{' in a double-quoted
746 // string (including regex'es, %Q, %<sym>, %w, and other strings
747 // that interpolate), it will stay as a string. The problem with this
748 // is that quotes might flip, a 7th '#{' will look like a comment,
749 // and code-folding might be wrong.
750
751 // If anyone runs into this problem, I recommend raising this
752 // value slightly higher to replacing the fixed array with a linked
753 // list. Keep in mind this code will be called every time the lexer
754 // is invoked.
755
756#define INNER_STRINGS_MAX_COUNT 5
757 // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
758 int inner_string_types[INNER_STRINGS_MAX_COUNT] {};
759 // Track # braces when we push a new #{ thing
760 int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT] {};
761 QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
762 int inner_string_count = 0;
763 int brace_counts = 0; // Number of #{ ... } things within an expression
764
765 Sci_Position i;
766 for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) {
767 inner_string_types[i] = 0;
768 inner_expn_brace_counts[i] = 0;
769 }
770 for (i = startPos; i < lengthDoc; i++) {
771 char ch = chNext;
772 chNext = styler.SafeGetCharAt(i + 1);
773 char chNext2 = styler.SafeGetCharAt(i + 2);
774
775 if (styler.IsLeadByte(ch)) {
776 chNext = chNext2;
777 chPrev = ' ';
778 i += 1;
779 continue;
780 }
781
782 // skip on DOS/Windows
783 //No, don't, because some things will get tagged on,
784 // so we won't recognize keywords, for example
785#if 0
786 if (ch == '\r' && chNext == '\n') {
787 continue;
788 }
789#endif
790
791 if (HereDoc.State == 1 && isEOLChar(ch)) {
792 // Begin of here-doc (the line after the here-doc delimiter):
793 HereDoc.State = 2;
794 styler.ColourTo(i-1, state);
795 // Don't check for a missing quote, just jump into
796 // the here-doc state
797 state = SCE_RB_HERE_Q;
798 }
799
800 // Regular transitions
801 if (state == SCE_RB_DEFAULT) {
802 if (isSafeDigit(ch)) {
803 styler.ColourTo(i - 1, state);
804 state = SCE_RB_NUMBER;
805 is_real_number = true;
806 numDots = 0;
807 } else if (isHighBitChar(ch) || iswordstart(ch)) {
808 styler.ColourTo(i - 1, state);
809 state = SCE_RB_WORD;
810 } else if (ch == '#') {
811 styler.ColourTo(i - 1, state);
812 state = SCE_RB_COMMENTLINE;
813 } else if (ch == '=') {
814 // =begin indicates the start of a comment (doc) block
815 if ((i == 0 || isEOLChar(chPrev))
816 && chNext == 'b'
817 && styler.SafeGetCharAt(i + 2) == 'e'
818 && styler.SafeGetCharAt(i + 3) == 'g'
819 && styler.SafeGetCharAt(i + 4) == 'i'
820 && styler.SafeGetCharAt(i + 5) == 'n'
821 && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
822 styler.ColourTo(i - 1, state);
823 state = SCE_RB_POD;
824 } else {
825 styler.ColourTo(i - 1, state);
826 styler.ColourTo(i, SCE_RB_OPERATOR);
827 preferRE = true;
828 }
829 } else if (ch == '"') {
830 styler.ColourTo(i - 1, state);
831 state = SCE_RB_STRING;
832 Quote.New();
833 Quote.Open(ch);
834 } else if (ch == '\'') {
835 styler.ColourTo(i - 1, state);
836 state = SCE_RB_CHARACTER;
837 Quote.New();
838 Quote.Open(ch);
839 } else if (ch == '`') {
840 styler.ColourTo(i - 1, state);
841 state = SCE_RB_BACKTICKS;
842 Quote.New();
843 Quote.Open(ch);
844 } else if (ch == '@') {
845 // Instance or class var
846 styler.ColourTo(i - 1, state);
847 if (chNext == '@') {
848 state = SCE_RB_CLASS_VAR;
849 advance_char(i, ch, chNext, chNext2); // pass by ref
850 } else {
851 state = SCE_RB_INSTANCE_VAR;
852 }
853 } else if (ch == '$') {
854 // Check for a builtin global
855 styler.ColourTo(i - 1, state);
856 // Recognize it bit by bit
857 state = SCE_RB_GLOBAL;
858 } else if (ch == '/' && preferRE) {
859 // Ambigous operator
860 styler.ColourTo(i - 1, state);
861 state = SCE_RB_REGEX;
862 Quote.New();
863 Quote.Open(ch);
864 } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
865
866 // Recognise the '<<' symbol - either a here document or a binary op
867 styler.ColourTo(i - 1, state);
868 i++;
869 chNext = chNext2;
870 styler.ColourTo(i, SCE_RB_OPERATOR);
871
872 if (!(strchr("\"\'`_-~", chNext2) || isSafeAlpha(chNext2))) {
873 // It's definitely not a here-doc,
874 // based on Ruby's lexer/parser in the
875 // heredoc_identifier routine.
876 // Nothing else to do.
877 } else if (preferRE) {
878 if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
879 state = SCE_RB_HERE_DELIM;
880 HereDoc.State = 0;
881 }
882 // else leave it in default state
883 } else {
884 if (sureThisIsNotHeredoc(i - 1, styler)) {
885 // leave state as default
886 // We don't have all the heuristics Perl has for indications
887 // of a here-doc, because '<<' is overloadable and used
888 // for so many other classes.
889 } else {
890 state = SCE_RB_HERE_DELIM;
891 HereDoc.State = 0;
892 }
893 }
894 preferRE = (state != SCE_RB_HERE_DELIM);
895 } else if (ch == ':') {
896 styler.ColourTo(i - 1, state);
897 if (chNext == ':') {
898 // Mark "::" as an operator, not symbol start
899 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
900 advance_char(i, ch, chNext, chNext2); // pass by ref
901 state = SCE_RB_DEFAULT;
902 preferRE = false;
903 } else if (isSafeWordcharOrHigh(chNext)) {
904 state = SCE_RB_SYMBOL;
905 } else if ((chNext == '@' || chNext == '$') &&
906 isSafeWordcharOrHigh(chNext2)) {
907 // instance and global variable followed by an identifier
908 advance_char(i, ch, chNext, chNext2);
909 state = SCE_RB_SYMBOL;
910 } else if (((chNext == '@' && chNext2 == '@') ||
911 (chNext == '$' && chNext2 == '-')) &&
912 isSafeWordcharOrHigh(styler.SafeGetCharAt(i+3))) {
913 // class variables and special global variable "$-IDENTCHAR"
914 state = SCE_RB_SYMBOL;
915 // $-IDENTCHAR doesn't continue past the IDENTCHAR
916 if (chNext == '$') {
917 styler.ColourTo(i+3, SCE_RB_SYMBOL);
918 state = SCE_RB_DEFAULT;
919 }
920 i += 3;
921 ch = styler.SafeGetCharAt(i);
922 chNext = styler.SafeGetCharAt(i+1);
923 } else if (chNext == '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+", chNext2)) {
924 // single-character special global variables
925 i += 2;
926 ch = chNext2;
927 chNext = styler.SafeGetCharAt(i+1);
928 styler.ColourTo(i, SCE_RB_SYMBOL);
929 state = SCE_RB_DEFAULT;
930 } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
931 // Do the operator analysis in-line, looking ahead
932 // Based on the table in pickaxe 2nd ed., page 339
933 bool doColoring = true;
934 switch (chNext) {
935 case '[':
936 if (chNext2 == ']') {
937 char ch_tmp = styler.SafeGetCharAt(i + 3);
938 if (ch_tmp == '=') {
939 i += 3;
940 ch = ch_tmp;
941 chNext = styler.SafeGetCharAt(i + 1);
942 } else {
943 i += 2;
944 ch = chNext2;
945 chNext = ch_tmp;
946 }
947 } else {
948 doColoring = false;
949 }
950 break;
951
952 case '*':
953 if (chNext2 == '*') {
954 i += 2;
955 ch = chNext2;
956 chNext = styler.SafeGetCharAt(i + 1);
957 } else {
958 advance_char(i, ch, chNext, chNext2);
959 }
960 break;
961
962 case '!':
963 if (chNext2 == '=' || chNext2 == '~') {
964 i += 2;
965 ch = chNext2;
966 chNext = styler.SafeGetCharAt(i + 1);
967 } else {
968 advance_char(i, ch, chNext, chNext2);
969 }
970 break;
971
972 case '<':
973 if (chNext2 == '<') {
974 i += 2;
975 ch = chNext2;
976 chNext = styler.SafeGetCharAt(i + 1);
977 } else if (chNext2 == '=') {
978 char ch_tmp = styler.SafeGetCharAt(i + 3);
979 if (ch_tmp == '>') { // <=> operator
980 i += 3;
981 ch = ch_tmp;
982 chNext = styler.SafeGetCharAt(i + 1);
983 } else {
984 i += 2;
985 ch = chNext2;
986 chNext = ch_tmp;
987 }
988 } else {
989 advance_char(i, ch, chNext, chNext2);
990 }
991 break;
992
993 default:
994 // Simple one-character operators
995 advance_char(i, ch, chNext, chNext2);
996 break;
997 }
998 if (doColoring) {
999 styler.ColourTo(i, SCE_RB_SYMBOL);
1000 state = SCE_RB_DEFAULT;
1001 }
1002 } else if (!preferRE) {
1003 // Don't color symbol strings (yet)
1004 // Just color the ":" and color rest as string
1005 styler.ColourTo(i, SCE_RB_SYMBOL);
1006 state = SCE_RB_DEFAULT;
1007 } else {
1008 styler.ColourTo(i, SCE_RB_OPERATOR);
1009 state = SCE_RB_DEFAULT;
1010 preferRE = true;
1011 }
1012 } else if (ch == '%') {
1013 styler.ColourTo(i - 1, state);
1014 bool have_string = false;
1015 if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) {
1016 Quote.New();
1017 const char *hit = strchr(q_chars, chNext);
1018 if (hit != NULL) {
1019 state = q_states[hit - q_chars];
1020 Quote.Open(chNext2);
1021 i += 2;
1022 ch = chNext2;
1023 chNext = styler.SafeGetCharAt(i + 1);
1024 have_string = true;
1025 }
1026 } else if (preferRE && !isSafeWordcharOrHigh(chNext)) {
1027 // Ruby doesn't allow high bit chars here,
1028 // but the editor host might
1029 Quote.New();
1030 state = SCE_RB_STRING_QQ;
1031 Quote.Open(chNext);
1032 advance_char(i, ch, chNext, chNext2); // pass by ref
1033 have_string = true;
1034 } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) {
1035 // Ruby doesn't allow high bit chars here,
1036 // but the editor host might
1037 Quote.New();
1038 state = SCE_RB_STRING_QQ;
1039 Quote.Open(chNext);
1040 advance_char(i, ch, chNext, chNext2); // pass by ref
1041 have_string = true;
1042 }
1043 if (!have_string) {
1044 styler.ColourTo(i, SCE_RB_OPERATOR);
1045 // stay in default
1046 preferRE = true;
1047 }
1048 } else if (ch == '?') {
1049 styler.ColourTo(i - 1, state);
1050 if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') {
1051 styler.ColourTo(i, SCE_RB_OPERATOR);
1052 } else {
1053 // It's the start of a character code escape sequence
1054 // Color it as a number.
1055 state = SCE_RB_NUMBER;
1056 is_real_number = false;
1057 }
1058 } else if (isoperator(ch) || ch == '.') {
1059 styler.ColourTo(i - 1, state);
1060 styler.ColourTo(i, SCE_RB_OPERATOR);
1061 // If we're ending an expression or block,
1062 // assume it ends an object, and the ambivalent
1063 // constructs are binary operators
1064 //
1065 // So if we don't have one of these chars,
1066 // we aren't ending an object exp'n, and ops
1067 // like : << / are unary operators.
1068
1069 if (ch == '{') {
1070 ++brace_counts;
1071 preferRE = true;
1072 } else if (ch == '}' && --brace_counts < 0
1073 && inner_string_count > 0) {
1074 styler.ColourTo(i, SCE_RB_OPERATOR);
1075 exitInnerExpression(inner_string_types,
1076 inner_expn_brace_counts,
1077 inner_quotes,
1078 inner_string_count,
1079 state, brace_counts, Quote);
1080 } else {
1081 preferRE = (strchr(")}].", ch) == NULL);
1082 }
1083 // Stay in default state
1084 } else if (isEOLChar(ch)) {
1085 // Make sure it's a true line-end, with no backslash
1086 if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
1087 && chPrev != '\\') {
1088 // Assume we've hit the end of the statement.
1089 preferRE = true;
1090 }
1091 }
1092 } else if (state == SCE_RB_WORD) {
1093 if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
1094 // Words include x? in all contexts,
1095 // and <letters>= after either 'def' or a dot
1096 // Move along until a complete word is on our left
1097
1098 // Default accessor treats '.' as word-chars,
1099 // but we don't for now.
1100
1101 if (ch == '='
1102 && isSafeWordcharOrHigh(chPrev)
1103 && (chNext == '('
1104 || strchr(" \t\n\r", chNext) != NULL)
1105 && (!strcmp(prevWord, "def")
1106 || followsDot(styler.GetStartSegment(), styler))) {
1107 // <name>= is a name only when being def'd -- Get it the next time
1108 // This means that <name>=<name> is always lexed as
1109 // <name>, (op, =), <name>
1110 } else if (ch == ':'
1111 && isSafeWordcharOrHigh(chPrev)
1112 && strchr(" \t\n\r", chNext) != NULL) {
1113 state = SCE_RB_SYMBOL;
1114 } else if ((ch == '?' || ch == '!')
1115 && isSafeWordcharOrHigh(chPrev)
1116 && !isSafeWordcharOrHigh(chNext)) {
1117 // <name>? is a name -- Get it the next time
1118 // But <name>?<name> is always lexed as
1119 // <name>, (op, ?), <name>
1120 // Same with <name>! to indicate a method that
1121 // modifies its target
1122 } else if (isEOLChar(ch)
1123 && isMatch(styler, lengthDoc, i - 7, "__END__")) {
1124 styler.ColourTo(i, SCE_RB_DATASECTION);
1125 state = SCE_RB_DATASECTION;
1126 // No need to handle this state -- we'll just move to the end
1127 preferRE = false;
1128 } else {
1129 Sci_Position wordStartPos = styler.GetStartSegment();
1130 int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
1131 switch (word_style) {
1132 case SCE_RB_WORD:
1133 preferRE = RE_CanFollowKeyword(prevWord);
1134 break;
1135
1136 case SCE_RB_WORD_DEMOTED:
1137 preferRE = true;
1138 break;
1139
1140 case SCE_RB_IDENTIFIER:
1141 if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
1142 preferRE = true;
1143 } else if (isEOLChar(ch)) {
1144 preferRE = true;
1145 } else {
1146 preferRE = false;
1147 }
1148 break;
1149 default:
1150 preferRE = false;
1151 }
1152 if (ch == '.') {
1153 // We might be redefining an operator-method
1154 preferRE = false;
1155 }
1156 // And if it's the first
1157 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1158 }
1159 }
1160 } else if (state == SCE_RB_NUMBER) {
1161 if (!is_real_number) {
1162 if (ch != '\\') {
1163 styler.ColourTo(i, state);
1164 state = SCE_RB_DEFAULT;
1165 preferRE = false;
1166 } else if (strchr("\\ntrfvaebs", chNext)) {
1167 // Terminal escape sequence -- handle it next time
1168 // Nothing more to do this time through the loop
1169 } else if (chNext == 'C' || chNext == 'M') {
1170 if (chNext2 != '-') {
1171 // \C or \M ends the sequence -- handle it next time
1172 } else {
1173 // Move from abc?\C-x
1174 // ^
1175 // to
1176 // ^
1177 i += 2;
1178 ch = chNext2;
1179 chNext = styler.SafeGetCharAt(i + 1);
1180 }
1181 } else if (chNext == 'c') {
1182 // Stay here, \c is a combining sequence
1183 advance_char(i, ch, chNext, chNext2); // pass by ref
1184 } else {
1185 // ?\x, including ?\\ is final.
1186 styler.ColourTo(i + 1, state);
1187 state = SCE_RB_DEFAULT;
1188 preferRE = false;
1189 advance_char(i, ch, chNext, chNext2);
1190 }
1191 } else if (isSafeAlnumOrHigh(ch) || ch == '_') {
1192 // Keep going
1193 } else if (ch == '.' && chNext == '.') {
1194 ++numDots;
1195 styler.ColourTo(i - 1, state);
1196 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1197 } else if (ch == '.' && ++numDots == 1) {
1198 // Keep going
1199 } else {
1200 styler.ColourTo(i - 1, state);
1201 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1202 preferRE = false;
1203 }
1204 } else if (state == SCE_RB_COMMENTLINE) {
1205 if (isEOLChar(ch)) {
1206 styler.ColourTo(i - 1, state);
1207 state = SCE_RB_DEFAULT;
1208 // Use whatever setting we had going into the comment
1209 }
1210 } else if (state == SCE_RB_HERE_DELIM) {
1211 // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
1212 // Slightly different: if we find an immediate '-',
1213 // the target can appear indented.
1214
1215 if (HereDoc.State == 0) { // '<<' encountered
1216 HereDoc.State = 1;
1217 HereDoc.DelimiterLength = 0;
1218 if (ch == '-' || ch == '~') {
1219 HereDoc.CanBeIndented = true;
1220 advance_char(i, ch, chNext, chNext2); // pass by ref
1221 } else {
1222 HereDoc.CanBeIndented = false;
1223 }
1224 if (isEOLChar(ch)) {
1225 // Bail out of doing a here doc if there's no target
1226 state = SCE_RB_DEFAULT;
1227 preferRE = false;
1228 } else {
1229 HereDoc.Quote = ch;
1230
1231 if (ch == '\'' || ch == '"' || ch == '`') {
1232 HereDoc.Quoted = true;
1233 HereDoc.Delimiter[0] = '\0';
1234 } else {
1235 HereDoc.Quoted = false;
1236 HereDoc.Delimiter[0] = ch;
1237 HereDoc.Delimiter[1] = '\0';
1238 HereDoc.DelimiterLength = 1;
1239 }
1240 }
1241 } else if (HereDoc.State == 1) { // collect the delimiter
1242 if (isEOLChar(ch)) {
1243 // End the quote now, and go back for more
1244 styler.ColourTo(i - 1, state);
1245 state = SCE_RB_DEFAULT;
1246 i--;
1247 chNext = ch;
1248 preferRE = false;
1249 } else if (HereDoc.Quoted) {
1250 if (ch == HereDoc.Quote) { // closing quote => end of delimiter
1251 styler.ColourTo(i, state);
1252 state = SCE_RB_DEFAULT;
1253 preferRE = false;
1254 } else {
1255 if (ch == '\\' && !isEOLChar(chNext)) {
1256 advance_char(i, ch, chNext, chNext2);
1257 }
1258 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1259 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1260 }
1261 } else { // an unquoted here-doc delimiter
1262 if (isSafeAlnumOrHigh(ch) || ch == '_') {
1263 HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
1264 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
1265 } else {
1266 styler.ColourTo(i - 1, state);
1267 redo_char(i, ch, chNext, chNext2, state);
1268 preferRE = false;
1269 }
1270 }
1271 if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
1272 styler.ColourTo(i - 1, state);
1273 state = SCE_RB_ERROR;
1274 preferRE = false;
1275 }
1276 }
1277 } else if (state == SCE_RB_HERE_Q) {
1278 // Not needed: HereDoc.State == 2
1279 // Indentable here docs: look backwards
1280 // Non-indentable: look forwards, like in Perl
1281 //
1282 // Why: so we can quickly resolve things like <<-" abc"
1283
1284 if (!HereDoc.CanBeIndented) {
1285 if (isEOLChar(chPrev)
1286 && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
1287 styler.ColourTo(i - 1, state);
1288 i += static_cast<Sci_Position>(HereDoc.DelimiterLength) - 1;
1289 chNext = styler.SafeGetCharAt(i + 1);
1290 if (isEOLChar(chNext)) {
1291 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1292 state = SCE_RB_DEFAULT;
1293 HereDoc.State = 0;
1294 preferRE = false;
1295 }
1296 // Otherwise we skipped through the here doc faster.
1297 }
1298 } else if (isEOLChar(chNext)
1299 && lookingAtHereDocDelim(styler,
1300 i - HereDoc.DelimiterLength + 1,
1301 lengthDoc,
1302 HereDoc.Delimiter)) {
1303 styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
1304 styler.ColourTo(i, SCE_RB_HERE_DELIM);
1305 state = SCE_RB_DEFAULT;
1306 preferRE = false;
1307 HereDoc.State = 0;
1308 }
1309 } else if (state == SCE_RB_CLASS_VAR
1310 || state == SCE_RB_INSTANCE_VAR
1311 || state == SCE_RB_SYMBOL) {
1312 if (state == SCE_RB_SYMBOL &&
1313 // FIDs suffices '?' and '!'
1314 (((ch == '!' || ch == '?') && chNext != '=') ||
1315 // identifier suffix '='
1316 (ch == '=' && (chNext != '~' && chNext != '>' &&
1317 (chNext != '=' || chNext2 == '>'))))) {
1318 styler.ColourTo(i, state);
1319 state = SCE_RB_DEFAULT;
1320 preferRE = false;
1321 } else if (!isSafeWordcharOrHigh(ch)) {
1322 styler.ColourTo(i - 1, state);
1323 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1324 preferRE = false;
1325 }
1326 } else if (state == SCE_RB_GLOBAL) {
1327 if (!isSafeWordcharOrHigh(ch)) {
1328 // handle special globals here as well
1329 if (chPrev == '$') {
1330 if (ch == '-') {
1331 // Include the next char, like $-a
1332 advance_char(i, ch, chNext, chNext2);
1333 }
1334 styler.ColourTo(i, state);
1335 state = SCE_RB_DEFAULT;
1336 } else {
1337 styler.ColourTo(i - 1, state);
1338 redo_char(i, ch, chNext, chNext2, state); // pass by ref
1339 }
1340 preferRE = false;
1341 }
1342 } else if (state == SCE_RB_POD) {
1343 // PODs end with ^=end\s, -- any whitespace can follow =end
1344 if (strchr(" \t\n\r", ch) != NULL
1345 && i > 5
1346 && isEOLChar(styler[i - 5])
1347 && isMatch(styler, lengthDoc, i - 4, "=end")) {
1348 styler.ColourTo(i - 1, state);
1349 state = SCE_RB_DEFAULT;
1350 preferRE = false;
1351 }
1352 } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
1353 if (ch == '\\' && Quote.Up != '\\') {
1354 // Skip one
1355 advance_char(i, ch, chNext, chNext2);
1356 } else if (ch == Quote.Down) {
1357 Quote.Count--;
1358 if (Quote.Count == 0) {
1359 // Include the options
1360 while (isSafeAlpha(chNext)) {
1361 i++;
1362 ch = chNext;
1363 chNext = styler.SafeGetCharAt(i + 1);
1364 }
1365 styler.ColourTo(i, state);
1366 state = SCE_RB_DEFAULT;
1367 preferRE = false;
1368 }
1369 } else if (ch == Quote.Up) {
1370 // Only if close quoter != open quoter
1371 Quote.Count++;
1372
1373 } else if (ch == '#') {
1374 if (chNext == '{'
1375 && inner_string_count < INNER_STRINGS_MAX_COUNT) {
1376 // process #{ ... }
1377 styler.ColourTo(i - 1, state);
1378 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1379 enterInnerExpression(inner_string_types,
1380 inner_expn_brace_counts,
1381 inner_quotes,
1382 inner_string_count,
1383 state,
1384 brace_counts,
1385 Quote);
1386 preferRE = true;
1387 // Skip one
1388 advance_char(i, ch, chNext, chNext2);
1389 } else {
1390 //todo: distinguish comments from pound chars
1391 // for now, handle as comment
1392 styler.ColourTo(i - 1, state);
1393 bool inEscape = false;
1394 while (++i < lengthDoc) {
1395 ch = styler.SafeGetCharAt(i);
1396 if (ch == '\\') {
1397 inEscape = true;
1398 } else if (isEOLChar(ch)) {
1399 // Comment inside a regex
1400 styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
1401 break;
1402 } else if (inEscape) {
1403 inEscape = false; // don't look at char
1404 } else if (ch == Quote.Down) {
1405 // Have the regular handler deal with this
1406 // to get trailing modifiers.
1407 i--;
1408 ch = styler[i];
1409 break;
1410 }
1411 }
1412 chNext = styler.SafeGetCharAt(i + 1);
1413 }
1414 }
1415 // Quotes of all kinds...
1416 } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
1417 state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
1418 state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
1419 state == SCE_RB_BACKTICKS) {
1420 if (!Quote.Down && !isspacechar(ch)) {
1421 Quote.Open(ch);
1422 } else if (ch == '\\' && Quote.Up != '\\') {
1423 //Riddle me this: Is it safe to skip *every* escaped char?
1424 advance_char(i, ch, chNext, chNext2);
1425 } else if (ch == Quote.Down) {
1426 Quote.Count--;
1427 if (Quote.Count == 0) {
1428 styler.ColourTo(i, state);
1429 state = SCE_RB_DEFAULT;
1430 preferRE = false;
1431 }
1432 } else if (ch == Quote.Up) {
1433 Quote.Count++;
1434 } else if (ch == '#' && chNext == '{'
1435 && inner_string_count < INNER_STRINGS_MAX_COUNT
1436 && state != SCE_RB_CHARACTER
1437 && state != SCE_RB_STRING_Q) {
1438 // process #{ ... }
1439 styler.ColourTo(i - 1, state);
1440 styler.ColourTo(i + 1, SCE_RB_OPERATOR);
1441 enterInnerExpression(inner_string_types,
1442 inner_expn_brace_counts,
1443 inner_quotes,
1444 inner_string_count,
1445 state,
1446 brace_counts,
1447 Quote);
1448 preferRE = true;
1449 // Skip one
1450 advance_char(i, ch, chNext, chNext2);
1451 }
1452 }
1453
1454 if (state == SCE_RB_ERROR) {
1455 break;
1456 }
1457 chPrev = ch;
1458 }
1459 if (state == SCE_RB_WORD) {
1460 // We've ended on a word, possibly at EOF, and need to
1461 // classify it.
1462 (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
1463 } else {
1464 styler.ColourTo(lengthDoc - 1, state);
1465 }
1466}
1467
1468// Helper functions for folding, disambiguation keywords
1469// Assert that there are no high-bit chars
1470
1471static void getPrevWord(Sci_Position pos,
1472 char *prevWord,
1473 Accessor &styler,
1474 int word_state)
1475{
1476 Sci_Position i;
1477 styler.Flush();
1478 for (i = pos - 1; i > 0; i--) {
1479 if (actual_style(styler.StyleAt(i)) != word_state) {
1480 i++;
1481 break;
1482 }
1483 }
1484 if (i < pos - MAX_KEYWORD_LENGTH) // overflow
1485 i = pos - MAX_KEYWORD_LENGTH;
1486 char *dst = prevWord;
1487 for (; i <= pos; i++) {
1488 *dst++ = styler[i];
1489 }
1490 *dst = 0;
1491}
1492
1493static bool keywordIsAmbiguous(const char *prevWord)
1494{
1495 // Order from most likely used to least likely
1496 // Lots of ways to do a loop in Ruby besides 'while/until'
1497 if (!strcmp(prevWord, "if")
1498 || !strcmp(prevWord, "do")
1499 || !strcmp(prevWord, "while")
1500 || !strcmp(prevWord, "unless")
1501 || !strcmp(prevWord, "until")
1502 || !strcmp(prevWord, "for")) {
1503 return true;
1504 } else {
1505 return false;
1506 }
1507}
1508
1509// Demote keywords in the following conditions:
1510// if, while, unless, until modify a statement
1511// do after a while or until, as a noise word (like then after if)
1512
1513static bool keywordIsModifier(const char *word,
1514 Sci_Position pos,
1515 Accessor &styler)
1516{
1517 if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
1518 return keywordDoStartsLoop(pos, styler);
1519 }
1520 char ch, chPrev, chPrev2;
1521 int style = SCE_RB_DEFAULT;
1522 Sci_Position lineStart = styler.GetLine(pos);
1523 Sci_Position lineStartPosn = styler.LineStart(lineStart);
1524 // We want to step backwards until we don't care about the current
1525 // position. But first move lineStartPosn back behind any
1526 // continuations immediately above word.
1527 while (lineStartPosn > 0) {
1528 ch = styler[lineStartPosn-1];
1529 if (ch == '\n' || ch == '\r') {
1530 chPrev = styler.SafeGetCharAt(lineStartPosn-2);
1531 chPrev2 = styler.SafeGetCharAt(lineStartPosn-3);
1532 lineStart = styler.GetLine(lineStartPosn-1);
1533 // If we find a continuation line, include it in our analysis.
1534 if (chPrev == '\\') {
1535 lineStartPosn = styler.LineStart(lineStart);
1536 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1537 lineStartPosn = styler.LineStart(lineStart);
1538 } else {
1539 break;
1540 }
1541 } else {
1542 break;
1543 }
1544 }
1545
1546 styler.Flush();
1547 while (--pos >= lineStartPosn) {
1548 style = actual_style(styler.StyleAt(pos));
1549 if (style == SCE_RB_DEFAULT) {
1550 if (iswhitespace(ch = styler[pos])) {
1551 //continue
1552 } else if (ch == '\r' || ch == '\n') {
1553 // Scintilla's LineStart() and GetLine() routines aren't
1554 // platform-independent, so if we have text prepared with
1555 // a different system we can't rely on it.
1556
1557 // Also, lineStartPosn may have been moved to more than one
1558 // line above word's line while pushing past continuations.
1559 chPrev = styler.SafeGetCharAt(pos - 1);
1560 chPrev2 = styler.SafeGetCharAt(pos - 2);
1561 if (chPrev == '\\') {
1562 pos-=1; // gloss over the "\\"
1563 //continue
1564 } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
1565 pos-=2; // gloss over the "\\\r"
1566 //continue
1567 } else {
1568 return false;
1569 }
1570 }
1571 } else {
1572 break;
1573 }
1574 }
1575 if (pos < lineStartPosn) {
1576 return false;
1577 }
1578 // First things where the action is unambiguous
1579 switch (style) {
1580 case SCE_RB_DEFAULT:
1581 case SCE_RB_COMMENTLINE:
1582 case SCE_RB_POD:
1583 case SCE_RB_CLASSNAME:
1584 case SCE_RB_DEFNAME:
1585 case SCE_RB_MODULE_NAME:
1586 return false;
1587 case SCE_RB_OPERATOR:
1588 break;
1589 case SCE_RB_WORD:
1590 // Watch out for uses of 'else if'
1591 //XXX: Make a list of other keywords where 'if' isn't a modifier
1592 // and can appear legitimately
1593 // Formulate this to avoid warnings from most compilers
1594 if (strcmp(word, "if") == 0) {
1595 char prevWord[MAX_KEYWORD_LENGTH + 1];
1596 getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
1597 return strcmp(prevWord, "else") != 0;
1598 }
1599 return true;
1600 default:
1601 return true;
1602 }
1603 // Assume that if the keyword follows an operator,
1604 // usually it's a block assignment, like
1605 // a << if x then y else z
1606
1607 ch = styler[pos];
1608 switch (ch) {
1609 case ')':
1610 case ']':
1611 case '}':
1612 return true;
1613 default:
1614 return false;
1615 }
1616}
1617
1618#define WHILE_BACKWARDS "elihw"
1619#define UNTIL_BACKWARDS "litnu"
1620#define FOR_BACKWARDS "rof"
1621
1622// Nothing fancy -- look to see if we follow a while/until somewhere
1623// on the current line
1624
1625static bool keywordDoStartsLoop(Sci_Position pos,
1626 Accessor &styler)
1627{
1628 char ch;
1629 Sci_Position lineStart = styler.GetLine(pos);
1630 Sci_Position lineStartPosn = styler.LineStart(lineStart);
1631 styler.Flush();
1632 while (--pos >= lineStartPosn) {
1633 const int style = actual_style(styler.StyleAt(pos));
1634 if (style == SCE_RB_DEFAULT) {
1635 if ((ch = styler[pos]) == '\r' || ch == '\n') {
1636 // Scintilla's LineStart() and GetLine() routines aren't
1637 // platform-independent, so if we have text prepared with
1638 // a different system we can't rely on it.
1639 return false;
1640 }
1641 } else if (style == SCE_RB_WORD) {
1642 // Check for while or until, but write the word in backwards
1643 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1644 char *dst = prevWord;
1645 int wordLen = 0;
1646 Sci_Position start_word;
1647 for (start_word = pos;
1648 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
1649 start_word--) {
1650 if (++wordLen < MAX_KEYWORD_LENGTH) {
1651 *dst++ = styler[start_word];
1652 }
1653 }
1654 *dst = 0;
1655 // Did we see our keyword?
1656 if (!strcmp(prevWord, WHILE_BACKWARDS)
1657 || !strcmp(prevWord, UNTIL_BACKWARDS)
1658 || !strcmp(prevWord, FOR_BACKWARDS)) {
1659 return true;
1660 }
1661 // We can move pos to the beginning of the keyword, and then
1662 // accept another decrement, as we can never have two contiguous
1663 // keywords:
1664 // word1 word2
1665 // ^
1666 // <- move to start_word
1667 // ^
1668 // <- loop decrement
1669 // ^ # pointing to end of word1 is fine
1670 pos = start_word;
1671 }
1672 }
1673 return false;
1674}
1675
1676static bool IsCommentLine(Sci_Position line, Accessor &styler) {
1677 Sci_Position pos = styler.LineStart(line);
1678 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
1679 for (Sci_Position i = pos; i < eol_pos; i++) {
1680 char ch = styler[i];
1681 if (ch == '#')
1682 return true;
1683 else if (ch != ' ' && ch != '\t')
1684 return false;
1685 }
1686 return false;
1687}
1688
1689/*
1690 * Folding Ruby
1691 *
1692 * The language is quite complex to analyze without a full parse.
1693 * For example, this line shouldn't affect fold level:
1694 *
1695 * print "hello" if feeling_friendly?
1696 *
1697 * Neither should this:
1698 *
1699 * print "hello" \
1700 * if feeling_friendly?
1701 *
1702 *
1703 * But this should:
1704 *
1705 * if feeling_friendly? #++
1706 * print "hello" \
1707 * print "goodbye"
1708 * end #--
1709 *
1710 * So we cheat, by actually looking at the existing indentation
1711 * levels for each line, and just echoing it back. Like Python.
1712 * Then if we get better at it, we'll take braces into consideration,
1713 * which always affect folding levels.
1714
1715 * How the keywords should work:
1716 * No effect:
1717 * __FILE__ __LINE__ BEGIN END alias and
1718 * defined? false in nil not or self super then
1719 * true undef
1720
1721 * Always increment:
1722 * begin class def do for module when {
1723 *
1724 * Always decrement:
1725 * end }
1726 *
1727 * Increment if these start a statement
1728 * if unless until while -- do nothing if they're modifiers
1729
1730 * These end a block if there's no modifier, but don't bother
1731 * break next redo retry return yield
1732 *
1733 * These temporarily de-indent, but re-indent
1734 * case else elsif ensure rescue
1735 *
1736 * This means that the folder reflects indentation rather
1737 * than setting it. The language-service updates indentation
1738 * when users type return and finishes entering de-denters.
1739 *
1740 * Later offer to fold POD, here-docs, strings, and blocks of comments
1741 */
1742
1743static void FoldRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
1744 WordList *[], Accessor &styler) {
1745 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1746 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1747
1748 synchronizeDocStart(startPos, length, initStyle, styler, // ref args
1749 false);
1750 Sci_PositionU endPos = startPos + length;
1751 int visibleChars = 0;
1752 Sci_Position lineCurrent = styler.GetLine(startPos);
1753 int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
1754 & SC_FOLDLEVELNUMBERMASK
1755 & ~SC_FOLDLEVELBASE);
1756 int levelCurrent = levelPrev;
1757 char chNext = styler[startPos];
1758 int styleNext = styler.StyleAt(startPos);
1759 int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
1760 bool buffer_ends_with_eol = false;
1761 for (Sci_PositionU i = startPos; i < endPos; i++) {
1762 char ch = chNext;
1763 chNext = styler.SafeGetCharAt(i + 1);
1764 int style = styleNext;
1765 styleNext = styler.StyleAt(i + 1);
1766 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1767
1768 /*Mutiline comment patch*/
1769 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1770 if (!IsCommentLine(lineCurrent - 1, styler)
1771 && IsCommentLine(lineCurrent + 1, styler))
1772 levelCurrent++;
1773 else if (IsCommentLine(lineCurrent - 1, styler)
1774 && !IsCommentLine(lineCurrent + 1, styler))
1775 levelCurrent--;
1776 }
1777
1778 if (style == SCE_RB_COMMENTLINE) {
1779 if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
1780 if (chNext == '{') {
1781 levelCurrent++;
1782 } else if (chNext == '}' && levelCurrent > 0) {
1783 levelCurrent--;
1784 }
1785 }
1786 } else if (style == SCE_RB_OPERATOR) {
1787 if (strchr("[{(", ch)) {
1788 levelCurrent++;
1789 } else if (strchr(")}]", ch)) {
1790 // Don't decrement below 0
1791 if (levelCurrent > 0)
1792 levelCurrent--;
1793 }
1794 } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
1795 // Look at the keyword on the left and decide what to do
1796 char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
1797 prevWord[0] = 0;
1798 getPrevWord(i, prevWord, styler, SCE_RB_WORD);
1799 if (!strcmp(prevWord, "end")) {
1800 // Don't decrement below 0
1801 if (levelCurrent > 0)
1802 levelCurrent--;
1803 } else if (!strcmp(prevWord, "if")
1804 || !strcmp(prevWord, "def")
1805 || !strcmp(prevWord, "class")
1806 || !strcmp(prevWord, "module")
1807 || !strcmp(prevWord, "begin")
1808 || !strcmp(prevWord, "case")
1809 || !strcmp(prevWord, "do")
1810 || !strcmp(prevWord, "while")
1811 || !strcmp(prevWord, "unless")
1812 || !strcmp(prevWord, "until")
1813 || !strcmp(prevWord, "for")
1814 ) {
1815 levelCurrent++;
1816 }
1817 } else if (style == SCE_RB_HERE_DELIM) {
1818 if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') {
1819 levelCurrent++;
1820 } else if (styleNext == SCE_RB_DEFAULT) {
1821 levelCurrent--;
1822 }
1823 }
1824 if (atEOL) {
1825 int lev = levelPrev;
1826 if (visibleChars == 0 && foldCompact)
1827 lev |= SC_FOLDLEVELWHITEFLAG;
1828 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1829 lev |= SC_FOLDLEVELHEADERFLAG;
1830 styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
1831 lineCurrent++;
1832 levelPrev = levelCurrent;
1833 visibleChars = 0;
1834 buffer_ends_with_eol = true;
1835 } else if (!isspacechar(ch)) {
1836 visibleChars++;
1837 buffer_ends_with_eol = false;
1838 }
1839 stylePrev = style;
1840 }
1841 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1842 if (!buffer_ends_with_eol) {
1843 int new_lev = levelCurrent;
1844 if (visibleChars == 0 && foldCompact)
1845 new_lev |= SC_FOLDLEVELWHITEFLAG;
1846 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1847 new_lev |= SC_FOLDLEVELHEADERFLAG;
1848 levelCurrent = new_lev;
1849 }
1850 styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
1851}
1852
1853static const char *const rubyWordListDesc[] = {
1854 "Keywords",
1855 0
1856};
1857
1858LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
1859