| 1 | // Scintilla source code edit control |
| 2 | /** @file LexRuby.cxx |
| 3 | ** Lexer for Ruby. |
| 4 | **/ |
| 5 | // Copyright 2001- by Clemens Wyss <wys@helbling.ch> |
| 6 | // The License.txt file describes the conditions under which this software may be distributed. |
| 7 | |
| 8 | #include <stdlib.h> |
| 9 | #include <string.h> |
| 10 | #include <stdio.h> |
| 11 | #include <stdarg.h> |
| 12 | #include <assert.h> |
| 13 | #include <ctype.h> |
| 14 | |
| 15 | #include <string> |
| 16 | #include <string_view> |
| 17 | |
| 18 | #include "ILexer.h" |
| 19 | #include "Scintilla.h" |
| 20 | #include "SciLexer.h" |
| 21 | |
| 22 | #include "WordList.h" |
| 23 | #include "LexAccessor.h" |
| 24 | #include "Accessor.h" |
| 25 | #include "StyleContext.h" |
| 26 | #include "CharacterSet.h" |
| 27 | #include "LexerModule.h" |
| 28 | |
| 29 | using namespace Lexilla; |
| 30 | |
| 31 | //XXX Identical to Perl, put in common area |
| 32 | static inline bool isEOLChar(char ch) { |
| 33 | return (ch == '\r') || (ch == '\n'); |
| 34 | } |
| 35 | |
| 36 | #define isSafeASCII(ch) ((unsigned int)(ch) <= 127) |
| 37 | // This one's redundant, but makes for more readable code |
| 38 | #define isHighBitChar(ch) ((unsigned int)(ch) > 127) |
| 39 | |
| 40 | static inline bool isSafeAlpha(char ch) { |
| 41 | return (isSafeASCII(ch) && isalpha(ch)) || ch == '_'; |
| 42 | } |
| 43 | |
| 44 | static inline bool isSafeAlnum(char ch) { |
| 45 | return (isSafeASCII(ch) && isalnum(ch)) || ch == '_'; |
| 46 | } |
| 47 | |
| 48 | static inline bool isSafeAlnumOrHigh(char ch) { |
| 49 | return isHighBitChar(ch) || isalnum(ch) || ch == '_'; |
| 50 | } |
| 51 | |
| 52 | static inline bool isSafeDigit(char ch) { |
| 53 | return isSafeASCII(ch) && isdigit(ch); |
| 54 | } |
| 55 | |
| 56 | static inline bool isSafeWordcharOrHigh(char ch) { |
| 57 | // Error: scintilla's KeyWords.h includes '.' as a word-char |
| 58 | // we want to separate things that can take methods from the |
| 59 | // methods. |
| 60 | return isHighBitChar(ch) || isalnum(ch) || ch == '_'; |
| 61 | } |
| 62 | |
| 63 | static bool inline iswhitespace(char ch) { |
| 64 | return ch == ' ' || ch == '\t'; |
| 65 | } |
| 66 | |
| 67 | #define MAX_KEYWORD_LENGTH 200 |
| 68 | |
| 69 | #define STYLE_MASK 63 |
| 70 | #define actual_style(style) (style & STYLE_MASK) |
| 71 | |
| 72 | static bool followsDot(Sci_PositionU pos, Accessor &styler) { |
| 73 | styler.Flush(); |
| 74 | for (; pos >= 1; --pos) { |
| 75 | int style = actual_style(styler.StyleAt(pos)); |
| 76 | char ch; |
| 77 | switch (style) { |
| 78 | case SCE_RB_DEFAULT: |
| 79 | ch = styler[pos]; |
| 80 | if (ch == ' ' || ch == '\t') { |
| 81 | //continue |
| 82 | } else { |
| 83 | return false; |
| 84 | } |
| 85 | break; |
| 86 | |
| 87 | case SCE_RB_OPERATOR: |
| 88 | return styler[pos] == '.'; |
| 89 | |
| 90 | default: |
| 91 | return false; |
| 92 | } |
| 93 | } |
| 94 | return false; |
| 95 | } |
| 96 | |
| 97 | // Forward declarations |
| 98 | static bool keywordIsAmbiguous(const char *prevWord); |
| 99 | static bool keywordDoStartsLoop(Sci_Position pos, |
| 100 | Accessor &styler); |
| 101 | static bool keywordIsModifier(const char *word, |
| 102 | Sci_Position pos, |
| 103 | Accessor &styler); |
| 104 | |
| 105 | static int ClassifyWordRb(Sci_PositionU start, Sci_PositionU end, WordList &keywords, Accessor &styler, char *prevWord) { |
| 106 | char s[MAX_KEYWORD_LENGTH]; |
| 107 | Sci_PositionU i, j; |
| 108 | Sci_PositionU lim = end - start + 1; // num chars to copy |
| 109 | if (lim >= MAX_KEYWORD_LENGTH) { |
| 110 | lim = MAX_KEYWORD_LENGTH - 1; |
| 111 | } |
| 112 | for (i = start, j = 0; j < lim; i++, j++) { |
| 113 | s[j] = styler[i]; |
| 114 | } |
| 115 | s[j] = '\0'; |
| 116 | int chAttr; |
| 117 | if (0 == strcmp(prevWord, "class" )) |
| 118 | chAttr = SCE_RB_CLASSNAME; |
| 119 | else if (0 == strcmp(prevWord, "module" )) |
| 120 | chAttr = SCE_RB_MODULE_NAME; |
| 121 | else if (0 == strcmp(prevWord, "def" )) |
| 122 | chAttr = SCE_RB_DEFNAME; |
| 123 | else if (keywords.InList(s) && ((start == 0) || !followsDot(start - 1, styler))) { |
| 124 | if (keywordIsAmbiguous(s) |
| 125 | && keywordIsModifier(s, start, styler)) { |
| 126 | |
| 127 | // Demoted keywords are colored as keywords, |
| 128 | // but do not affect changes in indentation. |
| 129 | // |
| 130 | // Consider the word 'if': |
| 131 | // 1. <<if test ...>> : normal |
| 132 | // 2. <<stmt if test>> : demoted |
| 133 | // 3. <<lhs = if ...>> : normal: start a new indent level |
| 134 | // 4. <<obj.if = 10>> : color as identifer, since it follows '.' |
| 135 | |
| 136 | chAttr = SCE_RB_WORD_DEMOTED; |
| 137 | } else { |
| 138 | chAttr = SCE_RB_WORD; |
| 139 | } |
| 140 | } else |
| 141 | chAttr = SCE_RB_IDENTIFIER; |
| 142 | styler.ColourTo(end, chAttr); |
| 143 | if (chAttr == SCE_RB_WORD) { |
| 144 | strcpy(prevWord, s); |
| 145 | } else { |
| 146 | prevWord[0] = 0; |
| 147 | } |
| 148 | return chAttr; |
| 149 | } |
| 150 | |
| 151 | |
| 152 | //XXX Identical to Perl, put in common area |
| 153 | static bool isMatch(Accessor &styler, Sci_Position lengthDoc, Sci_Position pos, const char *val) { |
| 154 | if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { |
| 155 | return false; |
| 156 | } |
| 157 | while (*val) { |
| 158 | if (*val != styler[pos++]) { |
| 159 | return false; |
| 160 | } |
| 161 | val++; |
| 162 | } |
| 163 | return true; |
| 164 | } |
| 165 | |
| 166 | // Do Ruby better -- find the end of the line, work back, |
| 167 | // and then check for leading white space |
| 168 | |
| 169 | // Precondition: the here-doc target can be indented |
| 170 | static bool lookingAtHereDocDelim(Accessor &styler, |
| 171 | Sci_Position pos, |
| 172 | Sci_Position lengthDoc, |
| 173 | const char *HereDocDelim) |
| 174 | { |
| 175 | if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) { |
| 176 | return false; |
| 177 | } |
| 178 | while (--pos > 0) { |
| 179 | char ch = styler[pos]; |
| 180 | if (isEOLChar(ch)) { |
| 181 | return true; |
| 182 | } else if (ch != ' ' && ch != '\t') { |
| 183 | return false; |
| 184 | } |
| 185 | } |
| 186 | return false; |
| 187 | } |
| 188 | |
| 189 | //XXX Identical to Perl, put in common area |
| 190 | static char opposite(char ch) { |
| 191 | if (ch == '(') |
| 192 | return ')'; |
| 193 | if (ch == '[') |
| 194 | return ']'; |
| 195 | if (ch == '{') |
| 196 | return '}'; |
| 197 | if (ch == '<') |
| 198 | return '>'; |
| 199 | return ch; |
| 200 | } |
| 201 | |
| 202 | // Null transitions when we see we've reached the end |
| 203 | // and need to relex the curr char. |
| 204 | |
| 205 | static void redo_char(Sci_Position &i, char &ch, char &chNext, char &chNext2, |
| 206 | int &state) { |
| 207 | i--; |
| 208 | chNext2 = chNext; |
| 209 | chNext = ch; |
| 210 | state = SCE_RB_DEFAULT; |
| 211 | } |
| 212 | |
| 213 | static void advance_char(Sci_Position &i, char &ch, char &chNext, char &chNext2) { |
| 214 | i++; |
| 215 | ch = chNext; |
| 216 | chNext = chNext2; |
| 217 | } |
| 218 | |
| 219 | // precondition: startPos points to one after the EOL char |
| 220 | static bool currLineContainsHereDelims(Sci_Position &startPos, |
| 221 | Accessor &styler) { |
| 222 | if (startPos <= 1) |
| 223 | return false; |
| 224 | |
| 225 | Sci_Position pos; |
| 226 | for (pos = startPos - 1; pos > 0; pos--) { |
| 227 | char ch = styler.SafeGetCharAt(pos); |
| 228 | if (isEOLChar(ch)) { |
| 229 | // Leave the pointers where they are -- there are no |
| 230 | // here doc delims on the current line, even if |
| 231 | // the EOL isn't default style |
| 232 | |
| 233 | return false; |
| 234 | } else { |
| 235 | styler.Flush(); |
| 236 | if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) { |
| 237 | break; |
| 238 | } |
| 239 | } |
| 240 | } |
| 241 | if (pos == 0) { |
| 242 | return false; |
| 243 | } |
| 244 | // Update the pointers so we don't have to re-analyze the string |
| 245 | startPos = pos; |
| 246 | return true; |
| 247 | } |
| 248 | |
| 249 | // This class is used by the enter and exit methods, so it needs |
| 250 | // to be hoisted out of the function. |
| 251 | |
| 252 | class QuoteCls { |
| 253 | public: |
| 254 | int Count; |
| 255 | char Up; |
| 256 | char Down; |
| 257 | QuoteCls() noexcept { |
| 258 | New(); |
| 259 | } |
| 260 | void New() noexcept { |
| 261 | Count = 0; |
| 262 | Up = '\0'; |
| 263 | Down = '\0'; |
| 264 | } |
| 265 | void Open(char u) { |
| 266 | Count++; |
| 267 | Up = u; |
| 268 | Down = opposite(Up); |
| 269 | } |
| 270 | }; |
| 271 | |
| 272 | |
| 273 | static void enterInnerExpression(int *p_inner_string_types, |
| 274 | int *p_inner_expn_brace_counts, |
| 275 | QuoteCls *p_inner_quotes, |
| 276 | int &inner_string_count, |
| 277 | int &state, |
| 278 | int &brace_counts, |
| 279 | QuoteCls curr_quote |
| 280 | ) { |
| 281 | p_inner_string_types[inner_string_count] = state; |
| 282 | state = SCE_RB_DEFAULT; |
| 283 | p_inner_expn_brace_counts[inner_string_count] = brace_counts; |
| 284 | brace_counts = 0; |
| 285 | p_inner_quotes[inner_string_count] = curr_quote; |
| 286 | ++inner_string_count; |
| 287 | } |
| 288 | |
| 289 | static void exitInnerExpression(int *p_inner_string_types, |
| 290 | int *p_inner_expn_brace_counts, |
| 291 | QuoteCls *p_inner_quotes, |
| 292 | int &inner_string_count, |
| 293 | int &state, |
| 294 | int &brace_counts, |
| 295 | QuoteCls &curr_quote |
| 296 | ) { |
| 297 | --inner_string_count; |
| 298 | state = p_inner_string_types[inner_string_count]; |
| 299 | brace_counts = p_inner_expn_brace_counts[inner_string_count]; |
| 300 | curr_quote = p_inner_quotes[inner_string_count]; |
| 301 | } |
| 302 | |
| 303 | static bool isEmptyLine(Sci_Position pos, |
| 304 | Accessor &styler) { |
| 305 | int spaceFlags = 0; |
| 306 | Sci_Position lineCurrent = styler.GetLine(pos); |
| 307 | int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL); |
| 308 | return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0; |
| 309 | } |
| 310 | |
| 311 | static bool RE_CanFollowKeyword(const char *keyword) { |
| 312 | if (!strcmp(keyword, "and" ) |
| 313 | || !strcmp(keyword, "begin" ) |
| 314 | || !strcmp(keyword, "break" ) |
| 315 | || !strcmp(keyword, "case" ) |
| 316 | || !strcmp(keyword, "do" ) |
| 317 | || !strcmp(keyword, "else" ) |
| 318 | || !strcmp(keyword, "elsif" ) |
| 319 | || !strcmp(keyword, "if" ) |
| 320 | || !strcmp(keyword, "next" ) |
| 321 | || !strcmp(keyword, "return" ) |
| 322 | || !strcmp(keyword, "when" ) |
| 323 | || !strcmp(keyword, "unless" ) |
| 324 | || !strcmp(keyword, "until" ) |
| 325 | || !strcmp(keyword, "not" ) |
| 326 | || !strcmp(keyword, "or" )) { |
| 327 | return true; |
| 328 | } |
| 329 | return false; |
| 330 | } |
| 331 | |
| 332 | // Look at chars up to but not including endPos |
| 333 | // Don't look at styles in case we're looking forward |
| 334 | |
| 335 | static Sci_Position skipWhitespace(Sci_Position startPos, |
| 336 | Sci_Position endPos, |
| 337 | Accessor &styler) { |
| 338 | for (Sci_Position i = startPos; i < endPos; i++) { |
| 339 | if (!iswhitespace(styler[i])) { |
| 340 | return i; |
| 341 | } |
| 342 | } |
| 343 | return endPos; |
| 344 | } |
| 345 | |
| 346 | // This routine looks for false positives like |
| 347 | // undef foo, << |
| 348 | // There aren't too many. |
| 349 | // |
| 350 | // iPrev points to the start of << |
| 351 | |
| 352 | static bool sureThisIsHeredoc(Sci_Position iPrev, |
| 353 | Accessor &styler, |
| 354 | char *prevWord) { |
| 355 | |
| 356 | // Not so fast, since Ruby's so dynamic. Check the context |
| 357 | // to make sure we're OK. |
| 358 | int prevStyle; |
| 359 | Sci_Position lineStart = styler.GetLine(iPrev); |
| 360 | Sci_Position lineStartPosn = styler.LineStart(lineStart); |
| 361 | styler.Flush(); |
| 362 | |
| 363 | // Find the first word after some whitespace |
| 364 | Sci_Position firstWordPosn = skipWhitespace(lineStartPosn, iPrev, styler); |
| 365 | if (firstWordPosn >= iPrev) { |
| 366 | // Have something like {^ <<} |
| 367 | //XXX Look at the first previous non-comment non-white line |
| 368 | // to establish the context. Not too likely though. |
| 369 | return true; |
| 370 | } else { |
| 371 | switch (prevStyle = styler.StyleAt(firstWordPosn)) { |
| 372 | case SCE_RB_WORD: |
| 373 | case SCE_RB_WORD_DEMOTED: |
| 374 | case SCE_RB_IDENTIFIER: |
| 375 | break; |
| 376 | default: |
| 377 | return true; |
| 378 | } |
| 379 | } |
| 380 | Sci_Position firstWordEndPosn = firstWordPosn; |
| 381 | char *dst = prevWord; |
| 382 | for (;;) { |
| 383 | if (firstWordEndPosn >= iPrev || |
| 384 | styler.StyleAt(firstWordEndPosn) != prevStyle) { |
| 385 | *dst = 0; |
| 386 | break; |
| 387 | } |
| 388 | *dst++ = styler[firstWordEndPosn]; |
| 389 | firstWordEndPosn += 1; |
| 390 | } |
| 391 | //XXX Write a style-aware thing to regex scintilla buffer objects |
| 392 | if (!strcmp(prevWord, "undef" ) |
| 393 | || !strcmp(prevWord, "def" ) |
| 394 | || !strcmp(prevWord, "alias" )) { |
| 395 | // These keywords are what we were looking for |
| 396 | return false; |
| 397 | } |
| 398 | return true; |
| 399 | } |
| 400 | |
| 401 | // Routine that saves us from allocating a buffer for the here-doc target |
| 402 | // targetEndPos points one past the end of the current target |
| 403 | static bool haveTargetMatch(Sci_Position currPos, |
| 404 | Sci_Position lengthDoc, |
| 405 | Sci_Position targetStartPos, |
| 406 | Sci_Position targetEndPos, |
| 407 | Accessor &styler) { |
| 408 | if (lengthDoc - currPos < targetEndPos - targetStartPos) { |
| 409 | return false; |
| 410 | } |
| 411 | Sci_Position i, j; |
| 412 | for (i = targetStartPos, j = currPos; |
| 413 | i < targetEndPos && j < lengthDoc; |
| 414 | i++, j++) { |
| 415 | if (styler[i] != styler[j]) { |
| 416 | return false; |
| 417 | } |
| 418 | } |
| 419 | return true; |
| 420 | } |
| 421 | |
| 422 | // Finds the start position of the expression containing @p pos |
| 423 | // @p min_pos should be a known expression start, e.g. the start of the line |
| 424 | static Sci_Position findExpressionStart(Sci_Position pos, |
| 425 | Sci_Position min_pos, |
| 426 | Accessor &styler) { |
| 427 | int depth = 0; |
| 428 | for (; pos > min_pos; pos -= 1) { |
| 429 | int style = styler.StyleAt(pos - 1); |
| 430 | if (style == SCE_RB_OPERATOR) { |
| 431 | int ch = styler[pos - 1]; |
| 432 | if (ch == '}' || ch == ')' || ch == ']') { |
| 433 | depth += 1; |
| 434 | } else if (ch == '{' || ch == '(' || ch == '[') { |
| 435 | if (depth == 0) { |
| 436 | break; |
| 437 | } else { |
| 438 | depth -= 1; |
| 439 | } |
| 440 | } else if (ch == ';' && depth == 0) { |
| 441 | break; |
| 442 | } |
| 443 | } |
| 444 | } |
| 445 | return pos; |
| 446 | } |
| 447 | |
| 448 | // We need a check because the form |
| 449 | // [identifier] <<[target] |
| 450 | // is ambiguous. The Ruby lexer/parser resolves it by |
| 451 | // looking to see if [identifier] names a variable or a |
| 452 | // function. If it's the first, it's the start of a here-doc. |
| 453 | // If it's a var, it's an operator. This lexer doesn't |
| 454 | // maintain a symbol table, so it looks ahead to see what's |
| 455 | // going on, in cases where we have |
| 456 | // ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target] |
| 457 | // |
| 458 | // If there's no occurrence of [target] on a line, assume we don't. |
| 459 | |
| 460 | // return true == yes, we have no heredocs |
| 461 | |
| 462 | static bool sureThisIsNotHeredoc(Sci_Position lt2StartPos, |
| 463 | Accessor &styler) { |
| 464 | int prevStyle; |
| 465 | // Use full document, not just part we're styling |
| 466 | Sci_Position lengthDoc = styler.Length(); |
| 467 | Sci_Position lineStart = styler.GetLine(lt2StartPos); |
| 468 | Sci_Position lineStartPosn = styler.LineStart(lineStart); |
| 469 | styler.Flush(); |
| 470 | const bool definitely_not_a_here_doc = true; |
| 471 | const bool looks_like_a_here_doc = false; |
| 472 | |
| 473 | // find the expression start rather than the line start |
| 474 | Sci_Position exprStartPosn = findExpressionStart(lt2StartPos, lineStartPosn, styler); |
| 475 | |
| 476 | // Find the first word after some whitespace |
| 477 | Sci_Position firstWordPosn = skipWhitespace(exprStartPosn, lt2StartPos, styler); |
| 478 | if (firstWordPosn >= lt2StartPos) { |
| 479 | return definitely_not_a_here_doc; |
| 480 | } |
| 481 | prevStyle = styler.StyleAt(firstWordPosn); |
| 482 | // If we have '<<' following a keyword, it's not a heredoc |
| 483 | if (prevStyle != SCE_RB_IDENTIFIER |
| 484 | && prevStyle != SCE_RB_SYMBOL |
| 485 | && prevStyle != SCE_RB_INSTANCE_VAR |
| 486 | && prevStyle != SCE_RB_CLASS_VAR) { |
| 487 | return definitely_not_a_here_doc; |
| 488 | } |
| 489 | int newStyle = prevStyle; |
| 490 | // Some compilers incorrectly warn about uninit newStyle |
| 491 | for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { |
| 492 | // Inner loop looks at the name |
| 493 | for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { |
| 494 | newStyle = styler.StyleAt(firstWordPosn); |
| 495 | if (newStyle != prevStyle) { |
| 496 | break; |
| 497 | } |
| 498 | } |
| 499 | // Do we have '::' or '.'? |
| 500 | if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) { |
| 501 | char ch = styler[firstWordPosn]; |
| 502 | if (ch == '.') { |
| 503 | // yes |
| 504 | } else if (ch == ':') { |
| 505 | if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) { |
| 506 | return definitely_not_a_here_doc; |
| 507 | } else if (styler[firstWordPosn] != ':') { |
| 508 | return definitely_not_a_here_doc; |
| 509 | } |
| 510 | } else { |
| 511 | break; |
| 512 | } |
| 513 | } else { |
| 514 | break; |
| 515 | } |
| 516 | // on second and next passes, only identifiers may appear since |
| 517 | // class and instance variable are private |
| 518 | prevStyle = SCE_RB_IDENTIFIER; |
| 519 | } |
| 520 | // Skip next batch of white-space |
| 521 | firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler); |
| 522 | // possible symbol for an implicit hash argument |
| 523 | if (firstWordPosn < lt2StartPos && styler.StyleAt(firstWordPosn) == SCE_RB_SYMBOL) { |
| 524 | for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) { |
| 525 | if (styler.StyleAt(firstWordPosn) != SCE_RB_SYMBOL) { |
| 526 | break; |
| 527 | } |
| 528 | } |
| 529 | // Skip next batch of white-space |
| 530 | firstWordPosn = skipWhitespace(firstWordPosn, lt2StartPos, styler); |
| 531 | } |
| 532 | if (firstWordPosn != lt2StartPos) { |
| 533 | // Have [[^ws[identifier]ws[*something_else*]ws<< |
| 534 | return definitely_not_a_here_doc; |
| 535 | } |
| 536 | // OK, now 'j' will point to the current spot moving ahead |
| 537 | Sci_Position j = firstWordPosn + 1; |
| 538 | if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') { |
| 539 | // This shouldn't happen |
| 540 | return definitely_not_a_here_doc; |
| 541 | } |
| 542 | Sci_Position nextLineStartPosn = styler.LineStart(lineStart + 1); |
| 543 | if (nextLineStartPosn >= lengthDoc) { |
| 544 | return definitely_not_a_here_doc; |
| 545 | } |
| 546 | j = skipWhitespace(j + 1, nextLineStartPosn, styler); |
| 547 | if (j >= lengthDoc) { |
| 548 | return definitely_not_a_here_doc; |
| 549 | } |
| 550 | bool allow_indent; |
| 551 | Sci_Position target_start, target_end; |
| 552 | // From this point on no more styling, since we're looking ahead |
| 553 | if (styler[j] == '-' || styler[j] == '~') { |
| 554 | allow_indent = true; |
| 555 | j++; |
| 556 | } else { |
| 557 | allow_indent = false; |
| 558 | } |
| 559 | |
| 560 | // Allow for quoted targets. |
| 561 | char target_quote = 0; |
| 562 | switch (styler[j]) { |
| 563 | case '\'': |
| 564 | case '"': |
| 565 | case '`': |
| 566 | target_quote = styler[j]; |
| 567 | j += 1; |
| 568 | } |
| 569 | |
| 570 | if (isSafeAlnum(styler[j])) { |
| 571 | // Init target_end because some compilers think it won't |
| 572 | // be initialized by the time it's used |
| 573 | target_start = target_end = j; |
| 574 | j++; |
| 575 | } else { |
| 576 | return definitely_not_a_here_doc; |
| 577 | } |
| 578 | for (; j < lengthDoc; j++) { |
| 579 | if (!isSafeAlnum(styler[j])) { |
| 580 | if (target_quote && styler[j] != target_quote) { |
| 581 | // unquoted end |
| 582 | return definitely_not_a_here_doc; |
| 583 | } |
| 584 | |
| 585 | // And for now make sure that it's a newline |
| 586 | // don't handle arbitrary expressions yet |
| 587 | |
| 588 | target_end = j; |
| 589 | if (target_quote) { |
| 590 | // Now we can move to the character after the string delimiter. |
| 591 | j += 1; |
| 592 | } |
| 593 | j = skipWhitespace(j, lengthDoc, styler); |
| 594 | if (j >= lengthDoc) { |
| 595 | return definitely_not_a_here_doc; |
| 596 | } else { |
| 597 | char ch = styler[j]; |
| 598 | if (ch == '#' || isEOLChar(ch)) { |
| 599 | // This is OK, so break and continue; |
| 600 | break; |
| 601 | } else { |
| 602 | return definitely_not_a_here_doc; |
| 603 | } |
| 604 | } |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | // Just look at the start of each line |
| 609 | Sci_Position last_line = styler.GetLine(lengthDoc - 1); |
| 610 | // But don't go too far |
| 611 | if (last_line > lineStart + 50) { |
| 612 | last_line = lineStart + 50; |
| 613 | } |
| 614 | for (Sci_Position line_num = lineStart + 1; line_num <= last_line; line_num++) { |
| 615 | if (allow_indent) { |
| 616 | j = skipWhitespace(styler.LineStart(line_num), lengthDoc, styler); |
| 617 | } else { |
| 618 | j = styler.LineStart(line_num); |
| 619 | } |
| 620 | // target_end is one past the end |
| 621 | if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) { |
| 622 | // We got it |
| 623 | return looks_like_a_here_doc; |
| 624 | } |
| 625 | } |
| 626 | return definitely_not_a_here_doc; |
| 627 | } |
| 628 | |
| 629 | //todo: if we aren't looking at a stdio character, |
| 630 | // move to the start of the first line that is not in a |
| 631 | // multi-line construct |
| 632 | |
| 633 | static void synchronizeDocStart(Sci_PositionU &startPos, |
| 634 | Sci_Position &length, |
| 635 | int &initStyle, |
| 636 | Accessor &styler, |
| 637 | bool skipWhiteSpace=false) { |
| 638 | |
| 639 | styler.Flush(); |
| 640 | int style = actual_style(styler.StyleAt(startPos)); |
| 641 | switch (style) { |
| 642 | case SCE_RB_STDIN: |
| 643 | case SCE_RB_STDOUT: |
| 644 | case SCE_RB_STDERR: |
| 645 | // Don't do anything else with these. |
| 646 | return; |
| 647 | } |
| 648 | |
| 649 | Sci_Position pos = startPos; |
| 650 | // Quick way to characterize each line |
| 651 | Sci_Position lineStart; |
| 652 | for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) { |
| 653 | // Now look at the style before the previous line's EOL |
| 654 | pos = styler.LineStart(lineStart) - 1; |
| 655 | if (pos <= 10) { |
| 656 | lineStart = 0; |
| 657 | break; |
| 658 | } |
| 659 | char ch = styler.SafeGetCharAt(pos); |
| 660 | char chPrev = styler.SafeGetCharAt(pos - 1); |
| 661 | if (ch == '\n' && chPrev == '\r') { |
| 662 | pos--; |
| 663 | } |
| 664 | if (styler.SafeGetCharAt(pos - 1) == '\\') { |
| 665 | // Continuation line -- keep going |
| 666 | } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) { |
| 667 | // Part of multi-line construct -- keep going |
| 668 | } else if (currLineContainsHereDelims(pos, styler)) { |
| 669 | // Keep going, with pos and length now pointing |
| 670 | // at the end of the here-doc delimiter |
| 671 | } else if (skipWhiteSpace && isEmptyLine(pos, styler)) { |
| 672 | // Keep going |
| 673 | } else { |
| 674 | break; |
| 675 | } |
| 676 | } |
| 677 | pos = styler.LineStart(lineStart); |
| 678 | length += (startPos - pos); |
| 679 | startPos = pos; |
| 680 | initStyle = SCE_RB_DEFAULT; |
| 681 | } |
| 682 | |
| 683 | static void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, |
| 684 | WordList *keywordlists[], Accessor &styler) { |
| 685 | |
| 686 | // Lexer for Ruby often has to backtrack to start of current style to determine |
| 687 | // which characters are being used as quotes, how deeply nested is the |
| 688 | // start position and what the termination string is for here documents |
| 689 | |
| 690 | WordList &keywords = *keywordlists[0]; |
| 691 | |
| 692 | class HereDocCls { |
| 693 | public: |
| 694 | int State = 0; |
| 695 | // States |
| 696 | // 0: '<<' encountered |
| 697 | // 1: collect the delimiter |
| 698 | // 1b: text between the end of the delimiter and the EOL |
| 699 | // 2: here doc text (lines after the delimiter) |
| 700 | char Quote = 0; // the char after '<<' |
| 701 | bool Quoted = false; // true if Quote in ('\'','"','`') |
| 702 | int DelimiterLength = 0; // strlen(Delimiter) |
| 703 | char Delimiter[256] {}; // the Delimiter, limit of 256: from Perl |
| 704 | bool CanBeIndented = false; |
| 705 | }; |
| 706 | HereDocCls HereDoc; |
| 707 | |
| 708 | QuoteCls Quote; |
| 709 | |
| 710 | int numDots = 0; // For numbers -- |
| 711 | // Don't start lexing in the middle of a num |
| 712 | |
| 713 | synchronizeDocStart(startPos, length, initStyle, styler, // ref args |
| 714 | false); |
| 715 | |
| 716 | bool preferRE = true; |
| 717 | int state = initStyle; |
| 718 | Sci_Position lengthDoc = startPos + length; |
| 719 | |
| 720 | char prevWord[MAX_KEYWORD_LENGTH + 1] = "" ; // 1 byte for zero |
| 721 | if (length == 0) |
| 722 | return; |
| 723 | |
| 724 | char chPrev = styler.SafeGetCharAt(startPos - 1); |
| 725 | char chNext = styler.SafeGetCharAt(startPos); |
| 726 | bool is_real_number = true; // Differentiate between constants and ?-sequences. |
| 727 | styler.StartAt(startPos); |
| 728 | styler.StartSegment(startPos); |
| 729 | |
| 730 | static int q_states[] = {SCE_RB_STRING_Q, |
| 731 | SCE_RB_STRING_QQ, |
| 732 | SCE_RB_STRING_QR, |
| 733 | SCE_RB_STRING_QW, |
| 734 | SCE_RB_STRING_QW, |
| 735 | SCE_RB_STRING_QX |
| 736 | }; |
| 737 | static const char *q_chars = "qQrwWx" ; |
| 738 | |
| 739 | // In most cases a value of 2 should be ample for the code in the |
| 740 | // Ruby library, and the code the user is likely to enter. |
| 741 | // For example, |
| 742 | // fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}" |
| 743 | // if options[:verbose] |
| 744 | // from fileutils.rb nests to a level of 2 |
| 745 | // If the user actually hits a 6th occurrence of '#{' in a double-quoted |
| 746 | // string (including regex'es, %Q, %<sym>, %w, and other strings |
| 747 | // that interpolate), it will stay as a string. The problem with this |
| 748 | // is that quotes might flip, a 7th '#{' will look like a comment, |
| 749 | // and code-folding might be wrong. |
| 750 | |
| 751 | // If anyone runs into this problem, I recommend raising this |
| 752 | // value slightly higher to replacing the fixed array with a linked |
| 753 | // list. Keep in mind this code will be called every time the lexer |
| 754 | // is invoked. |
| 755 | |
| 756 | #define INNER_STRINGS_MAX_COUNT 5 |
| 757 | // These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..." |
| 758 | int inner_string_types[INNER_STRINGS_MAX_COUNT] {}; |
| 759 | // Track # braces when we push a new #{ thing |
| 760 | int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT] {}; |
| 761 | QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT]; |
| 762 | int inner_string_count = 0; |
| 763 | int brace_counts = 0; // Number of #{ ... } things within an expression |
| 764 | |
| 765 | Sci_Position i; |
| 766 | for (i = 0; i < INNER_STRINGS_MAX_COUNT; i++) { |
| 767 | inner_string_types[i] = 0; |
| 768 | inner_expn_brace_counts[i] = 0; |
| 769 | } |
| 770 | for (i = startPos; i < lengthDoc; i++) { |
| 771 | char ch = chNext; |
| 772 | chNext = styler.SafeGetCharAt(i + 1); |
| 773 | char chNext2 = styler.SafeGetCharAt(i + 2); |
| 774 | |
| 775 | if (styler.IsLeadByte(ch)) { |
| 776 | chNext = chNext2; |
| 777 | chPrev = ' '; |
| 778 | i += 1; |
| 779 | continue; |
| 780 | } |
| 781 | |
| 782 | // skip on DOS/Windows |
| 783 | //No, don't, because some things will get tagged on, |
| 784 | // so we won't recognize keywords, for example |
| 785 | #if 0 |
| 786 | if (ch == '\r' && chNext == '\n') { |
| 787 | continue; |
| 788 | } |
| 789 | #endif |
| 790 | |
| 791 | if (HereDoc.State == 1 && isEOLChar(ch)) { |
| 792 | // Begin of here-doc (the line after the here-doc delimiter): |
| 793 | HereDoc.State = 2; |
| 794 | styler.ColourTo(i-1, state); |
| 795 | // Don't check for a missing quote, just jump into |
| 796 | // the here-doc state |
| 797 | state = SCE_RB_HERE_Q; |
| 798 | } |
| 799 | |
| 800 | // Regular transitions |
| 801 | if (state == SCE_RB_DEFAULT) { |
| 802 | if (isSafeDigit(ch)) { |
| 803 | styler.ColourTo(i - 1, state); |
| 804 | state = SCE_RB_NUMBER; |
| 805 | is_real_number = true; |
| 806 | numDots = 0; |
| 807 | } else if (isHighBitChar(ch) || iswordstart(ch)) { |
| 808 | styler.ColourTo(i - 1, state); |
| 809 | state = SCE_RB_WORD; |
| 810 | } else if (ch == '#') { |
| 811 | styler.ColourTo(i - 1, state); |
| 812 | state = SCE_RB_COMMENTLINE; |
| 813 | } else if (ch == '=') { |
| 814 | // =begin indicates the start of a comment (doc) block |
| 815 | if ((i == 0 || isEOLChar(chPrev)) |
| 816 | && chNext == 'b' |
| 817 | && styler.SafeGetCharAt(i + 2) == 'e' |
| 818 | && styler.SafeGetCharAt(i + 3) == 'g' |
| 819 | && styler.SafeGetCharAt(i + 4) == 'i' |
| 820 | && styler.SafeGetCharAt(i + 5) == 'n' |
| 821 | && !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) { |
| 822 | styler.ColourTo(i - 1, state); |
| 823 | state = SCE_RB_POD; |
| 824 | } else { |
| 825 | styler.ColourTo(i - 1, state); |
| 826 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 827 | preferRE = true; |
| 828 | } |
| 829 | } else if (ch == '"') { |
| 830 | styler.ColourTo(i - 1, state); |
| 831 | state = SCE_RB_STRING; |
| 832 | Quote.New(); |
| 833 | Quote.Open(ch); |
| 834 | } else if (ch == '\'') { |
| 835 | styler.ColourTo(i - 1, state); |
| 836 | state = SCE_RB_CHARACTER; |
| 837 | Quote.New(); |
| 838 | Quote.Open(ch); |
| 839 | } else if (ch == '`') { |
| 840 | styler.ColourTo(i - 1, state); |
| 841 | state = SCE_RB_BACKTICKS; |
| 842 | Quote.New(); |
| 843 | Quote.Open(ch); |
| 844 | } else if (ch == '@') { |
| 845 | // Instance or class var |
| 846 | styler.ColourTo(i - 1, state); |
| 847 | if (chNext == '@') { |
| 848 | state = SCE_RB_CLASS_VAR; |
| 849 | advance_char(i, ch, chNext, chNext2); // pass by ref |
| 850 | } else { |
| 851 | state = SCE_RB_INSTANCE_VAR; |
| 852 | } |
| 853 | } else if (ch == '$') { |
| 854 | // Check for a builtin global |
| 855 | styler.ColourTo(i - 1, state); |
| 856 | // Recognize it bit by bit |
| 857 | state = SCE_RB_GLOBAL; |
| 858 | } else if (ch == '/' && preferRE) { |
| 859 | // Ambigous operator |
| 860 | styler.ColourTo(i - 1, state); |
| 861 | state = SCE_RB_REGEX; |
| 862 | Quote.New(); |
| 863 | Quote.Open(ch); |
| 864 | } else if (ch == '<' && chNext == '<' && chNext2 != '=') { |
| 865 | |
| 866 | // Recognise the '<<' symbol - either a here document or a binary op |
| 867 | styler.ColourTo(i - 1, state); |
| 868 | i++; |
| 869 | chNext = chNext2; |
| 870 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 871 | |
| 872 | if (!(strchr("\"\'`_-~" , chNext2) || isSafeAlpha(chNext2))) { |
| 873 | // It's definitely not a here-doc, |
| 874 | // based on Ruby's lexer/parser in the |
| 875 | // heredoc_identifier routine. |
| 876 | // Nothing else to do. |
| 877 | } else if (preferRE) { |
| 878 | if (sureThisIsHeredoc(i - 1, styler, prevWord)) { |
| 879 | state = SCE_RB_HERE_DELIM; |
| 880 | HereDoc.State = 0; |
| 881 | } |
| 882 | // else leave it in default state |
| 883 | } else { |
| 884 | if (sureThisIsNotHeredoc(i - 1, styler)) { |
| 885 | // leave state as default |
| 886 | // We don't have all the heuristics Perl has for indications |
| 887 | // of a here-doc, because '<<' is overloadable and used |
| 888 | // for so many other classes. |
| 889 | } else { |
| 890 | state = SCE_RB_HERE_DELIM; |
| 891 | HereDoc.State = 0; |
| 892 | } |
| 893 | } |
| 894 | preferRE = (state != SCE_RB_HERE_DELIM); |
| 895 | } else if (ch == ':') { |
| 896 | styler.ColourTo(i - 1, state); |
| 897 | if (chNext == ':') { |
| 898 | // Mark "::" as an operator, not symbol start |
| 899 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); |
| 900 | advance_char(i, ch, chNext, chNext2); // pass by ref |
| 901 | state = SCE_RB_DEFAULT; |
| 902 | preferRE = false; |
| 903 | } else if (isSafeWordcharOrHigh(chNext)) { |
| 904 | state = SCE_RB_SYMBOL; |
| 905 | } else if ((chNext == '@' || chNext == '$') && |
| 906 | isSafeWordcharOrHigh(chNext2)) { |
| 907 | // instance and global variable followed by an identifier |
| 908 | advance_char(i, ch, chNext, chNext2); |
| 909 | state = SCE_RB_SYMBOL; |
| 910 | } else if (((chNext == '@' && chNext2 == '@') || |
| 911 | (chNext == '$' && chNext2 == '-')) && |
| 912 | isSafeWordcharOrHigh(styler.SafeGetCharAt(i+3))) { |
| 913 | // class variables and special global variable "$-IDENTCHAR" |
| 914 | state = SCE_RB_SYMBOL; |
| 915 | // $-IDENTCHAR doesn't continue past the IDENTCHAR |
| 916 | if (chNext == '$') { |
| 917 | styler.ColourTo(i+3, SCE_RB_SYMBOL); |
| 918 | state = SCE_RB_DEFAULT; |
| 919 | } |
| 920 | i += 3; |
| 921 | ch = styler.SafeGetCharAt(i); |
| 922 | chNext = styler.SafeGetCharAt(i+1); |
| 923 | } else if (chNext == '$' && strchr("_~*$?!@/\\;,.=:<>\"&`'+" , chNext2)) { |
| 924 | // single-character special global variables |
| 925 | i += 2; |
| 926 | ch = chNext2; |
| 927 | chNext = styler.SafeGetCharAt(i+1); |
| 928 | styler.ColourTo(i, SCE_RB_SYMBOL); |
| 929 | state = SCE_RB_DEFAULT; |
| 930 | } else if (strchr("[*!~+-*/%=<>&^|" , chNext)) { |
| 931 | // Do the operator analysis in-line, looking ahead |
| 932 | // Based on the table in pickaxe 2nd ed., page 339 |
| 933 | bool doColoring = true; |
| 934 | switch (chNext) { |
| 935 | case '[': |
| 936 | if (chNext2 == ']') { |
| 937 | char ch_tmp = styler.SafeGetCharAt(i + 3); |
| 938 | if (ch_tmp == '=') { |
| 939 | i += 3; |
| 940 | ch = ch_tmp; |
| 941 | chNext = styler.SafeGetCharAt(i + 1); |
| 942 | } else { |
| 943 | i += 2; |
| 944 | ch = chNext2; |
| 945 | chNext = ch_tmp; |
| 946 | } |
| 947 | } else { |
| 948 | doColoring = false; |
| 949 | } |
| 950 | break; |
| 951 | |
| 952 | case '*': |
| 953 | if (chNext2 == '*') { |
| 954 | i += 2; |
| 955 | ch = chNext2; |
| 956 | chNext = styler.SafeGetCharAt(i + 1); |
| 957 | } else { |
| 958 | advance_char(i, ch, chNext, chNext2); |
| 959 | } |
| 960 | break; |
| 961 | |
| 962 | case '!': |
| 963 | if (chNext2 == '=' || chNext2 == '~') { |
| 964 | i += 2; |
| 965 | ch = chNext2; |
| 966 | chNext = styler.SafeGetCharAt(i + 1); |
| 967 | } else { |
| 968 | advance_char(i, ch, chNext, chNext2); |
| 969 | } |
| 970 | break; |
| 971 | |
| 972 | case '<': |
| 973 | if (chNext2 == '<') { |
| 974 | i += 2; |
| 975 | ch = chNext2; |
| 976 | chNext = styler.SafeGetCharAt(i + 1); |
| 977 | } else if (chNext2 == '=') { |
| 978 | char ch_tmp = styler.SafeGetCharAt(i + 3); |
| 979 | if (ch_tmp == '>') { // <=> operator |
| 980 | i += 3; |
| 981 | ch = ch_tmp; |
| 982 | chNext = styler.SafeGetCharAt(i + 1); |
| 983 | } else { |
| 984 | i += 2; |
| 985 | ch = chNext2; |
| 986 | chNext = ch_tmp; |
| 987 | } |
| 988 | } else { |
| 989 | advance_char(i, ch, chNext, chNext2); |
| 990 | } |
| 991 | break; |
| 992 | |
| 993 | default: |
| 994 | // Simple one-character operators |
| 995 | advance_char(i, ch, chNext, chNext2); |
| 996 | break; |
| 997 | } |
| 998 | if (doColoring) { |
| 999 | styler.ColourTo(i, SCE_RB_SYMBOL); |
| 1000 | state = SCE_RB_DEFAULT; |
| 1001 | } |
| 1002 | } else if (!preferRE) { |
| 1003 | // Don't color symbol strings (yet) |
| 1004 | // Just color the ":" and color rest as string |
| 1005 | styler.ColourTo(i, SCE_RB_SYMBOL); |
| 1006 | state = SCE_RB_DEFAULT; |
| 1007 | } else { |
| 1008 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 1009 | state = SCE_RB_DEFAULT; |
| 1010 | preferRE = true; |
| 1011 | } |
| 1012 | } else if (ch == '%') { |
| 1013 | styler.ColourTo(i - 1, state); |
| 1014 | bool have_string = false; |
| 1015 | if (strchr(q_chars, chNext) && !isSafeWordcharOrHigh(chNext2)) { |
| 1016 | Quote.New(); |
| 1017 | const char *hit = strchr(q_chars, chNext); |
| 1018 | if (hit != NULL) { |
| 1019 | state = q_states[hit - q_chars]; |
| 1020 | Quote.Open(chNext2); |
| 1021 | i += 2; |
| 1022 | ch = chNext2; |
| 1023 | chNext = styler.SafeGetCharAt(i + 1); |
| 1024 | have_string = true; |
| 1025 | } |
| 1026 | } else if (preferRE && !isSafeWordcharOrHigh(chNext)) { |
| 1027 | // Ruby doesn't allow high bit chars here, |
| 1028 | // but the editor host might |
| 1029 | Quote.New(); |
| 1030 | state = SCE_RB_STRING_QQ; |
| 1031 | Quote.Open(chNext); |
| 1032 | advance_char(i, ch, chNext, chNext2); // pass by ref |
| 1033 | have_string = true; |
| 1034 | } else if (!isSafeWordcharOrHigh(chNext) && !iswhitespace(chNext) && !isEOLChar(chNext)) { |
| 1035 | // Ruby doesn't allow high bit chars here, |
| 1036 | // but the editor host might |
| 1037 | Quote.New(); |
| 1038 | state = SCE_RB_STRING_QQ; |
| 1039 | Quote.Open(chNext); |
| 1040 | advance_char(i, ch, chNext, chNext2); // pass by ref |
| 1041 | have_string = true; |
| 1042 | } |
| 1043 | if (!have_string) { |
| 1044 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 1045 | // stay in default |
| 1046 | preferRE = true; |
| 1047 | } |
| 1048 | } else if (ch == '?') { |
| 1049 | styler.ColourTo(i - 1, state); |
| 1050 | if (iswhitespace(chNext) || chNext == '\n' || chNext == '\r') { |
| 1051 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 1052 | } else { |
| 1053 | // It's the start of a character code escape sequence |
| 1054 | // Color it as a number. |
| 1055 | state = SCE_RB_NUMBER; |
| 1056 | is_real_number = false; |
| 1057 | } |
| 1058 | } else if (isoperator(ch) || ch == '.') { |
| 1059 | styler.ColourTo(i - 1, state); |
| 1060 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 1061 | // If we're ending an expression or block, |
| 1062 | // assume it ends an object, and the ambivalent |
| 1063 | // constructs are binary operators |
| 1064 | // |
| 1065 | // So if we don't have one of these chars, |
| 1066 | // we aren't ending an object exp'n, and ops |
| 1067 | // like : << / are unary operators. |
| 1068 | |
| 1069 | if (ch == '{') { |
| 1070 | ++brace_counts; |
| 1071 | preferRE = true; |
| 1072 | } else if (ch == '}' && --brace_counts < 0 |
| 1073 | && inner_string_count > 0) { |
| 1074 | styler.ColourTo(i, SCE_RB_OPERATOR); |
| 1075 | exitInnerExpression(inner_string_types, |
| 1076 | inner_expn_brace_counts, |
| 1077 | inner_quotes, |
| 1078 | inner_string_count, |
| 1079 | state, brace_counts, Quote); |
| 1080 | } else { |
| 1081 | preferRE = (strchr(")}]." , ch) == NULL); |
| 1082 | } |
| 1083 | // Stay in default state |
| 1084 | } else if (isEOLChar(ch)) { |
| 1085 | // Make sure it's a true line-end, with no backslash |
| 1086 | if ((ch == '\r' || (ch == '\n' && chPrev != '\r')) |
| 1087 | && chPrev != '\\') { |
| 1088 | // Assume we've hit the end of the statement. |
| 1089 | preferRE = true; |
| 1090 | } |
| 1091 | } |
| 1092 | } else if (state == SCE_RB_WORD) { |
| 1093 | if (ch == '.' || !isSafeWordcharOrHigh(ch)) { |
| 1094 | // Words include x? in all contexts, |
| 1095 | // and <letters>= after either 'def' or a dot |
| 1096 | // Move along until a complete word is on our left |
| 1097 | |
| 1098 | // Default accessor treats '.' as word-chars, |
| 1099 | // but we don't for now. |
| 1100 | |
| 1101 | if (ch == '=' |
| 1102 | && isSafeWordcharOrHigh(chPrev) |
| 1103 | && (chNext == '(' |
| 1104 | || strchr(" \t\n\r" , chNext) != NULL) |
| 1105 | && (!strcmp(prevWord, "def" ) |
| 1106 | || followsDot(styler.GetStartSegment(), styler))) { |
| 1107 | // <name>= is a name only when being def'd -- Get it the next time |
| 1108 | // This means that <name>=<name> is always lexed as |
| 1109 | // <name>, (op, =), <name> |
| 1110 | } else if (ch == ':' |
| 1111 | && isSafeWordcharOrHigh(chPrev) |
| 1112 | && strchr(" \t\n\r" , chNext) != NULL) { |
| 1113 | state = SCE_RB_SYMBOL; |
| 1114 | } else if ((ch == '?' || ch == '!') |
| 1115 | && isSafeWordcharOrHigh(chPrev) |
| 1116 | && !isSafeWordcharOrHigh(chNext)) { |
| 1117 | // <name>? is a name -- Get it the next time |
| 1118 | // But <name>?<name> is always lexed as |
| 1119 | // <name>, (op, ?), <name> |
| 1120 | // Same with <name>! to indicate a method that |
| 1121 | // modifies its target |
| 1122 | } else if (isEOLChar(ch) |
| 1123 | && isMatch(styler, lengthDoc, i - 7, "__END__" )) { |
| 1124 | styler.ColourTo(i, SCE_RB_DATASECTION); |
| 1125 | state = SCE_RB_DATASECTION; |
| 1126 | // No need to handle this state -- we'll just move to the end |
| 1127 | preferRE = false; |
| 1128 | } else { |
| 1129 | Sci_Position wordStartPos = styler.GetStartSegment(); |
| 1130 | int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord); |
| 1131 | switch (word_style) { |
| 1132 | case SCE_RB_WORD: |
| 1133 | preferRE = RE_CanFollowKeyword(prevWord); |
| 1134 | break; |
| 1135 | |
| 1136 | case SCE_RB_WORD_DEMOTED: |
| 1137 | preferRE = true; |
| 1138 | break; |
| 1139 | |
| 1140 | case SCE_RB_IDENTIFIER: |
| 1141 | if (isMatch(styler, lengthDoc, wordStartPos, "print" )) { |
| 1142 | preferRE = true; |
| 1143 | } else if (isEOLChar(ch)) { |
| 1144 | preferRE = true; |
| 1145 | } else { |
| 1146 | preferRE = false; |
| 1147 | } |
| 1148 | break; |
| 1149 | default: |
| 1150 | preferRE = false; |
| 1151 | } |
| 1152 | if (ch == '.') { |
| 1153 | // We might be redefining an operator-method |
| 1154 | preferRE = false; |
| 1155 | } |
| 1156 | // And if it's the first |
| 1157 | redo_char(i, ch, chNext, chNext2, state); // pass by ref |
| 1158 | } |
| 1159 | } |
| 1160 | } else if (state == SCE_RB_NUMBER) { |
| 1161 | if (!is_real_number) { |
| 1162 | if (ch != '\\') { |
| 1163 | styler.ColourTo(i, state); |
| 1164 | state = SCE_RB_DEFAULT; |
| 1165 | preferRE = false; |
| 1166 | } else if (strchr("\\ntrfvaebs" , chNext)) { |
| 1167 | // Terminal escape sequence -- handle it next time |
| 1168 | // Nothing more to do this time through the loop |
| 1169 | } else if (chNext == 'C' || chNext == 'M') { |
| 1170 | if (chNext2 != '-') { |
| 1171 | // \C or \M ends the sequence -- handle it next time |
| 1172 | } else { |
| 1173 | // Move from abc?\C-x |
| 1174 | // ^ |
| 1175 | // to |
| 1176 | // ^ |
| 1177 | i += 2; |
| 1178 | ch = chNext2; |
| 1179 | chNext = styler.SafeGetCharAt(i + 1); |
| 1180 | } |
| 1181 | } else if (chNext == 'c') { |
| 1182 | // Stay here, \c is a combining sequence |
| 1183 | advance_char(i, ch, chNext, chNext2); // pass by ref |
| 1184 | } else { |
| 1185 | // ?\x, including ?\\ is final. |
| 1186 | styler.ColourTo(i + 1, state); |
| 1187 | state = SCE_RB_DEFAULT; |
| 1188 | preferRE = false; |
| 1189 | advance_char(i, ch, chNext, chNext2); |
| 1190 | } |
| 1191 | } else if (isSafeAlnumOrHigh(ch) || ch == '_') { |
| 1192 | // Keep going |
| 1193 | } else if (ch == '.' && chNext == '.') { |
| 1194 | ++numDots; |
| 1195 | styler.ColourTo(i - 1, state); |
| 1196 | redo_char(i, ch, chNext, chNext2, state); // pass by ref |
| 1197 | } else if (ch == '.' && ++numDots == 1) { |
| 1198 | // Keep going |
| 1199 | } else { |
| 1200 | styler.ColourTo(i - 1, state); |
| 1201 | redo_char(i, ch, chNext, chNext2, state); // pass by ref |
| 1202 | preferRE = false; |
| 1203 | } |
| 1204 | } else if (state == SCE_RB_COMMENTLINE) { |
| 1205 | if (isEOLChar(ch)) { |
| 1206 | styler.ColourTo(i - 1, state); |
| 1207 | state = SCE_RB_DEFAULT; |
| 1208 | // Use whatever setting we had going into the comment |
| 1209 | } |
| 1210 | } else if (state == SCE_RB_HERE_DELIM) { |
| 1211 | // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx |
| 1212 | // Slightly different: if we find an immediate '-', |
| 1213 | // the target can appear indented. |
| 1214 | |
| 1215 | if (HereDoc.State == 0) { // '<<' encountered |
| 1216 | HereDoc.State = 1; |
| 1217 | HereDoc.DelimiterLength = 0; |
| 1218 | if (ch == '-' || ch == '~') { |
| 1219 | HereDoc.CanBeIndented = true; |
| 1220 | advance_char(i, ch, chNext, chNext2); // pass by ref |
| 1221 | } else { |
| 1222 | HereDoc.CanBeIndented = false; |
| 1223 | } |
| 1224 | if (isEOLChar(ch)) { |
| 1225 | // Bail out of doing a here doc if there's no target |
| 1226 | state = SCE_RB_DEFAULT; |
| 1227 | preferRE = false; |
| 1228 | } else { |
| 1229 | HereDoc.Quote = ch; |
| 1230 | |
| 1231 | if (ch == '\'' || ch == '"' || ch == '`') { |
| 1232 | HereDoc.Quoted = true; |
| 1233 | HereDoc.Delimiter[0] = '\0'; |
| 1234 | } else { |
| 1235 | HereDoc.Quoted = false; |
| 1236 | HereDoc.Delimiter[0] = ch; |
| 1237 | HereDoc.Delimiter[1] = '\0'; |
| 1238 | HereDoc.DelimiterLength = 1; |
| 1239 | } |
| 1240 | } |
| 1241 | } else if (HereDoc.State == 1) { // collect the delimiter |
| 1242 | if (isEOLChar(ch)) { |
| 1243 | // End the quote now, and go back for more |
| 1244 | styler.ColourTo(i - 1, state); |
| 1245 | state = SCE_RB_DEFAULT; |
| 1246 | i--; |
| 1247 | chNext = ch; |
| 1248 | preferRE = false; |
| 1249 | } else if (HereDoc.Quoted) { |
| 1250 | if (ch == HereDoc.Quote) { // closing quote => end of delimiter |
| 1251 | styler.ColourTo(i, state); |
| 1252 | state = SCE_RB_DEFAULT; |
| 1253 | preferRE = false; |
| 1254 | } else { |
| 1255 | if (ch == '\\' && !isEOLChar(chNext)) { |
| 1256 | advance_char(i, ch, chNext, chNext2); |
| 1257 | } |
| 1258 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; |
| 1259 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; |
| 1260 | } |
| 1261 | } else { // an unquoted here-doc delimiter |
| 1262 | if (isSafeAlnumOrHigh(ch) || ch == '_') { |
| 1263 | HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; |
| 1264 | HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; |
| 1265 | } else { |
| 1266 | styler.ColourTo(i - 1, state); |
| 1267 | redo_char(i, ch, chNext, chNext2, state); |
| 1268 | preferRE = false; |
| 1269 | } |
| 1270 | } |
| 1271 | if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) { |
| 1272 | styler.ColourTo(i - 1, state); |
| 1273 | state = SCE_RB_ERROR; |
| 1274 | preferRE = false; |
| 1275 | } |
| 1276 | } |
| 1277 | } else if (state == SCE_RB_HERE_Q) { |
| 1278 | // Not needed: HereDoc.State == 2 |
| 1279 | // Indentable here docs: look backwards |
| 1280 | // Non-indentable: look forwards, like in Perl |
| 1281 | // |
| 1282 | // Why: so we can quickly resolve things like <<-" abc" |
| 1283 | |
| 1284 | if (!HereDoc.CanBeIndented) { |
| 1285 | if (isEOLChar(chPrev) |
| 1286 | && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { |
| 1287 | styler.ColourTo(i - 1, state); |
| 1288 | i += static_cast<Sci_Position>(HereDoc.DelimiterLength) - 1; |
| 1289 | chNext = styler.SafeGetCharAt(i + 1); |
| 1290 | if (isEOLChar(chNext)) { |
| 1291 | styler.ColourTo(i, SCE_RB_HERE_DELIM); |
| 1292 | state = SCE_RB_DEFAULT; |
| 1293 | HereDoc.State = 0; |
| 1294 | preferRE = false; |
| 1295 | } |
| 1296 | // Otherwise we skipped through the here doc faster. |
| 1297 | } |
| 1298 | } else if (isEOLChar(chNext) |
| 1299 | && lookingAtHereDocDelim(styler, |
| 1300 | i - HereDoc.DelimiterLength + 1, |
| 1301 | lengthDoc, |
| 1302 | HereDoc.Delimiter)) { |
| 1303 | styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state); |
| 1304 | styler.ColourTo(i, SCE_RB_HERE_DELIM); |
| 1305 | state = SCE_RB_DEFAULT; |
| 1306 | preferRE = false; |
| 1307 | HereDoc.State = 0; |
| 1308 | } |
| 1309 | } else if (state == SCE_RB_CLASS_VAR |
| 1310 | || state == SCE_RB_INSTANCE_VAR |
| 1311 | || state == SCE_RB_SYMBOL) { |
| 1312 | if (state == SCE_RB_SYMBOL && |
| 1313 | // FIDs suffices '?' and '!' |
| 1314 | (((ch == '!' || ch == '?') && chNext != '=') || |
| 1315 | // identifier suffix '=' |
| 1316 | (ch == '=' && (chNext != '~' && chNext != '>' && |
| 1317 | (chNext != '=' || chNext2 == '>'))))) { |
| 1318 | styler.ColourTo(i, state); |
| 1319 | state = SCE_RB_DEFAULT; |
| 1320 | preferRE = false; |
| 1321 | } else if (!isSafeWordcharOrHigh(ch)) { |
| 1322 | styler.ColourTo(i - 1, state); |
| 1323 | redo_char(i, ch, chNext, chNext2, state); // pass by ref |
| 1324 | preferRE = false; |
| 1325 | } |
| 1326 | } else if (state == SCE_RB_GLOBAL) { |
| 1327 | if (!isSafeWordcharOrHigh(ch)) { |
| 1328 | // handle special globals here as well |
| 1329 | if (chPrev == '$') { |
| 1330 | if (ch == '-') { |
| 1331 | // Include the next char, like $-a |
| 1332 | advance_char(i, ch, chNext, chNext2); |
| 1333 | } |
| 1334 | styler.ColourTo(i, state); |
| 1335 | state = SCE_RB_DEFAULT; |
| 1336 | } else { |
| 1337 | styler.ColourTo(i - 1, state); |
| 1338 | redo_char(i, ch, chNext, chNext2, state); // pass by ref |
| 1339 | } |
| 1340 | preferRE = false; |
| 1341 | } |
| 1342 | } else if (state == SCE_RB_POD) { |
| 1343 | // PODs end with ^=end\s, -- any whitespace can follow =end |
| 1344 | if (strchr(" \t\n\r" , ch) != NULL |
| 1345 | && i > 5 |
| 1346 | && isEOLChar(styler[i - 5]) |
| 1347 | && isMatch(styler, lengthDoc, i - 4, "=end" )) { |
| 1348 | styler.ColourTo(i - 1, state); |
| 1349 | state = SCE_RB_DEFAULT; |
| 1350 | preferRE = false; |
| 1351 | } |
| 1352 | } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) { |
| 1353 | if (ch == '\\' && Quote.Up != '\\') { |
| 1354 | // Skip one |
| 1355 | advance_char(i, ch, chNext, chNext2); |
| 1356 | } else if (ch == Quote.Down) { |
| 1357 | Quote.Count--; |
| 1358 | if (Quote.Count == 0) { |
| 1359 | // Include the options |
| 1360 | while (isSafeAlpha(chNext)) { |
| 1361 | i++; |
| 1362 | ch = chNext; |
| 1363 | chNext = styler.SafeGetCharAt(i + 1); |
| 1364 | } |
| 1365 | styler.ColourTo(i, state); |
| 1366 | state = SCE_RB_DEFAULT; |
| 1367 | preferRE = false; |
| 1368 | } |
| 1369 | } else if (ch == Quote.Up) { |
| 1370 | // Only if close quoter != open quoter |
| 1371 | Quote.Count++; |
| 1372 | |
| 1373 | } else if (ch == '#') { |
| 1374 | if (chNext == '{' |
| 1375 | && inner_string_count < INNER_STRINGS_MAX_COUNT) { |
| 1376 | // process #{ ... } |
| 1377 | styler.ColourTo(i - 1, state); |
| 1378 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); |
| 1379 | enterInnerExpression(inner_string_types, |
| 1380 | inner_expn_brace_counts, |
| 1381 | inner_quotes, |
| 1382 | inner_string_count, |
| 1383 | state, |
| 1384 | brace_counts, |
| 1385 | Quote); |
| 1386 | preferRE = true; |
| 1387 | // Skip one |
| 1388 | advance_char(i, ch, chNext, chNext2); |
| 1389 | } else { |
| 1390 | //todo: distinguish comments from pound chars |
| 1391 | // for now, handle as comment |
| 1392 | styler.ColourTo(i - 1, state); |
| 1393 | bool inEscape = false; |
| 1394 | while (++i < lengthDoc) { |
| 1395 | ch = styler.SafeGetCharAt(i); |
| 1396 | if (ch == '\\') { |
| 1397 | inEscape = true; |
| 1398 | } else if (isEOLChar(ch)) { |
| 1399 | // Comment inside a regex |
| 1400 | styler.ColourTo(i - 1, SCE_RB_COMMENTLINE); |
| 1401 | break; |
| 1402 | } else if (inEscape) { |
| 1403 | inEscape = false; // don't look at char |
| 1404 | } else if (ch == Quote.Down) { |
| 1405 | // Have the regular handler deal with this |
| 1406 | // to get trailing modifiers. |
| 1407 | i--; |
| 1408 | ch = styler[i]; |
| 1409 | break; |
| 1410 | } |
| 1411 | } |
| 1412 | chNext = styler.SafeGetCharAt(i + 1); |
| 1413 | } |
| 1414 | } |
| 1415 | // Quotes of all kinds... |
| 1416 | } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ || |
| 1417 | state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW || |
| 1418 | state == SCE_RB_STRING || state == SCE_RB_CHARACTER || |
| 1419 | state == SCE_RB_BACKTICKS) { |
| 1420 | if (!Quote.Down && !isspacechar(ch)) { |
| 1421 | Quote.Open(ch); |
| 1422 | } else if (ch == '\\' && Quote.Up != '\\') { |
| 1423 | //Riddle me this: Is it safe to skip *every* escaped char? |
| 1424 | advance_char(i, ch, chNext, chNext2); |
| 1425 | } else if (ch == Quote.Down) { |
| 1426 | Quote.Count--; |
| 1427 | if (Quote.Count == 0) { |
| 1428 | styler.ColourTo(i, state); |
| 1429 | state = SCE_RB_DEFAULT; |
| 1430 | preferRE = false; |
| 1431 | } |
| 1432 | } else if (ch == Quote.Up) { |
| 1433 | Quote.Count++; |
| 1434 | } else if (ch == '#' && chNext == '{' |
| 1435 | && inner_string_count < INNER_STRINGS_MAX_COUNT |
| 1436 | && state != SCE_RB_CHARACTER |
| 1437 | && state != SCE_RB_STRING_Q) { |
| 1438 | // process #{ ... } |
| 1439 | styler.ColourTo(i - 1, state); |
| 1440 | styler.ColourTo(i + 1, SCE_RB_OPERATOR); |
| 1441 | enterInnerExpression(inner_string_types, |
| 1442 | inner_expn_brace_counts, |
| 1443 | inner_quotes, |
| 1444 | inner_string_count, |
| 1445 | state, |
| 1446 | brace_counts, |
| 1447 | Quote); |
| 1448 | preferRE = true; |
| 1449 | // Skip one |
| 1450 | advance_char(i, ch, chNext, chNext2); |
| 1451 | } |
| 1452 | } |
| 1453 | |
| 1454 | if (state == SCE_RB_ERROR) { |
| 1455 | break; |
| 1456 | } |
| 1457 | chPrev = ch; |
| 1458 | } |
| 1459 | if (state == SCE_RB_WORD) { |
| 1460 | // We've ended on a word, possibly at EOF, and need to |
| 1461 | // classify it. |
| 1462 | (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord); |
| 1463 | } else { |
| 1464 | styler.ColourTo(lengthDoc - 1, state); |
| 1465 | } |
| 1466 | } |
| 1467 | |
| 1468 | // Helper functions for folding, disambiguation keywords |
| 1469 | // Assert that there are no high-bit chars |
| 1470 | |
| 1471 | static void getPrevWord(Sci_Position pos, |
| 1472 | char *prevWord, |
| 1473 | Accessor &styler, |
| 1474 | int word_state) |
| 1475 | { |
| 1476 | Sci_Position i; |
| 1477 | styler.Flush(); |
| 1478 | for (i = pos - 1; i > 0; i--) { |
| 1479 | if (actual_style(styler.StyleAt(i)) != word_state) { |
| 1480 | i++; |
| 1481 | break; |
| 1482 | } |
| 1483 | } |
| 1484 | if (i < pos - MAX_KEYWORD_LENGTH) // overflow |
| 1485 | i = pos - MAX_KEYWORD_LENGTH; |
| 1486 | char *dst = prevWord; |
| 1487 | for (; i <= pos; i++) { |
| 1488 | *dst++ = styler[i]; |
| 1489 | } |
| 1490 | *dst = 0; |
| 1491 | } |
| 1492 | |
| 1493 | static bool keywordIsAmbiguous(const char *prevWord) |
| 1494 | { |
| 1495 | // Order from most likely used to least likely |
| 1496 | // Lots of ways to do a loop in Ruby besides 'while/until' |
| 1497 | if (!strcmp(prevWord, "if" ) |
| 1498 | || !strcmp(prevWord, "do" ) |
| 1499 | || !strcmp(prevWord, "while" ) |
| 1500 | || !strcmp(prevWord, "unless" ) |
| 1501 | || !strcmp(prevWord, "until" ) |
| 1502 | || !strcmp(prevWord, "for" )) { |
| 1503 | return true; |
| 1504 | } else { |
| 1505 | return false; |
| 1506 | } |
| 1507 | } |
| 1508 | |
| 1509 | // Demote keywords in the following conditions: |
| 1510 | // if, while, unless, until modify a statement |
| 1511 | // do after a while or until, as a noise word (like then after if) |
| 1512 | |
| 1513 | static bool keywordIsModifier(const char *word, |
| 1514 | Sci_Position pos, |
| 1515 | Accessor &styler) |
| 1516 | { |
| 1517 | if (word[0] == 'd' && word[1] == 'o' && !word[2]) { |
| 1518 | return keywordDoStartsLoop(pos, styler); |
| 1519 | } |
| 1520 | char ch, chPrev, chPrev2; |
| 1521 | int style = SCE_RB_DEFAULT; |
| 1522 | Sci_Position lineStart = styler.GetLine(pos); |
| 1523 | Sci_Position lineStartPosn = styler.LineStart(lineStart); |
| 1524 | // We want to step backwards until we don't care about the current |
| 1525 | // position. But first move lineStartPosn back behind any |
| 1526 | // continuations immediately above word. |
| 1527 | while (lineStartPosn > 0) { |
| 1528 | ch = styler[lineStartPosn-1]; |
| 1529 | if (ch == '\n' || ch == '\r') { |
| 1530 | chPrev = styler.SafeGetCharAt(lineStartPosn-2); |
| 1531 | chPrev2 = styler.SafeGetCharAt(lineStartPosn-3); |
| 1532 | lineStart = styler.GetLine(lineStartPosn-1); |
| 1533 | // If we find a continuation line, include it in our analysis. |
| 1534 | if (chPrev == '\\') { |
| 1535 | lineStartPosn = styler.LineStart(lineStart); |
| 1536 | } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') { |
| 1537 | lineStartPosn = styler.LineStart(lineStart); |
| 1538 | } else { |
| 1539 | break; |
| 1540 | } |
| 1541 | } else { |
| 1542 | break; |
| 1543 | } |
| 1544 | } |
| 1545 | |
| 1546 | styler.Flush(); |
| 1547 | while (--pos >= lineStartPosn) { |
| 1548 | style = actual_style(styler.StyleAt(pos)); |
| 1549 | if (style == SCE_RB_DEFAULT) { |
| 1550 | if (iswhitespace(ch = styler[pos])) { |
| 1551 | //continue |
| 1552 | } else if (ch == '\r' || ch == '\n') { |
| 1553 | // Scintilla's LineStart() and GetLine() routines aren't |
| 1554 | // platform-independent, so if we have text prepared with |
| 1555 | // a different system we can't rely on it. |
| 1556 | |
| 1557 | // Also, lineStartPosn may have been moved to more than one |
| 1558 | // line above word's line while pushing past continuations. |
| 1559 | chPrev = styler.SafeGetCharAt(pos - 1); |
| 1560 | chPrev2 = styler.SafeGetCharAt(pos - 2); |
| 1561 | if (chPrev == '\\') { |
| 1562 | pos-=1; // gloss over the "\\" |
| 1563 | //continue |
| 1564 | } else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') { |
| 1565 | pos-=2; // gloss over the "\\\r" |
| 1566 | //continue |
| 1567 | } else { |
| 1568 | return false; |
| 1569 | } |
| 1570 | } |
| 1571 | } else { |
| 1572 | break; |
| 1573 | } |
| 1574 | } |
| 1575 | if (pos < lineStartPosn) { |
| 1576 | return false; |
| 1577 | } |
| 1578 | // First things where the action is unambiguous |
| 1579 | switch (style) { |
| 1580 | case SCE_RB_DEFAULT: |
| 1581 | case SCE_RB_COMMENTLINE: |
| 1582 | case SCE_RB_POD: |
| 1583 | case SCE_RB_CLASSNAME: |
| 1584 | case SCE_RB_DEFNAME: |
| 1585 | case SCE_RB_MODULE_NAME: |
| 1586 | return false; |
| 1587 | case SCE_RB_OPERATOR: |
| 1588 | break; |
| 1589 | case SCE_RB_WORD: |
| 1590 | // Watch out for uses of 'else if' |
| 1591 | //XXX: Make a list of other keywords where 'if' isn't a modifier |
| 1592 | // and can appear legitimately |
| 1593 | // Formulate this to avoid warnings from most compilers |
| 1594 | if (strcmp(word, "if" ) == 0) { |
| 1595 | char prevWord[MAX_KEYWORD_LENGTH + 1]; |
| 1596 | getPrevWord(pos, prevWord, styler, SCE_RB_WORD); |
| 1597 | return strcmp(prevWord, "else" ) != 0; |
| 1598 | } |
| 1599 | return true; |
| 1600 | default: |
| 1601 | return true; |
| 1602 | } |
| 1603 | // Assume that if the keyword follows an operator, |
| 1604 | // usually it's a block assignment, like |
| 1605 | // a << if x then y else z |
| 1606 | |
| 1607 | ch = styler[pos]; |
| 1608 | switch (ch) { |
| 1609 | case ')': |
| 1610 | case ']': |
| 1611 | case '}': |
| 1612 | return true; |
| 1613 | default: |
| 1614 | return false; |
| 1615 | } |
| 1616 | } |
| 1617 | |
| 1618 | #define WHILE_BACKWARDS "elihw" |
| 1619 | #define UNTIL_BACKWARDS "litnu" |
| 1620 | #define FOR_BACKWARDS "rof" |
| 1621 | |
| 1622 | // Nothing fancy -- look to see if we follow a while/until somewhere |
| 1623 | // on the current line |
| 1624 | |
| 1625 | static bool keywordDoStartsLoop(Sci_Position pos, |
| 1626 | Accessor &styler) |
| 1627 | { |
| 1628 | char ch; |
| 1629 | Sci_Position lineStart = styler.GetLine(pos); |
| 1630 | Sci_Position lineStartPosn = styler.LineStart(lineStart); |
| 1631 | styler.Flush(); |
| 1632 | while (--pos >= lineStartPosn) { |
| 1633 | const int style = actual_style(styler.StyleAt(pos)); |
| 1634 | if (style == SCE_RB_DEFAULT) { |
| 1635 | if ((ch = styler[pos]) == '\r' || ch == '\n') { |
| 1636 | // Scintilla's LineStart() and GetLine() routines aren't |
| 1637 | // platform-independent, so if we have text prepared with |
| 1638 | // a different system we can't rely on it. |
| 1639 | return false; |
| 1640 | } |
| 1641 | } else if (style == SCE_RB_WORD) { |
| 1642 | // Check for while or until, but write the word in backwards |
| 1643 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero |
| 1644 | char *dst = prevWord; |
| 1645 | int wordLen = 0; |
| 1646 | Sci_Position start_word; |
| 1647 | for (start_word = pos; |
| 1648 | start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD; |
| 1649 | start_word--) { |
| 1650 | if (++wordLen < MAX_KEYWORD_LENGTH) { |
| 1651 | *dst++ = styler[start_word]; |
| 1652 | } |
| 1653 | } |
| 1654 | *dst = 0; |
| 1655 | // Did we see our keyword? |
| 1656 | if (!strcmp(prevWord, WHILE_BACKWARDS) |
| 1657 | || !strcmp(prevWord, UNTIL_BACKWARDS) |
| 1658 | || !strcmp(prevWord, FOR_BACKWARDS)) { |
| 1659 | return true; |
| 1660 | } |
| 1661 | // We can move pos to the beginning of the keyword, and then |
| 1662 | // accept another decrement, as we can never have two contiguous |
| 1663 | // keywords: |
| 1664 | // word1 word2 |
| 1665 | // ^ |
| 1666 | // <- move to start_word |
| 1667 | // ^ |
| 1668 | // <- loop decrement |
| 1669 | // ^ # pointing to end of word1 is fine |
| 1670 | pos = start_word; |
| 1671 | } |
| 1672 | } |
| 1673 | return false; |
| 1674 | } |
| 1675 | |
| 1676 | static bool (Sci_Position line, Accessor &styler) { |
| 1677 | Sci_Position pos = styler.LineStart(line); |
| 1678 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
| 1679 | for (Sci_Position i = pos; i < eol_pos; i++) { |
| 1680 | char ch = styler[i]; |
| 1681 | if (ch == '#') |
| 1682 | return true; |
| 1683 | else if (ch != ' ' && ch != '\t') |
| 1684 | return false; |
| 1685 | } |
| 1686 | return false; |
| 1687 | } |
| 1688 | |
| 1689 | /* |
| 1690 | * Folding Ruby |
| 1691 | * |
| 1692 | * The language is quite complex to analyze without a full parse. |
| 1693 | * For example, this line shouldn't affect fold level: |
| 1694 | * |
| 1695 | * print "hello" if feeling_friendly? |
| 1696 | * |
| 1697 | * Neither should this: |
| 1698 | * |
| 1699 | * print "hello" \ |
| 1700 | * if feeling_friendly? |
| 1701 | * |
| 1702 | * |
| 1703 | * But this should: |
| 1704 | * |
| 1705 | * if feeling_friendly? #++ |
| 1706 | * print "hello" \ |
| 1707 | * print "goodbye" |
| 1708 | * end #-- |
| 1709 | * |
| 1710 | * So we cheat, by actually looking at the existing indentation |
| 1711 | * levels for each line, and just echoing it back. Like Python. |
| 1712 | * Then if we get better at it, we'll take braces into consideration, |
| 1713 | * which always affect folding levels. |
| 1714 | |
| 1715 | * How the keywords should work: |
| 1716 | * No effect: |
| 1717 | * __FILE__ __LINE__ BEGIN END alias and |
| 1718 | * defined? false in nil not or self super then |
| 1719 | * true undef |
| 1720 | |
| 1721 | * Always increment: |
| 1722 | * begin class def do for module when { |
| 1723 | * |
| 1724 | * Always decrement: |
| 1725 | * end } |
| 1726 | * |
| 1727 | * Increment if these start a statement |
| 1728 | * if unless until while -- do nothing if they're modifiers |
| 1729 | |
| 1730 | * These end a block if there's no modifier, but don't bother |
| 1731 | * break next redo retry return yield |
| 1732 | * |
| 1733 | * These temporarily de-indent, but re-indent |
| 1734 | * case else elsif ensure rescue |
| 1735 | * |
| 1736 | * This means that the folder reflects indentation rather |
| 1737 | * than setting it. The language-service updates indentation |
| 1738 | * when users type return and finishes entering de-denters. |
| 1739 | * |
| 1740 | * Later offer to fold POD, here-docs, strings, and blocks of comments |
| 1741 | */ |
| 1742 | |
| 1743 | static void FoldRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, |
| 1744 | WordList *[], Accessor &styler) { |
| 1745 | const bool foldCompact = styler.GetPropertyInt("fold.compact" , 1) != 0; |
| 1746 | bool = styler.GetPropertyInt("fold.comment" ) != 0; |
| 1747 | |
| 1748 | synchronizeDocStart(startPos, length, initStyle, styler, // ref args |
| 1749 | false); |
| 1750 | Sci_PositionU endPos = startPos + length; |
| 1751 | int visibleChars = 0; |
| 1752 | Sci_Position lineCurrent = styler.GetLine(startPos); |
| 1753 | int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent) |
| 1754 | & SC_FOLDLEVELNUMBERMASK |
| 1755 | & ~SC_FOLDLEVELBASE); |
| 1756 | int levelCurrent = levelPrev; |
| 1757 | char chNext = styler[startPos]; |
| 1758 | int styleNext = styler.StyleAt(startPos); |
| 1759 | int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1); |
| 1760 | bool buffer_ends_with_eol = false; |
| 1761 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
| 1762 | char ch = chNext; |
| 1763 | chNext = styler.SafeGetCharAt(i + 1); |
| 1764 | int style = styleNext; |
| 1765 | styleNext = styler.StyleAt(i + 1); |
| 1766 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
| 1767 | |
| 1768 | /*Mutiline comment patch*/ |
| 1769 | if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) { |
| 1770 | if (!IsCommentLine(lineCurrent - 1, styler) |
| 1771 | && IsCommentLine(lineCurrent + 1, styler)) |
| 1772 | levelCurrent++; |
| 1773 | else if (IsCommentLine(lineCurrent - 1, styler) |
| 1774 | && !IsCommentLine(lineCurrent + 1, styler)) |
| 1775 | levelCurrent--; |
| 1776 | } |
| 1777 | |
| 1778 | if (style == SCE_RB_COMMENTLINE) { |
| 1779 | if (foldComment && stylePrev != SCE_RB_COMMENTLINE) { |
| 1780 | if (chNext == '{') { |
| 1781 | levelCurrent++; |
| 1782 | } else if (chNext == '}' && levelCurrent > 0) { |
| 1783 | levelCurrent--; |
| 1784 | } |
| 1785 | } |
| 1786 | } else if (style == SCE_RB_OPERATOR) { |
| 1787 | if (strchr("[{(" , ch)) { |
| 1788 | levelCurrent++; |
| 1789 | } else if (strchr(")}]" , ch)) { |
| 1790 | // Don't decrement below 0 |
| 1791 | if (levelCurrent > 0) |
| 1792 | levelCurrent--; |
| 1793 | } |
| 1794 | } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) { |
| 1795 | // Look at the keyword on the left and decide what to do |
| 1796 | char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero |
| 1797 | prevWord[0] = 0; |
| 1798 | getPrevWord(i, prevWord, styler, SCE_RB_WORD); |
| 1799 | if (!strcmp(prevWord, "end" )) { |
| 1800 | // Don't decrement below 0 |
| 1801 | if (levelCurrent > 0) |
| 1802 | levelCurrent--; |
| 1803 | } else if (!strcmp(prevWord, "if" ) |
| 1804 | || !strcmp(prevWord, "def" ) |
| 1805 | || !strcmp(prevWord, "class" ) |
| 1806 | || !strcmp(prevWord, "module" ) |
| 1807 | || !strcmp(prevWord, "begin" ) |
| 1808 | || !strcmp(prevWord, "case" ) |
| 1809 | || !strcmp(prevWord, "do" ) |
| 1810 | || !strcmp(prevWord, "while" ) |
| 1811 | || !strcmp(prevWord, "unless" ) |
| 1812 | || !strcmp(prevWord, "until" ) |
| 1813 | || !strcmp(prevWord, "for" ) |
| 1814 | ) { |
| 1815 | levelCurrent++; |
| 1816 | } |
| 1817 | } else if (style == SCE_RB_HERE_DELIM) { |
| 1818 | if (styler.SafeGetCharAt(i-2) == '<' && styler.SafeGetCharAt(i-1) == '<') { |
| 1819 | levelCurrent++; |
| 1820 | } else if (styleNext == SCE_RB_DEFAULT) { |
| 1821 | levelCurrent--; |
| 1822 | } |
| 1823 | } |
| 1824 | if (atEOL) { |
| 1825 | int lev = levelPrev; |
| 1826 | if (visibleChars == 0 && foldCompact) |
| 1827 | lev |= SC_FOLDLEVELWHITEFLAG; |
| 1828 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
| 1829 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 1830 | styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE); |
| 1831 | lineCurrent++; |
| 1832 | levelPrev = levelCurrent; |
| 1833 | visibleChars = 0; |
| 1834 | buffer_ends_with_eol = true; |
| 1835 | } else if (!isspacechar(ch)) { |
| 1836 | visibleChars++; |
| 1837 | buffer_ends_with_eol = false; |
| 1838 | } |
| 1839 | stylePrev = style; |
| 1840 | } |
| 1841 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
| 1842 | if (!buffer_ends_with_eol) { |
| 1843 | int new_lev = levelCurrent; |
| 1844 | if (visibleChars == 0 && foldCompact) |
| 1845 | new_lev |= SC_FOLDLEVELWHITEFLAG; |
| 1846 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
| 1847 | new_lev |= SC_FOLDLEVELHEADERFLAG; |
| 1848 | levelCurrent = new_lev; |
| 1849 | } |
| 1850 | styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE); |
| 1851 | } |
| 1852 | |
| 1853 | static const char *const rubyWordListDesc[] = { |
| 1854 | "Keywords" , |
| 1855 | 0 |
| 1856 | }; |
| 1857 | |
| 1858 | LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby" , FoldRbDoc, rubyWordListDesc); |
| 1859 | |