1/** @file LexRust.cxx
2 ** Lexer for Rust.
3 **
4 ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
5 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 **/
7// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13#include <stdarg.h>
14#include <assert.h>
15#include <ctype.h>
16
17#include <string>
18#include <string_view>
19#include <map>
20#include <functional>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "PropSetSimple.h"
27#include "WordList.h"
28#include "LexAccessor.h"
29#include "Accessor.h"
30#include "StyleContext.h"
31#include "CharacterSet.h"
32#include "LexerModule.h"
33#include "OptionSet.h"
34#include "DefaultLexer.h"
35
36using namespace Scintilla;
37using namespace Lexilla;
38
39static const int NUM_RUST_KEYWORD_LISTS = 7;
40static const int MAX_RUST_IDENT_CHARS = 1023;
41
42static bool IsStreamCommentStyle(int style) {
43 return style == SCE_RUST_COMMENTBLOCK ||
44 style == SCE_RUST_COMMENTBLOCKDOC;
45}
46
47// Options used for LexerRust
48struct OptionsRust {
49 bool fold;
50 bool foldSyntaxBased;
51 bool foldComment;
52 bool foldCommentMultiline;
53 bool foldCommentExplicit;
54 std::string foldExplicitStart;
55 std::string foldExplicitEnd;
56 bool foldExplicitAnywhere;
57 bool foldCompact;
58 int foldAtElseInt;
59 bool foldAtElse;
60 OptionsRust() {
61 fold = false;
62 foldSyntaxBased = true;
63 foldComment = false;
64 foldCommentMultiline = true;
65 foldCommentExplicit = true;
66 foldExplicitStart = "";
67 foldExplicitEnd = "";
68 foldExplicitAnywhere = false;
69 foldCompact = true;
70 foldAtElseInt = -1;
71 foldAtElse = false;
72 }
73};
74
75static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
76 "Primary keywords and identifiers",
77 "Built in types",
78 "Other keywords",
79 "Keywords 4",
80 "Keywords 5",
81 "Keywords 6",
82 "Keywords 7",
83 0,
84 };
85
86struct OptionSetRust : public OptionSet<OptionsRust> {
87 OptionSetRust() {
88 DefineProperty("fold", &OptionsRust::fold);
89
90 DefineProperty("fold.comment", &OptionsRust::foldComment);
91
92 DefineProperty("fold.compact", &OptionsRust::foldCompact);
93
94 DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
95
96 DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
97 "Set this property to 0 to disable syntax based folding.");
98
99 DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
100 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
101
102 DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
103 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
104
105 DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
106 "The string to use for explicit fold start points, replacing the standard //{.");
107
108 DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
109 "The string to use for explicit fold end points, replacing the standard //}.");
110
111 DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
112 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
113
114 DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
115 "This option enables Rust folding on a \"} else {\" line of an if statement.");
116
117 DefineWordListSets(rustWordLists);
118 }
119};
120
121class LexerRust : public DefaultLexer {
122 WordList keywords[NUM_RUST_KEYWORD_LISTS];
123 OptionsRust options;
124 OptionSetRust osRust;
125public:
126 LexerRust() : DefaultLexer("rust", SCLEX_RUST) {
127 }
128 virtual ~LexerRust() {
129 }
130 void SCI_METHOD Release() override {
131 delete this;
132 }
133 int SCI_METHOD Version() const override {
134 return lvRelease5;
135 }
136 const char * SCI_METHOD PropertyNames() override {
137 return osRust.PropertyNames();
138 }
139 int SCI_METHOD PropertyType(const char *name) override {
140 return osRust.PropertyType(name);
141 }
142 const char * SCI_METHOD DescribeProperty(const char *name) override {
143 return osRust.DescribeProperty(name);
144 }
145 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
146 const char * SCI_METHOD PropertyGet(const char *key) override {
147 return osRust.PropertyGet(key);
148 }
149 const char * SCI_METHOD DescribeWordListSets() override {
150 return osRust.DescribeWordListSets();
151 }
152 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
153 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
154 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
155 void * SCI_METHOD PrivateCall(int, void *) override {
156 return 0;
157 }
158 static ILexer5 *LexerFactoryRust() {
159 return new LexerRust();
160 }
161};
162
163Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
164 if (osRust.PropertySet(&options, key, val)) {
165 return 0;
166 }
167 return -1;
168}
169
170Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
171 Sci_Position firstModification = -1;
172 if (n < NUM_RUST_KEYWORD_LISTS) {
173 WordList *wordListN = &keywords[n];
174 WordList wlNew;
175 wlNew.Set(wl);
176 if (*wordListN != wlNew) {
177 wordListN->Set(wl);
178 firstModification = 0;
179 }
180 }
181 return firstModification;
182}
183
184static bool IsWhitespace(int c) {
185 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
186}
187
188/* This isn't quite right for Unicode identifiers */
189static bool IsIdentifierStart(int ch) {
190 return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
191}
192
193/* This isn't quite right for Unicode identifiers */
194static bool IsIdentifierContinue(int ch) {
195 return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
196}
197
198static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
199 while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
200 if (pos == styler.LineEnd(styler.GetLine(pos)))
201 styler.SetLineState(styler.GetLine(pos), 0);
202 pos++;
203 }
204 styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
205}
206
207static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
208 for (Sci_Position ii = 0; ii < len; ii++)
209 s[ii] = styler[ii + start];
210 s[len] = '\0';
211}
212
213static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
214 Sci_Position start = pos;
215 while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
216 pos++;
217
218 if (styler.SafeGetCharAt(pos, '\0') == '!') {
219 pos++;
220 styler.ColourTo(pos - 1, SCE_RUST_MACRO);
221 } else {
222 char s[MAX_RUST_IDENT_CHARS + 1];
223 Sci_Position len = pos - start;
224 len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
225 GrabString(s, styler, start, len);
226 bool keyword = false;
227 for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
228 if (keywords[ii].InList(s)) {
229 styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
230 keyword = true;
231 break;
232 }
233 }
234 if (!keyword) {
235 styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
236 }
237 }
238}
239
240/* Scans a sequence of digits, returning true if it found any. */
241static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
242 Sci_Position old_pos = pos;
243 for (;;) {
244 int c = styler.SafeGetCharAt(pos, '\0');
245 if (IsADigit(c, base) || c == '_')
246 pos++;
247 else
248 break;
249 }
250 return old_pos != pos;
251}
252
253/* Scans an integer and floating point literals. */
254static void ScanNumber(Accessor& styler, Sci_Position& pos) {
255 int base = 10;
256 int c = styler.SafeGetCharAt(pos, '\0');
257 int n = styler.SafeGetCharAt(pos + 1, '\0');
258 bool error = false;
259 /* Scan the prefix, thus determining the base.
260 * 10 is default if there's no prefix. */
261 if (c == '0' && n == 'x') {
262 pos += 2;
263 base = 16;
264 } else if (c == '0' && n == 'b') {
265 pos += 2;
266 base = 2;
267 } else if (c == '0' && n == 'o') {
268 pos += 2;
269 base = 8;
270 }
271
272 /* Scan initial digits. The literal is malformed if there are none. */
273 error |= !ScanDigits(styler, pos, base);
274 /* See if there's an integer suffix. We mimic the Rust's lexer
275 * and munch it even if there was an error above. */
276 c = styler.SafeGetCharAt(pos, '\0');
277 if (c == 'u' || c == 'i') {
278 pos++;
279 c = styler.SafeGetCharAt(pos, '\0');
280 n = styler.SafeGetCharAt(pos + 1, '\0');
281 if (c == '8') {
282 pos++;
283 } else if (c == '1' && n == '6') {
284 pos += 2;
285 } else if (c == '3' && n == '2') {
286 pos += 2;
287 } else if (c == '6' && n == '4') {
288 pos += 2;
289 } else if (styler.Match(pos, "128")) {
290 pos += 3;
291 } else if (styler.Match(pos, "size")) {
292 pos += 4;
293 } else {
294 error = true;
295 }
296 /* See if it's a floating point literal. These literals have to be base 10.
297 */
298 } else if (!error) {
299 /* If there's a period, it's a floating point literal unless it's
300 * followed by an identifier (meaning this is a method call, e.g.
301 * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
302 */
303 n = styler.SafeGetCharAt(pos + 1, '\0');
304 if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
305 error |= base != 10;
306 pos++;
307 /* It's ok to have no digits after the period. */
308 ScanDigits(styler, pos, 10);
309 }
310
311 /* Look for the exponentiation. */
312 c = styler.SafeGetCharAt(pos, '\0');
313 if (c == 'e' || c == 'E') {
314 error |= base != 10;
315 pos++;
316 c = styler.SafeGetCharAt(pos, '\0');
317 if (c == '-' || c == '+')
318 pos++;
319 /* It is invalid to have no digits in the exponent. */
320 error |= !ScanDigits(styler, pos, 10);
321 }
322
323 /* Scan the floating point suffix. */
324 c = styler.SafeGetCharAt(pos, '\0');
325 if (c == 'f') {
326 error |= base != 10;
327 pos++;
328 c = styler.SafeGetCharAt(pos, '\0');
329 n = styler.SafeGetCharAt(pos + 1, '\0');
330 if (c == '3' && n == '2') {
331 pos += 2;
332 } else if (c == '6' && n == '4') {
333 pos += 2;
334 } else {
335 error = true;
336 }
337 }
338 }
339
340 if (error)
341 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
342 else
343 styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
344}
345
346static bool IsOneCharOperator(int c) {
347 return c == ';' || c == ',' || c == '(' || c == ')'
348 || c == '{' || c == '}' || c == '[' || c == ']'
349 || c == '@' || c == '#' || c == '~' || c == '+'
350 || c == '*' || c == '/' || c == '^' || c == '%'
351 || c == '.' || c == ':' || c == '!' || c == '<'
352 || c == '>' || c == '=' || c == '-' || c == '&'
353 || c == '|' || c == '$' || c == '?';
354}
355
356static bool IsTwoCharOperator(int c, int n) {
357 return (c == '.' && n == '.') || (c == ':' && n == ':')
358 || (c == '!' && n == '=') || (c == '<' && n == '<')
359 || (c == '<' && n == '=') || (c == '>' && n == '>')
360 || (c == '>' && n == '=') || (c == '=' && n == '=')
361 || (c == '=' && n == '>') || (c == '-' && n == '>')
362 || (c == '&' && n == '&') || (c == '|' && n == '|')
363 || (c == '-' && n == '=') || (c == '&' && n == '=')
364 || (c == '|' && n == '=') || (c == '+' && n == '=')
365 || (c == '*' && n == '=') || (c == '/' && n == '=')
366 || (c == '^' && n == '=') || (c == '%' && n == '=');
367}
368
369static bool IsThreeCharOperator(int c, int n, int n2) {
370 return (c == '<' && n == '<' && n2 == '=')
371 || (c == '>' && n == '>' && n2 == '=');
372}
373
374static bool IsValidCharacterEscape(int c) {
375 return c == 'n' || c == 'r' || c == 't' || c == '\\'
376 || c == '\'' || c == '"' || c == '0';
377}
378
379static bool IsValidStringEscape(int c) {
380 return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
381}
382
383static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
384 for (;;) {
385 int c = styler.SafeGetCharAt(pos, '\0');
386 if (!IsADigit(c, 16))
387 break;
388 num_digits--;
389 pos++;
390 if (num_digits == 0 && stop_asap)
391 return true;
392 }
393 if (num_digits == 0) {
394 return true;
395 } else {
396 return false;
397 }
398}
399
400/* This is overly permissive for character literals in order to accept UTF-8 encoded
401 * character literals. */
402static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
403 pos++;
404 int c = styler.SafeGetCharAt(pos, '\0');
405 int n = styler.SafeGetCharAt(pos + 1, '\0');
406 bool done = false;
407 bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
408 bool valid_char = true;
409 bool first = true;
410 while (!done) {
411 switch (c) {
412 case '\\':
413 done = true;
414 if (IsValidCharacterEscape(n)) {
415 pos += 2;
416 } else if (n == 'x') {
417 pos += 2;
418 valid_char = ScanNumericEscape(styler, pos, 2, false);
419 } else if (n == 'u' && !ascii_only) {
420 pos += 2;
421 if (styler.SafeGetCharAt(pos, '\0') != '{') {
422 // old-style
423 valid_char = ScanNumericEscape(styler, pos, 4, false);
424 } else {
425 int n_digits = 0;
426 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
427 }
428 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
429 pos++;
430 else
431 valid_char = false;
432 }
433 } else if (n == 'U' && !ascii_only) {
434 pos += 2;
435 valid_char = ScanNumericEscape(styler, pos, 8, false);
436 } else {
437 valid_char = false;
438 }
439 break;
440 case '\'':
441 valid_char = !first;
442 done = true;
443 break;
444 case '\t':
445 case '\n':
446 case '\r':
447 case '\0':
448 valid_char = false;
449 done = true;
450 break;
451 default:
452 if (ascii_only && !IsASCII((char)c)) {
453 done = true;
454 valid_char = false;
455 } else if (!IsIdentifierContinue(c) && !first) {
456 done = true;
457 } else {
458 pos++;
459 }
460 break;
461 }
462 c = styler.SafeGetCharAt(pos, '\0');
463 n = styler.SafeGetCharAt(pos + 1, '\0');
464
465 first = false;
466 }
467 if (styler.SafeGetCharAt(pos, '\0') == '\'') {
468 valid_lifetime = false;
469 } else {
470 valid_char = false;
471 }
472 if (valid_lifetime) {
473 styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
474 } else if (valid_char) {
475 pos++;
476 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
477 } else {
478 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
479 }
480}
481
482enum CommentState {
483 UnknownComment,
484 DocComment,
485 NotDocComment
486};
487
488/*
489 * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
490 * Otherwise it's a regular comment.
491 */
492static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
493 int c = styler.SafeGetCharAt(pos, '\0');
494 bool maybe_doc_comment = false;
495 if (c == '*') {
496 int n = styler.SafeGetCharAt(pos + 1, '\0');
497 if (n != '*' && n != '/') {
498 maybe_doc_comment = true;
499 }
500 } else if (c == '!') {
501 maybe_doc_comment = true;
502 }
503
504 for (;;) {
505 int n = styler.SafeGetCharAt(pos + 1, '\0');
506 if (pos == styler.LineEnd(styler.GetLine(pos)))
507 styler.SetLineState(styler.GetLine(pos), level);
508 if (c == '*') {
509 pos++;
510 if (n == '/') {
511 pos++;
512 level--;
513 if (level == 0) {
514 styler.SetLineState(styler.GetLine(pos), 0);
515 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
516 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
517 else
518 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
519 break;
520 }
521 }
522 } else if (c == '/') {
523 pos++;
524 if (n == '*') {
525 pos++;
526 level++;
527 }
528 }
529 else if (pos < max) {
530 pos++;
531 }
532 if (pos >= max) {
533 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
534 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
535 else
536 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
537 break;
538 }
539 c = styler.SafeGetCharAt(pos, '\0');
540 }
541}
542
543/*
544 * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
545 * Otherwise it's a normal line comment.
546 */
547static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
548 bool maybe_doc_comment = false;
549 int c = styler.SafeGetCharAt(pos, '\0');
550 if (c == '/') {
551 if (pos < max) {
552 pos++;
553 c = styler.SafeGetCharAt(pos, '\0');
554 if (c != '/') {
555 maybe_doc_comment = true;
556 }
557 }
558 } else if (c == '!') {
559 maybe_doc_comment = true;
560 }
561
562 pos = styler.LineEnd(styler.GetLine(pos));
563 styler.SetLineState(styler.GetLine(pos), SCE_RUST_DEFAULT);
564
565 if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
566 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
567 else
568 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
569}
570
571static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
572 pos++;
573 int c = styler.SafeGetCharAt(pos, '\0');
574 pos++;
575 if (c == '/')
576 ResumeLineComment(styler, pos, max, UnknownComment);
577 else if (c == '*')
578 ResumeBlockComment(styler, pos, max, UnknownComment, 1);
579}
580
581static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
582 int c = styler.SafeGetCharAt(pos, '\0');
583 bool error = false;
584 while (c != '"' && !error) {
585 if (pos >= max) {
586 error = true;
587 break;
588 }
589 if (pos == styler.LineEnd(styler.GetLine(pos)))
590 styler.SetLineState(styler.GetLine(pos), 0);
591 if (c == '\\') {
592 int n = styler.SafeGetCharAt(pos + 1, '\0');
593 if (IsValidStringEscape(n)) {
594 pos += 2;
595 } else if (n == 'x') {
596 pos += 2;
597 error = !ScanNumericEscape(styler, pos, 2, true);
598 } else if (n == 'u' && !ascii_only) {
599 pos += 2;
600 if (styler.SafeGetCharAt(pos, '\0') != '{') {
601 // old-style
602 error = !ScanNumericEscape(styler, pos, 4, true);
603 } else {
604 int n_digits = 0;
605 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
606 }
607 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
608 pos++;
609 else
610 error = true;
611 }
612 } else if (n == 'U' && !ascii_only) {
613 pos += 2;
614 error = !ScanNumericEscape(styler, pos, 8, true);
615 } else {
616 pos += 1;
617 error = true;
618 }
619 } else {
620 if (ascii_only && !IsASCII((char)c))
621 error = true;
622 else
623 pos++;
624 }
625 c = styler.SafeGetCharAt(pos, '\0');
626 }
627 if (!error)
628 pos++;
629 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
630}
631
632static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
633 for (;;) {
634 if (pos == styler.LineEnd(styler.GetLine(pos)))
635 styler.SetLineState(styler.GetLine(pos), num_hashes);
636
637 int c = styler.SafeGetCharAt(pos, '\0');
638 if (c == '"') {
639 pos++;
640 int trailing_num_hashes = 0;
641 while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
642 trailing_num_hashes++;
643 pos++;
644 }
645 if (trailing_num_hashes == num_hashes) {
646 styler.SetLineState(styler.GetLine(pos), 0);
647 break;
648 }
649 } else if (pos >= max) {
650 break;
651 } else {
652 if (ascii_only && !IsASCII((char)c))
653 break;
654 pos++;
655 }
656 }
657 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
658}
659
660static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
661 pos++;
662 int num_hashes = 0;
663 while (styler.SafeGetCharAt(pos, '\0') == '#') {
664 num_hashes++;
665 pos++;
666 }
667 if (styler.SafeGetCharAt(pos, '\0') != '"') {
668 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
669 } else {
670 pos++;
671 ResumeRawString(styler, pos, max, num_hashes, ascii_only);
672 }
673}
674
675void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
676 PropSetSimple props;
677 Accessor styler(pAccess, &props);
678 Sci_Position pos = startPos;
679 Sci_Position max = pos + length;
680
681 styler.StartAt(pos);
682 styler.StartSegment(pos);
683
684 if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
685 ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
686 } else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
687 ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
688 } else if (initStyle == SCE_RUST_STRING) {
689 ResumeString(styler, pos, max, false);
690 } else if (initStyle == SCE_RUST_BYTESTRING) {
691 ResumeString(styler, pos, max, true);
692 } else if (initStyle == SCE_RUST_STRINGR) {
693 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
694 } else if (initStyle == SCE_RUST_BYTESTRINGR) {
695 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
696 }
697
698 while (pos < max) {
699 int c = styler.SafeGetCharAt(pos, '\0');
700 int n = styler.SafeGetCharAt(pos + 1, '\0');
701 int n2 = styler.SafeGetCharAt(pos + 2, '\0');
702
703 if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
704 pos += 2;
705 ResumeLineComment(styler, pos, max, NotDocComment);
706 } else if (IsWhitespace(c)) {
707 ScanWhitespace(styler, pos, max);
708 } else if (c == '/' && (n == '/' || n == '*')) {
709 ScanComments(styler, pos, max);
710 } else if (c == 'r' && (n == '#' || n == '"')) {
711 ScanRawString(styler, pos, max, false);
712 } else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
713 pos++;
714 ScanRawString(styler, pos, max, true);
715 } else if (c == 'b' && n == '"') {
716 pos += 2;
717 ResumeString(styler, pos, max, true);
718 } else if (c == 'b' && n == '\'') {
719 pos++;
720 ScanCharacterLiteralOrLifetime(styler, pos, true);
721 } else if (IsIdentifierStart(c)) {
722 ScanIdentifier(styler, pos, keywords);
723 } else if (IsADigit(c)) {
724 ScanNumber(styler, pos);
725 } else if (IsThreeCharOperator(c, n, n2)) {
726 pos += 3;
727 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
728 } else if (IsTwoCharOperator(c, n)) {
729 pos += 2;
730 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
731 } else if (IsOneCharOperator(c)) {
732 pos++;
733 styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
734 } else if (c == '\'') {
735 ScanCharacterLiteralOrLifetime(styler, pos, false);
736 } else if (c == '"') {
737 pos++;
738 ResumeString(styler, pos, max, false);
739 } else {
740 pos++;
741 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
742 }
743 }
744 styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
745 styler.Flush();
746}
747
748void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
749
750 if (!options.fold)
751 return;
752
753 LexAccessor styler(pAccess);
754
755 Sci_PositionU endPos = startPos + length;
756 int visibleChars = 0;
757 bool inLineComment = false;
758 Sci_Position lineCurrent = styler.GetLine(startPos);
759 int levelCurrent = SC_FOLDLEVELBASE;
760 if (lineCurrent > 0)
761 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
762 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
763 int levelMinCurrent = levelCurrent;
764 int levelNext = levelCurrent;
765 char chNext = styler[startPos];
766 int styleNext = styler.StyleAt(startPos);
767 int style = initStyle;
768 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
769 for (Sci_PositionU i = startPos; i < endPos; i++) {
770 char ch = chNext;
771 chNext = styler.SafeGetCharAt(i + 1);
772 int stylePrev = style;
773 style = styleNext;
774 styleNext = styler.StyleAt(i + 1);
775 bool atEOL = i == (lineStartNext-1);
776 if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
777 inLineComment = true;
778 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
779 if (!IsStreamCommentStyle(stylePrev)) {
780 levelNext++;
781 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
782 // Comments don't end at end of line and the next character may be unstyled.
783 levelNext--;
784 }
785 }
786 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
787 if (userDefinedFoldMarkers) {
788 if (styler.Match(i, options.foldExplicitStart.c_str())) {
789 levelNext++;
790 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
791 levelNext--;
792 }
793 } else {
794 if ((ch == '/') && (chNext == '/')) {
795 char chNext2 = styler.SafeGetCharAt(i + 2);
796 if (chNext2 == '{') {
797 levelNext++;
798 } else if (chNext2 == '}') {
799 levelNext--;
800 }
801 }
802 }
803 }
804 if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
805 if (ch == '{') {
806 // Measure the minimum before a '{' to allow
807 // folding on "} else {"
808 if (levelMinCurrent > levelNext) {
809 levelMinCurrent = levelNext;
810 }
811 levelNext++;
812 } else if (ch == '}') {
813 levelNext--;
814 }
815 }
816 if (!IsASpace(ch))
817 visibleChars++;
818 if (atEOL || (i == endPos-1)) {
819 int levelUse = levelCurrent;
820 if (options.foldSyntaxBased && options.foldAtElse) {
821 levelUse = levelMinCurrent;
822 }
823 int lev = levelUse | levelNext << 16;
824 if (visibleChars == 0 && options.foldCompact)
825 lev |= SC_FOLDLEVELWHITEFLAG;
826 if (levelUse < levelNext)
827 lev |= SC_FOLDLEVELHEADERFLAG;
828 if (lev != styler.LevelAt(lineCurrent)) {
829 styler.SetLevel(lineCurrent, lev);
830 }
831 lineCurrent++;
832 lineStartNext = styler.LineStart(lineCurrent+1);
833 levelCurrent = levelNext;
834 levelMinCurrent = levelCurrent;
835 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
836 // There is an empty line at end of file so give it same level and empty
837 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
838 }
839 visibleChars = 0;
840 inLineComment = false;
841 }
842 }
843}
844
845LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
846