1// Scintilla source code edit control
2// Encoding: UTF-8
3/** @file LexJulia.cxx
4 ** Lexer for Julia.
5 ** Reusing code from LexMatlab, LexPython and LexRust
6 **
7 ** Written by Bertrand Lacoste
8 **
9 **/
10// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
11// The License.txt file describes the conditions under which this software may be distributed.
12
13#include <cstdlib>
14#include <cassert>
15#include <cstring>
16
17#include <string>
18#include <string_view>
19#include <vector>
20#include <map>
21#include <algorithm>
22#include <functional>
23
24#include "ILexer.h"
25#include "Scintilla.h"
26#include "SciLexer.h"
27
28#include "StringCopy.h"
29#include "PropSetSimple.h"
30#include "WordList.h"
31#include "LexAccessor.h"
32#include "Accessor.h"
33#include "StyleContext.h"
34#include "CharacterSet.h"
35#include "CharacterCategory.h"
36#include "LexerModule.h"
37#include "OptionSet.h"
38#include "DefaultLexer.h"
39
40using namespace Scintilla;
41using namespace Lexilla;
42
43static const int MAX_JULIA_IDENT_CHARS = 1023;
44
45// Options used for LexerJulia
46struct OptionsJulia {
47 bool fold;
48 bool foldComment;
49 bool foldCompact;
50 bool foldDocstring;
51 bool foldSyntaxBased;
52 bool highlightTypeannotation;
53 bool highlightLexerror;
54 OptionsJulia() {
55 fold = true;
56 foldComment = true;
57 foldCompact = false;
58 foldDocstring = true;
59 foldSyntaxBased = true;
60 highlightTypeannotation = false;
61 highlightLexerror = false;
62 }
63};
64
65const char * const juliaWordLists[] = {
66 "Primary keywords and identifiers",
67 "Built in types",
68 "Other keywords",
69 "Built in functions",
70 0,
71};
72
73struct OptionSetJulia : public OptionSet<OptionsJulia> {
74 OptionSetJulia() {
75 DefineProperty("fold", &OptionsJulia::fold);
76
77 DefineProperty("fold.compact", &OptionsJulia::foldCompact);
78
79 DefineProperty("fold.comment", &OptionsJulia::foldComment);
80
81 DefineProperty("fold.julia.docstring", &OptionsJulia::foldDocstring,
82 "Fold multiline triple-doublequote strings, usually used to document a function or type above the definition.");
83
84 DefineProperty("fold.julia.syntax.based", &OptionsJulia::foldSyntaxBased,
85 "Set this property to 0 to disable syntax based folding.");
86
87 DefineProperty("lexer.julia.highlight.typeannotation", &OptionsJulia::highlightTypeannotation,
88 "This option enables highlighting of the type identifier after `::`.");
89
90 DefineProperty("lexer.julia.highlight.lexerror", &OptionsJulia::highlightLexerror,
91 "This option enables highlighting of syntax error int character or number definition.");
92
93 DefineWordListSets(juliaWordLists);
94 }
95};
96
97LexicalClass juliaLexicalClasses[] = {
98 // Lexer Julia SCLEX_JULIA SCE_JULIA_:
99 0, "SCE_JULIA_DEFAULT", "default", "White space",
100 1, "SCE_JULIA_COMMENT", "comment", "Comment",
101 2, "SCE_JULIA_NUMBER", "literal numeric", "Number",
102 3, "SCE_JULIA_KEYWORD1", "keyword", "Reserved keywords",
103 4, "SCE_JULIA_KEYWORD2", "identifier", "Builtin type names",
104 5, "SCE_JULIA_KEYWORD3", "identifier", "Constants",
105 6, "SCE_JULIA_CHAR", "literal string character", "Single quoted string",
106 7, "SCE_JULIA_OPERATOR", "operator", "Operator",
107 8, "SCE_JULIA_BRACKET", "bracket operator", "Bracket operator",
108 9, "SCE_JULIA_IDENTIFIER", "identifier", "Identifier",
109 10, "SCE_JULIA_STRING", "literal string", "Double quoted String",
110 11, "SCE_JULIA_SYMBOL", "literal string symbol", "Symbol",
111 12, "SCE_JULIA_MACRO", "macro preprocessor", "Macro",
112 13, "SCE_JULIA_STRINGINTERP", "literal string interpolated", "String interpolation",
113 14, "SCE_JULIA_DOCSTRING", "literal string documentation", "Docstring",
114 15, "SCE_JULIA_STRINGLITERAL", "literal string", "String literal prefix",
115 16, "SCE_JULIA_COMMAND", "literal string command", "Command",
116 17, "SCE_JULIA_COMMANDLITERAL", "literal string command", "Command literal prefix",
117 18, "SCE_JULIA_TYPEANNOT", "identifier type", "Type annotation identifier",
118 19, "SCE_JULIA_LEXERROR", "lexer error", "Lexing error",
119 20, "SCE_JULIA_KEYWORD4", "identifier", "Builtin function names",
120 21, "SCE_JULIA_TYPEOPERATOR", "operator type", "Type annotation operator",
121};
122
123class LexerJulia : public DefaultLexer {
124 WordList keywords;
125 WordList identifiers2;
126 WordList identifiers3;
127 WordList identifiers4;
128 OptionsJulia options;
129 OptionSetJulia osJulia;
130public:
131 explicit LexerJulia() :
132 DefaultLexer("julia", SCLEX_JULIA, juliaLexicalClasses, ELEMENTS(juliaLexicalClasses)) {
133 }
134 virtual ~LexerJulia() {
135 }
136 void SCI_METHOD Release() override {
137 delete this;
138 }
139 int SCI_METHOD Version() const override {
140 return lvRelease5;
141 }
142 const char * SCI_METHOD PropertyNames() override {
143 return osJulia.PropertyNames();
144 }
145 int SCI_METHOD PropertyType(const char *name) override {
146 return osJulia.PropertyType(name);
147 }
148 const char * SCI_METHOD DescribeProperty(const char *name) override {
149 return osJulia.DescribeProperty(name);
150 }
151 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
152 const char * SCI_METHOD PropertyGet(const char *key) override {
153 return osJulia.PropertyGet(key);
154 }
155 const char * SCI_METHOD DescribeWordListSets() override {
156 return osJulia.DescribeWordListSets();
157 }
158 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
159 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
160 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
161 void * SCI_METHOD PrivateCall(int, void *) override {
162 return 0;
163 }
164
165 static ILexer5 *LexerFactoryJulia() {
166 return new LexerJulia();
167 }
168};
169
170Sci_Position SCI_METHOD LexerJulia::PropertySet(const char *key, const char *val) {
171 if (osJulia.PropertySet(&options, key, val)) {
172 return 0;
173 }
174 return -1;
175}
176
177Sci_Position SCI_METHOD LexerJulia::WordListSet(int n, const char *wl) {
178 WordList *wordListN = nullptr;
179 switch (n) {
180 case 0:
181 wordListN = &keywords;
182 break;
183 case 1:
184 wordListN = &identifiers2;
185 break;
186 case 2:
187 wordListN = &identifiers3;
188 break;
189 case 3:
190 wordListN = &identifiers4;
191 break;
192 }
193 Sci_Position firstModification = -1;
194 if (wordListN) {
195 WordList wlNew;
196 wlNew.Set(wl);
197 if (*wordListN != wlNew) {
198 wordListN->Set(wl);
199 firstModification = 0;
200 }
201 }
202 return firstModification;
203}
204
205static inline bool IsJuliaOperator(int ch) {
206 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
207 ch == '-' || ch == '+' || ch == '=' || ch == '|' ||
208 ch == '<' || ch == '>' || ch == '/' || ch == '~' ||
209 ch == '\\' ) {
210 return true;
211 }
212 return false;
213}
214
215// The list contains non-ascii unary operators
216static inline bool IsJuliaUnaryOperator (int ch) {
217 if (ch == 0x00ac || ch == 0x221a || ch == 0x221b ||
218 ch == 0x221c || ch == 0x22c6 || ch == 0x00b1 ||
219 ch == 0x2213 ) {
220 return true;
221 }
222 return false;
223}
224
225static inline bool IsJuliaParen (int ch) {
226 if (ch == '(' || ch == ')' || ch == '{' || ch == '}' ||
227 ch == '[' || ch == ']' ) {
228 return true;
229 }
230 return false;
231}
232
233// Unicode parsing from Julia source code:
234// https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_extensions.c
235// keep the same function name to be easy to find again
236static int is_wc_cat_id_start(uint32_t wc) {
237 const CharacterCategory cat = CategoriseCharacter((int) wc);
238
239 return (cat == ccLu || cat == ccLl ||
240 cat == ccLt || cat == ccLm ||
241 cat == ccLo || cat == ccNl ||
242 cat == ccSc || // allow currency symbols
243 // other symbols, but not arrows or replacement characters
244 (cat == ccSo && !(wc >= 0x2190 && wc <= 0x21FF) &&
245 wc != 0xfffc && wc != 0xfffd &&
246 wc != 0x233f && // notslash
247 wc != 0x00a6) || // broken bar
248
249 // math symbol (category Sm) whitelist
250 (wc >= 0x2140 && wc <= 0x2a1c &&
251 ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
252 wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
253 wc == 0x22a4 || wc == 0x22a5 || // ⊤ ⊥
254
255 (wc >= 0x2202 && wc <= 0x2233 &&
256 (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆
257 wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏
258 wc == 0x2210 || wc == 0x2211 || // ∐, ∑
259 wc == 0x221e || wc == 0x221f || // ∞, ∟
260 wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
261
262 (wc >= 0x22c0 && wc <= 0x22c3) || // N-ary big ops: ⋀, ⋁, ⋂, ⋃
263 (wc >= 0x25F8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿
264
265 (wc >= 0x266f &&
266 (wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘, ⟙
267 (wc >= 0x27c0 && wc <= 0x27c1) || // ⟀, ⟁
268 (wc >= 0x29b0 && wc <= 0x29b4) || // ⦰, ⦱, ⦲, ⦳, ⦴
269 (wc >= 0x2a00 && wc <= 0x2a06) || // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆
270 (wc >= 0x2a09 && wc <= 0x2a16) || // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖
271 wc == 0x2a1b || wc == 0x2a1c)))) || // ⨛, ⨜
272
273 (wc >= 0x1d6c1 && // variants of \nabla and \partial
274 (wc == 0x1d6c1 || wc == 0x1d6db ||
275 wc == 0x1d6fb || wc == 0x1d715 ||
276 wc == 0x1d735 || wc == 0x1d74f ||
277 wc == 0x1d76f || wc == 0x1d789 ||
278 wc == 0x1d7a9 || wc == 0x1d7c3)) ||
279
280 // super- and subscript +-=()
281 (wc >= 0x207a && wc <= 0x207e) ||
282 (wc >= 0x208a && wc <= 0x208e) ||
283
284 // angle symbols
285 (wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢
286 (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯
287
288 // Other_ID_Start
289 wc == 0x2118 || wc == 0x212E || // ℘, ℮
290 (wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks
291
292 // bold-digits and double-struck digits
293 (wc >= 0x1D7CE && wc <= 0x1D7E1)); // 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive)
294}
295
296static inline bool IsIdentifierFirstCharacter (int ch) {
297 if (IsASCII(ch)) {
298 return (bool) (isalpha(ch) || ch == '_');
299 }
300 if (ch < 0xA1 || ch > 0x10ffff) {
301 return false;
302 }
303
304 return is_wc_cat_id_start((uint32_t) ch);
305}
306
307static inline bool IsIdentifierCharacter (int ch) {
308 if (IsASCII(ch)) {
309 return (bool) (isalnum(ch) || ch == '_' || ch == '!');
310 }
311 if (ch < 0xA1 || ch > 0x10ffff) {
312 return false;
313 }
314
315 if (is_wc_cat_id_start((uint32_t) ch)) {
316 return true;
317 }
318
319 const CharacterCategory cat = CategoriseCharacter(ch);
320
321 if (cat == ccMn || cat == ccMc ||
322 cat == ccNd || cat == ccPc ||
323 cat == ccSk || cat == ccMe ||
324 cat == ccNo ||
325 // primes (single, double, triple, their reverses, and quadruple)
326 (ch >= 0x2032 && ch <= 0x2037) || (ch == 0x2057)) {
327 return true;
328 }
329 return false;
330}
331
332// keep the same function name to be easy to find again
333static const uint32_t opsuffs[] = {
334 0x00b2, // ²
335 0x00b3, // ³
336 0x00b9, // ¹
337 0x02b0, // ʰ
338 0x02b2, // ʲ
339 0x02b3, // ʳ
340 0x02b7, // ʷ
341 0x02b8, // ʸ
342 0x02e1, // ˡ
343 0x02e2, // ˢ
344 0x02e3, // ˣ
345 0x1d2c, // ᴬ
346 0x1d2e, // ᴮ
347 0x1d30, // ᴰ
348 0x1d31, // ᴱ
349 0x1d33, // ᴳ
350 0x1d34, // ᴴ
351 0x1d35, // ᴵ
352 0x1d36, // ᴶ
353 0x1d37, // ᴷ
354 0x1d38, // ᴸ
355 0x1d39, // ᴹ
356 0x1d3a, // ᴺ
357 0x1d3c, // ᴼ
358 0x1d3e, // ᴾ
359 0x1d3f, // ᴿ
360 0x1d40, // ᵀ
361 0x1d41, // ᵁ
362 0x1d42, // ᵂ
363 0x1d43, // ᵃ
364 0x1d47, // ᵇ
365 0x1d48, // ᵈ
366 0x1d49, // ᵉ
367 0x1d4d, // ᵍ
368 0x1d4f, // ᵏ
369 0x1d50, // ᵐ
370 0x1d52, // ᵒ
371 0x1d56, // ᵖ
372 0x1d57, // ᵗ
373 0x1d58, // ᵘ
374 0x1d5b, // ᵛ
375 0x1d5d, // ᵝ
376 0x1d5e, // ᵞ
377 0x1d5f, // ᵟ
378 0x1d60, // ᵠ
379 0x1d61, // ᵡ
380 0x1d62, // ᵢ
381 0x1d63, // ᵣ
382 0x1d64, // ᵤ
383 0x1d65, // ᵥ
384 0x1d66, // ᵦ
385 0x1d67, // ᵧ
386 0x1d68, // ᵨ
387 0x1d69, // ᵩ
388 0x1d6a, // ᵪ
389 0x1d9c, // ᶜ
390 0x1da0, // ᶠ
391 0x1da5, // ᶥ
392 0x1da6, // ᶦ
393 0x1dab, // ᶫ
394 0x1db0, // ᶰ
395 0x1db8, // ᶸ
396 0x1dbb, // ᶻ
397 0x1dbf, // ᶿ
398 0x2032, // ′
399 0x2033, // ″
400 0x2034, // ‴
401 0x2035, // ‵
402 0x2036, // ‶
403 0x2037, // ‷
404 0x2057, // ⁗
405 0x2070, // ⁰
406 0x2071, // ⁱ
407 0x2074, // ⁴
408 0x2075, // ⁵
409 0x2076, // ⁶
410 0x2077, // ⁷
411 0x2078, // ⁸
412 0x2079, // ⁹
413 0x207a, // ⁺
414 0x207b, // ⁻
415 0x207c, // ⁼
416 0x207d, // ⁽
417 0x207e, // ⁾
418 0x207f, // ⁿ
419 0x2080, // ₀
420 0x2081, // ₁
421 0x2082, // ₂
422 0x2083, // ₃
423 0x2084, // ₄
424 0x2085, // ₅
425 0x2086, // ₆
426 0x2087, // ₇
427 0x2088, // ₈
428 0x2089, // ₉
429 0x208a, // ₊
430 0x208b, // ₋
431 0x208c, // ₌
432 0x208d, // ₍
433 0x208e, // ₎
434 0x2090, // ₐ
435 0x2091, // ₑ
436 0x2092, // ₒ
437 0x2093, // ₓ
438 0x2095, // ₕ
439 0x2096, // ₖ
440 0x2097, // ₗ
441 0x2098, // ₘ
442 0x2099, // ₙ
443 0x209a, // ₚ
444 0x209b, // ₛ
445 0x209c, // ₜ
446 0x2c7c, // ⱼ
447 0x2c7d, // ⱽ
448 0xa71b, // ꜛ
449 0xa71c, // ꜜ
450 0xa71d // ꜝ
451};
452static const size_t opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t));
453
454// keep the same function name to be easy to find again
455static bool jl_op_suffix_char(uint32_t wc) {
456 if (wc < 0xA1 || wc > 0x10ffff) {
457 return false;
458 }
459 const CharacterCategory cat = CategoriseCharacter((int) wc);
460 if (cat == ccMn || cat == ccMc ||
461 cat == ccMe) {
462 return true;
463 }
464
465 for (size_t i = 0; i < opsuffs_len; ++i) {
466 if (wc == opsuffs[i]) {
467 return true;
468 }
469 }
470 return false;
471}
472
473// keep the same function name to be easy to find again
474static bool never_id_char(uint32_t wc) {
475 const CharacterCategory cat = CategoriseCharacter((int) wc);
476 return (
477 // spaces and control characters:
478 (cat >= ccZs && cat <= ccCs) ||
479
480 // ASCII and Latin1 non-connector punctuation
481 (wc < 0xff &&
482 cat >= ccPd && cat <= ccPo) ||
483
484 wc == '`' ||
485
486 // mathematical brackets
487 (wc >= 0x27e6 && wc <= 0x27ef) ||
488 // angle, corner, and lenticular brackets
489 (wc >= 0x3008 && wc <= 0x3011) ||
490 // tortoise shell, square, and more lenticular brackets
491 (wc >= 0x3014 && wc <= 0x301b) ||
492 // fullwidth parens
493 (wc == 0xff08 || wc == 0xff09) ||
494 // fullwidth square brackets
495 (wc == 0xff3b || wc == 0xff3d));
496}
497
498
499static bool IsOperatorFirstCharacter (int ch) {
500 if (IsASCII(ch)) {
501 if (IsJuliaOperator(ch) ||
502 ch == '!' || ch == '?' ||
503 ch == ':' || ch == ';' ||
504 ch == ',' || ch == '.' ) {
505 return true;
506 }else {
507 return false;
508 }
509 } else if (is_wc_cat_id_start((uint32_t) ch)) {
510 return false;
511 } else if (IsJuliaUnaryOperator(ch) ||
512 ! never_id_char((uint32_t) ch)) {
513 return true;
514 }
515 return false;
516}
517
518static bool IsOperatorCharacter (int ch) {
519 if (IsOperatorFirstCharacter(ch) ||
520 (!IsASCII(ch) && jl_op_suffix_char((uint32_t) ch)) ) {
521 return true;
522 }
523 return false;
524}
525
526static bool CheckBoundsIndexing(char *str) {
527 if (strcmp("begin", str) == 0 || strcmp("end", str) == 0 ) {
528 return true;
529 }
530 return false;
531}
532
533static int CheckKeywordFoldPoint(char *str) {
534 if (strcmp ("if", str) == 0 ||
535 strcmp ("for", str) == 0 ||
536 strcmp ("while", str) == 0 ||
537 strcmp ("try", str) == 0 ||
538 strcmp ("do", str) == 0 ||
539 strcmp ("begin", str) == 0 ||
540 strcmp ("let", str) == 0 ||
541 strcmp ("baremodule", str) == 0 ||
542 strcmp ("quote", str) == 0 ||
543 strcmp ("module", str) == 0 ||
544 strcmp ("struct", str) == 0 ||
545 strcmp ("type", str) == 0 ||
546 strcmp ("macro", str) == 0 ||
547 strcmp ("function", str) == 0) {
548 return 1;
549 }
550 if (strcmp("end", str) == 0) {
551 return -1;
552 }
553 return 0;
554}
555
556static bool IsNumberExpon(int ch, int base) {
557 if ((base == 10 && (ch == 'e' || ch == 'E' || ch == 'f')) ||
558 (base == 16 && (ch == 'p' || ch == 'P'))) {
559 return true;
560 }
561 return false;
562}
563
564/* Scans a sequence of digits, returning true if it found any. */
565static bool ScanDigits(StyleContext& sc, int base, bool allow_sep) {
566 bool found = false;
567 for (;;) {
568 if (IsADigit(sc.chNext, base) || (allow_sep && sc.chNext == '_')) {
569 found = true;
570 sc.Forward();
571 } else {
572 break;
573 }
574 }
575 return found;
576}
577
578static inline bool ScanNHexas(StyleContext &sc, int max) {
579 int n = 0;
580 bool error = false;
581
582 sc.Forward();
583 if (!IsADigit(sc.ch, 16)) {
584 error = true;
585 } else {
586 while (IsADigit(sc.ch, 16) && n < max) {
587 sc.Forward();
588 n++;
589 }
590 }
591 return error;
592}
593
594static void resumeCharacter(StyleContext &sc, bool lexerror) {
595 bool error = false;
596
597 // ''' case
598 if (sc.chPrev == '\'' && sc.ch == '\'' && sc.chNext == '\'') {
599 sc.Forward();
600 sc.ForwardSetState(SCE_JULIA_DEFAULT);
601 return;
602 } else if (lexerror && sc.chPrev == '\'' && sc.ch == '\'') {
603 sc.ChangeState(SCE_JULIA_LEXERROR);
604 sc.ForwardSetState(SCE_JULIA_DEFAULT);
605
606 // Escape characters
607 } else if (sc.ch == '\\') {
608 sc.Forward();
609 if (sc.ch == '\'' || sc.ch == '\\' ) {
610 sc.Forward();
611 } else if (sc.ch == 'n' || sc.ch == 't' || sc.ch == 'a' ||
612 sc.ch == 'b' || sc.ch == 'e' || sc.ch == 'f' ||
613 sc.ch == 'r' || sc.ch == 'v' ) {
614 sc.Forward();
615 } else if (sc.ch == 'x') {
616 error |= ScanNHexas(sc, 2);
617 } else if (sc.ch == 'u') {
618 error |= ScanNHexas(sc, 4);
619 } else if (sc.ch == 'U') {
620 error |= ScanNHexas(sc, 8);
621 } else if (IsADigit(sc.ch, 8)) {
622 int n = 1;
623 int max = 3;
624 sc.Forward();
625 while (IsADigit(sc.ch, 8) && n < max) {
626 sc.Forward();
627 n++;
628 }
629 }
630
631 if (lexerror) {
632 if (sc.ch != '\'') {
633 error = true;
634 while (sc.ch != '\'' &&
635 sc.ch != '\r' &&
636 sc.ch != '\n') {
637 sc.Forward();
638 }
639 }
640
641 if (error) {
642 sc.ChangeState(SCE_JULIA_LEXERROR);
643 sc.ForwardSetState(SCE_JULIA_DEFAULT);
644 }
645 }
646 } else if (lexerror) {
647 if (sc.ch < 0x20 || sc.ch > 0x10ffff) {
648 error = true;
649 } else {
650 // single character
651 sc.Forward();
652
653 if (sc.ch != '\'') {
654 error = true;
655 while (sc.ch != '\'' &&
656 sc.ch != '\r' &&
657 sc.ch != '\n') {
658 sc.Forward();
659 }
660 }
661 }
662
663 if (error) {
664 sc.ChangeState(SCE_JULIA_LEXERROR);
665 sc.ForwardSetState(SCE_JULIA_DEFAULT);
666 }
667 }
668
669 // closing quote
670 if (sc.ch == '\'') {
671 if (sc.chNext == '\'') {
672 sc.Forward();
673 } else {
674 sc.ForwardSetState(SCE_JULIA_DEFAULT);
675 }
676 }
677}
678
679static inline bool IsACharacter(StyleContext &sc) {
680 return (sc.chPrev == '\'' && sc.chNext == '\'');
681}
682
683static void ScanParenInterpolation(StyleContext &sc) {
684 // TODO: no syntax highlighting inside a string interpolation
685
686 // Level of nested parenthesis
687 int interp_level = 0;
688
689 // If true, it is inside a string and parenthesis are not counted.
690 bool allow_paren_string = false;
691
692
693 // check for end of states
694 for (; sc.More(); sc.Forward()) {
695 // TODO: check corner cases for nested string interpolation
696 // TODO: check corner cases with Command inside interpolation
697
698 if ( sc.ch == '\"' && sc.chPrev != '\\') {
699 // Toggle the string environment (parenthesis are not counted inside a string)
700 allow_paren_string = !allow_paren_string;
701 } else if ( !allow_paren_string ) {
702 if ( sc.ch == '(' && !IsACharacter(sc) ) {
703 interp_level ++;
704 } else if ( sc.ch == ')' && !IsACharacter(sc) && interp_level > 0 ) {
705 interp_level --;
706 if (interp_level == 0) {
707 // Exit interpolation
708 return;
709 }
710 }
711 }
712 }
713}
714/*
715 * Start parsing a number, parse the base.
716 */
717static void initNumber (StyleContext &sc, int &base, bool &with_dot) {
718 base = 10;
719 with_dot = false;
720 sc.SetState(SCE_JULIA_NUMBER);
721 if (sc.ch == '0') {
722 if (sc.chNext == 'x') {
723 sc.Forward();
724 base = 16;
725 if (sc.chNext == '.') {
726 sc.Forward();
727 with_dot = true;
728 }
729 } else if (sc.chNext == 'o') {
730 sc.Forward();
731 base = 8;
732 } else if (sc.chNext == 'b') {
733 sc.Forward();
734 base = 2;
735 }
736 } else if (sc.ch == '.') {
737 with_dot = true;
738 }
739}
740
741/*
742 * Resume parsing a String or Command, bounded by the `quote` character (\" or \`)
743 * The `triple` argument specifies if it is a triple-quote String or Command.
744 * Interpolation is detected (with `$`), and parsed if `allow_interp` is true.
745 */
746static void resumeStringLike(StyleContext &sc, int quote, bool triple, bool allow_interp, bool full_highlight) {
747 int stylePrev = sc.state;
748 bool checkcurrent = false;
749
750 // Escape characters
751 if (sc.ch == '\\') {
752 if (sc.chNext == quote || sc.chNext == '\\' || sc.chNext == '$') {
753 sc.Forward();
754 }
755 } else if (allow_interp && sc.ch == '$') {
756 // If the interpolation is only of a variable, do not change state
757 if (sc.chNext == '(') {
758 if (full_highlight) {
759 sc.SetState(SCE_JULIA_STRINGINTERP);
760 } else {
761 sc.ForwardSetState(SCE_JULIA_STRINGINTERP);
762 }
763 ScanParenInterpolation(sc);
764 sc.ForwardSetState(stylePrev);
765
766 checkcurrent = true;
767
768 } else if (full_highlight && IsIdentifierFirstCharacter(sc.chNext)) {
769 sc.SetState(SCE_JULIA_STRINGINTERP);
770 sc.Forward();
771 sc.Forward();
772 for (; sc.More(); sc.Forward()) {
773 if (! IsIdentifierCharacter(sc.ch)) {
774 break;
775 }
776 }
777 sc.SetState(stylePrev);
778
779 checkcurrent = true;
780 }
781
782 if (checkcurrent) {
783 // Check that the current character is not a special char,
784 // otherwise it will be skipped
785 resumeStringLike(sc, quote, triple, allow_interp, full_highlight);
786 }
787
788 } else if (sc.ch == quote) {
789 if (triple) {
790 if (sc.chNext == quote && sc.GetRelativeCharacter(2) == quote) {
791 // Move to the end of the triple quotes
792 Sci_PositionU nextIndex = sc.currentPos + 2;
793 while (nextIndex > sc.currentPos && sc.More()) {
794 sc.Forward();
795 }
796 sc.ForwardSetState(SCE_JULIA_DEFAULT);
797 }
798 } else {
799 sc.ForwardSetState(SCE_JULIA_DEFAULT);
800 }
801 }
802}
803
804static void resumeCommand(StyleContext &sc, bool triple, bool allow_interp) {
805 return resumeStringLike(sc, '`', triple, allow_interp, true);
806}
807
808static void resumeString(StyleContext &sc, bool triple, bool allow_interp) {
809 return resumeStringLike(sc, '"', triple, allow_interp, true);
810}
811
812static void resumeNumber (StyleContext &sc, int base, bool &with_dot, bool lexerror) {
813 if (IsNumberExpon(sc.ch, base)) {
814 if (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-') {
815 sc.Forward();
816 // Capture all digits
817 ScanDigits(sc, 10, false);
818 sc.Forward();
819 }
820 sc.SetState(SCE_JULIA_DEFAULT);
821 } else if (sc.ch == '.' && sc.chNext == '.') {
822 // Interval operator `..`
823 sc.SetState(SCE_JULIA_OPERATOR);
824 sc.Forward();
825 sc.ForwardSetState(SCE_JULIA_DEFAULT);
826 } else if (sc.ch == '.' && !with_dot) {
827 with_dot = true;
828 ScanDigits(sc, base, true);
829 } else if (IsADigit(sc.ch, base) || sc.ch == '_') {
830 ScanDigits(sc, base, true);
831 } else if (IsADigit(sc.ch) && !IsADigit(sc.ch, base)) {
832 if (lexerror) {
833 sc.ChangeState(SCE_JULIA_LEXERROR);
834 }
835 ScanDigits(sc, 10, false);
836 sc.ForwardSetState(SCE_JULIA_DEFAULT);
837 } else {
838 sc.SetState(SCE_JULIA_DEFAULT);
839 }
840}
841
842static void resumeOperator (StyleContext &sc) {
843 if (sc.chNext == ':' && (sc.ch == ':' || sc.ch == '<' ||
844 (sc.ch == '>' && (sc.chPrev != '-' && sc.chPrev != '=')))) {
845 // Case `:a=>:b`
846 sc.Forward();
847 sc.ForwardSetState(SCE_JULIA_DEFAULT);
848 } else if (sc.ch == ':') {
849 // Case `foo(:baz,:baz)` or `:one+:two`
850 // Let the default case switch decide if it is a symbol
851 sc.SetState(SCE_JULIA_DEFAULT);
852 } else if (sc.ch == '\'') {
853 sc.SetState(SCE_JULIA_DEFAULT);
854 } else if ((sc.ch == '.' && sc.chPrev != '.') || IsIdentifierFirstCharacter(sc.ch) ||
855 (! (sc.chPrev == '.' && IsOperatorFirstCharacter(sc.ch)) &&
856 ! IsOperatorCharacter(sc.ch)) ) {
857 sc.SetState(SCE_JULIA_DEFAULT);
858 }
859}
860
861void SCI_METHOD LexerJulia::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
862 PropSetSimple props;
863 Accessor styler(pAccess, &props);
864
865 Sci_Position pos = startPos;
866 styler.StartAt(pos);
867 styler.StartSegment(pos);
868
869 // use the line state of each line to store block/multiline states
870 Sci_Position curLine = styler.GetLine(startPos);
871 // Default is false for everything and 0 counters.
872 int lineState = (curLine > 0) ? styler.GetLineState(curLine-1) : 0;
873
874 bool transpose = (lineState >> 0) & 0x01; // 1 bit to know if ' is allowed to mean transpose
875 bool istripledocstring = (lineState >> 1) & 0x01; // 1 bit to know if we are in a triple doublequotes string
876 bool triple_backtick = (lineState >> 2) & 0x01; // 1 bit to know if we are in a triple backtick command
877 bool israwstring = (lineState >> 3) & 0x01; // 1 bit to know if we are in a raw string
878 int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter
879 int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter
880 int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter
881
882 // base for parsing number
883 int base = 10;
884 // number has a float dot ?
885 bool with_dot = false;
886
887 StyleContext sc(startPos, length, initStyle, styler);
888
889 for (; sc.More(); sc.Forward()) {
890
891 //// check for end of states
892 switch (sc.state) {
893 case SCE_JULIA_BRACKET:
894 sc.SetState(SCE_JULIA_DEFAULT);
895 break;
896 case SCE_JULIA_OPERATOR:
897 resumeOperator(sc);
898 break;
899 case SCE_JULIA_TYPEOPERATOR:
900 sc.SetState(SCE_JULIA_DEFAULT);
901 break;
902 case SCE_JULIA_TYPEANNOT:
903 if (! IsIdentifierCharacter(sc.ch)) {
904 sc.SetState(SCE_JULIA_DEFAULT);
905 }
906 break;
907 case SCE_JULIA_IDENTIFIER:
908 // String literal
909 if (sc.ch == '\"') {
910 // If the string literal has a prefix, interpolation is disabled
911 israwstring = true;
912 sc.ChangeState(SCE_JULIA_STRINGLITERAL);
913 sc.SetState(SCE_JULIA_DEFAULT);
914
915 } else if (sc.ch == '`') {
916 // If the string literal has a prefix, interpolation is disabled
917 israwstring = true;
918 sc.ChangeState(SCE_JULIA_COMMANDLITERAL);
919 sc.SetState(SCE_JULIA_DEFAULT);
920
921 // Continue if the character is an identifier character
922 } else if (! IsIdentifierCharacter(sc.ch)) {
923 char s[MAX_JULIA_IDENT_CHARS + 1];
924 sc.GetCurrent(s, sizeof(s));
925
926 // Treat the keywords differently if we are indexing or not
927 if ( indexing_level > 0 && CheckBoundsIndexing(s)) {
928 // Inside [], (), `begin` and `end` are numbers not block keywords
929 sc.ChangeState(SCE_JULIA_NUMBER);
930 transpose = false;
931
932 } else {
933 if (keywords.InList(s)) {
934 sc.ChangeState(SCE_JULIA_KEYWORD1);
935 transpose = false;
936 } else if (identifiers2.InList(s)) {
937 sc.ChangeState(SCE_JULIA_KEYWORD2);
938 transpose = false;
939 } else if (identifiers3.InList(s)) {
940 sc.ChangeState(SCE_JULIA_KEYWORD3);
941 transpose = false;
942 } else if (identifiers4.InList(s)) {
943 sc.ChangeState(SCE_JULIA_KEYWORD4);
944 // These identifiers can be used for variable names also,
945 // so transpose is not forbidden.
946 //transpose = false;
947 }
948 }
949 sc.SetState(SCE_JULIA_DEFAULT);
950
951 // TODO: recognize begin-end blocks inside list comprehension
952 // b = [(begin n%2; n*2 end) for n in 1:10]
953 // TODO: recognize better comprehension for-if to avoid problem with code-folding
954 // c = [(if isempty(a); missing else first(b) end) for (a, b) in zip(l1, l2)]
955 }
956 break;
957 case SCE_JULIA_NUMBER:
958 resumeNumber(sc, base, with_dot, options.highlightLexerror);
959 break;
960 case SCE_JULIA_CHAR:
961 resumeCharacter(sc, options.highlightLexerror);
962 break;
963 case SCE_JULIA_DOCSTRING:
964 resumeString(sc, true, !israwstring);
965 if (sc.state == SCE_JULIA_DEFAULT && israwstring) {
966 israwstring = false;
967 }
968 break;
969 case SCE_JULIA_STRING:
970 resumeString(sc, false, !israwstring);
971 if (sc.state == SCE_JULIA_DEFAULT && israwstring) {
972 israwstring = false;
973 }
974 break;
975 case SCE_JULIA_COMMAND:
976 resumeCommand(sc, triple_backtick, !israwstring);
977 break;
978 case SCE_JULIA_MACRO:
979 if (IsASpace(sc.ch) || ! IsIdentifierCharacter(sc.ch)) {
980 sc.SetState(SCE_JULIA_DEFAULT);
981 }
982 break;
983 case SCE_JULIA_SYMBOL:
984 if (! IsIdentifierCharacter(sc.ch)) {
985 sc.SetState(SCE_JULIA_DEFAULT);
986 }
987 break;
988 case SCE_JULIA_COMMENT:
989 if( commentDepth > 0 ) {
990 // end or start of a nested a block comment
991 if ( sc.ch == '=' && sc.chNext == '#') {
992 commentDepth --;
993 sc.Forward();
994
995 if (commentDepth == 0) {
996 sc.ForwardSetState(SCE_JULIA_DEFAULT);
997 }
998 } else if( sc.ch == '#' && sc.chNext == '=') {
999 commentDepth ++;
1000 sc.Forward();
1001 }
1002 } else {
1003 // single line comment
1004 if (sc.atLineEnd || sc.ch == '\r' || sc.ch == '\n') {
1005 sc.SetState(SCE_JULIA_DEFAULT);
1006 transpose = false;
1007 }
1008 }
1009 break;
1010 }
1011
1012 // check start of a new state
1013 if (sc.state == SCE_JULIA_DEFAULT) {
1014 if (sc.ch == '#') {
1015 sc.SetState(SCE_JULIA_COMMENT);
1016 // increment depth if we are a block comment
1017 if(sc.chNext == '=') {
1018 commentDepth ++;
1019 sc.Forward();
1020 }
1021 } else if (sc.ch == '!') {
1022 sc.SetState(SCE_JULIA_OPERATOR);
1023 } else if (sc.ch == '\'') {
1024 if (transpose) {
1025 sc.SetState(SCE_JULIA_OPERATOR);
1026 } else {
1027 sc.SetState(SCE_JULIA_CHAR);
1028 }
1029 } else if (sc.ch == '\"') {
1030 istripledocstring = (sc.chNext == '\"' && sc.GetRelativeCharacter(2) == '\"');
1031 if (istripledocstring) {
1032 sc.SetState(SCE_JULIA_DOCSTRING);
1033 // Move to the end of the triple quotes
1034 Sci_PositionU nextIndex = sc.currentPos + 2;
1035 while (nextIndex > sc.currentPos && sc.More()) {
1036 sc.Forward();
1037 }
1038 } else {
1039 sc.SetState(SCE_JULIA_STRING);
1040 }
1041 } else if (sc.ch == '`') {
1042 triple_backtick = (sc.chNext == '`' && sc.GetRelativeCharacter(2) == '`');
1043 sc.SetState(SCE_JULIA_COMMAND);
1044 if (triple_backtick) {
1045 // Move to the end of the triple backticks
1046 Sci_PositionU nextIndex = sc.currentPos + 2;
1047 while (nextIndex > sc.currentPos && sc.More()) {
1048 sc.Forward();
1049 }
1050 }
1051 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1052 initNumber(sc, base, with_dot);
1053 } else if (IsIdentifierFirstCharacter(sc.ch)) {
1054 sc.SetState(SCE_JULIA_IDENTIFIER);
1055 transpose = true;
1056 } else if (sc.ch == '@') {
1057 sc.SetState(SCE_JULIA_MACRO);
1058 transpose = false;
1059
1060 // Several parsing of operators, should keep the order of `if` blocks
1061 } else if ((sc.ch == ':' || sc.ch == '<' || sc.ch == '>') && sc.chNext == ':') {
1062 sc.SetState(SCE_JULIA_TYPEOPERATOR);
1063 sc.Forward();
1064 // Highlight the next identifier, if option is set
1065 if (options.highlightTypeannotation &&
1066 IsIdentifierFirstCharacter(sc.chNext)) {
1067 sc.ForwardSetState(SCE_JULIA_TYPEANNOT);
1068 }
1069 } else if (sc.ch == ':') {
1070 // TODO: improve detection of range
1071 // should be solved with begin-end parsing
1072 // `push!(arr, s1 :s2)` and `a[begin :end]
1073 if (IsIdentifierFirstCharacter(sc.chNext) &&
1074 ! IsIdentifierCharacter(sc.chPrev) &&
1075 sc.chPrev != ')' && sc.chPrev != ']' ) {
1076 sc.SetState(SCE_JULIA_SYMBOL);
1077 } else {
1078 sc.SetState(SCE_JULIA_OPERATOR);
1079 }
1080 } else if (IsJuliaParen(sc.ch)) {
1081 if (sc.ch == '[') {
1082 list_comprehension ++;
1083 indexing_level ++;
1084 } else if (sc.ch == ']' && (indexing_level > 0)) {
1085 list_comprehension --;
1086 indexing_level --;
1087 } else if (sc.ch == '(') {
1088 list_comprehension ++;
1089 } else if (sc.ch == ')' && (list_comprehension > 0)) {
1090 list_comprehension --;
1091 }
1092
1093 if (sc.ch == ')' || sc.ch == ']' || sc.ch == '}') {
1094 transpose = true;
1095 } else {
1096 transpose = false;
1097 }
1098 sc.SetState(SCE_JULIA_BRACKET);
1099 } else if (IsOperatorFirstCharacter(sc.ch)) {
1100 transpose = false;
1101 sc.SetState(SCE_JULIA_OPERATOR);
1102 } else {
1103 transpose = false;
1104 }
1105 }
1106
1107 // update the line information (used for line-by-line lexing and folding)
1108 if (sc.atLineEnd) {
1109 // set the line state to the current state
1110 curLine = styler.GetLine(sc.currentPos);
1111
1112 lineState = ((transpose ? 1 : 0) << 0) |
1113 ((istripledocstring ? 1 : 0) << 1) |
1114 ((triple_backtick ? 1 : 0) << 2) |
1115 ((israwstring ? 1 : 0) << 3) |
1116 ((indexing_level & 0x0F) << 4) |
1117 ((list_comprehension & 0x0F) << 8) |
1118 ((commentDepth & 0x0F) << 12);
1119 styler.SetLineState(curLine, lineState);
1120 }
1121 }
1122 sc.Complete();
1123}
1124
1125void SCI_METHOD LexerJulia::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1126
1127 if (!options.fold)
1128 return;
1129
1130 LexAccessor styler(pAccess);
1131
1132 Sci_PositionU endPos = startPos + length;
1133 int visibleChars = 0;
1134 Sci_Position lineCurrent = styler.GetLine(startPos);
1135 int levelCurrent = SC_FOLDLEVELBASE;
1136 int lineState = 0;
1137 if (lineCurrent > 0) {
1138 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1139 lineState = styler.GetLineState(lineCurrent-1);
1140 }
1141
1142 // level of nested brackets
1143 int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter
1144 // level of nested parenthesis or brackets
1145 int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter
1146 //int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter
1147
1148 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
1149 int levelNext = levelCurrent;
1150 char chNext = styler[startPos];
1151 int stylePrev = styler.StyleAt(startPos - 1);
1152 int styleNext = styler.StyleAt(startPos);
1153 int style = initStyle;
1154 char word[100];
1155 int wordlen = 0;
1156 for (Sci_PositionU i = startPos; i < endPos; i++) {
1157 char ch = chNext;
1158 chNext = styler.SafeGetCharAt(i + 1);
1159 style = styleNext;
1160 styleNext = styler.StyleAt(i + 1);
1161 bool atEOL = i == (lineStartNext-1);
1162
1163 // a start/end of comment block
1164 if (options.foldComment && style == SCE_JULIA_COMMENT) {
1165 // start of block comment
1166 if (ch == '#' && chNext == '=') {
1167 levelNext ++;
1168 }
1169 // end of block comment
1170 if (ch == '=' && chNext == '#' && levelNext > 0) {
1171 levelNext --;
1172 }
1173 }
1174
1175 // Syntax based folding, accounts for list comprehension
1176 if (options.foldSyntaxBased) {
1177 // list comprehension allow `for`, `if` and `begin` without `end`
1178 if (style == SCE_JULIA_BRACKET) {
1179 if (ch == '[') {
1180 list_comprehension ++;
1181 indexing_level ++;
1182 levelNext ++;
1183 } else if (ch == ']') {
1184 list_comprehension --;
1185 indexing_level --;
1186 levelNext --;
1187 } else if (ch == '(') {
1188 list_comprehension ++;
1189 levelNext ++;
1190 } else if (ch == ')') {
1191 list_comprehension --;
1192 levelNext --;
1193 }
1194 // check non-negative
1195 if (indexing_level < 0) {
1196 indexing_level = 0;
1197 }
1198 if (list_comprehension < 0) {
1199 list_comprehension = 0;
1200 }
1201 }
1202
1203 // keyword
1204 if (style == SCE_JULIA_KEYWORD1) {
1205 word[wordlen++] = static_cast<char>(ch);
1206 if (wordlen == 100) { // prevent overflow
1207 word[0] = '\0';
1208 wordlen = 1;
1209 }
1210 if (styleNext != SCE_JULIA_KEYWORD1) {
1211 word[wordlen] = '\0';
1212 wordlen = 0;
1213 if (list_comprehension <= 0 && indexing_level <= 0) {
1214 levelNext += CheckKeywordFoldPoint(word);
1215 }
1216 }
1217 }
1218 }
1219
1220 // Docstring
1221 if (options.foldDocstring) {
1222 if (stylePrev != SCE_JULIA_DOCSTRING && style == SCE_JULIA_DOCSTRING) {
1223 levelNext ++;
1224 } else if (style == SCE_JULIA_DOCSTRING && styleNext != SCE_JULIA_DOCSTRING) {
1225 levelNext --;
1226 }
1227 }
1228
1229 // check non-negative level
1230 if (levelNext < 0) {
1231 levelNext = 0;
1232 }
1233
1234 if (!IsASpace(ch)) {
1235 visibleChars++;
1236 }
1237 stylePrev = style;
1238
1239 if (atEOL || (i == endPos-1)) {
1240 int levelUse = levelCurrent;
1241 int lev = levelUse | levelNext << 16;
1242 if (visibleChars == 0 && options.foldCompact) {
1243 lev |= SC_FOLDLEVELWHITEFLAG;
1244 }
1245 if (levelUse < levelNext) {
1246 lev |= SC_FOLDLEVELHEADERFLAG;
1247 }
1248 if (lev != styler.LevelAt(lineCurrent)) {
1249 styler.SetLevel(lineCurrent, lev);
1250 }
1251 lineCurrent++;
1252 lineStartNext = styler.LineStart(lineCurrent+1);
1253 levelCurrent = levelNext;
1254 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
1255 // There is an empty line at end of file so give it same level and empty
1256 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1257 }
1258 visibleChars = 0;
1259 }
1260 }
1261}
1262
1263LexerModule lmJulia(SCLEX_JULIA, LexerJulia::LexerFactoryJulia, "julia", juliaWordLists);
1264