| 1 | // Scintilla source code edit control |
| 2 | // Encoding: UTF-8 |
| 3 | /** @file LexJulia.cxx |
| 4 | ** Lexer for Julia. |
| 5 | ** Reusing code from LexMatlab, LexPython and LexRust |
| 6 | ** |
| 7 | ** Written by Bertrand Lacoste |
| 8 | ** |
| 9 | **/ |
| 10 | // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> |
| 11 | // The License.txt file describes the conditions under which this software may be distributed. |
| 12 | |
| 13 | #include <cstdlib> |
| 14 | #include <cassert> |
| 15 | #include <cstring> |
| 16 | |
| 17 | #include <string> |
| 18 | #include <string_view> |
| 19 | #include <vector> |
| 20 | #include <map> |
| 21 | #include <algorithm> |
| 22 | #include <functional> |
| 23 | |
| 24 | #include "ILexer.h" |
| 25 | #include "Scintilla.h" |
| 26 | #include "SciLexer.h" |
| 27 | |
| 28 | #include "StringCopy.h" |
| 29 | #include "PropSetSimple.h" |
| 30 | #include "WordList.h" |
| 31 | #include "LexAccessor.h" |
| 32 | #include "Accessor.h" |
| 33 | #include "StyleContext.h" |
| 34 | #include "CharacterSet.h" |
| 35 | #include "CharacterCategory.h" |
| 36 | #include "LexerModule.h" |
| 37 | #include "OptionSet.h" |
| 38 | #include "DefaultLexer.h" |
| 39 | |
| 40 | using namespace Scintilla; |
| 41 | using namespace Lexilla; |
| 42 | |
| 43 | static const int MAX_JULIA_IDENT_CHARS = 1023; |
| 44 | |
| 45 | // Options used for LexerJulia |
| 46 | struct OptionsJulia { |
| 47 | bool fold; |
| 48 | bool ; |
| 49 | bool foldCompact; |
| 50 | bool foldDocstring; |
| 51 | bool foldSyntaxBased; |
| 52 | bool highlightTypeannotation; |
| 53 | bool highlightLexerror; |
| 54 | OptionsJulia() { |
| 55 | fold = true; |
| 56 | foldComment = true; |
| 57 | foldCompact = false; |
| 58 | foldDocstring = true; |
| 59 | foldSyntaxBased = true; |
| 60 | highlightTypeannotation = false; |
| 61 | highlightLexerror = false; |
| 62 | } |
| 63 | }; |
| 64 | |
| 65 | const char * const juliaWordLists[] = { |
| 66 | "Primary keywords and identifiers" , |
| 67 | "Built in types" , |
| 68 | "Other keywords" , |
| 69 | "Built in functions" , |
| 70 | 0, |
| 71 | }; |
| 72 | |
| 73 | struct OptionSetJulia : public OptionSet<OptionsJulia> { |
| 74 | OptionSetJulia() { |
| 75 | DefineProperty("fold" , &OptionsJulia::fold); |
| 76 | |
| 77 | DefineProperty("fold.compact" , &OptionsJulia::foldCompact); |
| 78 | |
| 79 | DefineProperty("fold.comment" , &OptionsJulia::foldComment); |
| 80 | |
| 81 | DefineProperty("fold.julia.docstring" , &OptionsJulia::foldDocstring, |
| 82 | "Fold multiline triple-doublequote strings, usually used to document a function or type above the definition." ); |
| 83 | |
| 84 | DefineProperty("fold.julia.syntax.based" , &OptionsJulia::foldSyntaxBased, |
| 85 | "Set this property to 0 to disable syntax based folding." ); |
| 86 | |
| 87 | DefineProperty("lexer.julia.highlight.typeannotation" , &OptionsJulia::highlightTypeannotation, |
| 88 | "This option enables highlighting of the type identifier after `::`." ); |
| 89 | |
| 90 | DefineProperty("lexer.julia.highlight.lexerror" , &OptionsJulia::highlightLexerror, |
| 91 | "This option enables highlighting of syntax error int character or number definition." ); |
| 92 | |
| 93 | DefineWordListSets(juliaWordLists); |
| 94 | } |
| 95 | }; |
| 96 | |
| 97 | LexicalClass juliaLexicalClasses[] = { |
| 98 | // Lexer Julia SCLEX_JULIA SCE_JULIA_: |
| 99 | 0, "SCE_JULIA_DEFAULT" , "default" , "White space" , |
| 100 | 1, "SCE_JULIA_COMMENT" , "comment" , "Comment" , |
| 101 | 2, "SCE_JULIA_NUMBER" , "literal numeric" , "Number" , |
| 102 | 3, "SCE_JULIA_KEYWORD1" , "keyword" , "Reserved keywords" , |
| 103 | 4, "SCE_JULIA_KEYWORD2" , "identifier" , "Builtin type names" , |
| 104 | 5, "SCE_JULIA_KEYWORD3" , "identifier" , "Constants" , |
| 105 | 6, "SCE_JULIA_CHAR" , "literal string character" , "Single quoted string" , |
| 106 | 7, "SCE_JULIA_OPERATOR" , "operator" , "Operator" , |
| 107 | 8, "SCE_JULIA_BRACKET" , "bracket operator" , "Bracket operator" , |
| 108 | 9, "SCE_JULIA_IDENTIFIER" , "identifier" , "Identifier" , |
| 109 | 10, "SCE_JULIA_STRING" , "literal string" , "Double quoted String" , |
| 110 | 11, "SCE_JULIA_SYMBOL" , "literal string symbol" , "Symbol" , |
| 111 | 12, "SCE_JULIA_MACRO" , "macro preprocessor" , "Macro" , |
| 112 | 13, "SCE_JULIA_STRINGINTERP" , "literal string interpolated" , "String interpolation" , |
| 113 | 14, "SCE_JULIA_DOCSTRING" , "literal string documentation" , "Docstring" , |
| 114 | 15, "SCE_JULIA_STRINGLITERAL" , "literal string" , "String literal prefix" , |
| 115 | 16, "SCE_JULIA_COMMAND" , "literal string command" , "Command" , |
| 116 | 17, "SCE_JULIA_COMMANDLITERAL" , "literal string command" , "Command literal prefix" , |
| 117 | 18, "SCE_JULIA_TYPEANNOT" , "identifier type" , "Type annotation identifier" , |
| 118 | 19, "SCE_JULIA_LEXERROR" , "lexer error" , "Lexing error" , |
| 119 | 20, "SCE_JULIA_KEYWORD4" , "identifier" , "Builtin function names" , |
| 120 | 21, "SCE_JULIA_TYPEOPERATOR" , "operator type" , "Type annotation operator" , |
| 121 | }; |
| 122 | |
| 123 | class LexerJulia : public DefaultLexer { |
| 124 | WordList keywords; |
| 125 | WordList identifiers2; |
| 126 | WordList identifiers3; |
| 127 | WordList identifiers4; |
| 128 | OptionsJulia options; |
| 129 | OptionSetJulia osJulia; |
| 130 | public: |
| 131 | explicit LexerJulia() : |
| 132 | DefaultLexer("julia" , SCLEX_JULIA, juliaLexicalClasses, ELEMENTS(juliaLexicalClasses)) { |
| 133 | } |
| 134 | virtual ~LexerJulia() { |
| 135 | } |
| 136 | void SCI_METHOD Release() override { |
| 137 | delete this; |
| 138 | } |
| 139 | int SCI_METHOD Version() const override { |
| 140 | return lvRelease5; |
| 141 | } |
| 142 | const char * SCI_METHOD PropertyNames() override { |
| 143 | return osJulia.PropertyNames(); |
| 144 | } |
| 145 | int SCI_METHOD PropertyType(const char *name) override { |
| 146 | return osJulia.PropertyType(name); |
| 147 | } |
| 148 | const char * SCI_METHOD DescribeProperty(const char *name) override { |
| 149 | return osJulia.DescribeProperty(name); |
| 150 | } |
| 151 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
| 152 | const char * SCI_METHOD PropertyGet(const char *key) override { |
| 153 | return osJulia.PropertyGet(key); |
| 154 | } |
| 155 | const char * SCI_METHOD DescribeWordListSets() override { |
| 156 | return osJulia.DescribeWordListSets(); |
| 157 | } |
| 158 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
| 159 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
| 160 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
| 161 | void * SCI_METHOD PrivateCall(int, void *) override { |
| 162 | return 0; |
| 163 | } |
| 164 | |
| 165 | static ILexer5 *LexerFactoryJulia() { |
| 166 | return new LexerJulia(); |
| 167 | } |
| 168 | }; |
| 169 | |
| 170 | Sci_Position SCI_METHOD LexerJulia::PropertySet(const char *key, const char *val) { |
| 171 | if (osJulia.PropertySet(&options, key, val)) { |
| 172 | return 0; |
| 173 | } |
| 174 | return -1; |
| 175 | } |
| 176 | |
| 177 | Sci_Position SCI_METHOD LexerJulia::WordListSet(int n, const char *wl) { |
| 178 | WordList *wordListN = nullptr; |
| 179 | switch (n) { |
| 180 | case 0: |
| 181 | wordListN = &keywords; |
| 182 | break; |
| 183 | case 1: |
| 184 | wordListN = &identifiers2; |
| 185 | break; |
| 186 | case 2: |
| 187 | wordListN = &identifiers3; |
| 188 | break; |
| 189 | case 3: |
| 190 | wordListN = &identifiers4; |
| 191 | break; |
| 192 | } |
| 193 | Sci_Position firstModification = -1; |
| 194 | if (wordListN) { |
| 195 | WordList wlNew; |
| 196 | wlNew.Set(wl); |
| 197 | if (*wordListN != wlNew) { |
| 198 | wordListN->Set(wl); |
| 199 | firstModification = 0; |
| 200 | } |
| 201 | } |
| 202 | return firstModification; |
| 203 | } |
| 204 | |
| 205 | static inline bool IsJuliaOperator(int ch) { |
| 206 | if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || |
| 207 | ch == '-' || ch == '+' || ch == '=' || ch == '|' || |
| 208 | ch == '<' || ch == '>' || ch == '/' || ch == '~' || |
| 209 | ch == '\\' ) { |
| 210 | return true; |
| 211 | } |
| 212 | return false; |
| 213 | } |
| 214 | |
| 215 | // The list contains non-ascii unary operators |
| 216 | static inline bool IsJuliaUnaryOperator (int ch) { |
| 217 | if (ch == 0x00ac || ch == 0x221a || ch == 0x221b || |
| 218 | ch == 0x221c || ch == 0x22c6 || ch == 0x00b1 || |
| 219 | ch == 0x2213 ) { |
| 220 | return true; |
| 221 | } |
| 222 | return false; |
| 223 | } |
| 224 | |
| 225 | static inline bool IsJuliaParen (int ch) { |
| 226 | if (ch == '(' || ch == ')' || ch == '{' || ch == '}' || |
| 227 | ch == '[' || ch == ']' ) { |
| 228 | return true; |
| 229 | } |
| 230 | return false; |
| 231 | } |
| 232 | |
| 233 | // Unicode parsing from Julia source code: |
| 234 | // https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_extensions.c |
| 235 | // keep the same function name to be easy to find again |
| 236 | static int is_wc_cat_id_start(uint32_t wc) { |
| 237 | const CharacterCategory cat = CategoriseCharacter((int) wc); |
| 238 | |
| 239 | return (cat == ccLu || cat == ccLl || |
| 240 | cat == ccLt || cat == ccLm || |
| 241 | cat == ccLo || cat == ccNl || |
| 242 | cat == ccSc || // allow currency symbols |
| 243 | // other symbols, but not arrows or replacement characters |
| 244 | (cat == ccSo && !(wc >= 0x2190 && wc <= 0x21FF) && |
| 245 | wc != 0xfffc && wc != 0xfffd && |
| 246 | wc != 0x233f && // notslash |
| 247 | wc != 0x00a6) || // broken bar |
| 248 | |
| 249 | // math symbol (category Sm) whitelist |
| 250 | (wc >= 0x2140 && wc <= 0x2a1c && |
| 251 | ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄ |
| 252 | wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿ |
| 253 | wc == 0x22a4 || wc == 0x22a5 || // ⊤ ⊥ |
| 254 | |
| 255 | (wc >= 0x2202 && wc <= 0x2233 && |
| 256 | (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆ |
| 257 | wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏ |
| 258 | wc == 0x2210 || wc == 0x2211 || // ∐, ∑ |
| 259 | wc == 0x221e || wc == 0x221f || // ∞, ∟ |
| 260 | wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ |
| 261 | |
| 262 | (wc >= 0x22c0 && wc <= 0x22c3) || // N-ary big ops: ⋀, ⋁, ⋂, ⋃ |
| 263 | (wc >= 0x25F8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ |
| 264 | |
| 265 | (wc >= 0x266f && |
| 266 | (wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘, ⟙ |
| 267 | (wc >= 0x27c0 && wc <= 0x27c1) || // ⟀, ⟁ |
| 268 | (wc >= 0x29b0 && wc <= 0x29b4) || // ⦰, ⦱, ⦲, ⦳, ⦴ |
| 269 | (wc >= 0x2a00 && wc <= 0x2a06) || // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ |
| 270 | (wc >= 0x2a09 && wc <= 0x2a16) || // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖ |
| 271 | wc == 0x2a1b || wc == 0x2a1c)))) || // ⨛, ⨜ |
| 272 | |
| 273 | (wc >= 0x1d6c1 && // variants of \nabla and \partial |
| 274 | (wc == 0x1d6c1 || wc == 0x1d6db || |
| 275 | wc == 0x1d6fb || wc == 0x1d715 || |
| 276 | wc == 0x1d735 || wc == 0x1d74f || |
| 277 | wc == 0x1d76f || wc == 0x1d789 || |
| 278 | wc == 0x1d7a9 || wc == 0x1d7c3)) || |
| 279 | |
| 280 | // super- and subscript +-=() |
| 281 | (wc >= 0x207a && wc <= 0x207e) || |
| 282 | (wc >= 0x208a && wc <= 0x208e) || |
| 283 | |
| 284 | // angle symbols |
| 285 | (wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢ |
| 286 | (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ |
| 287 | |
| 288 | // Other_ID_Start |
| 289 | wc == 0x2118 || wc == 0x212E || // ℘, ℮ |
| 290 | (wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks |
| 291 | |
| 292 | // bold-digits and double-struck digits |
| 293 | (wc >= 0x1D7CE && wc <= 0x1D7E1)); // 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive) |
| 294 | } |
| 295 | |
| 296 | static inline bool IsIdentifierFirstCharacter (int ch) { |
| 297 | if (IsASCII(ch)) { |
| 298 | return (bool) (isalpha(ch) || ch == '_'); |
| 299 | } |
| 300 | if (ch < 0xA1 || ch > 0x10ffff) { |
| 301 | return false; |
| 302 | } |
| 303 | |
| 304 | return is_wc_cat_id_start((uint32_t) ch); |
| 305 | } |
| 306 | |
| 307 | static inline bool IsIdentifierCharacter (int ch) { |
| 308 | if (IsASCII(ch)) { |
| 309 | return (bool) (isalnum(ch) || ch == '_' || ch == '!'); |
| 310 | } |
| 311 | if (ch < 0xA1 || ch > 0x10ffff) { |
| 312 | return false; |
| 313 | } |
| 314 | |
| 315 | if (is_wc_cat_id_start((uint32_t) ch)) { |
| 316 | return true; |
| 317 | } |
| 318 | |
| 319 | const CharacterCategory cat = CategoriseCharacter(ch); |
| 320 | |
| 321 | if (cat == ccMn || cat == ccMc || |
| 322 | cat == ccNd || cat == ccPc || |
| 323 | cat == ccSk || cat == ccMe || |
| 324 | cat == ccNo || |
| 325 | // primes (single, double, triple, their reverses, and quadruple) |
| 326 | (ch >= 0x2032 && ch <= 0x2037) || (ch == 0x2057)) { |
| 327 | return true; |
| 328 | } |
| 329 | return false; |
| 330 | } |
| 331 | |
| 332 | // keep the same function name to be easy to find again |
| 333 | static const uint32_t opsuffs[] = { |
| 334 | 0x00b2, // ² |
| 335 | 0x00b3, // ³ |
| 336 | 0x00b9, // ¹ |
| 337 | 0x02b0, // ʰ |
| 338 | 0x02b2, // ʲ |
| 339 | 0x02b3, // ʳ |
| 340 | 0x02b7, // ʷ |
| 341 | 0x02b8, // ʸ |
| 342 | 0x02e1, // ˡ |
| 343 | 0x02e2, // ˢ |
| 344 | 0x02e3, // ˣ |
| 345 | 0x1d2c, // ᴬ |
| 346 | 0x1d2e, // ᴮ |
| 347 | 0x1d30, // ᴰ |
| 348 | 0x1d31, // ᴱ |
| 349 | 0x1d33, // ᴳ |
| 350 | 0x1d34, // ᴴ |
| 351 | 0x1d35, // ᴵ |
| 352 | 0x1d36, // ᴶ |
| 353 | 0x1d37, // ᴷ |
| 354 | 0x1d38, // ᴸ |
| 355 | 0x1d39, // ᴹ |
| 356 | 0x1d3a, // ᴺ |
| 357 | 0x1d3c, // ᴼ |
| 358 | 0x1d3e, // ᴾ |
| 359 | 0x1d3f, // ᴿ |
| 360 | 0x1d40, // ᵀ |
| 361 | 0x1d41, // ᵁ |
| 362 | 0x1d42, // ᵂ |
| 363 | 0x1d43, // ᵃ |
| 364 | 0x1d47, // ᵇ |
| 365 | 0x1d48, // ᵈ |
| 366 | 0x1d49, // ᵉ |
| 367 | 0x1d4d, // ᵍ |
| 368 | 0x1d4f, // ᵏ |
| 369 | 0x1d50, // ᵐ |
| 370 | 0x1d52, // ᵒ |
| 371 | 0x1d56, // ᵖ |
| 372 | 0x1d57, // ᵗ |
| 373 | 0x1d58, // ᵘ |
| 374 | 0x1d5b, // ᵛ |
| 375 | 0x1d5d, // ᵝ |
| 376 | 0x1d5e, // ᵞ |
| 377 | 0x1d5f, // ᵟ |
| 378 | 0x1d60, // ᵠ |
| 379 | 0x1d61, // ᵡ |
| 380 | 0x1d62, // ᵢ |
| 381 | 0x1d63, // ᵣ |
| 382 | 0x1d64, // ᵤ |
| 383 | 0x1d65, // ᵥ |
| 384 | 0x1d66, // ᵦ |
| 385 | 0x1d67, // ᵧ |
| 386 | 0x1d68, // ᵨ |
| 387 | 0x1d69, // ᵩ |
| 388 | 0x1d6a, // ᵪ |
| 389 | 0x1d9c, // ᶜ |
| 390 | 0x1da0, // ᶠ |
| 391 | 0x1da5, // ᶥ |
| 392 | 0x1da6, // ᶦ |
| 393 | 0x1dab, // ᶫ |
| 394 | 0x1db0, // ᶰ |
| 395 | 0x1db8, // ᶸ |
| 396 | 0x1dbb, // ᶻ |
| 397 | 0x1dbf, // ᶿ |
| 398 | 0x2032, // ′ |
| 399 | 0x2033, // ″ |
| 400 | 0x2034, // ‴ |
| 401 | 0x2035, // ‵ |
| 402 | 0x2036, // ‶ |
| 403 | 0x2037, // ‷ |
| 404 | 0x2057, // ⁗ |
| 405 | 0x2070, // ⁰ |
| 406 | 0x2071, // ⁱ |
| 407 | 0x2074, // ⁴ |
| 408 | 0x2075, // ⁵ |
| 409 | 0x2076, // ⁶ |
| 410 | 0x2077, // ⁷ |
| 411 | 0x2078, // ⁸ |
| 412 | 0x2079, // ⁹ |
| 413 | 0x207a, // ⁺ |
| 414 | 0x207b, // ⁻ |
| 415 | 0x207c, // ⁼ |
| 416 | 0x207d, // ⁽ |
| 417 | 0x207e, // ⁾ |
| 418 | 0x207f, // ⁿ |
| 419 | 0x2080, // ₀ |
| 420 | 0x2081, // ₁ |
| 421 | 0x2082, // ₂ |
| 422 | 0x2083, // ₃ |
| 423 | 0x2084, // ₄ |
| 424 | 0x2085, // ₅ |
| 425 | 0x2086, // ₆ |
| 426 | 0x2087, // ₇ |
| 427 | 0x2088, // ₈ |
| 428 | 0x2089, // ₉ |
| 429 | 0x208a, // ₊ |
| 430 | 0x208b, // ₋ |
| 431 | 0x208c, // ₌ |
| 432 | 0x208d, // ₍ |
| 433 | 0x208e, // ₎ |
| 434 | 0x2090, // ₐ |
| 435 | 0x2091, // ₑ |
| 436 | 0x2092, // ₒ |
| 437 | 0x2093, // ₓ |
| 438 | 0x2095, // ₕ |
| 439 | 0x2096, // ₖ |
| 440 | 0x2097, // ₗ |
| 441 | 0x2098, // ₘ |
| 442 | 0x2099, // ₙ |
| 443 | 0x209a, // ₚ |
| 444 | 0x209b, // ₛ |
| 445 | 0x209c, // ₜ |
| 446 | 0x2c7c, // ⱼ |
| 447 | 0x2c7d, // ⱽ |
| 448 | 0xa71b, // ꜛ |
| 449 | 0xa71c, // ꜜ |
| 450 | 0xa71d // ꜝ |
| 451 | }; |
| 452 | static const size_t opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t)); |
| 453 | |
| 454 | // keep the same function name to be easy to find again |
| 455 | static bool jl_op_suffix_char(uint32_t wc) { |
| 456 | if (wc < 0xA1 || wc > 0x10ffff) { |
| 457 | return false; |
| 458 | } |
| 459 | const CharacterCategory cat = CategoriseCharacter((int) wc); |
| 460 | if (cat == ccMn || cat == ccMc || |
| 461 | cat == ccMe) { |
| 462 | return true; |
| 463 | } |
| 464 | |
| 465 | for (size_t i = 0; i < opsuffs_len; ++i) { |
| 466 | if (wc == opsuffs[i]) { |
| 467 | return true; |
| 468 | } |
| 469 | } |
| 470 | return false; |
| 471 | } |
| 472 | |
| 473 | // keep the same function name to be easy to find again |
| 474 | static bool never_id_char(uint32_t wc) { |
| 475 | const CharacterCategory cat = CategoriseCharacter((int) wc); |
| 476 | return ( |
| 477 | // spaces and control characters: |
| 478 | (cat >= ccZs && cat <= ccCs) || |
| 479 | |
| 480 | // ASCII and Latin1 non-connector punctuation |
| 481 | (wc < 0xff && |
| 482 | cat >= ccPd && cat <= ccPo) || |
| 483 | |
| 484 | wc == '`' || |
| 485 | |
| 486 | // mathematical brackets |
| 487 | (wc >= 0x27e6 && wc <= 0x27ef) || |
| 488 | // angle, corner, and lenticular brackets |
| 489 | (wc >= 0x3008 && wc <= 0x3011) || |
| 490 | // tortoise shell, square, and more lenticular brackets |
| 491 | (wc >= 0x3014 && wc <= 0x301b) || |
| 492 | // fullwidth parens |
| 493 | (wc == 0xff08 || wc == 0xff09) || |
| 494 | // fullwidth square brackets |
| 495 | (wc == 0xff3b || wc == 0xff3d)); |
| 496 | } |
| 497 | |
| 498 | |
| 499 | static bool IsOperatorFirstCharacter (int ch) { |
| 500 | if (IsASCII(ch)) { |
| 501 | if (IsJuliaOperator(ch) || |
| 502 | ch == '!' || ch == '?' || |
| 503 | ch == ':' || ch == ';' || |
| 504 | ch == ',' || ch == '.' ) { |
| 505 | return true; |
| 506 | }else { |
| 507 | return false; |
| 508 | } |
| 509 | } else if (is_wc_cat_id_start((uint32_t) ch)) { |
| 510 | return false; |
| 511 | } else if (IsJuliaUnaryOperator(ch) || |
| 512 | ! never_id_char((uint32_t) ch)) { |
| 513 | return true; |
| 514 | } |
| 515 | return false; |
| 516 | } |
| 517 | |
| 518 | static bool IsOperatorCharacter (int ch) { |
| 519 | if (IsOperatorFirstCharacter(ch) || |
| 520 | (!IsASCII(ch) && jl_op_suffix_char((uint32_t) ch)) ) { |
| 521 | return true; |
| 522 | } |
| 523 | return false; |
| 524 | } |
| 525 | |
| 526 | static bool CheckBoundsIndexing(char *str) { |
| 527 | if (strcmp("begin" , str) == 0 || strcmp("end" , str) == 0 ) { |
| 528 | return true; |
| 529 | } |
| 530 | return false; |
| 531 | } |
| 532 | |
| 533 | static int CheckKeywordFoldPoint(char *str) { |
| 534 | if (strcmp ("if" , str) == 0 || |
| 535 | strcmp ("for" , str) == 0 || |
| 536 | strcmp ("while" , str) == 0 || |
| 537 | strcmp ("try" , str) == 0 || |
| 538 | strcmp ("do" , str) == 0 || |
| 539 | strcmp ("begin" , str) == 0 || |
| 540 | strcmp ("let" , str) == 0 || |
| 541 | strcmp ("baremodule" , str) == 0 || |
| 542 | strcmp ("quote" , str) == 0 || |
| 543 | strcmp ("module" , str) == 0 || |
| 544 | strcmp ("struct" , str) == 0 || |
| 545 | strcmp ("type" , str) == 0 || |
| 546 | strcmp ("macro" , str) == 0 || |
| 547 | strcmp ("function" , str) == 0) { |
| 548 | return 1; |
| 549 | } |
| 550 | if (strcmp("end" , str) == 0) { |
| 551 | return -1; |
| 552 | } |
| 553 | return 0; |
| 554 | } |
| 555 | |
| 556 | static bool IsNumberExpon(int ch, int base) { |
| 557 | if ((base == 10 && (ch == 'e' || ch == 'E' || ch == 'f')) || |
| 558 | (base == 16 && (ch == 'p' || ch == 'P'))) { |
| 559 | return true; |
| 560 | } |
| 561 | return false; |
| 562 | } |
| 563 | |
| 564 | /* Scans a sequence of digits, returning true if it found any. */ |
| 565 | static bool ScanDigits(StyleContext& sc, int base, bool allow_sep) { |
| 566 | bool found = false; |
| 567 | for (;;) { |
| 568 | if (IsADigit(sc.chNext, base) || (allow_sep && sc.chNext == '_')) { |
| 569 | found = true; |
| 570 | sc.Forward(); |
| 571 | } else { |
| 572 | break; |
| 573 | } |
| 574 | } |
| 575 | return found; |
| 576 | } |
| 577 | |
| 578 | static inline bool ScanNHexas(StyleContext &sc, int max) { |
| 579 | int n = 0; |
| 580 | bool error = false; |
| 581 | |
| 582 | sc.Forward(); |
| 583 | if (!IsADigit(sc.ch, 16)) { |
| 584 | error = true; |
| 585 | } else { |
| 586 | while (IsADigit(sc.ch, 16) && n < max) { |
| 587 | sc.Forward(); |
| 588 | n++; |
| 589 | } |
| 590 | } |
| 591 | return error; |
| 592 | } |
| 593 | |
| 594 | static void resumeCharacter(StyleContext &sc, bool lexerror) { |
| 595 | bool error = false; |
| 596 | |
| 597 | // ''' case |
| 598 | if (sc.chPrev == '\'' && sc.ch == '\'' && sc.chNext == '\'') { |
| 599 | sc.Forward(); |
| 600 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 601 | return; |
| 602 | } else if (lexerror && sc.chPrev == '\'' && sc.ch == '\'') { |
| 603 | sc.ChangeState(SCE_JULIA_LEXERROR); |
| 604 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 605 | |
| 606 | // Escape characters |
| 607 | } else if (sc.ch == '\\') { |
| 608 | sc.Forward(); |
| 609 | if (sc.ch == '\'' || sc.ch == '\\' ) { |
| 610 | sc.Forward(); |
| 611 | } else if (sc.ch == 'n' || sc.ch == 't' || sc.ch == 'a' || |
| 612 | sc.ch == 'b' || sc.ch == 'e' || sc.ch == 'f' || |
| 613 | sc.ch == 'r' || sc.ch == 'v' ) { |
| 614 | sc.Forward(); |
| 615 | } else if (sc.ch == 'x') { |
| 616 | error |= ScanNHexas(sc, 2); |
| 617 | } else if (sc.ch == 'u') { |
| 618 | error |= ScanNHexas(sc, 4); |
| 619 | } else if (sc.ch == 'U') { |
| 620 | error |= ScanNHexas(sc, 8); |
| 621 | } else if (IsADigit(sc.ch, 8)) { |
| 622 | int n = 1; |
| 623 | int max = 3; |
| 624 | sc.Forward(); |
| 625 | while (IsADigit(sc.ch, 8) && n < max) { |
| 626 | sc.Forward(); |
| 627 | n++; |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | if (lexerror) { |
| 632 | if (sc.ch != '\'') { |
| 633 | error = true; |
| 634 | while (sc.ch != '\'' && |
| 635 | sc.ch != '\r' && |
| 636 | sc.ch != '\n') { |
| 637 | sc.Forward(); |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | if (error) { |
| 642 | sc.ChangeState(SCE_JULIA_LEXERROR); |
| 643 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 644 | } |
| 645 | } |
| 646 | } else if (lexerror) { |
| 647 | if (sc.ch < 0x20 || sc.ch > 0x10ffff) { |
| 648 | error = true; |
| 649 | } else { |
| 650 | // single character |
| 651 | sc.Forward(); |
| 652 | |
| 653 | if (sc.ch != '\'') { |
| 654 | error = true; |
| 655 | while (sc.ch != '\'' && |
| 656 | sc.ch != '\r' && |
| 657 | sc.ch != '\n') { |
| 658 | sc.Forward(); |
| 659 | } |
| 660 | } |
| 661 | } |
| 662 | |
| 663 | if (error) { |
| 664 | sc.ChangeState(SCE_JULIA_LEXERROR); |
| 665 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 666 | } |
| 667 | } |
| 668 | |
| 669 | // closing quote |
| 670 | if (sc.ch == '\'') { |
| 671 | if (sc.chNext == '\'') { |
| 672 | sc.Forward(); |
| 673 | } else { |
| 674 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 675 | } |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | static inline bool IsACharacter(StyleContext &sc) { |
| 680 | return (sc.chPrev == '\'' && sc.chNext == '\''); |
| 681 | } |
| 682 | |
| 683 | static void ScanParenInterpolation(StyleContext &sc) { |
| 684 | // TODO: no syntax highlighting inside a string interpolation |
| 685 | |
| 686 | // Level of nested parenthesis |
| 687 | int interp_level = 0; |
| 688 | |
| 689 | // If true, it is inside a string and parenthesis are not counted. |
| 690 | bool allow_paren_string = false; |
| 691 | |
| 692 | |
| 693 | // check for end of states |
| 694 | for (; sc.More(); sc.Forward()) { |
| 695 | // TODO: check corner cases for nested string interpolation |
| 696 | // TODO: check corner cases with Command inside interpolation |
| 697 | |
| 698 | if ( sc.ch == '\"' && sc.chPrev != '\\') { |
| 699 | // Toggle the string environment (parenthesis are not counted inside a string) |
| 700 | allow_paren_string = !allow_paren_string; |
| 701 | } else if ( !allow_paren_string ) { |
| 702 | if ( sc.ch == '(' && !IsACharacter(sc) ) { |
| 703 | interp_level ++; |
| 704 | } else if ( sc.ch == ')' && !IsACharacter(sc) && interp_level > 0 ) { |
| 705 | interp_level --; |
| 706 | if (interp_level == 0) { |
| 707 | // Exit interpolation |
| 708 | return; |
| 709 | } |
| 710 | } |
| 711 | } |
| 712 | } |
| 713 | } |
| 714 | /* |
| 715 | * Start parsing a number, parse the base. |
| 716 | */ |
| 717 | static void initNumber (StyleContext &sc, int &base, bool &with_dot) { |
| 718 | base = 10; |
| 719 | with_dot = false; |
| 720 | sc.SetState(SCE_JULIA_NUMBER); |
| 721 | if (sc.ch == '0') { |
| 722 | if (sc.chNext == 'x') { |
| 723 | sc.Forward(); |
| 724 | base = 16; |
| 725 | if (sc.chNext == '.') { |
| 726 | sc.Forward(); |
| 727 | with_dot = true; |
| 728 | } |
| 729 | } else if (sc.chNext == 'o') { |
| 730 | sc.Forward(); |
| 731 | base = 8; |
| 732 | } else if (sc.chNext == 'b') { |
| 733 | sc.Forward(); |
| 734 | base = 2; |
| 735 | } |
| 736 | } else if (sc.ch == '.') { |
| 737 | with_dot = true; |
| 738 | } |
| 739 | } |
| 740 | |
| 741 | /* |
| 742 | * Resume parsing a String or Command, bounded by the `quote` character (\" or \`) |
| 743 | * The `triple` argument specifies if it is a triple-quote String or Command. |
| 744 | * Interpolation is detected (with `$`), and parsed if `allow_interp` is true. |
| 745 | */ |
| 746 | static void resumeStringLike(StyleContext &sc, int quote, bool triple, bool allow_interp, bool full_highlight) { |
| 747 | int stylePrev = sc.state; |
| 748 | bool checkcurrent = false; |
| 749 | |
| 750 | // Escape characters |
| 751 | if (sc.ch == '\\') { |
| 752 | if (sc.chNext == quote || sc.chNext == '\\' || sc.chNext == '$') { |
| 753 | sc.Forward(); |
| 754 | } |
| 755 | } else if (allow_interp && sc.ch == '$') { |
| 756 | // If the interpolation is only of a variable, do not change state |
| 757 | if (sc.chNext == '(') { |
| 758 | if (full_highlight) { |
| 759 | sc.SetState(SCE_JULIA_STRINGINTERP); |
| 760 | } else { |
| 761 | sc.ForwardSetState(SCE_JULIA_STRINGINTERP); |
| 762 | } |
| 763 | ScanParenInterpolation(sc); |
| 764 | sc.ForwardSetState(stylePrev); |
| 765 | |
| 766 | checkcurrent = true; |
| 767 | |
| 768 | } else if (full_highlight && IsIdentifierFirstCharacter(sc.chNext)) { |
| 769 | sc.SetState(SCE_JULIA_STRINGINTERP); |
| 770 | sc.Forward(); |
| 771 | sc.Forward(); |
| 772 | for (; sc.More(); sc.Forward()) { |
| 773 | if (! IsIdentifierCharacter(sc.ch)) { |
| 774 | break; |
| 775 | } |
| 776 | } |
| 777 | sc.SetState(stylePrev); |
| 778 | |
| 779 | checkcurrent = true; |
| 780 | } |
| 781 | |
| 782 | if (checkcurrent) { |
| 783 | // Check that the current character is not a special char, |
| 784 | // otherwise it will be skipped |
| 785 | resumeStringLike(sc, quote, triple, allow_interp, full_highlight); |
| 786 | } |
| 787 | |
| 788 | } else if (sc.ch == quote) { |
| 789 | if (triple) { |
| 790 | if (sc.chNext == quote && sc.GetRelativeCharacter(2) == quote) { |
| 791 | // Move to the end of the triple quotes |
| 792 | Sci_PositionU nextIndex = sc.currentPos + 2; |
| 793 | while (nextIndex > sc.currentPos && sc.More()) { |
| 794 | sc.Forward(); |
| 795 | } |
| 796 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 797 | } |
| 798 | } else { |
| 799 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 800 | } |
| 801 | } |
| 802 | } |
| 803 | |
| 804 | static void resumeCommand(StyleContext &sc, bool triple, bool allow_interp) { |
| 805 | return resumeStringLike(sc, '`', triple, allow_interp, true); |
| 806 | } |
| 807 | |
| 808 | static void resumeString(StyleContext &sc, bool triple, bool allow_interp) { |
| 809 | return resumeStringLike(sc, '"', triple, allow_interp, true); |
| 810 | } |
| 811 | |
| 812 | static void (StyleContext &sc, int base, bool &with_dot, bool lexerror) { |
| 813 | if (IsNumberExpon(sc.ch, base)) { |
| 814 | if (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-') { |
| 815 | sc.Forward(); |
| 816 | // Capture all digits |
| 817 | ScanDigits(sc, 10, false); |
| 818 | sc.Forward(); |
| 819 | } |
| 820 | sc.SetState(SCE_JULIA_DEFAULT); |
| 821 | } else if (sc.ch == '.' && sc.chNext == '.') { |
| 822 | // Interval operator `..` |
| 823 | sc.SetState(SCE_JULIA_OPERATOR); |
| 824 | sc.Forward(); |
| 825 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 826 | } else if (sc.ch == '.' && !with_dot) { |
| 827 | with_dot = true; |
| 828 | ScanDigits(sc, base, true); |
| 829 | } else if (IsADigit(sc.ch, base) || sc.ch == '_') { |
| 830 | ScanDigits(sc, base, true); |
| 831 | } else if (IsADigit(sc.ch) && !IsADigit(sc.ch, base)) { |
| 832 | if (lexerror) { |
| 833 | sc.ChangeState(SCE_JULIA_LEXERROR); |
| 834 | } |
| 835 | ScanDigits(sc, 10, false); |
| 836 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 837 | } else { |
| 838 | sc.SetState(SCE_JULIA_DEFAULT); |
| 839 | } |
| 840 | } |
| 841 | |
| 842 | static void resumeOperator (StyleContext &sc) { |
| 843 | if (sc.chNext == ':' && (sc.ch == ':' || sc.ch == '<' || |
| 844 | (sc.ch == '>' && (sc.chPrev != '-' && sc.chPrev != '=')))) { |
| 845 | // Case `:a=>:b` |
| 846 | sc.Forward(); |
| 847 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 848 | } else if (sc.ch == ':') { |
| 849 | // Case `foo(:baz,:baz)` or `:one+:two` |
| 850 | // Let the default case switch decide if it is a symbol |
| 851 | sc.SetState(SCE_JULIA_DEFAULT); |
| 852 | } else if (sc.ch == '\'') { |
| 853 | sc.SetState(SCE_JULIA_DEFAULT); |
| 854 | } else if ((sc.ch == '.' && sc.chPrev != '.') || IsIdentifierFirstCharacter(sc.ch) || |
| 855 | (! (sc.chPrev == '.' && IsOperatorFirstCharacter(sc.ch)) && |
| 856 | ! IsOperatorCharacter(sc.ch)) ) { |
| 857 | sc.SetState(SCE_JULIA_DEFAULT); |
| 858 | } |
| 859 | } |
| 860 | |
| 861 | void SCI_METHOD LexerJulia::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
| 862 | PropSetSimple props; |
| 863 | Accessor styler(pAccess, &props); |
| 864 | |
| 865 | Sci_Position pos = startPos; |
| 866 | styler.StartAt(pos); |
| 867 | styler.StartSegment(pos); |
| 868 | |
| 869 | // use the line state of each line to store block/multiline states |
| 870 | Sci_Position curLine = styler.GetLine(startPos); |
| 871 | // Default is false for everything and 0 counters. |
| 872 | int lineState = (curLine > 0) ? styler.GetLineState(curLine-1) : 0; |
| 873 | |
| 874 | bool transpose = (lineState >> 0) & 0x01; // 1 bit to know if ' is allowed to mean transpose |
| 875 | bool istripledocstring = (lineState >> 1) & 0x01; // 1 bit to know if we are in a triple doublequotes string |
| 876 | bool triple_backtick = (lineState >> 2) & 0x01; // 1 bit to know if we are in a triple backtick command |
| 877 | bool israwstring = (lineState >> 3) & 0x01; // 1 bit to know if we are in a raw string |
| 878 | int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter |
| 879 | int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter |
| 880 | int = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter |
| 881 | |
| 882 | // base for parsing number |
| 883 | int base = 10; |
| 884 | // number has a float dot ? |
| 885 | bool with_dot = false; |
| 886 | |
| 887 | StyleContext sc(startPos, length, initStyle, styler); |
| 888 | |
| 889 | for (; sc.More(); sc.Forward()) { |
| 890 | |
| 891 | //// check for end of states |
| 892 | switch (sc.state) { |
| 893 | case SCE_JULIA_BRACKET: |
| 894 | sc.SetState(SCE_JULIA_DEFAULT); |
| 895 | break; |
| 896 | case SCE_JULIA_OPERATOR: |
| 897 | resumeOperator(sc); |
| 898 | break; |
| 899 | case SCE_JULIA_TYPEOPERATOR: |
| 900 | sc.SetState(SCE_JULIA_DEFAULT); |
| 901 | break; |
| 902 | case SCE_JULIA_TYPEANNOT: |
| 903 | if (! IsIdentifierCharacter(sc.ch)) { |
| 904 | sc.SetState(SCE_JULIA_DEFAULT); |
| 905 | } |
| 906 | break; |
| 907 | case SCE_JULIA_IDENTIFIER: |
| 908 | // String literal |
| 909 | if (sc.ch == '\"') { |
| 910 | // If the string literal has a prefix, interpolation is disabled |
| 911 | israwstring = true; |
| 912 | sc.ChangeState(SCE_JULIA_STRINGLITERAL); |
| 913 | sc.SetState(SCE_JULIA_DEFAULT); |
| 914 | |
| 915 | } else if (sc.ch == '`') { |
| 916 | // If the string literal has a prefix, interpolation is disabled |
| 917 | israwstring = true; |
| 918 | sc.ChangeState(SCE_JULIA_COMMANDLITERAL); |
| 919 | sc.SetState(SCE_JULIA_DEFAULT); |
| 920 | |
| 921 | // Continue if the character is an identifier character |
| 922 | } else if (! IsIdentifierCharacter(sc.ch)) { |
| 923 | char s[MAX_JULIA_IDENT_CHARS + 1]; |
| 924 | sc.GetCurrent(s, sizeof(s)); |
| 925 | |
| 926 | // Treat the keywords differently if we are indexing or not |
| 927 | if ( indexing_level > 0 && CheckBoundsIndexing(s)) { |
| 928 | // Inside [], (), `begin` and `end` are numbers not block keywords |
| 929 | sc.ChangeState(SCE_JULIA_NUMBER); |
| 930 | transpose = false; |
| 931 | |
| 932 | } else { |
| 933 | if (keywords.InList(s)) { |
| 934 | sc.ChangeState(SCE_JULIA_KEYWORD1); |
| 935 | transpose = false; |
| 936 | } else if (identifiers2.InList(s)) { |
| 937 | sc.ChangeState(SCE_JULIA_KEYWORD2); |
| 938 | transpose = false; |
| 939 | } else if (identifiers3.InList(s)) { |
| 940 | sc.ChangeState(SCE_JULIA_KEYWORD3); |
| 941 | transpose = false; |
| 942 | } else if (identifiers4.InList(s)) { |
| 943 | sc.ChangeState(SCE_JULIA_KEYWORD4); |
| 944 | // These identifiers can be used for variable names also, |
| 945 | // so transpose is not forbidden. |
| 946 | //transpose = false; |
| 947 | } |
| 948 | } |
| 949 | sc.SetState(SCE_JULIA_DEFAULT); |
| 950 | |
| 951 | // TODO: recognize begin-end blocks inside list comprehension |
| 952 | // b = [(begin n%2; n*2 end) for n in 1:10] |
| 953 | // TODO: recognize better comprehension for-if to avoid problem with code-folding |
| 954 | // c = [(if isempty(a); missing else first(b) end) for (a, b) in zip(l1, l2)] |
| 955 | } |
| 956 | break; |
| 957 | case SCE_JULIA_NUMBER: |
| 958 | resumeNumber(sc, base, with_dot, options.highlightLexerror); |
| 959 | break; |
| 960 | case SCE_JULIA_CHAR: |
| 961 | resumeCharacter(sc, options.highlightLexerror); |
| 962 | break; |
| 963 | case SCE_JULIA_DOCSTRING: |
| 964 | resumeString(sc, true, !israwstring); |
| 965 | if (sc.state == SCE_JULIA_DEFAULT && israwstring) { |
| 966 | israwstring = false; |
| 967 | } |
| 968 | break; |
| 969 | case SCE_JULIA_STRING: |
| 970 | resumeString(sc, false, !israwstring); |
| 971 | if (sc.state == SCE_JULIA_DEFAULT && israwstring) { |
| 972 | israwstring = false; |
| 973 | } |
| 974 | break; |
| 975 | case SCE_JULIA_COMMAND: |
| 976 | resumeCommand(sc, triple_backtick, !israwstring); |
| 977 | break; |
| 978 | case SCE_JULIA_MACRO: |
| 979 | if (IsASpace(sc.ch) || ! IsIdentifierCharacter(sc.ch)) { |
| 980 | sc.SetState(SCE_JULIA_DEFAULT); |
| 981 | } |
| 982 | break; |
| 983 | case SCE_JULIA_SYMBOL: |
| 984 | if (! IsIdentifierCharacter(sc.ch)) { |
| 985 | sc.SetState(SCE_JULIA_DEFAULT); |
| 986 | } |
| 987 | break; |
| 988 | case SCE_JULIA_COMMENT: |
| 989 | if( commentDepth > 0 ) { |
| 990 | // end or start of a nested a block comment |
| 991 | if ( sc.ch == '=' && sc.chNext == '#') { |
| 992 | commentDepth --; |
| 993 | sc.Forward(); |
| 994 | |
| 995 | if (commentDepth == 0) { |
| 996 | sc.ForwardSetState(SCE_JULIA_DEFAULT); |
| 997 | } |
| 998 | } else if( sc.ch == '#' && sc.chNext == '=') { |
| 999 | commentDepth ++; |
| 1000 | sc.Forward(); |
| 1001 | } |
| 1002 | } else { |
| 1003 | // single line comment |
| 1004 | if (sc.atLineEnd || sc.ch == '\r' || sc.ch == '\n') { |
| 1005 | sc.SetState(SCE_JULIA_DEFAULT); |
| 1006 | transpose = false; |
| 1007 | } |
| 1008 | } |
| 1009 | break; |
| 1010 | } |
| 1011 | |
| 1012 | // check start of a new state |
| 1013 | if (sc.state == SCE_JULIA_DEFAULT) { |
| 1014 | if (sc.ch == '#') { |
| 1015 | sc.SetState(SCE_JULIA_COMMENT); |
| 1016 | // increment depth if we are a block comment |
| 1017 | if(sc.chNext == '=') { |
| 1018 | commentDepth ++; |
| 1019 | sc.Forward(); |
| 1020 | } |
| 1021 | } else if (sc.ch == '!') { |
| 1022 | sc.SetState(SCE_JULIA_OPERATOR); |
| 1023 | } else if (sc.ch == '\'') { |
| 1024 | if (transpose) { |
| 1025 | sc.SetState(SCE_JULIA_OPERATOR); |
| 1026 | } else { |
| 1027 | sc.SetState(SCE_JULIA_CHAR); |
| 1028 | } |
| 1029 | } else if (sc.ch == '\"') { |
| 1030 | istripledocstring = (sc.chNext == '\"' && sc.GetRelativeCharacter(2) == '\"'); |
| 1031 | if (istripledocstring) { |
| 1032 | sc.SetState(SCE_JULIA_DOCSTRING); |
| 1033 | // Move to the end of the triple quotes |
| 1034 | Sci_PositionU nextIndex = sc.currentPos + 2; |
| 1035 | while (nextIndex > sc.currentPos && sc.More()) { |
| 1036 | sc.Forward(); |
| 1037 | } |
| 1038 | } else { |
| 1039 | sc.SetState(SCE_JULIA_STRING); |
| 1040 | } |
| 1041 | } else if (sc.ch == '`') { |
| 1042 | triple_backtick = (sc.chNext == '`' && sc.GetRelativeCharacter(2) == '`'); |
| 1043 | sc.SetState(SCE_JULIA_COMMAND); |
| 1044 | if (triple_backtick) { |
| 1045 | // Move to the end of the triple backticks |
| 1046 | Sci_PositionU nextIndex = sc.currentPos + 2; |
| 1047 | while (nextIndex > sc.currentPos && sc.More()) { |
| 1048 | sc.Forward(); |
| 1049 | } |
| 1050 | } |
| 1051 | } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { |
| 1052 | initNumber(sc, base, with_dot); |
| 1053 | } else if (IsIdentifierFirstCharacter(sc.ch)) { |
| 1054 | sc.SetState(SCE_JULIA_IDENTIFIER); |
| 1055 | transpose = true; |
| 1056 | } else if (sc.ch == '@') { |
| 1057 | sc.SetState(SCE_JULIA_MACRO); |
| 1058 | transpose = false; |
| 1059 | |
| 1060 | // Several parsing of operators, should keep the order of `if` blocks |
| 1061 | } else if ((sc.ch == ':' || sc.ch == '<' || sc.ch == '>') && sc.chNext == ':') { |
| 1062 | sc.SetState(SCE_JULIA_TYPEOPERATOR); |
| 1063 | sc.Forward(); |
| 1064 | // Highlight the next identifier, if option is set |
| 1065 | if (options.highlightTypeannotation && |
| 1066 | IsIdentifierFirstCharacter(sc.chNext)) { |
| 1067 | sc.ForwardSetState(SCE_JULIA_TYPEANNOT); |
| 1068 | } |
| 1069 | } else if (sc.ch == ':') { |
| 1070 | // TODO: improve detection of range |
| 1071 | // should be solved with begin-end parsing |
| 1072 | // `push!(arr, s1 :s2)` and `a[begin :end] |
| 1073 | if (IsIdentifierFirstCharacter(sc.chNext) && |
| 1074 | ! IsIdentifierCharacter(sc.chPrev) && |
| 1075 | sc.chPrev != ')' && sc.chPrev != ']' ) { |
| 1076 | sc.SetState(SCE_JULIA_SYMBOL); |
| 1077 | } else { |
| 1078 | sc.SetState(SCE_JULIA_OPERATOR); |
| 1079 | } |
| 1080 | } else if (IsJuliaParen(sc.ch)) { |
| 1081 | if (sc.ch == '[') { |
| 1082 | list_comprehension ++; |
| 1083 | indexing_level ++; |
| 1084 | } else if (sc.ch == ']' && (indexing_level > 0)) { |
| 1085 | list_comprehension --; |
| 1086 | indexing_level --; |
| 1087 | } else if (sc.ch == '(') { |
| 1088 | list_comprehension ++; |
| 1089 | } else if (sc.ch == ')' && (list_comprehension > 0)) { |
| 1090 | list_comprehension --; |
| 1091 | } |
| 1092 | |
| 1093 | if (sc.ch == ')' || sc.ch == ']' || sc.ch == '}') { |
| 1094 | transpose = true; |
| 1095 | } else { |
| 1096 | transpose = false; |
| 1097 | } |
| 1098 | sc.SetState(SCE_JULIA_BRACKET); |
| 1099 | } else if (IsOperatorFirstCharacter(sc.ch)) { |
| 1100 | transpose = false; |
| 1101 | sc.SetState(SCE_JULIA_OPERATOR); |
| 1102 | } else { |
| 1103 | transpose = false; |
| 1104 | } |
| 1105 | } |
| 1106 | |
| 1107 | // update the line information (used for line-by-line lexing and folding) |
| 1108 | if (sc.atLineEnd) { |
| 1109 | // set the line state to the current state |
| 1110 | curLine = styler.GetLine(sc.currentPos); |
| 1111 | |
| 1112 | lineState = ((transpose ? 1 : 0) << 0) | |
| 1113 | ((istripledocstring ? 1 : 0) << 1) | |
| 1114 | ((triple_backtick ? 1 : 0) << 2) | |
| 1115 | ((israwstring ? 1 : 0) << 3) | |
| 1116 | ((indexing_level & 0x0F) << 4) | |
| 1117 | ((list_comprehension & 0x0F) << 8) | |
| 1118 | ((commentDepth & 0x0F) << 12); |
| 1119 | styler.SetLineState(curLine, lineState); |
| 1120 | } |
| 1121 | } |
| 1122 | sc.Complete(); |
| 1123 | } |
| 1124 | |
| 1125 | void SCI_METHOD LexerJulia::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
| 1126 | |
| 1127 | if (!options.fold) |
| 1128 | return; |
| 1129 | |
| 1130 | LexAccessor styler(pAccess); |
| 1131 | |
| 1132 | Sci_PositionU endPos = startPos + length; |
| 1133 | int visibleChars = 0; |
| 1134 | Sci_Position lineCurrent = styler.GetLine(startPos); |
| 1135 | int levelCurrent = SC_FOLDLEVELBASE; |
| 1136 | int lineState = 0; |
| 1137 | if (lineCurrent > 0) { |
| 1138 | levelCurrent = styler.LevelAt(lineCurrent-1) >> 16; |
| 1139 | lineState = styler.GetLineState(lineCurrent-1); |
| 1140 | } |
| 1141 | |
| 1142 | // level of nested brackets |
| 1143 | int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter |
| 1144 | // level of nested parenthesis or brackets |
| 1145 | int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter |
| 1146 | //int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter |
| 1147 | |
| 1148 | Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1); |
| 1149 | int levelNext = levelCurrent; |
| 1150 | char chNext = styler[startPos]; |
| 1151 | int stylePrev = styler.StyleAt(startPos - 1); |
| 1152 | int styleNext = styler.StyleAt(startPos); |
| 1153 | int style = initStyle; |
| 1154 | char word[100]; |
| 1155 | int wordlen = 0; |
| 1156 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
| 1157 | char ch = chNext; |
| 1158 | chNext = styler.SafeGetCharAt(i + 1); |
| 1159 | style = styleNext; |
| 1160 | styleNext = styler.StyleAt(i + 1); |
| 1161 | bool atEOL = i == (lineStartNext-1); |
| 1162 | |
| 1163 | // a start/end of comment block |
| 1164 | if (options.foldComment && style == SCE_JULIA_COMMENT) { |
| 1165 | // start of block comment |
| 1166 | if (ch == '#' && chNext == '=') { |
| 1167 | levelNext ++; |
| 1168 | } |
| 1169 | // end of block comment |
| 1170 | if (ch == '=' && chNext == '#' && levelNext > 0) { |
| 1171 | levelNext --; |
| 1172 | } |
| 1173 | } |
| 1174 | |
| 1175 | // Syntax based folding, accounts for list comprehension |
| 1176 | if (options.foldSyntaxBased) { |
| 1177 | // list comprehension allow `for`, `if` and `begin` without `end` |
| 1178 | if (style == SCE_JULIA_BRACKET) { |
| 1179 | if (ch == '[') { |
| 1180 | list_comprehension ++; |
| 1181 | indexing_level ++; |
| 1182 | levelNext ++; |
| 1183 | } else if (ch == ']') { |
| 1184 | list_comprehension --; |
| 1185 | indexing_level --; |
| 1186 | levelNext --; |
| 1187 | } else if (ch == '(') { |
| 1188 | list_comprehension ++; |
| 1189 | levelNext ++; |
| 1190 | } else if (ch == ')') { |
| 1191 | list_comprehension --; |
| 1192 | levelNext --; |
| 1193 | } |
| 1194 | // check non-negative |
| 1195 | if (indexing_level < 0) { |
| 1196 | indexing_level = 0; |
| 1197 | } |
| 1198 | if (list_comprehension < 0) { |
| 1199 | list_comprehension = 0; |
| 1200 | } |
| 1201 | } |
| 1202 | |
| 1203 | // keyword |
| 1204 | if (style == SCE_JULIA_KEYWORD1) { |
| 1205 | word[wordlen++] = static_cast<char>(ch); |
| 1206 | if (wordlen == 100) { // prevent overflow |
| 1207 | word[0] = '\0'; |
| 1208 | wordlen = 1; |
| 1209 | } |
| 1210 | if (styleNext != SCE_JULIA_KEYWORD1) { |
| 1211 | word[wordlen] = '\0'; |
| 1212 | wordlen = 0; |
| 1213 | if (list_comprehension <= 0 && indexing_level <= 0) { |
| 1214 | levelNext += CheckKeywordFoldPoint(word); |
| 1215 | } |
| 1216 | } |
| 1217 | } |
| 1218 | } |
| 1219 | |
| 1220 | // Docstring |
| 1221 | if (options.foldDocstring) { |
| 1222 | if (stylePrev != SCE_JULIA_DOCSTRING && style == SCE_JULIA_DOCSTRING) { |
| 1223 | levelNext ++; |
| 1224 | } else if (style == SCE_JULIA_DOCSTRING && styleNext != SCE_JULIA_DOCSTRING) { |
| 1225 | levelNext --; |
| 1226 | } |
| 1227 | } |
| 1228 | |
| 1229 | // check non-negative level |
| 1230 | if (levelNext < 0) { |
| 1231 | levelNext = 0; |
| 1232 | } |
| 1233 | |
| 1234 | if (!IsASpace(ch)) { |
| 1235 | visibleChars++; |
| 1236 | } |
| 1237 | stylePrev = style; |
| 1238 | |
| 1239 | if (atEOL || (i == endPos-1)) { |
| 1240 | int levelUse = levelCurrent; |
| 1241 | int lev = levelUse | levelNext << 16; |
| 1242 | if (visibleChars == 0 && options.foldCompact) { |
| 1243 | lev |= SC_FOLDLEVELWHITEFLAG; |
| 1244 | } |
| 1245 | if (levelUse < levelNext) { |
| 1246 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 1247 | } |
| 1248 | if (lev != styler.LevelAt(lineCurrent)) { |
| 1249 | styler.SetLevel(lineCurrent, lev); |
| 1250 | } |
| 1251 | lineCurrent++; |
| 1252 | lineStartNext = styler.LineStart(lineCurrent+1); |
| 1253 | levelCurrent = levelNext; |
| 1254 | if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) { |
| 1255 | // There is an empty line at end of file so give it same level and empty |
| 1256 | styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG); |
| 1257 | } |
| 1258 | visibleChars = 0; |
| 1259 | } |
| 1260 | } |
| 1261 | } |
| 1262 | |
| 1263 | LexerModule lmJulia(SCLEX_JULIA, LexerJulia::LexerFactoryJulia, "julia" , juliaWordLists); |
| 1264 | |