1 | // Scintilla source code edit control |
2 | /** @file LexLua.cxx |
3 | ** Lexer for Lua language. |
4 | ** |
5 | ** Written by Paul Winwood. |
6 | ** Folder by Alexey Yutkin. |
7 | ** Modified by Marcos E. Wurzius & Philippe Lhoste |
8 | **/ |
9 | |
10 | #include <stdlib.h> |
11 | #include <string.h> |
12 | #include <stdio.h> |
13 | #include <stdarg.h> |
14 | #include <assert.h> |
15 | #include <ctype.h> |
16 | |
17 | #include <string> |
18 | #include <string_view> |
19 | |
20 | #include "ILexer.h" |
21 | #include "Scintilla.h" |
22 | #include "SciLexer.h" |
23 | |
24 | #include "StringCopy.h" |
25 | #include "WordList.h" |
26 | #include "LexAccessor.h" |
27 | #include "Accessor.h" |
28 | #include "StyleContext.h" |
29 | #include "CharacterSet.h" |
30 | #include "LexerModule.h" |
31 | |
32 | using namespace Lexilla; |
33 | |
34 | // Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ], |
35 | // return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on. |
36 | // The maximum number of '=' characters allowed is 254. |
37 | static int LongDelimCheck(StyleContext &sc) { |
38 | int sep = 1; |
39 | while (sc.GetRelative(sep) == '=' && sep < 0xFF) |
40 | sep++; |
41 | if (sc.GetRelative(sep) == sc.ch) |
42 | return sep; |
43 | return 0; |
44 | } |
45 | |
46 | static void ColouriseLuaDoc( |
47 | Sci_PositionU startPos, |
48 | Sci_Position length, |
49 | int initStyle, |
50 | WordList *keywordlists[], |
51 | Accessor &styler) { |
52 | |
53 | const WordList &keywords = *keywordlists[0]; |
54 | const WordList &keywords2 = *keywordlists[1]; |
55 | const WordList &keywords3 = *keywordlists[2]; |
56 | const WordList &keywords4 = *keywordlists[3]; |
57 | const WordList &keywords5 = *keywordlists[4]; |
58 | const WordList &keywords6 = *keywordlists[5]; |
59 | const WordList &keywords7 = *keywordlists[6]; |
60 | const WordList &keywords8 = *keywordlists[7]; |
61 | |
62 | // Accepts accented characters |
63 | CharacterSet setWordStart(CharacterSet::setAlpha, "_" , true); |
64 | CharacterSet setWord(CharacterSet::setAlphaNum, "_" , true); |
65 | // Not exactly following number definition (several dots are seen as OK, etc.) |
66 | // but probably enough in most cases. [pP] is for hex floats. |
67 | CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP" ); |
68 | CharacterSet setExponent(CharacterSet::setNone, "eEpP" ); |
69 | CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#&|" ); |
70 | CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\" ); |
71 | |
72 | Sci_Position currentLine = styler.GetLine(startPos); |
73 | // Initialize long string [[ ... ]] or block comment --[[ ... ]], |
74 | // if we are inside such a string. Block comment was introduced in Lua 5.0, |
75 | // blocks with separators [=[ ... ]=] in Lua 5.1. |
76 | // Continuation of a string (\z whitespace escaping) is controlled by stringWs. |
77 | int sepCount = 0; |
78 | int stringWs = 0; |
79 | if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT || |
80 | initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) { |
81 | const int lineState = styler.GetLineState(currentLine - 1); |
82 | sepCount = lineState & 0xFF; |
83 | stringWs = lineState & 0x100; |
84 | } |
85 | |
86 | // results of identifier/keyword matching |
87 | Sci_Position idenPos = 0; |
88 | Sci_Position idenWordPos = 0; |
89 | int idenStyle = SCE_LUA_IDENTIFIER; |
90 | bool foundGoto = false; |
91 | |
92 | // Do not leak onto next line |
93 | if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) { |
94 | initStyle = SCE_LUA_DEFAULT; |
95 | } |
96 | |
97 | StyleContext sc(startPos, length, initStyle, styler); |
98 | if (startPos == 0 && sc.ch == '#' && sc.chNext == '!') { |
99 | // shbang line: "#!" is a comment only if located at the start of the script |
100 | sc.SetState(SCE_LUA_COMMENTLINE); |
101 | } |
102 | for (; sc.More(); sc.Forward()) { |
103 | if (sc.atLineEnd) { |
104 | // Update the line state, so it can be seen by next line |
105 | currentLine = styler.GetLine(sc.currentPos); |
106 | switch (sc.state) { |
107 | case SCE_LUA_LITERALSTRING: |
108 | case SCE_LUA_COMMENT: |
109 | case SCE_LUA_STRING: |
110 | case SCE_LUA_CHARACTER: |
111 | // Inside a literal string, block comment or string, we set the line state |
112 | styler.SetLineState(currentLine, stringWs | sepCount); |
113 | break; |
114 | default: |
115 | // Reset the line state |
116 | styler.SetLineState(currentLine, 0); |
117 | break; |
118 | } |
119 | } |
120 | if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) { |
121 | // Prevent SCE_LUA_STRINGEOL from leaking back to previous line |
122 | sc.SetState(SCE_LUA_STRING); |
123 | } |
124 | |
125 | // Handle string line continuation |
126 | if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) && |
127 | sc.ch == '\\') { |
128 | if (sc.chNext == '\n' || sc.chNext == '\r') { |
129 | sc.Forward(); |
130 | if (sc.ch == '\r' && sc.chNext == '\n') { |
131 | sc.Forward(); |
132 | } |
133 | continue; |
134 | } |
135 | } |
136 | |
137 | // Determine if the current state should terminate. |
138 | if (sc.state == SCE_LUA_OPERATOR) { |
139 | if (sc.ch == ':' && sc.chPrev == ':') { // :: <label> :: forward scan |
140 | sc.Forward(); |
141 | Sci_Position ln = 0; |
142 | while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs |
143 | ln++; |
144 | Sci_Position ws1 = ln; |
145 | if (setWordStart.Contains(sc.GetRelative(ln))) { |
146 | int c, i = 0; |
147 | char s[100]; |
148 | while (setWord.Contains(c = sc.GetRelative(ln))) { // get potential label |
149 | if (i < 90) |
150 | s[i++] = static_cast<char>(c); |
151 | ln++; |
152 | } |
153 | s[i] = '\0'; Sci_Position lbl = ln; |
154 | if (!keywords.InList(s)) { |
155 | while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs |
156 | ln++; |
157 | Sci_Position ws2 = ln - lbl; |
158 | if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') { |
159 | // final :: found, complete valid label construct |
160 | sc.ChangeState(SCE_LUA_LABEL); |
161 | if (ws1) { |
162 | sc.SetState(SCE_LUA_DEFAULT); |
163 | sc.ForwardBytes(ws1); |
164 | } |
165 | sc.SetState(SCE_LUA_LABEL); |
166 | sc.ForwardBytes(lbl - ws1); |
167 | if (ws2) { |
168 | sc.SetState(SCE_LUA_DEFAULT); |
169 | sc.ForwardBytes(ws2); |
170 | } |
171 | sc.SetState(SCE_LUA_LABEL); |
172 | sc.ForwardBytes(2); |
173 | } |
174 | } |
175 | } |
176 | } |
177 | sc.SetState(SCE_LUA_DEFAULT); |
178 | } else if (sc.state == SCE_LUA_NUMBER) { |
179 | // We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char |
180 | if (!setNumber.Contains(sc.ch)) { |
181 | sc.SetState(SCE_LUA_DEFAULT); |
182 | } else if (sc.ch == '-' || sc.ch == '+') { |
183 | if (!setExponent.Contains(sc.chPrev)) |
184 | sc.SetState(SCE_LUA_DEFAULT); |
185 | } |
186 | } else if (sc.state == SCE_LUA_IDENTIFIER) { |
187 | idenPos--; // commit already-scanned identitier/word parts |
188 | if (idenWordPos > 0) { |
189 | idenWordPos--; |
190 | sc.ChangeState(idenStyle); |
191 | sc.ForwardBytes(idenWordPos); |
192 | idenPos -= idenWordPos; |
193 | if (idenPos > 0) { |
194 | sc.SetState(SCE_LUA_IDENTIFIER); |
195 | sc.ForwardBytes(idenPos); |
196 | } |
197 | } else { |
198 | sc.ForwardBytes(idenPos); |
199 | } |
200 | sc.SetState(SCE_LUA_DEFAULT); |
201 | if (foundGoto) { // goto <label> forward scan |
202 | while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) |
203 | sc.Forward(); |
204 | if (setWordStart.Contains(sc.ch)) { |
205 | sc.SetState(SCE_LUA_LABEL); |
206 | sc.Forward(); |
207 | while (setWord.Contains(sc.ch)) |
208 | sc.Forward(); |
209 | char s[100]; |
210 | sc.GetCurrent(s, sizeof(s)); |
211 | if (keywords.InList(s)) // labels cannot be keywords |
212 | sc.ChangeState(SCE_LUA_WORD); |
213 | } |
214 | sc.SetState(SCE_LUA_DEFAULT); |
215 | } |
216 | } else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) { |
217 | if (sc.atLineEnd) { |
218 | sc.ForwardSetState(SCE_LUA_DEFAULT); |
219 | } |
220 | } else if (sc.state == SCE_LUA_STRING) { |
221 | if (stringWs) { |
222 | if (!IsASpace(sc.ch)) |
223 | stringWs = 0; |
224 | } |
225 | if (sc.ch == '\\') { |
226 | if (setEscapeSkip.Contains(sc.chNext)) { |
227 | sc.Forward(); |
228 | } else if (sc.chNext == 'z') { |
229 | sc.Forward(); |
230 | stringWs = 0x100; |
231 | } |
232 | } else if (sc.ch == '\"') { |
233 | sc.ForwardSetState(SCE_LUA_DEFAULT); |
234 | } else if (stringWs == 0 && sc.atLineEnd) { |
235 | sc.ChangeState(SCE_LUA_STRINGEOL); |
236 | sc.ForwardSetState(SCE_LUA_DEFAULT); |
237 | } |
238 | } else if (sc.state == SCE_LUA_CHARACTER) { |
239 | if (stringWs) { |
240 | if (!IsASpace(sc.ch)) |
241 | stringWs = 0; |
242 | } |
243 | if (sc.ch == '\\') { |
244 | if (setEscapeSkip.Contains(sc.chNext)) { |
245 | sc.Forward(); |
246 | } else if (sc.chNext == 'z') { |
247 | sc.Forward(); |
248 | stringWs = 0x100; |
249 | } |
250 | } else if (sc.ch == '\'') { |
251 | sc.ForwardSetState(SCE_LUA_DEFAULT); |
252 | } else if (stringWs == 0 && sc.atLineEnd) { |
253 | sc.ChangeState(SCE_LUA_STRINGEOL); |
254 | sc.ForwardSetState(SCE_LUA_DEFAULT); |
255 | } |
256 | } else if (sc.ch == ']' && (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT)) { |
257 | const int sep = LongDelimCheck(sc); |
258 | if (sep == sepCount) { // ]=]-style delim |
259 | sc.Forward(sep); |
260 | sc.ForwardSetState(SCE_LUA_DEFAULT); |
261 | } |
262 | } |
263 | |
264 | // Determine if a new state should be entered. |
265 | if (sc.state == SCE_LUA_DEFAULT) { |
266 | if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { |
267 | sc.SetState(SCE_LUA_NUMBER); |
268 | if (sc.ch == '0' && toupper(sc.chNext) == 'X') { |
269 | sc.Forward(); |
270 | } |
271 | } else if (setWordStart.Contains(sc.ch)) { |
272 | // For matching various identifiers with dots and colons, multiple |
273 | // matches are done as identifier segments are added. Longest match is |
274 | // set to a word style. The non-matched part is in identifier style. |
275 | std::string ident; |
276 | idenPos = 0; |
277 | idenWordPos = 0; |
278 | idenStyle = SCE_LUA_IDENTIFIER; |
279 | foundGoto = false; |
280 | int cNext; |
281 | do { |
282 | int c; |
283 | const Sci_Position idenPosOld = idenPos; |
284 | std::string identSeg; |
285 | identSeg += static_cast<char>(sc.GetRelative(idenPos++)); |
286 | while (setWord.Contains(c = sc.GetRelative(idenPos))) { |
287 | identSeg += static_cast<char>(c); |
288 | idenPos++; |
289 | } |
290 | if (keywords.InList(identSeg.c_str()) && (idenPosOld > 0)) { |
291 | idenPos = idenPosOld - 1; // keywords cannot mix |
292 | ident.pop_back(); |
293 | break; |
294 | } |
295 | ident += identSeg; |
296 | const char* s = ident.c_str(); |
297 | int newStyle = SCE_LUA_IDENTIFIER; |
298 | if (keywords.InList(s)) { |
299 | newStyle = SCE_LUA_WORD; |
300 | } else if (keywords2.InList(s)) { |
301 | newStyle = SCE_LUA_WORD2; |
302 | } else if (keywords3.InList(s)) { |
303 | newStyle = SCE_LUA_WORD3; |
304 | } else if (keywords4.InList(s)) { |
305 | newStyle = SCE_LUA_WORD4; |
306 | } else if (keywords5.InList(s)) { |
307 | newStyle = SCE_LUA_WORD5; |
308 | } else if (keywords6.InList(s)) { |
309 | newStyle = SCE_LUA_WORD6; |
310 | } else if (keywords7.InList(s)) { |
311 | newStyle = SCE_LUA_WORD7; |
312 | } else if (keywords8.InList(s)) { |
313 | newStyle = SCE_LUA_WORD8; |
314 | } |
315 | if (newStyle != SCE_LUA_IDENTIFIER) { |
316 | idenStyle = newStyle; |
317 | idenWordPos = idenPos; |
318 | } |
319 | if (idenStyle == SCE_LUA_WORD) // keywords cannot mix |
320 | break; |
321 | cNext = sc.GetRelative(idenPos + 1); |
322 | if ((c == '.' || c == ':') && setWordStart.Contains(cNext)) { |
323 | ident += static_cast<char>(c); |
324 | idenPos++; |
325 | } else { |
326 | cNext = 0; |
327 | } |
328 | } while (cNext); |
329 | if ((idenStyle == SCE_LUA_WORD) && (ident.compare("goto" ) == 0)) { |
330 | foundGoto = true; |
331 | } |
332 | sc.SetState(SCE_LUA_IDENTIFIER); |
333 | } else if (sc.ch == '\"') { |
334 | sc.SetState(SCE_LUA_STRING); |
335 | stringWs = 0; |
336 | } else if (sc.ch == '\'') { |
337 | sc.SetState(SCE_LUA_CHARACTER); |
338 | stringWs = 0; |
339 | } else if (sc.ch == '[') { |
340 | sepCount = LongDelimCheck(sc); |
341 | if (sepCount == 0) { |
342 | sc.SetState(SCE_LUA_OPERATOR); |
343 | } else { |
344 | sc.SetState(SCE_LUA_LITERALSTRING); |
345 | sc.Forward(sepCount); |
346 | } |
347 | } else if (sc.Match('-', '-')) { |
348 | sc.SetState(SCE_LUA_COMMENTLINE); |
349 | if (sc.Match("--[" )) { |
350 | sc.Forward(2); |
351 | sepCount = LongDelimCheck(sc); |
352 | if (sepCount > 0) { |
353 | sc.ChangeState(SCE_LUA_COMMENT); |
354 | sc.Forward(sepCount); |
355 | } |
356 | } else { |
357 | sc.Forward(); |
358 | } |
359 | } else if (sc.atLineStart && sc.Match('$')) { |
360 | sc.SetState(SCE_LUA_PREPROCESSOR); // Obsolete since Lua 4.0, but still in old code |
361 | } else if (setLuaOperator.Contains(sc.ch)) { |
362 | sc.SetState(SCE_LUA_OPERATOR); |
363 | } |
364 | } |
365 | } |
366 | |
367 | sc.Complete(); |
368 | } |
369 | |
370 | static void FoldLuaDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *[], |
371 | Accessor &styler) { |
372 | const Sci_PositionU lengthDoc = startPos + length; |
373 | int visibleChars = 0; |
374 | Sci_Position lineCurrent = styler.GetLine(startPos); |
375 | int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; |
376 | int levelCurrent = levelPrev; |
377 | char chNext = styler[startPos]; |
378 | const bool foldCompact = styler.GetPropertyInt("fold.compact" , 1) != 0; |
379 | int style = initStyle; |
380 | int styleNext = styler.StyleAt(startPos); |
381 | |
382 | for (Sci_PositionU i = startPos; i < lengthDoc; i++) { |
383 | const char ch = chNext; |
384 | chNext = styler.SafeGetCharAt(i + 1); |
385 | const int stylePrev = style; |
386 | style = styleNext; |
387 | styleNext = styler.StyleAt(i + 1); |
388 | const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
389 | if (style == SCE_LUA_WORD) { |
390 | if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') { |
391 | char s[10] = "" ; |
392 | for (Sci_PositionU j = 0; j < 8; j++) { |
393 | if (!iswordchar(styler[i + j])) { |
394 | break; |
395 | } |
396 | s[j] = styler[i + j]; |
397 | s[j + 1] = '\0'; |
398 | } |
399 | |
400 | if ((strcmp(s, "if" ) == 0) || (strcmp(s, "do" ) == 0) || (strcmp(s, "function" ) == 0) || (strcmp(s, "repeat" ) == 0)) { |
401 | levelCurrent++; |
402 | } |
403 | if ((strcmp(s, "end" ) == 0) || (strcmp(s, "elseif" ) == 0) || (strcmp(s, "until" ) == 0)) { |
404 | levelCurrent--; |
405 | } |
406 | } |
407 | } else if (style == SCE_LUA_OPERATOR) { |
408 | if (ch == '{' || ch == '(') { |
409 | levelCurrent++; |
410 | } else if (ch == '}' || ch == ')') { |
411 | levelCurrent--; |
412 | } |
413 | } else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) { |
414 | if (stylePrev != style) { |
415 | levelCurrent++; |
416 | } else if (styleNext != style) { |
417 | levelCurrent--; |
418 | } |
419 | } |
420 | |
421 | if (atEOL) { |
422 | int lev = levelPrev; |
423 | if (visibleChars == 0 && foldCompact) { |
424 | lev |= SC_FOLDLEVELWHITEFLAG; |
425 | } |
426 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) { |
427 | lev |= SC_FOLDLEVELHEADERFLAG; |
428 | } |
429 | if (lev != styler.LevelAt(lineCurrent)) { |
430 | styler.SetLevel(lineCurrent, lev); |
431 | } |
432 | lineCurrent++; |
433 | levelPrev = levelCurrent; |
434 | visibleChars = 0; |
435 | } |
436 | if (!isspacechar(ch)) { |
437 | visibleChars++; |
438 | } |
439 | } |
440 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
441 | |
442 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
443 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
444 | } |
445 | |
446 | static const char * const luaWordListDesc[] = { |
447 | "Keywords" , |
448 | "Basic functions" , |
449 | "String, (table) & math functions" , |
450 | "(coroutines), I/O & system facilities" , |
451 | "user1" , |
452 | "user2" , |
453 | "user3" , |
454 | "user4" , |
455 | 0 |
456 | }; |
457 | |
458 | namespace { |
459 | |
460 | LexicalClass lexicalClasses[] = { |
461 | // Lexer Lua SCLEX_LUA SCE_LUA_: |
462 | 0, "SCE_LUA_DEFAULT" , "default" , "White space: Visible only in View Whitespace mode (or if it has a back colour)" , |
463 | 1, "SCE_LUA_COMMENT" , "comment" , "Block comment (Lua 5.0)" , |
464 | 2, "SCE_LUA_COMMENTLINE" , "comment line" , "Line comment" , |
465 | 3, "SCE_LUA_COMMENTDOC" , "comment documentation" , "Doc comment -- Not used in Lua (yet?)" , |
466 | 4, "SCE_LUA_NUMBER" , "literal numeric" , "Number" , |
467 | 5, "SCE_LUA_WORD" , "keyword" , "Keyword" , |
468 | 6, "SCE_LUA_STRING" , "literal string" , "(Double quoted) String" , |
469 | 7, "SCE_LUA_CHARACTER" , "literal string character" , "Character (Single quoted string)" , |
470 | 8, "SCE_LUA_LITERALSTRING" , "literal string" , "Literal string" , |
471 | 9, "SCE_LUA_PREPROCESSOR" , "preprocessor" , "Preprocessor (obsolete in Lua 4.0 and up)" , |
472 | 10, "SCE_LUA_OPERATOR" , "operator" , "Operators" , |
473 | 11, "SCE_LUA_IDENTIFIER" , "identifier" , "Identifier (everything else...)" , |
474 | 12, "SCE_LUA_STRINGEOL" , "error literal string" , "End of line where string is not closed" , |
475 | 13, "SCE_LUA_WORD2" , "identifier" , "Other keywords" , |
476 | 14, "SCE_LUA_WORD3" , "identifier" , "Other keywords" , |
477 | 15, "SCE_LUA_WORD4" , "identifier" , "Other keywords" , |
478 | 16, "SCE_LUA_WORD5" , "identifier" , "Other keywords" , |
479 | 17, "SCE_LUA_WORD6" , "identifier" , "Other keywords" , |
480 | 18, "SCE_LUA_WORD7" , "identifier" , "Other keywords" , |
481 | 19, "SCE_LUA_WORD8" , "identifier" , "Other keywords" , |
482 | 20, "SCE_LUA_LABEL" , "label" , "Labels" , |
483 | }; |
484 | |
485 | } |
486 | |
487 | LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua" , FoldLuaDoc, luaWordListDesc, lexicalClasses, ELEMENTS(lexicalClasses)); |
488 | |