1// Scintilla source code edit control
2/** @file LexLua.cxx
3 ** Lexer for Lua language.
4 **
5 ** Written by Paul Winwood.
6 ** Folder by Alexey Yutkin.
7 ** Modified by Marcos E. Wurzius & Philippe Lhoste
8 **/
9
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13#include <stdarg.h>
14#include <assert.h>
15#include <ctype.h>
16
17#include <string>
18#include <string_view>
19
20#include "ILexer.h"
21#include "Scintilla.h"
22#include "SciLexer.h"
23
24#include "StringCopy.h"
25#include "WordList.h"
26#include "LexAccessor.h"
27#include "Accessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31
32using namespace Lexilla;
33
34// Test for [=[ ... ]=] delimiters, returns 0 if it's only a [ or ],
35// return 1 for [[ or ]], returns >=2 for [=[ or ]=] and so on.
36// The maximum number of '=' characters allowed is 254.
37static int LongDelimCheck(StyleContext &sc) {
38 int sep = 1;
39 while (sc.GetRelative(sep) == '=' && sep < 0xFF)
40 sep++;
41 if (sc.GetRelative(sep) == sc.ch)
42 return sep;
43 return 0;
44}
45
46static void ColouriseLuaDoc(
47 Sci_PositionU startPos,
48 Sci_Position length,
49 int initStyle,
50 WordList *keywordlists[],
51 Accessor &styler) {
52
53 const WordList &keywords = *keywordlists[0];
54 const WordList &keywords2 = *keywordlists[1];
55 const WordList &keywords3 = *keywordlists[2];
56 const WordList &keywords4 = *keywordlists[3];
57 const WordList &keywords5 = *keywordlists[4];
58 const WordList &keywords6 = *keywordlists[5];
59 const WordList &keywords7 = *keywordlists[6];
60 const WordList &keywords8 = *keywordlists[7];
61
62 // Accepts accented characters
63 CharacterSet setWordStart(CharacterSet::setAlpha, "_", true);
64 CharacterSet setWord(CharacterSet::setAlphaNum, "_", true);
65 // Not exactly following number definition (several dots are seen as OK, etc.)
66 // but probably enough in most cases. [pP] is for hex floats.
67 CharacterSet setNumber(CharacterSet::setDigits, ".-+abcdefpABCDEFP");
68 CharacterSet setExponent(CharacterSet::setNone, "eEpP");
69 CharacterSet setLuaOperator(CharacterSet::setNone, "*/-+()={}~[];<>,.^%:#&|");
70 CharacterSet setEscapeSkip(CharacterSet::setNone, "\"'\\");
71
72 Sci_Position currentLine = styler.GetLine(startPos);
73 // Initialize long string [[ ... ]] or block comment --[[ ... ]],
74 // if we are inside such a string. Block comment was introduced in Lua 5.0,
75 // blocks with separators [=[ ... ]=] in Lua 5.1.
76 // Continuation of a string (\z whitespace escaping) is controlled by stringWs.
77 int sepCount = 0;
78 int stringWs = 0;
79 if (initStyle == SCE_LUA_LITERALSTRING || initStyle == SCE_LUA_COMMENT ||
80 initStyle == SCE_LUA_STRING || initStyle == SCE_LUA_CHARACTER) {
81 const int lineState = styler.GetLineState(currentLine - 1);
82 sepCount = lineState & 0xFF;
83 stringWs = lineState & 0x100;
84 }
85
86 // results of identifier/keyword matching
87 Sci_Position idenPos = 0;
88 Sci_Position idenWordPos = 0;
89 int idenStyle = SCE_LUA_IDENTIFIER;
90 bool foundGoto = false;
91
92 // Do not leak onto next line
93 if (initStyle == SCE_LUA_STRINGEOL || initStyle == SCE_LUA_COMMENTLINE || initStyle == SCE_LUA_PREPROCESSOR) {
94 initStyle = SCE_LUA_DEFAULT;
95 }
96
97 StyleContext sc(startPos, length, initStyle, styler);
98 if (startPos == 0 && sc.ch == '#' && sc.chNext == '!') {
99 // shbang line: "#!" is a comment only if located at the start of the script
100 sc.SetState(SCE_LUA_COMMENTLINE);
101 }
102 for (; sc.More(); sc.Forward()) {
103 if (sc.atLineEnd) {
104 // Update the line state, so it can be seen by next line
105 currentLine = styler.GetLine(sc.currentPos);
106 switch (sc.state) {
107 case SCE_LUA_LITERALSTRING:
108 case SCE_LUA_COMMENT:
109 case SCE_LUA_STRING:
110 case SCE_LUA_CHARACTER:
111 // Inside a literal string, block comment or string, we set the line state
112 styler.SetLineState(currentLine, stringWs | sepCount);
113 break;
114 default:
115 // Reset the line state
116 styler.SetLineState(currentLine, 0);
117 break;
118 }
119 }
120 if (sc.atLineStart && (sc.state == SCE_LUA_STRING)) {
121 // Prevent SCE_LUA_STRINGEOL from leaking back to previous line
122 sc.SetState(SCE_LUA_STRING);
123 }
124
125 // Handle string line continuation
126 if ((sc.state == SCE_LUA_STRING || sc.state == SCE_LUA_CHARACTER) &&
127 sc.ch == '\\') {
128 if (sc.chNext == '\n' || sc.chNext == '\r') {
129 sc.Forward();
130 if (sc.ch == '\r' && sc.chNext == '\n') {
131 sc.Forward();
132 }
133 continue;
134 }
135 }
136
137 // Determine if the current state should terminate.
138 if (sc.state == SCE_LUA_OPERATOR) {
139 if (sc.ch == ':' && sc.chPrev == ':') { // :: <label> :: forward scan
140 sc.Forward();
141 Sci_Position ln = 0;
142 while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs
143 ln++;
144 Sci_Position ws1 = ln;
145 if (setWordStart.Contains(sc.GetRelative(ln))) {
146 int c, i = 0;
147 char s[100];
148 while (setWord.Contains(c = sc.GetRelative(ln))) { // get potential label
149 if (i < 90)
150 s[i++] = static_cast<char>(c);
151 ln++;
152 }
153 s[i] = '\0'; Sci_Position lbl = ln;
154 if (!keywords.InList(s)) {
155 while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs
156 ln++;
157 Sci_Position ws2 = ln - lbl;
158 if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') {
159 // final :: found, complete valid label construct
160 sc.ChangeState(SCE_LUA_LABEL);
161 if (ws1) {
162 sc.SetState(SCE_LUA_DEFAULT);
163 sc.ForwardBytes(ws1);
164 }
165 sc.SetState(SCE_LUA_LABEL);
166 sc.ForwardBytes(lbl - ws1);
167 if (ws2) {
168 sc.SetState(SCE_LUA_DEFAULT);
169 sc.ForwardBytes(ws2);
170 }
171 sc.SetState(SCE_LUA_LABEL);
172 sc.ForwardBytes(2);
173 }
174 }
175 }
176 }
177 sc.SetState(SCE_LUA_DEFAULT);
178 } else if (sc.state == SCE_LUA_NUMBER) {
179 // We stop the number definition on non-numerical non-dot non-eEpP non-sign non-hexdigit char
180 if (!setNumber.Contains(sc.ch)) {
181 sc.SetState(SCE_LUA_DEFAULT);
182 } else if (sc.ch == '-' || sc.ch == '+') {
183 if (!setExponent.Contains(sc.chPrev))
184 sc.SetState(SCE_LUA_DEFAULT);
185 }
186 } else if (sc.state == SCE_LUA_IDENTIFIER) {
187 idenPos--; // commit already-scanned identitier/word parts
188 if (idenWordPos > 0) {
189 idenWordPos--;
190 sc.ChangeState(idenStyle);
191 sc.ForwardBytes(idenWordPos);
192 idenPos -= idenWordPos;
193 if (idenPos > 0) {
194 sc.SetState(SCE_LUA_IDENTIFIER);
195 sc.ForwardBytes(idenPos);
196 }
197 } else {
198 sc.ForwardBytes(idenPos);
199 }
200 sc.SetState(SCE_LUA_DEFAULT);
201 if (foundGoto) { // goto <label> forward scan
202 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
203 sc.Forward();
204 if (setWordStart.Contains(sc.ch)) {
205 sc.SetState(SCE_LUA_LABEL);
206 sc.Forward();
207 while (setWord.Contains(sc.ch))
208 sc.Forward();
209 char s[100];
210 sc.GetCurrent(s, sizeof(s));
211 if (keywords.InList(s)) // labels cannot be keywords
212 sc.ChangeState(SCE_LUA_WORD);
213 }
214 sc.SetState(SCE_LUA_DEFAULT);
215 }
216 } else if (sc.state == SCE_LUA_COMMENTLINE || sc.state == SCE_LUA_PREPROCESSOR) {
217 if (sc.atLineEnd) {
218 sc.ForwardSetState(SCE_LUA_DEFAULT);
219 }
220 } else if (sc.state == SCE_LUA_STRING) {
221 if (stringWs) {
222 if (!IsASpace(sc.ch))
223 stringWs = 0;
224 }
225 if (sc.ch == '\\') {
226 if (setEscapeSkip.Contains(sc.chNext)) {
227 sc.Forward();
228 } else if (sc.chNext == 'z') {
229 sc.Forward();
230 stringWs = 0x100;
231 }
232 } else if (sc.ch == '\"') {
233 sc.ForwardSetState(SCE_LUA_DEFAULT);
234 } else if (stringWs == 0 && sc.atLineEnd) {
235 sc.ChangeState(SCE_LUA_STRINGEOL);
236 sc.ForwardSetState(SCE_LUA_DEFAULT);
237 }
238 } else if (sc.state == SCE_LUA_CHARACTER) {
239 if (stringWs) {
240 if (!IsASpace(sc.ch))
241 stringWs = 0;
242 }
243 if (sc.ch == '\\') {
244 if (setEscapeSkip.Contains(sc.chNext)) {
245 sc.Forward();
246 } else if (sc.chNext == 'z') {
247 sc.Forward();
248 stringWs = 0x100;
249 }
250 } else if (sc.ch == '\'') {
251 sc.ForwardSetState(SCE_LUA_DEFAULT);
252 } else if (stringWs == 0 && sc.atLineEnd) {
253 sc.ChangeState(SCE_LUA_STRINGEOL);
254 sc.ForwardSetState(SCE_LUA_DEFAULT);
255 }
256 } else if (sc.ch == ']' && (sc.state == SCE_LUA_LITERALSTRING || sc.state == SCE_LUA_COMMENT)) {
257 const int sep = LongDelimCheck(sc);
258 if (sep == sepCount) { // ]=]-style delim
259 sc.Forward(sep);
260 sc.ForwardSetState(SCE_LUA_DEFAULT);
261 }
262 }
263
264 // Determine if a new state should be entered.
265 if (sc.state == SCE_LUA_DEFAULT) {
266 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
267 sc.SetState(SCE_LUA_NUMBER);
268 if (sc.ch == '0' && toupper(sc.chNext) == 'X') {
269 sc.Forward();
270 }
271 } else if (setWordStart.Contains(sc.ch)) {
272 // For matching various identifiers with dots and colons, multiple
273 // matches are done as identifier segments are added. Longest match is
274 // set to a word style. The non-matched part is in identifier style.
275 std::string ident;
276 idenPos = 0;
277 idenWordPos = 0;
278 idenStyle = SCE_LUA_IDENTIFIER;
279 foundGoto = false;
280 int cNext;
281 do {
282 int c;
283 const Sci_Position idenPosOld = idenPos;
284 std::string identSeg;
285 identSeg += static_cast<char>(sc.GetRelative(idenPos++));
286 while (setWord.Contains(c = sc.GetRelative(idenPos))) {
287 identSeg += static_cast<char>(c);
288 idenPos++;
289 }
290 if (keywords.InList(identSeg.c_str()) && (idenPosOld > 0)) {
291 idenPos = idenPosOld - 1; // keywords cannot mix
292 ident.pop_back();
293 break;
294 }
295 ident += identSeg;
296 const char* s = ident.c_str();
297 int newStyle = SCE_LUA_IDENTIFIER;
298 if (keywords.InList(s)) {
299 newStyle = SCE_LUA_WORD;
300 } else if (keywords2.InList(s)) {
301 newStyle = SCE_LUA_WORD2;
302 } else if (keywords3.InList(s)) {
303 newStyle = SCE_LUA_WORD3;
304 } else if (keywords4.InList(s)) {
305 newStyle = SCE_LUA_WORD4;
306 } else if (keywords5.InList(s)) {
307 newStyle = SCE_LUA_WORD5;
308 } else if (keywords6.InList(s)) {
309 newStyle = SCE_LUA_WORD6;
310 } else if (keywords7.InList(s)) {
311 newStyle = SCE_LUA_WORD7;
312 } else if (keywords8.InList(s)) {
313 newStyle = SCE_LUA_WORD8;
314 }
315 if (newStyle != SCE_LUA_IDENTIFIER) {
316 idenStyle = newStyle;
317 idenWordPos = idenPos;
318 }
319 if (idenStyle == SCE_LUA_WORD) // keywords cannot mix
320 break;
321 cNext = sc.GetRelative(idenPos + 1);
322 if ((c == '.' || c == ':') && setWordStart.Contains(cNext)) {
323 ident += static_cast<char>(c);
324 idenPos++;
325 } else {
326 cNext = 0;
327 }
328 } while (cNext);
329 if ((idenStyle == SCE_LUA_WORD) && (ident.compare("goto") == 0)) {
330 foundGoto = true;
331 }
332 sc.SetState(SCE_LUA_IDENTIFIER);
333 } else if (sc.ch == '\"') {
334 sc.SetState(SCE_LUA_STRING);
335 stringWs = 0;
336 } else if (sc.ch == '\'') {
337 sc.SetState(SCE_LUA_CHARACTER);
338 stringWs = 0;
339 } else if (sc.ch == '[') {
340 sepCount = LongDelimCheck(sc);
341 if (sepCount == 0) {
342 sc.SetState(SCE_LUA_OPERATOR);
343 } else {
344 sc.SetState(SCE_LUA_LITERALSTRING);
345 sc.Forward(sepCount);
346 }
347 } else if (sc.Match('-', '-')) {
348 sc.SetState(SCE_LUA_COMMENTLINE);
349 if (sc.Match("--[")) {
350 sc.Forward(2);
351 sepCount = LongDelimCheck(sc);
352 if (sepCount > 0) {
353 sc.ChangeState(SCE_LUA_COMMENT);
354 sc.Forward(sepCount);
355 }
356 } else {
357 sc.Forward();
358 }
359 } else if (sc.atLineStart && sc.Match('$')) {
360 sc.SetState(SCE_LUA_PREPROCESSOR); // Obsolete since Lua 4.0, but still in old code
361 } else if (setLuaOperator.Contains(sc.ch)) {
362 sc.SetState(SCE_LUA_OPERATOR);
363 }
364 }
365 }
366
367 sc.Complete();
368}
369
370static void FoldLuaDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *[],
371 Accessor &styler) {
372 const Sci_PositionU lengthDoc = startPos + length;
373 int visibleChars = 0;
374 Sci_Position lineCurrent = styler.GetLine(startPos);
375 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
376 int levelCurrent = levelPrev;
377 char chNext = styler[startPos];
378 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
379 int style = initStyle;
380 int styleNext = styler.StyleAt(startPos);
381
382 for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
383 const char ch = chNext;
384 chNext = styler.SafeGetCharAt(i + 1);
385 const int stylePrev = style;
386 style = styleNext;
387 styleNext = styler.StyleAt(i + 1);
388 const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
389 if (style == SCE_LUA_WORD) {
390 if (ch == 'i' || ch == 'd' || ch == 'f' || ch == 'e' || ch == 'r' || ch == 'u') {
391 char s[10] = "";
392 for (Sci_PositionU j = 0; j < 8; j++) {
393 if (!iswordchar(styler[i + j])) {
394 break;
395 }
396 s[j] = styler[i + j];
397 s[j + 1] = '\0';
398 }
399
400 if ((strcmp(s, "if") == 0) || (strcmp(s, "do") == 0) || (strcmp(s, "function") == 0) || (strcmp(s, "repeat") == 0)) {
401 levelCurrent++;
402 }
403 if ((strcmp(s, "end") == 0) || (strcmp(s, "elseif") == 0) || (strcmp(s, "until") == 0)) {
404 levelCurrent--;
405 }
406 }
407 } else if (style == SCE_LUA_OPERATOR) {
408 if (ch == '{' || ch == '(') {
409 levelCurrent++;
410 } else if (ch == '}' || ch == ')') {
411 levelCurrent--;
412 }
413 } else if (style == SCE_LUA_LITERALSTRING || style == SCE_LUA_COMMENT) {
414 if (stylePrev != style) {
415 levelCurrent++;
416 } else if (styleNext != style) {
417 levelCurrent--;
418 }
419 }
420
421 if (atEOL) {
422 int lev = levelPrev;
423 if (visibleChars == 0 && foldCompact) {
424 lev |= SC_FOLDLEVELWHITEFLAG;
425 }
426 if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
427 lev |= SC_FOLDLEVELHEADERFLAG;
428 }
429 if (lev != styler.LevelAt(lineCurrent)) {
430 styler.SetLevel(lineCurrent, lev);
431 }
432 lineCurrent++;
433 levelPrev = levelCurrent;
434 visibleChars = 0;
435 }
436 if (!isspacechar(ch)) {
437 visibleChars++;
438 }
439 }
440 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
441
442 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
443 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
444}
445
446static const char * const luaWordListDesc[] = {
447 "Keywords",
448 "Basic functions",
449 "String, (table) & math functions",
450 "(coroutines), I/O & system facilities",
451 "user1",
452 "user2",
453 "user3",
454 "user4",
455 0
456};
457
458namespace {
459
460LexicalClass lexicalClasses[] = {
461 // Lexer Lua SCLEX_LUA SCE_LUA_:
462 0, "SCE_LUA_DEFAULT", "default", "White space: Visible only in View Whitespace mode (or if it has a back colour)",
463 1, "SCE_LUA_COMMENT", "comment", "Block comment (Lua 5.0)",
464 2, "SCE_LUA_COMMENTLINE", "comment line", "Line comment",
465 3, "SCE_LUA_COMMENTDOC", "comment documentation", "Doc comment -- Not used in Lua (yet?)",
466 4, "SCE_LUA_NUMBER", "literal numeric", "Number",
467 5, "SCE_LUA_WORD", "keyword", "Keyword",
468 6, "SCE_LUA_STRING", "literal string", "(Double quoted) String",
469 7, "SCE_LUA_CHARACTER", "literal string character", "Character (Single quoted string)",
470 8, "SCE_LUA_LITERALSTRING", "literal string", "Literal string",
471 9, "SCE_LUA_PREPROCESSOR", "preprocessor", "Preprocessor (obsolete in Lua 4.0 and up)",
472 10, "SCE_LUA_OPERATOR", "operator", "Operators",
473 11, "SCE_LUA_IDENTIFIER", "identifier", "Identifier (everything else...)",
474 12, "SCE_LUA_STRINGEOL", "error literal string", "End of line where string is not closed",
475 13, "SCE_LUA_WORD2", "identifier", "Other keywords",
476 14, "SCE_LUA_WORD3", "identifier", "Other keywords",
477 15, "SCE_LUA_WORD4", "identifier", "Other keywords",
478 16, "SCE_LUA_WORD5", "identifier", "Other keywords",
479 17, "SCE_LUA_WORD6", "identifier", "Other keywords",
480 18, "SCE_LUA_WORD7", "identifier", "Other keywords",
481 19, "SCE_LUA_WORD8", "identifier", "Other keywords",
482 20, "SCE_LUA_LABEL", "label", "Labels",
483};
484
485}
486
487LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc, luaWordListDesc, lexicalClasses, ELEMENTS(lexicalClasses));
488