1 | // Scintilla source code edit control |
2 | /** @file LexHollywood.cxx |
3 | ** Lexer for Hollywood |
4 | ** Written by Andreas Falkenhahn, based on the BlitzBasic/PureBasic/Lua lexers |
5 | ** Thanks to Nicholai Benalal |
6 | ** For more information on Hollywood, see http://www.hollywood-mal.com/ |
7 | ** Mail me (andreas <at> airsoftsoftwair <dot> de) for any bugs. |
8 | ** This code is subject to the same license terms as the rest of the Scintilla project: |
9 | ** The License.txt file describes the conditions under which this software may be distributed. |
10 | **/ |
11 | |
12 | #include <stdlib.h> |
13 | #include <string.h> |
14 | #include <stdio.h> |
15 | #include <stdarg.h> |
16 | #include <assert.h> |
17 | #include <ctype.h> |
18 | |
19 | #include <string> |
20 | #include <string_view> |
21 | #include <map> |
22 | #include <functional> |
23 | |
24 | #include "ILexer.h" |
25 | #include "Scintilla.h" |
26 | #include "SciLexer.h" |
27 | |
28 | #include "WordList.h" |
29 | #include "LexAccessor.h" |
30 | #include "StyleContext.h" |
31 | #include "CharacterSet.h" |
32 | #include "LexerModule.h" |
33 | #include "OptionSet.h" |
34 | #include "DefaultLexer.h" |
35 | |
36 | using namespace Scintilla; |
37 | using namespace Lexilla; |
38 | |
39 | /* Bits: |
40 | * 1 - whitespace |
41 | * 2 - operator |
42 | * 4 - identifier |
43 | * 8 - decimal digit |
44 | * 16 - hex digit |
45 | * 32 - bin digit |
46 | * 64 - letter |
47 | */ |
48 | static int character_classification[128] = |
49 | { |
50 | 0, // NUL ($0) |
51 | 0, // SOH ($1) |
52 | 0, // STX ($2) |
53 | 0, // ETX ($3) |
54 | 0, // EOT ($4) |
55 | 0, // ENQ ($5) |
56 | 0, // ACK ($6) |
57 | 0, // BEL ($7) |
58 | 0, // BS ($8) |
59 | 1, // HT ($9) |
60 | 1, // LF ($A) |
61 | 0, // VT ($B) |
62 | 0, // FF ($C) |
63 | 1, // CR ($D) |
64 | 0, // SO ($E) |
65 | 0, // SI ($F) |
66 | 0, // DLE ($10) |
67 | 0, // DC1 ($11) |
68 | 0, // DC2 ($12) |
69 | 0, // DC3 ($13) |
70 | 0, // DC4 ($14) |
71 | 0, // NAK ($15) |
72 | 0, // SYN ($16) |
73 | 0, // ETB ($17) |
74 | 0, // CAN ($18) |
75 | 0, // EM ($19) |
76 | 0, // SUB ($1A) |
77 | 0, // ESC ($1B) |
78 | 0, // FS ($1C) |
79 | 0, // GS ($1D) |
80 | 0, // RS ($1E) |
81 | 0, // US ($1F) |
82 | 1, // space ($20) |
83 | 4, // ! ($21) |
84 | 0, // " ($22) |
85 | 0, // # ($23) |
86 | 4, // $ ($24) |
87 | 2, // % ($25) |
88 | 2, // & ($26) |
89 | 2, // ' ($27) |
90 | 2, // ( ($28) |
91 | 2, // ) ($29) |
92 | 2, // * ($2A) |
93 | 2, // + ($2B) |
94 | 2, // , ($2C) |
95 | 2, // - ($2D) |
96 | // NB: we treat "." as an identifier although it is also an operator and a decimal digit |
97 | // the reason why we treat it as an identifier is to support syntax highlighting for |
98 | // plugin commands which always use a "." in their names, e.g. pdf.OpenDocument(); |
99 | // we handle the decimal digit case manually below so that 3.1415 and .123 is styled correctly |
100 | // the collateral damage of treating "." as an identifier is that "." is never styled |
101 | // SCE_HOLLYWOOD_OPERATOR |
102 | 4, // . ($2E) |
103 | 2, // / ($2F) |
104 | 28, // 0 ($30) |
105 | 28, // 1 ($31) |
106 | 28, // 2 ($32) |
107 | 28, // 3 ($33) |
108 | 28, // 4 ($34) |
109 | 28, // 5 ($35) |
110 | 28, // 6 ($36) |
111 | 28, // 7 ($37) |
112 | 28, // 8 ($38) |
113 | 28, // 9 ($39) |
114 | 2, // : ($3A) |
115 | 2, // ; ($3B) |
116 | 2, // < ($3C) |
117 | 2, // = ($3D) |
118 | 2, // > ($3E) |
119 | 2, // ? ($3F) |
120 | 0, // @ ($40) |
121 | 84, // A ($41) |
122 | 84, // B ($42) |
123 | 84, // C ($43) |
124 | 84, // D ($44) |
125 | 84, // E ($45) |
126 | 84, // F ($46) |
127 | 68, // G ($47) |
128 | 68, // H ($48) |
129 | 68, // I ($49) |
130 | 68, // J ($4A) |
131 | 68, // K ($4B) |
132 | 68, // L ($4C) |
133 | 68, // M ($4D) |
134 | 68, // N ($4E) |
135 | 68, // O ($4F) |
136 | 68, // P ($50) |
137 | 68, // Q ($51) |
138 | 68, // R ($52) |
139 | 68, // S ($53) |
140 | 68, // T ($54) |
141 | 68, // U ($55) |
142 | 68, // V ($56) |
143 | 68, // W ($57) |
144 | 68, // X ($58) |
145 | 68, // Y ($59) |
146 | 68, // Z ($5A) |
147 | 2, // [ ($5B) |
148 | 2, // \ ($5C) |
149 | 2, // ] ($5D) |
150 | 2, // ^ ($5E) |
151 | 68, // _ ($5F) |
152 | 2, // ` ($60) |
153 | 84, // a ($61) |
154 | 84, // b ($62) |
155 | 84, // c ($63) |
156 | 84, // d ($64) |
157 | 84, // e ($65) |
158 | 84, // f ($66) |
159 | 68, // g ($67) |
160 | 68, // h ($68) |
161 | 68, // i ($69) |
162 | 68, // j ($6A) |
163 | 68, // k ($6B) |
164 | 68, // l ($6C) |
165 | 68, // m ($6D) |
166 | 68, // n ($6E) |
167 | 68, // o ($6F) |
168 | 68, // p ($70) |
169 | 68, // q ($71) |
170 | 68, // r ($72) |
171 | 68, // s ($73) |
172 | 68, // t ($74) |
173 | 68, // u ($75) |
174 | 68, // v ($76) |
175 | 68, // w ($77) |
176 | 68, // x ($78) |
177 | 68, // y ($79) |
178 | 68, // z ($7A) |
179 | 2, // { ($7B) |
180 | 2, // | ($7C) |
181 | 2, // } ($7D) |
182 | 2, // ~ ($7E) |
183 | 0, //  ($7F) |
184 | }; |
185 | |
186 | static bool IsSpace(int c) { |
187 | return c < 128 && (character_classification[c] & 1); |
188 | } |
189 | |
190 | static bool IsOperator(int c) { |
191 | return c < 128 && (character_classification[c] & 2); |
192 | } |
193 | |
194 | static bool IsIdentifier(int c) { |
195 | return c < 128 && (character_classification[c] & 4); |
196 | } |
197 | |
198 | static bool IsDigit(int c) { |
199 | return c < 128 && (character_classification[c] & 8); |
200 | } |
201 | |
202 | static bool IsHexDigit(int c) { |
203 | return c < 128 && (character_classification[c] & 16); |
204 | } |
205 | |
206 | static int LowerCase(int c) |
207 | { |
208 | if (c >= 'A' && c <= 'Z') |
209 | return 'a' + c - 'A'; |
210 | return c; |
211 | } |
212 | |
213 | static int CheckHollywoodFoldPoint(char const *token) { |
214 | if (!strcmp(token, "function" )) { |
215 | return 1; |
216 | } |
217 | if (!strcmp(token, "endfunction" )) { |
218 | return -1; |
219 | } |
220 | return 0; |
221 | } |
222 | |
223 | // An individual named option for use in an OptionSet |
224 | |
225 | // Options used for LexerHollywood |
226 | struct OptionsHollywood { |
227 | bool fold; |
228 | bool foldCompact; |
229 | OptionsHollywood() { |
230 | fold = false; |
231 | foldCompact = false; |
232 | } |
233 | }; |
234 | |
235 | static const char * const hollywoodWordListDesc[] = { |
236 | "Hollywood keywords" , |
237 | "Hollywood standard API functions" , |
238 | "Hollywood plugin API functions" , |
239 | "Hollywood plugin methods" , |
240 | 0 |
241 | }; |
242 | |
243 | struct OptionSetHollywood : public OptionSet<OptionsHollywood> { |
244 | OptionSetHollywood(const char * const wordListDescriptions[]) { |
245 | DefineProperty("fold" , &OptionsHollywood::fold); |
246 | DefineProperty("fold.compact" , &OptionsHollywood::foldCompact); |
247 | DefineWordListSets(wordListDescriptions); |
248 | } |
249 | }; |
250 | |
251 | class LexerHollywood : public DefaultLexer { |
252 | int (*CheckFoldPoint)(char const *); |
253 | WordList keywordlists[4]; |
254 | OptionsHollywood options; |
255 | OptionSetHollywood osHollywood; |
256 | public: |
257 | LexerHollywood(int (*CheckFoldPoint_)(char const *), const char * const wordListDescriptions[]) : |
258 | DefaultLexer("hollywood" , SCLEX_HOLLYWOOD), |
259 | CheckFoldPoint(CheckFoldPoint_), |
260 | osHollywood(wordListDescriptions) { |
261 | } |
262 | virtual ~LexerHollywood() { |
263 | } |
264 | void SCI_METHOD Release() override { |
265 | delete this; |
266 | } |
267 | int SCI_METHOD Version() const override { |
268 | return lvRelease5; |
269 | } |
270 | const char * SCI_METHOD PropertyNames() override { |
271 | return osHollywood.PropertyNames(); |
272 | } |
273 | int SCI_METHOD PropertyType(const char *name) override { |
274 | return osHollywood.PropertyType(name); |
275 | } |
276 | const char * SCI_METHOD DescribeProperty(const char *name) override { |
277 | return osHollywood.DescribeProperty(name); |
278 | } |
279 | Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; |
280 | const char * SCI_METHOD PropertyGet(const char* key) override { |
281 | return osHollywood.PropertyGet(key); |
282 | } |
283 | const char * SCI_METHOD DescribeWordListSets() override { |
284 | return osHollywood.DescribeWordListSets(); |
285 | } |
286 | Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; |
287 | void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
288 | void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; |
289 | |
290 | void * SCI_METHOD PrivateCall(int, void *) override { |
291 | return 0; |
292 | } |
293 | static ILexer5 *LexerFactoryHollywood() { |
294 | return new LexerHollywood(CheckHollywoodFoldPoint, hollywoodWordListDesc); |
295 | } |
296 | }; |
297 | |
298 | Sci_Position SCI_METHOD LexerHollywood::PropertySet(const char *key, const char *val) { |
299 | if (osHollywood.PropertySet(&options, key, val)) { |
300 | return 0; |
301 | } |
302 | return -1; |
303 | } |
304 | |
305 | Sci_Position SCI_METHOD LexerHollywood::WordListSet(int n, const char *wl) { |
306 | WordList *wordListN = 0; |
307 | switch (n) { |
308 | case 0: |
309 | wordListN = &keywordlists[0]; |
310 | break; |
311 | case 1: |
312 | wordListN = &keywordlists[1]; |
313 | break; |
314 | case 2: |
315 | wordListN = &keywordlists[2]; |
316 | break; |
317 | case 3: |
318 | wordListN = &keywordlists[3]; |
319 | break; |
320 | } |
321 | Sci_Position firstModification = -1; |
322 | if (wordListN) { |
323 | WordList wlNew; |
324 | wlNew.Set(wl); |
325 | if (*wordListN != wlNew) { |
326 | wordListN->Set(wl); |
327 | firstModification = 0; |
328 | } |
329 | } |
330 | return firstModification; |
331 | } |
332 | |
333 | void SCI_METHOD LexerHollywood::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { |
334 | LexAccessor styler(pAccess); |
335 | |
336 | styler.StartAt(startPos); |
337 | bool inString = false; |
338 | |
339 | StyleContext sc(startPos, length, initStyle, styler); |
340 | |
341 | // Can't use sc.More() here else we miss the last character |
342 | for (; ; sc.Forward()) |
343 | { |
344 | if (sc.atLineStart) inString = false; |
345 | |
346 | if (sc.ch == '\"' && sc.chPrev != '\\') inString = !inString; |
347 | |
348 | if (sc.state == SCE_HOLLYWOOD_IDENTIFIER) { |
349 | if (!IsIdentifier(sc.ch)) { |
350 | char s[100]; |
351 | int kstates[4] = { |
352 | SCE_HOLLYWOOD_KEYWORD, |
353 | SCE_HOLLYWOOD_STDAPI, |
354 | SCE_HOLLYWOOD_PLUGINAPI, |
355 | SCE_HOLLYWOOD_PLUGINMETHOD, |
356 | }; |
357 | sc.GetCurrentLowered(s, sizeof(s)); |
358 | for (int i = 0; i < 4; i++) { |
359 | if (keywordlists[i].InList(s)) { |
360 | sc.ChangeState(kstates[i]); |
361 | } |
362 | } |
363 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
364 | } |
365 | } else if (sc.state == SCE_HOLLYWOOD_OPERATOR) { |
366 | |
367 | // always reset to default on operators because otherwise |
368 | // comments won't be recognized in sequences like "+/* Hello*/" |
369 | // --> "+/*" would be recognized as a sequence of operators |
370 | |
371 | // if (!IsOperator(sc.ch)) sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
372 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
373 | |
374 | } else if (sc.state == SCE_HOLLYWOOD_PREPROCESSOR) { |
375 | if (!IsIdentifier(sc.ch)) |
376 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
377 | } else if (sc.state == SCE_HOLLYWOOD_CONSTANT) { |
378 | if (!IsIdentifier(sc.ch)) |
379 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
380 | } else if (sc.state == SCE_HOLLYWOOD_NUMBER) { |
381 | if (!IsDigit(sc.ch) && sc.ch != '.') |
382 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
383 | } else if (sc.state == SCE_HOLLYWOOD_HEXNUMBER) { |
384 | if (!IsHexDigit(sc.ch)) |
385 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
386 | } else if (sc.state == SCE_HOLLYWOOD_STRING) { |
387 | if (sc.ch == '"') { |
388 | sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT); |
389 | } |
390 | if (sc.atLineEnd) { |
391 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
392 | } |
393 | } else if (sc.state == SCE_HOLLYWOOD_COMMENT) { |
394 | if (sc.atLineEnd) { |
395 | sc.SetState(SCE_HOLLYWOOD_DEFAULT); |
396 | } |
397 | } else if (sc.state == SCE_HOLLYWOOD_COMMENTBLOCK) { |
398 | if (sc.Match("*/" ) && !inString) { |
399 | sc.Forward(); |
400 | sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT); |
401 | } |
402 | } else if (sc.state == SCE_HOLLYWOOD_STRINGBLOCK) { |
403 | if (sc.Match("]]" ) && !inString) { |
404 | sc.Forward(); |
405 | sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT); |
406 | } |
407 | } |
408 | |
409 | if (sc.state == SCE_HOLLYWOOD_DEFAULT) { |
410 | if (sc.Match(';')) { |
411 | sc.SetState(SCE_HOLLYWOOD_COMMENT); |
412 | } else if (sc.Match("/*" )) { |
413 | sc.SetState(SCE_HOLLYWOOD_COMMENTBLOCK); |
414 | sc.Forward(); |
415 | } else if (sc.Match("[[" )) { |
416 | sc.SetState(SCE_HOLLYWOOD_STRINGBLOCK); |
417 | sc.Forward(); |
418 | } else if (sc.Match('"')) { |
419 | sc.SetState(SCE_HOLLYWOOD_STRING); |
420 | } else if (sc.Match('$')) { |
421 | sc.SetState(SCE_HOLLYWOOD_HEXNUMBER); |
422 | } else if (sc.Match("0x" ) || sc.Match("0X" )) { // must be before IsDigit() because of 0x |
423 | sc.SetState(SCE_HOLLYWOOD_HEXNUMBER); |
424 | sc.Forward(); |
425 | } else if (sc.ch == '.' && (sc.chNext >= '0' && sc.chNext <= '9')) { // ".1234" style numbers |
426 | sc.SetState(SCE_HOLLYWOOD_NUMBER); |
427 | sc.Forward(); |
428 | } else if (IsDigit(sc.ch)) { |
429 | sc.SetState(SCE_HOLLYWOOD_NUMBER); |
430 | } else if (sc.Match('#')) { |
431 | sc.SetState(SCE_HOLLYWOOD_CONSTANT); |
432 | } else if (sc.Match('@')) { |
433 | sc.SetState(SCE_HOLLYWOOD_PREPROCESSOR); |
434 | } else if (IsOperator(sc.ch)) { |
435 | sc.SetState(SCE_HOLLYWOOD_OPERATOR); |
436 | } else if (IsIdentifier(sc.ch)) { |
437 | sc.SetState(SCE_HOLLYWOOD_IDENTIFIER); |
438 | } |
439 | } |
440 | |
441 | if (!sc.More()) |
442 | break; |
443 | } |
444 | sc.Complete(); |
445 | } |
446 | |
447 | void SCI_METHOD LexerHollywood::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { |
448 | |
449 | if (!options.fold) |
450 | return; |
451 | |
452 | LexAccessor styler(pAccess); |
453 | |
454 | Sci_PositionU lengthDoc = startPos + length; |
455 | int visibleChars = 0; |
456 | Sci_Position lineCurrent = styler.GetLine(startPos); |
457 | int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; |
458 | int levelCurrent = levelPrev; |
459 | char chNext = styler[startPos]; |
460 | int styleNext = styler.StyleAt(startPos); |
461 | int done = 0; |
462 | char word[256]; |
463 | int wordlen = 0; |
464 | |
465 | for (Sci_PositionU i = startPos; i < lengthDoc; i++) { |
466 | char ch = chNext; |
467 | chNext = styler.SafeGetCharAt(i + 1); |
468 | int style = styleNext; |
469 | styleNext = styler.StyleAt(i + 1); |
470 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
471 | if (!done) { |
472 | if (wordlen) { // are we scanning a token already? |
473 | word[wordlen] = static_cast<char>(LowerCase(ch)); |
474 | if (!IsIdentifier(ch)) { // done with token |
475 | word[wordlen] = '\0'; |
476 | levelCurrent += CheckFoldPoint(word); |
477 | done = 1; |
478 | } else if (wordlen < 255) { |
479 | wordlen++; |
480 | } |
481 | } else { // start scanning at first non-whitespace character |
482 | if (!IsSpace(ch)) { |
483 | if (style != SCE_HOLLYWOOD_COMMENTBLOCK && IsIdentifier(ch)) { |
484 | word[0] = static_cast<char>(LowerCase(ch)); |
485 | wordlen = 1; |
486 | } else // done with this line |
487 | done = 1; |
488 | } |
489 | } |
490 | } |
491 | |
492 | if (atEOL) { |
493 | int lev = levelPrev; |
494 | if (visibleChars == 0 && options.foldCompact) { |
495 | lev |= SC_FOLDLEVELWHITEFLAG; |
496 | } |
497 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) { |
498 | lev |= SC_FOLDLEVELHEADERFLAG; |
499 | } |
500 | if (lev != styler.LevelAt(lineCurrent)) { |
501 | styler.SetLevel(lineCurrent, lev); |
502 | } |
503 | lineCurrent++; |
504 | levelPrev = levelCurrent; |
505 | visibleChars = 0; |
506 | done = 0; |
507 | wordlen = 0; |
508 | } |
509 | if (!IsSpace(ch)) { |
510 | visibleChars++; |
511 | } |
512 | } |
513 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
514 | |
515 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
516 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
517 | } |
518 | |
519 | LexerModule lmHollywood(SCLEX_HOLLYWOOD, LexerHollywood::LexerFactoryHollywood, "hollywood" , hollywoodWordListDesc); |
520 | |