1// Scintilla source code edit control
2/** @file LexHollywood.cxx
3 ** Lexer for Hollywood
4 ** Written by Andreas Falkenhahn, based on the BlitzBasic/PureBasic/Lua lexers
5 ** Thanks to Nicholai Benalal
6 ** For more information on Hollywood, see http://www.hollywood-mal.com/
7 ** Mail me (andreas <at> airsoftsoftwair <dot> de) for any bugs.
8 ** This code is subject to the same license terms as the rest of the Scintilla project:
9 ** The License.txt file describes the conditions under which this software may be distributed.
10 **/
11
12#include <stdlib.h>
13#include <string.h>
14#include <stdio.h>
15#include <stdarg.h>
16#include <assert.h>
17#include <ctype.h>
18
19#include <string>
20#include <string_view>
21#include <map>
22#include <functional>
23
24#include "ILexer.h"
25#include "Scintilla.h"
26#include "SciLexer.h"
27
28#include "WordList.h"
29#include "LexAccessor.h"
30#include "StyleContext.h"
31#include "CharacterSet.h"
32#include "LexerModule.h"
33#include "OptionSet.h"
34#include "DefaultLexer.h"
35
36using namespace Scintilla;
37using namespace Lexilla;
38
39/* Bits:
40 * 1 - whitespace
41 * 2 - operator
42 * 4 - identifier
43 * 8 - decimal digit
44 * 16 - hex digit
45 * 32 - bin digit
46 * 64 - letter
47 */
48static int character_classification[128] =
49{
50 0, // NUL ($0)
51 0, // SOH ($1)
52 0, // STX ($2)
53 0, // ETX ($3)
54 0, // EOT ($4)
55 0, // ENQ ($5)
56 0, // ACK ($6)
57 0, // BEL ($7)
58 0, // BS ($8)
59 1, // HT ($9)
60 1, // LF ($A)
61 0, // VT ($B)
62 0, // FF ($C)
63 1, // CR ($D)
64 0, // SO ($E)
65 0, // SI ($F)
66 0, // DLE ($10)
67 0, // DC1 ($11)
68 0, // DC2 ($12)
69 0, // DC3 ($13)
70 0, // DC4 ($14)
71 0, // NAK ($15)
72 0, // SYN ($16)
73 0, // ETB ($17)
74 0, // CAN ($18)
75 0, // EM ($19)
76 0, // SUB ($1A)
77 0, // ESC ($1B)
78 0, // FS ($1C)
79 0, // GS ($1D)
80 0, // RS ($1E)
81 0, // US ($1F)
82 1, // space ($20)
83 4, // ! ($21)
84 0, // " ($22)
85 0, // # ($23)
86 4, // $ ($24)
87 2, // % ($25)
88 2, // & ($26)
89 2, // ' ($27)
90 2, // ( ($28)
91 2, // ) ($29)
92 2, // * ($2A)
93 2, // + ($2B)
94 2, // , ($2C)
95 2, // - ($2D)
96 // NB: we treat "." as an identifier although it is also an operator and a decimal digit
97 // the reason why we treat it as an identifier is to support syntax highlighting for
98 // plugin commands which always use a "." in their names, e.g. pdf.OpenDocument();
99 // we handle the decimal digit case manually below so that 3.1415 and .123 is styled correctly
100 // the collateral damage of treating "." as an identifier is that "." is never styled
101 // SCE_HOLLYWOOD_OPERATOR
102 4, // . ($2E)
103 2, // / ($2F)
104 28, // 0 ($30)
105 28, // 1 ($31)
106 28, // 2 ($32)
107 28, // 3 ($33)
108 28, // 4 ($34)
109 28, // 5 ($35)
110 28, // 6 ($36)
111 28, // 7 ($37)
112 28, // 8 ($38)
113 28, // 9 ($39)
114 2, // : ($3A)
115 2, // ; ($3B)
116 2, // < ($3C)
117 2, // = ($3D)
118 2, // > ($3E)
119 2, // ? ($3F)
120 0, // @ ($40)
121 84, // A ($41)
122 84, // B ($42)
123 84, // C ($43)
124 84, // D ($44)
125 84, // E ($45)
126 84, // F ($46)
127 68, // G ($47)
128 68, // H ($48)
129 68, // I ($49)
130 68, // J ($4A)
131 68, // K ($4B)
132 68, // L ($4C)
133 68, // M ($4D)
134 68, // N ($4E)
135 68, // O ($4F)
136 68, // P ($50)
137 68, // Q ($51)
138 68, // R ($52)
139 68, // S ($53)
140 68, // T ($54)
141 68, // U ($55)
142 68, // V ($56)
143 68, // W ($57)
144 68, // X ($58)
145 68, // Y ($59)
146 68, // Z ($5A)
147 2, // [ ($5B)
148 2, // \ ($5C)
149 2, // ] ($5D)
150 2, // ^ ($5E)
151 68, // _ ($5F)
152 2, // ` ($60)
153 84, // a ($61)
154 84, // b ($62)
155 84, // c ($63)
156 84, // d ($64)
157 84, // e ($65)
158 84, // f ($66)
159 68, // g ($67)
160 68, // h ($68)
161 68, // i ($69)
162 68, // j ($6A)
163 68, // k ($6B)
164 68, // l ($6C)
165 68, // m ($6D)
166 68, // n ($6E)
167 68, // o ($6F)
168 68, // p ($70)
169 68, // q ($71)
170 68, // r ($72)
171 68, // s ($73)
172 68, // t ($74)
173 68, // u ($75)
174 68, // v ($76)
175 68, // w ($77)
176 68, // x ($78)
177 68, // y ($79)
178 68, // z ($7A)
179 2, // { ($7B)
180 2, // | ($7C)
181 2, // } ($7D)
182 2, // ~ ($7E)
183 0, // &#127; ($7F)
184};
185
186static bool IsSpace(int c) {
187 return c < 128 && (character_classification[c] & 1);
188}
189
190static bool IsOperator(int c) {
191 return c < 128 && (character_classification[c] & 2);
192}
193
194static bool IsIdentifier(int c) {
195 return c < 128 && (character_classification[c] & 4);
196}
197
198static bool IsDigit(int c) {
199 return c < 128 && (character_classification[c] & 8);
200}
201
202static bool IsHexDigit(int c) {
203 return c < 128 && (character_classification[c] & 16);
204}
205
206static int LowerCase(int c)
207{
208 if (c >= 'A' && c <= 'Z')
209 return 'a' + c - 'A';
210 return c;
211}
212
213static int CheckHollywoodFoldPoint(char const *token) {
214 if (!strcmp(token, "function")) {
215 return 1;
216 }
217 if (!strcmp(token, "endfunction")) {
218 return -1;
219 }
220 return 0;
221}
222
223// An individual named option for use in an OptionSet
224
225// Options used for LexerHollywood
226struct OptionsHollywood {
227 bool fold;
228 bool foldCompact;
229 OptionsHollywood() {
230 fold = false;
231 foldCompact = false;
232 }
233};
234
235static const char * const hollywoodWordListDesc[] = {
236 "Hollywood keywords",
237 "Hollywood standard API functions",
238 "Hollywood plugin API functions",
239 "Hollywood plugin methods",
240 0
241};
242
243struct OptionSetHollywood : public OptionSet<OptionsHollywood> {
244 OptionSetHollywood(const char * const wordListDescriptions[]) {
245 DefineProperty("fold", &OptionsHollywood::fold);
246 DefineProperty("fold.compact", &OptionsHollywood::foldCompact);
247 DefineWordListSets(wordListDescriptions);
248 }
249};
250
251class LexerHollywood : public DefaultLexer {
252 int (*CheckFoldPoint)(char const *);
253 WordList keywordlists[4];
254 OptionsHollywood options;
255 OptionSetHollywood osHollywood;
256public:
257 LexerHollywood(int (*CheckFoldPoint_)(char const *), const char * const wordListDescriptions[]) :
258 DefaultLexer("hollywood", SCLEX_HOLLYWOOD),
259 CheckFoldPoint(CheckFoldPoint_),
260 osHollywood(wordListDescriptions) {
261 }
262 virtual ~LexerHollywood() {
263 }
264 void SCI_METHOD Release() override {
265 delete this;
266 }
267 int SCI_METHOD Version() const override {
268 return lvRelease5;
269 }
270 const char * SCI_METHOD PropertyNames() override {
271 return osHollywood.PropertyNames();
272 }
273 int SCI_METHOD PropertyType(const char *name) override {
274 return osHollywood.PropertyType(name);
275 }
276 const char * SCI_METHOD DescribeProperty(const char *name) override {
277 return osHollywood.DescribeProperty(name);
278 }
279 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
280 const char * SCI_METHOD PropertyGet(const char* key) override {
281 return osHollywood.PropertyGet(key);
282 }
283 const char * SCI_METHOD DescribeWordListSets() override {
284 return osHollywood.DescribeWordListSets();
285 }
286 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
287 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
288 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
289
290 void * SCI_METHOD PrivateCall(int, void *) override {
291 return 0;
292 }
293 static ILexer5 *LexerFactoryHollywood() {
294 return new LexerHollywood(CheckHollywoodFoldPoint, hollywoodWordListDesc);
295 }
296};
297
298Sci_Position SCI_METHOD LexerHollywood::PropertySet(const char *key, const char *val) {
299 if (osHollywood.PropertySet(&options, key, val)) {
300 return 0;
301 }
302 return -1;
303}
304
305Sci_Position SCI_METHOD LexerHollywood::WordListSet(int n, const char *wl) {
306 WordList *wordListN = 0;
307 switch (n) {
308 case 0:
309 wordListN = &keywordlists[0];
310 break;
311 case 1:
312 wordListN = &keywordlists[1];
313 break;
314 case 2:
315 wordListN = &keywordlists[2];
316 break;
317 case 3:
318 wordListN = &keywordlists[3];
319 break;
320 }
321 Sci_Position firstModification = -1;
322 if (wordListN) {
323 WordList wlNew;
324 wlNew.Set(wl);
325 if (*wordListN != wlNew) {
326 wordListN->Set(wl);
327 firstModification = 0;
328 }
329 }
330 return firstModification;
331}
332
333void SCI_METHOD LexerHollywood::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
334 LexAccessor styler(pAccess);
335
336 styler.StartAt(startPos);
337 bool inString = false;
338
339 StyleContext sc(startPos, length, initStyle, styler);
340
341 // Can't use sc.More() here else we miss the last character
342 for (; ; sc.Forward())
343 {
344 if (sc.atLineStart) inString = false;
345
346 if (sc.ch == '\"' && sc.chPrev != '\\') inString = !inString;
347
348 if (sc.state == SCE_HOLLYWOOD_IDENTIFIER) {
349 if (!IsIdentifier(sc.ch)) {
350 char s[100];
351 int kstates[4] = {
352 SCE_HOLLYWOOD_KEYWORD,
353 SCE_HOLLYWOOD_STDAPI,
354 SCE_HOLLYWOOD_PLUGINAPI,
355 SCE_HOLLYWOOD_PLUGINMETHOD,
356 };
357 sc.GetCurrentLowered(s, sizeof(s));
358 for (int i = 0; i < 4; i++) {
359 if (keywordlists[i].InList(s)) {
360 sc.ChangeState(kstates[i]);
361 }
362 }
363 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
364 }
365 } else if (sc.state == SCE_HOLLYWOOD_OPERATOR) {
366
367 // always reset to default on operators because otherwise
368 // comments won't be recognized in sequences like "+/* Hello*/"
369 // --> "+/*" would be recognized as a sequence of operators
370
371 // if (!IsOperator(sc.ch)) sc.SetState(SCE_HOLLYWOOD_DEFAULT);
372 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
373
374 } else if (sc.state == SCE_HOLLYWOOD_PREPROCESSOR) {
375 if (!IsIdentifier(sc.ch))
376 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
377 } else if (sc.state == SCE_HOLLYWOOD_CONSTANT) {
378 if (!IsIdentifier(sc.ch))
379 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
380 } else if (sc.state == SCE_HOLLYWOOD_NUMBER) {
381 if (!IsDigit(sc.ch) && sc.ch != '.')
382 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
383 } else if (sc.state == SCE_HOLLYWOOD_HEXNUMBER) {
384 if (!IsHexDigit(sc.ch))
385 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
386 } else if (sc.state == SCE_HOLLYWOOD_STRING) {
387 if (sc.ch == '"') {
388 sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
389 }
390 if (sc.atLineEnd) {
391 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
392 }
393 } else if (sc.state == SCE_HOLLYWOOD_COMMENT) {
394 if (sc.atLineEnd) {
395 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
396 }
397 } else if (sc.state == SCE_HOLLYWOOD_COMMENTBLOCK) {
398 if (sc.Match("*/") && !inString) {
399 sc.Forward();
400 sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
401 }
402 } else if (sc.state == SCE_HOLLYWOOD_STRINGBLOCK) {
403 if (sc.Match("]]") && !inString) {
404 sc.Forward();
405 sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
406 }
407 }
408
409 if (sc.state == SCE_HOLLYWOOD_DEFAULT) {
410 if (sc.Match(';')) {
411 sc.SetState(SCE_HOLLYWOOD_COMMENT);
412 } else if (sc.Match("/*")) {
413 sc.SetState(SCE_HOLLYWOOD_COMMENTBLOCK);
414 sc.Forward();
415 } else if (sc.Match("[[")) {
416 sc.SetState(SCE_HOLLYWOOD_STRINGBLOCK);
417 sc.Forward();
418 } else if (sc.Match('"')) {
419 sc.SetState(SCE_HOLLYWOOD_STRING);
420 } else if (sc.Match('$')) {
421 sc.SetState(SCE_HOLLYWOOD_HEXNUMBER);
422 } else if (sc.Match("0x") || sc.Match("0X")) { // must be before IsDigit() because of 0x
423 sc.SetState(SCE_HOLLYWOOD_HEXNUMBER);
424 sc.Forward();
425 } else if (sc.ch == '.' && (sc.chNext >= '0' && sc.chNext <= '9')) { // ".1234" style numbers
426 sc.SetState(SCE_HOLLYWOOD_NUMBER);
427 sc.Forward();
428 } else if (IsDigit(sc.ch)) {
429 sc.SetState(SCE_HOLLYWOOD_NUMBER);
430 } else if (sc.Match('#')) {
431 sc.SetState(SCE_HOLLYWOOD_CONSTANT);
432 } else if (sc.Match('@')) {
433 sc.SetState(SCE_HOLLYWOOD_PREPROCESSOR);
434 } else if (IsOperator(sc.ch)) {
435 sc.SetState(SCE_HOLLYWOOD_OPERATOR);
436 } else if (IsIdentifier(sc.ch)) {
437 sc.SetState(SCE_HOLLYWOOD_IDENTIFIER);
438 }
439 }
440
441 if (!sc.More())
442 break;
443 }
444 sc.Complete();
445}
446
447void SCI_METHOD LexerHollywood::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
448
449 if (!options.fold)
450 return;
451
452 LexAccessor styler(pAccess);
453
454 Sci_PositionU lengthDoc = startPos + length;
455 int visibleChars = 0;
456 Sci_Position lineCurrent = styler.GetLine(startPos);
457 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
458 int levelCurrent = levelPrev;
459 char chNext = styler[startPos];
460 int styleNext = styler.StyleAt(startPos);
461 int done = 0;
462 char word[256];
463 int wordlen = 0;
464
465 for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
466 char ch = chNext;
467 chNext = styler.SafeGetCharAt(i + 1);
468 int style = styleNext;
469 styleNext = styler.StyleAt(i + 1);
470 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
471 if (!done) {
472 if (wordlen) { // are we scanning a token already?
473 word[wordlen] = static_cast<char>(LowerCase(ch));
474 if (!IsIdentifier(ch)) { // done with token
475 word[wordlen] = '\0';
476 levelCurrent += CheckFoldPoint(word);
477 done = 1;
478 } else if (wordlen < 255) {
479 wordlen++;
480 }
481 } else { // start scanning at first non-whitespace character
482 if (!IsSpace(ch)) {
483 if (style != SCE_HOLLYWOOD_COMMENTBLOCK && IsIdentifier(ch)) {
484 word[0] = static_cast<char>(LowerCase(ch));
485 wordlen = 1;
486 } else // done with this line
487 done = 1;
488 }
489 }
490 }
491
492 if (atEOL) {
493 int lev = levelPrev;
494 if (visibleChars == 0 && options.foldCompact) {
495 lev |= SC_FOLDLEVELWHITEFLAG;
496 }
497 if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
498 lev |= SC_FOLDLEVELHEADERFLAG;
499 }
500 if (lev != styler.LevelAt(lineCurrent)) {
501 styler.SetLevel(lineCurrent, lev);
502 }
503 lineCurrent++;
504 levelPrev = levelCurrent;
505 visibleChars = 0;
506 done = 0;
507 wordlen = 0;
508 }
509 if (!IsSpace(ch)) {
510 visibleChars++;
511 }
512 }
513 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
514
515 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
516 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
517}
518
519LexerModule lmHollywood(SCLEX_HOLLYWOOD, LexerHollywood::LexerFactoryHollywood, "hollywood", hollywoodWordListDesc);
520