1// Scintilla source code edit control
2/** @file LexBasic.cxx
3 ** Lexer for BlitzBasic and PureBasic.
4 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
7// The License.txt file describes the conditions under which this software may be distributed.
8
9// This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
10// and derivatives. Once they diverge enough, might want to split it into multiple
11// lexers for more code clearity.
12//
13// Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
14
15// Folding only works for simple things like functions or types.
16
17// You may want to have a look at my ctags lexer as well, if you additionally to coloring
18// and folding need to extract things like label tags in your editor.
19
20#include <stdlib.h>
21#include <string.h>
22#include <stdio.h>
23#include <stdarg.h>
24#include <assert.h>
25#include <ctype.h>
26
27#include <string>
28#include <string_view>
29#include <map>
30#include <functional>
31
32#include "ILexer.h"
33#include "Scintilla.h"
34#include "SciLexer.h"
35
36#include "WordList.h"
37#include "LexAccessor.h"
38#include "StyleContext.h"
39#include "CharacterSet.h"
40#include "LexerModule.h"
41#include "OptionSet.h"
42#include "DefaultLexer.h"
43
44using namespace Scintilla;
45using namespace Lexilla;
46
47/* Bits:
48 * 1 - whitespace
49 * 2 - operator
50 * 4 - identifier
51 * 8 - decimal digit
52 * 16 - hex digit
53 * 32 - bin digit
54 * 64 - letter
55 */
56static int character_classification[128] =
57{
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2,
61 60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, 2, 2,
62 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
63 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 68,
64 2, 84, 84, 84, 84, 84, 84, 68, 68, 68, 68, 68, 68, 68, 68, 68,
65 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 0
66};
67
68static bool IsSpace(int c) {
69 return c < 128 && (character_classification[c] & 1);
70}
71
72static bool IsOperator(int c) {
73 return c < 128 && (character_classification[c] & 2);
74}
75
76static bool IsIdentifier(int c) {
77 return c < 128 && (character_classification[c] & 4);
78}
79
80static bool IsDigit(int c) {
81 return c < 128 && (character_classification[c] & 8);
82}
83
84static bool IsHexDigit(int c) {
85 return c < 128 && (character_classification[c] & 16);
86}
87
88static bool IsBinDigit(int c) {
89 return c < 128 && (character_classification[c] & 32);
90}
91
92static bool IsLetter(int c) {
93 return c < 128 && (character_classification[c] & 64);
94}
95
96static int LowerCase(int c)
97{
98 if (c >= 'A' && c <= 'Z')
99 return 'a' + c - 'A';
100 return c;
101}
102
103static int CheckBlitzFoldPoint(char const *token, int &level) {
104 if (!strcmp(token, "function") ||
105 !strcmp(token, "type")) {
106 level |= SC_FOLDLEVELHEADERFLAG;
107 return 1;
108 }
109 if (!strcmp(token, "end function") ||
110 !strcmp(token, "end type")) {
111 return -1;
112 }
113 return 0;
114}
115
116static int CheckPureFoldPoint(char const *token, int &level) {
117 if (!strcmp(token, "procedure") ||
118 !strcmp(token, "enumeration") ||
119 !strcmp(token, "interface") ||
120 !strcmp(token, "structure")) {
121 level |= SC_FOLDLEVELHEADERFLAG;
122 return 1;
123 }
124 if (!strcmp(token, "endprocedure") ||
125 !strcmp(token, "endenumeration") ||
126 !strcmp(token, "endinterface") ||
127 !strcmp(token, "endstructure")) {
128 return -1;
129 }
130 return 0;
131}
132
133static int CheckFreeFoldPoint(char const *token, int &level) {
134 if (!strcmp(token, "function") ||
135 !strcmp(token, "sub") ||
136 !strcmp(token, "enum") ||
137 !strcmp(token, "type") ||
138 !strcmp(token, "union") ||
139 !strcmp(token, "property") ||
140 !strcmp(token, "destructor") ||
141 !strcmp(token, "constructor")) {
142 level |= SC_FOLDLEVELHEADERFLAG;
143 return 1;
144 }
145 if (!strcmp(token, "end function") ||
146 !strcmp(token, "end sub") ||
147 !strcmp(token, "end enum") ||
148 !strcmp(token, "end type") ||
149 !strcmp(token, "end union") ||
150 !strcmp(token, "end property") ||
151 !strcmp(token, "end destructor") ||
152 !strcmp(token, "end constructor")) {
153 return -1;
154 }
155 return 0;
156}
157
158// An individual named option for use in an OptionSet
159
160// Options used for LexerBasic
161struct OptionsBasic {
162 bool fold;
163 bool foldSyntaxBased;
164 bool foldCommentExplicit;
165 std::string foldExplicitStart;
166 std::string foldExplicitEnd;
167 bool foldExplicitAnywhere;
168 bool foldCompact;
169 OptionsBasic() {
170 fold = false;
171 foldSyntaxBased = true;
172 foldCommentExplicit = false;
173 foldExplicitStart = "";
174 foldExplicitEnd = "";
175 foldExplicitAnywhere = false;
176 foldCompact = true;
177 }
178};
179
180static const char * const blitzbasicWordListDesc[] = {
181 "BlitzBasic Keywords",
182 "user1",
183 "user2",
184 "user3",
185 0
186};
187
188static const char * const purebasicWordListDesc[] = {
189 "PureBasic Keywords",
190 "PureBasic PreProcessor Keywords",
191 "user defined 1",
192 "user defined 2",
193 0
194};
195
196static const char * const freebasicWordListDesc[] = {
197 "FreeBasic Keywords",
198 "FreeBasic PreProcessor Keywords",
199 "user defined 1",
200 "user defined 2",
201 0
202};
203
204struct OptionSetBasic : public OptionSet<OptionsBasic> {
205 OptionSetBasic(const char * const wordListDescriptions[]) {
206 DefineProperty("fold", &OptionsBasic::fold);
207
208 DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased,
209 "Set this property to 0 to disable syntax based folding.");
210
211 DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit,
212 "This option enables folding explicit fold points when using the Basic lexer. "
213 "Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
214 "and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
215
216 DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart,
217 "The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
218
219 DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd,
220 "The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
221
222 DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere,
223 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
224
225 DefineProperty("fold.compact", &OptionsBasic::foldCompact);
226
227 DefineWordListSets(wordListDescriptions);
228 }
229};
230
231class LexerBasic : public DefaultLexer {
232 char comment_char;
233 int (*CheckFoldPoint)(char const *, int &);
234 WordList keywordlists[4];
235 OptionsBasic options;
236 OptionSetBasic osBasic;
237public:
238 LexerBasic(const char *languageName_, int language_, char comment_char_,
239 int (*CheckFoldPoint_)(char const *, int &), const char * const wordListDescriptions[]) :
240 DefaultLexer(languageName_, language_),
241 comment_char(comment_char_),
242 CheckFoldPoint(CheckFoldPoint_),
243 osBasic(wordListDescriptions) {
244 }
245 virtual ~LexerBasic() {
246 }
247 void SCI_METHOD Release() override {
248 delete this;
249 }
250 int SCI_METHOD Version() const override {
251 return lvRelease5;
252 }
253 const char * SCI_METHOD PropertyNames() override {
254 return osBasic.PropertyNames();
255 }
256 int SCI_METHOD PropertyType(const char *name) override {
257 return osBasic.PropertyType(name);
258 }
259 const char * SCI_METHOD DescribeProperty(const char *name) override {
260 return osBasic.DescribeProperty(name);
261 }
262 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
263 const char * SCI_METHOD PropertyGet(const char *key) override {
264 return osBasic.PropertyGet(key);
265 }
266 const char * SCI_METHOD DescribeWordListSets() override {
267 return osBasic.DescribeWordListSets();
268 }
269 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
270 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
271 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
272
273 void * SCI_METHOD PrivateCall(int, void *) override {
274 return 0;
275 }
276 static ILexer5 *LexerFactoryBlitzBasic() {
277 return new LexerBasic("blitzbasic", SCLEX_BLITZBASIC, ';', CheckBlitzFoldPoint, blitzbasicWordListDesc);
278 }
279 static ILexer5 *LexerFactoryPureBasic() {
280 return new LexerBasic("purebasic", SCLEX_PUREBASIC, ';', CheckPureFoldPoint, purebasicWordListDesc);
281 }
282 static ILexer5 *LexerFactoryFreeBasic() {
283 return new LexerBasic("freebasic", SCLEX_FREEBASIC, '\'', CheckFreeFoldPoint, freebasicWordListDesc );
284 }
285};
286
287Sci_Position SCI_METHOD LexerBasic::PropertySet(const char *key, const char *val) {
288 if (osBasic.PropertySet(&options, key, val)) {
289 return 0;
290 }
291 return -1;
292}
293
294Sci_Position SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) {
295 WordList *wordListN = 0;
296 switch (n) {
297 case 0:
298 wordListN = &keywordlists[0];
299 break;
300 case 1:
301 wordListN = &keywordlists[1];
302 break;
303 case 2:
304 wordListN = &keywordlists[2];
305 break;
306 case 3:
307 wordListN = &keywordlists[3];
308 break;
309 }
310 Sci_Position firstModification = -1;
311 if (wordListN) {
312 WordList wlNew;
313 wlNew.Set(wl);
314 if (*wordListN != wlNew) {
315 wordListN->Set(wl);
316 firstModification = 0;
317 }
318 }
319 return firstModification;
320}
321
322void SCI_METHOD LexerBasic::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
323 LexAccessor styler(pAccess);
324
325 bool wasfirst = true, isfirst = true; // true if first token in a line
326 styler.StartAt(startPos);
327 int styleBeforeKeyword = SCE_B_DEFAULT;
328
329 StyleContext sc(startPos, length, initStyle, styler);
330
331 // Can't use sc.More() here else we miss the last character
332 for (; ; sc.Forward()) {
333 if (sc.state == SCE_B_IDENTIFIER) {
334 if (!IsIdentifier(sc.ch)) {
335 // Labels
336 if (wasfirst && sc.Match(':')) {
337 sc.ChangeState(SCE_B_LABEL);
338 sc.ForwardSetState(SCE_B_DEFAULT);
339 } else {
340 char s[100];
341 int kstates[4] = {
342 SCE_B_KEYWORD,
343 SCE_B_KEYWORD2,
344 SCE_B_KEYWORD3,
345 SCE_B_KEYWORD4,
346 };
347 sc.GetCurrentLowered(s, sizeof(s));
348 for (int i = 0; i < 4; i++) {
349 if (keywordlists[i].InList(s)) {
350 sc.ChangeState(kstates[i]);
351 }
352 }
353 // Types, must set them as operator else they will be
354 // matched as number/constant
355 if (sc.Match('.') || sc.Match('$') || sc.Match('%') ||
356 sc.Match('#')) {
357 sc.SetState(SCE_B_OPERATOR);
358 } else {
359 sc.SetState(SCE_B_DEFAULT);
360 }
361 }
362 }
363 } else if (sc.state == SCE_B_OPERATOR) {
364 if (!IsOperator(sc.ch) || sc.Match('#'))
365 sc.SetState(SCE_B_DEFAULT);
366 } else if (sc.state == SCE_B_LABEL) {
367 if (!IsIdentifier(sc.ch))
368 sc.SetState(SCE_B_DEFAULT);
369 } else if (sc.state == SCE_B_CONSTANT) {
370 if (!IsIdentifier(sc.ch))
371 sc.SetState(SCE_B_DEFAULT);
372 } else if (sc.state == SCE_B_NUMBER) {
373 if (!IsDigit(sc.ch))
374 sc.SetState(SCE_B_DEFAULT);
375 } else if (sc.state == SCE_B_HEXNUMBER) {
376 if (!IsHexDigit(sc.ch))
377 sc.SetState(SCE_B_DEFAULT);
378 } else if (sc.state == SCE_B_BINNUMBER) {
379 if (!IsBinDigit(sc.ch))
380 sc.SetState(SCE_B_DEFAULT);
381 } else if (sc.state == SCE_B_STRING) {
382 if (sc.ch == '"') {
383 sc.ForwardSetState(SCE_B_DEFAULT);
384 }
385 if (sc.atLineEnd) {
386 sc.ChangeState(SCE_B_ERROR);
387 sc.SetState(SCE_B_DEFAULT);
388 }
389 } else if (sc.state == SCE_B_COMMENT || sc.state == SCE_B_PREPROCESSOR) {
390 if (sc.atLineEnd) {
391 sc.SetState(SCE_B_DEFAULT);
392 }
393 } else if (sc.state == SCE_B_DOCLINE) {
394 if (sc.atLineEnd) {
395 sc.SetState(SCE_B_DEFAULT);
396 } else if (sc.ch == '\\' || sc.ch == '@') {
397 if (IsLetter(sc.chNext) && sc.chPrev != '\\') {
398 styleBeforeKeyword = sc.state;
399 sc.SetState(SCE_B_DOCKEYWORD);
400 };
401 }
402 } else if (sc.state == SCE_B_DOCKEYWORD) {
403 if (IsSpace(sc.ch)) {
404 sc.SetState(styleBeforeKeyword);
405 } else if (sc.atLineEnd && styleBeforeKeyword == SCE_B_DOCLINE) {
406 sc.SetState(SCE_B_DEFAULT);
407 }
408 } else if (sc.state == SCE_B_COMMENTBLOCK) {
409 if (sc.Match("\'/")) {
410 sc.Forward();
411 sc.ForwardSetState(SCE_B_DEFAULT);
412 }
413 } else if (sc.state == SCE_B_DOCBLOCK) {
414 if (sc.Match("\'/")) {
415 sc.Forward();
416 sc.ForwardSetState(SCE_B_DEFAULT);
417 } else if (sc.ch == '\\' || sc.ch == '@') {
418 if (IsLetter(sc.chNext) && sc.chPrev != '\\') {
419 styleBeforeKeyword = sc.state;
420 sc.SetState(SCE_B_DOCKEYWORD);
421 };
422 }
423 }
424
425 if (sc.atLineStart)
426 isfirst = true;
427
428 if (sc.state == SCE_B_DEFAULT || sc.state == SCE_B_ERROR) {
429 if (isfirst && sc.Match('.') && comment_char != '\'') {
430 sc.SetState(SCE_B_LABEL);
431 } else if (isfirst && sc.Match('#')) {
432 wasfirst = isfirst;
433 sc.SetState(SCE_B_IDENTIFIER);
434 } else if (sc.Match(comment_char)) {
435 // Hack to make deprecated QBASIC '$Include show
436 // up in freebasic with SCE_B_PREPROCESSOR.
437 if (comment_char == '\'' && sc.Match(comment_char, '$'))
438 sc.SetState(SCE_B_PREPROCESSOR);
439 else if (sc.Match("\'*") || sc.Match("\'!")) {
440 sc.SetState(SCE_B_DOCLINE);
441 } else {
442 sc.SetState(SCE_B_COMMENT);
443 }
444 } else if (sc.Match("/\'")) {
445 if (sc.Match("/\'*") || sc.Match("/\'!")) { // Support of gtk-doc/Doxygen doc. style
446 sc.SetState(SCE_B_DOCBLOCK);
447 } else {
448 sc.SetState(SCE_B_COMMENTBLOCK);
449 }
450 sc.Forward(); // Eat the ' so it isn't used for the end of the comment
451 } else if (sc.Match('"')) {
452 sc.SetState(SCE_B_STRING);
453 } else if (IsDigit(sc.ch)) {
454 sc.SetState(SCE_B_NUMBER);
455 } else if (sc.Match('$') || sc.Match("&h") || sc.Match("&H") || sc.Match("&o") || sc.Match("&O")) {
456 sc.SetState(SCE_B_HEXNUMBER);
457 } else if (sc.Match('%') || sc.Match("&b") || sc.Match("&B")) {
458 sc.SetState(SCE_B_BINNUMBER);
459 } else if (sc.Match('#')) {
460 sc.SetState(SCE_B_CONSTANT);
461 } else if (IsOperator(sc.ch)) {
462 sc.SetState(SCE_B_OPERATOR);
463 } else if (IsIdentifier(sc.ch)) {
464 wasfirst = isfirst;
465 sc.SetState(SCE_B_IDENTIFIER);
466 } else if (!IsSpace(sc.ch)) {
467 sc.SetState(SCE_B_ERROR);
468 }
469 }
470
471 if (!IsSpace(sc.ch))
472 isfirst = false;
473
474 if (!sc.More())
475 break;
476 }
477 sc.Complete();
478}
479
480
481void SCI_METHOD LexerBasic::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
482
483 if (!options.fold)
484 return;
485
486 LexAccessor styler(pAccess);
487
488 Sci_Position line = styler.GetLine(startPos);
489 int level = styler.LevelAt(line);
490 int go = 0, done = 0;
491 Sci_Position endPos = startPos + length;
492 char word[256];
493 int wordlen = 0;
494 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
495 int cNext = styler[startPos];
496
497 // Scan for tokens at the start of the line (they may include
498 // whitespace, for tokens like "End Function"
499 for (Sci_Position i = startPos; i < endPos; i++) {
500 int c = cNext;
501 cNext = styler.SafeGetCharAt(i + 1);
502 bool atEOL = (c == '\r' && cNext != '\n') || (c == '\n');
503 if (options.foldSyntaxBased && !done && !go) {
504 if (wordlen) { // are we scanning a token already?
505 word[wordlen] = static_cast<char>(LowerCase(c));
506 if (!IsIdentifier(c)) { // done with token
507 word[wordlen] = '\0';
508 go = CheckFoldPoint(word, level);
509 if (!go) {
510 // Treat any whitespace as single blank, for
511 // things like "End Function".
512 if (IsSpace(c) && IsIdentifier(word[wordlen - 1])) {
513 word[wordlen] = ' ';
514 if (wordlen < 255)
515 wordlen++;
516 }
517 else // done with this line
518 done = 1;
519 }
520 } else if (wordlen < 255) {
521 wordlen++;
522 }
523 } else { // start scanning at first non-whitespace character
524 if (!IsSpace(c)) {
525 if (IsIdentifier(c)) {
526 word[0] = static_cast<char>(LowerCase(c));
527 wordlen = 1;
528 } else // done with this line
529 done = 1;
530 }
531 }
532 }
533 if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) || options.foldExplicitAnywhere)) {
534 if (userDefinedFoldMarkers) {
535 if (styler.Match(i, options.foldExplicitStart.c_str())) {
536 level |= SC_FOLDLEVELHEADERFLAG;
537 go = 1;
538 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
539 go = -1;
540 }
541 } else {
542 if (c == comment_char) {
543 if (cNext == '{') {
544 level |= SC_FOLDLEVELHEADERFLAG;
545 go = 1;
546 } else if (cNext == '}') {
547 go = -1;
548 }
549 }
550 }
551 }
552 if (atEOL) { // line end
553 if (!done && wordlen == 0 && options.foldCompact) // line was only space
554 level |= SC_FOLDLEVELWHITEFLAG;
555 if (level != styler.LevelAt(line))
556 styler.SetLevel(line, level);
557 level += go;
558 line++;
559 // reset state
560 wordlen = 0;
561 level &= ~SC_FOLDLEVELHEADERFLAG;
562 level &= ~SC_FOLDLEVELWHITEFLAG;
563 go = 0;
564 done = 0;
565 }
566 }
567}
568
569LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc);
570
571LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc);
572
573LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc);
574