LexBasic.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexBasic.cxx]

1	// Scintilla source code edit control
2	/* @file LexBasic.cxx*
3	** Lexer for BlitzBasic and PureBasic.
4	** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5	**/
6	// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
7	// The License.txt file describes the conditions under which this software may be distributed.
8
9	// This tries to be a unified Lexer/Folder for all the BlitzBasic/BlitzMax/PurBasic basics
10	// and derivatives. Once they diverge enough, might want to split it into multiple
11	// lexers for more code clearity.
12	//
13	// Mail me (elias <at> users <dot> sf <dot> net) for any bugs.
14
15	// Folding only works for simple things like functions or types.
16
17	// You may want to have a look at my ctags lexer as well, if you additionally to coloring
18	// and folding need to extract things like label tags in your editor.
19
20	#include <stdlib.h>
21	#include <string.h>
22	#include <stdio.h>
23	#include <stdarg.h>
24	#include <assert.h>
25	#include <ctype.h>
26
27	#include <string>
28	#include <string_view>
29	#include <map>
30	#include <functional>
31
32	#include "ILexer.h"
33	#include "Scintilla.h"
34	#include "SciLexer.h"
35
36	#include "WordList.h"
37	#include "LexAccessor.h"
38	#include "StyleContext.h"
39	#include "CharacterSet.h"
40	#include "LexerModule.h"
41	#include "OptionSet.h"
42	#include "DefaultLexer.h"
43
44	using namespace Scintilla;
45	using namespace Lexilla;
46
47	/ Bits:*
48	* 1 - whitespace
49	* 2 - operator
50	* 4 - identifier
51	* 8 - decimal digit
52	* 16 - hex digit
53	* 32 - bin digit
54	* 64 - letter
55	*/
56	static int character_classification[`128`] =
57	{
58	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `0`, `0`, `1`, `0`, `0`,
59	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
60	`1`, `2`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `10`, `2`,
61	`60`, `60`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `28`, `2`, `2`, `2`, `2`, `2`, `2`,
62	`2`, `84`, `84`, `84`, `84`, `84`, `84`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`,
63	`68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `2`, `2`, `2`, `2`, `68`,
64	`2`, `84`, `84`, `84`, `84`, `84`, `84`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`,
65	`68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `68`, `2`, `2`, `2`, `2`, `0`
66	};
67
68	static bool IsSpace(int c) {
69	return c < `128` && (character_classification[c] & `1`);
70	}
71
72	static bool IsOperator(int c) {
73	return c < `128` && (character_classification[c] & `2`);
74	}
75
76	static bool IsIdentifier(int c) {
77	return c < `128` && (character_classification[c] & `4`);
78	}
79
80	static bool IsDigit(int c) {
81	return c < `128` && (character_classification[c] & `8`);
82	}
83
84	static bool IsHexDigit(int c) {
85	return c < `128` && (character_classification[c] & `16`);
86	}
87
88	static bool IsBinDigit(int c) {
89	return c < `128` && (character_classification[c] & `32`);
90	}
91
92	static bool IsLetter(int c) {
93	return c < `128` && (character_classification[c] & `64`);
94	}
95
96	static int LowerCase(int c)
97	{
98	if (c >= `'A'` && c <= `'Z'`)
99	return `'a'` + c - `'A'`;
100	return c;
101	}
102
103	static int CheckBlitzFoldPoint(char const token, int* &level) {
104	if (!strcmp(token, "function") \|\|
105	!strcmp(token, "type")) {
106	level \|= SC_FOLDLEVELHEADERFLAG;
107	return `1`;
108	}
109	if (!strcmp(token, "end function") \|\|
110	!strcmp(token, "end type")) {
111	return -`1`;
112	}
113	return `0`;
114	}
115
116	static int CheckPureFoldPoint(char const token, int* &level) {
117	if (!strcmp(token, "procedure") \|\|
118	!strcmp(token, "enumeration") \|\|
119	!strcmp(token, "interface") \|\|
120	!strcmp(token, "structure")) {
121	level \|= SC_FOLDLEVELHEADERFLAG;
122	return `1`;
123	}
124	if (!strcmp(token, "endprocedure") \|\|
125	!strcmp(token, "endenumeration") \|\|
126	!strcmp(token, "endinterface") \|\|
127	!strcmp(token, "endstructure")) {
128	return -`1`;
129	}
130	return `0`;
131	}
132
133	static int CheckFreeFoldPoint(char const token, int* &level) {
134	if (!strcmp(token, "function") \|\|
135	!strcmp(token, "sub") \|\|
136	!strcmp(token, "enum") \|\|
137	!strcmp(token, "type") \|\|
138	!strcmp(token, "union") \|\|
139	!strcmp(token, "property") \|\|
140	!strcmp(token, "destructor") \|\|
141	!strcmp(token, "constructor")) {
142	level \|= SC_FOLDLEVELHEADERFLAG;
143	return `1`;
144	}
145	if (!strcmp(token, "end function") \|\|
146	!strcmp(token, "end sub") \|\|
147	!strcmp(token, "end enum") \|\|
148	!strcmp(token, "end type") \|\|
149	!strcmp(token, "end union") \|\|
150	!strcmp(token, "end property") \|\|
151	!strcmp(token, "end destructor") \|\|
152	!strcmp(token, "end constructor")) {
153	return -`1`;
154	}
155	return `0`;
156	}
157
158	// An individual named option for use in an OptionSet
159
160	// Options used for LexerBasic
161	struct OptionsBasic {
162	bool fold;
163	bool foldSyntaxBased;
164	bool foldCommentExplicit;
165	std::string foldExplicitStart;
166	std::string foldExplicitEnd;
167	bool foldExplicitAnywhere;
168	bool foldCompact;
169	OptionsBasic() {
170	fold = false;
171	foldSyntaxBased = true;
172	foldCommentExplicit = false;
173	foldExplicitStart = "";
174	foldExplicitEnd = "";
175	foldExplicitAnywhere = false;
176	foldCompact = true;
177	}
178	};
179
180	static const char * const blitzbasicWordListDesc[] = {
181	"BlitzBasic Keywords",
182	"user1",
183	"user2",
184	"user3",
185	`0`
186	};
187
188	static const char * const purebasicWordListDesc[] = {
189	"PureBasic Keywords",
190	"PureBasic PreProcessor Keywords",
191	"user defined 1",
192	"user defined 2",
193	`0`
194	};
195
196	static const char * const freebasicWordListDesc[] = {
197	"FreeBasic Keywords",
198	"FreeBasic PreProcessor Keywords",
199	"user defined 1",
200	"user defined 2",
201	`0`
202	};
203
204	struct OptionSetBasic : public OptionSet<OptionsBasic> {
205	OptionSetBasic(const char * const wordListDescriptions[]) {
206	DefineProperty("fold", &OptionsBasic::fold);
207
208	DefineProperty("fold.basic.syntax.based", &OptionsBasic::foldSyntaxBased,
209	"Set this property to 0 to disable syntax based folding.");
210
211	DefineProperty("fold.basic.comment.explicit", &OptionsBasic::foldCommentExplicit,
212	"This option enables folding explicit fold points when using the Basic lexer. "
213	"Explicit fold points allows adding extra folding by placing a ;{ (BB/PB) or '{ (FB) comment at the start "
214	"and a ;} (BB/PB) or '} (FB) at the end of a section that should be folded.");
215
216	DefineProperty("fold.basic.explicit.start", &OptionsBasic::foldExplicitStart,
217	"The string to use for explicit fold start points, replacing the standard ;{ (BB/PB) or '{ (FB).");
218
219	DefineProperty("fold.basic.explicit.end", &OptionsBasic::foldExplicitEnd,
220	"The string to use for explicit fold end points, replacing the standard ;} (BB/PB) or '} (FB).");
221
222	DefineProperty("fold.basic.explicit.anywhere", &OptionsBasic::foldExplicitAnywhere,
223	"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
224
225	DefineProperty("fold.compact", &OptionsBasic::foldCompact);
226
227	DefineWordListSets(wordListDescriptions);
228	}
229	};
230
231	class LexerBasic : public DefaultLexer {
232	char comment_char;
233	int (CheckFoldPoint)(char* const , int* &);
234	WordList keywordlists[`4`];
235	OptionsBasic options;
236	OptionSetBasic osBasic;
237	public:
238	LexerBasic(const char languageName_, int* language_, char comment_char_,
239	int (CheckFoldPoint_)(char* const , int* &), const char * const wordListDescriptions[]) :
240	DefaultLexer (languageName_, language_),
241	comment_char(comment_char_),
242	CheckFoldPoint(CheckFoldPoint_),
243	osBasic (wordListDescriptions) {
244	}
245	virtual ~LexerBasic() {
246	}
247	void SCI_METHOD Release() override {
248	delete this;
249	}
250	int SCI_METHOD Version() const override {
251	return lvRelease5;
252	}
253	const char * SCI_METHOD PropertyNames() override {
254	return osBasic.PropertyNames();
255	}
256	int SCI_METHOD PropertyType(const char *name) override {
257	return osBasic.PropertyType(name);
258	}
259	const char * SCI_METHOD DescribeProperty(const char *name) override {
260	return osBasic.DescribeProperty(name);
261	}
262	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override;
263	const char * SCI_METHOD PropertyGet(const char *key) override {
264	return osBasic.PropertyGet(key);
265	}
266	const char * SCI_METHOD DescribeWordListSets() override {
267	return osBasic.DescribeWordListSets();
268	}
269	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
270	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
271	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
272
273	void * SCI_METHOD PrivateCall(int, void *) override {
274	return `0`;
275	}
276	static ILexer5 *LexerFactoryBlitzBasic() {
277	return new LexerBasic ("blitzbasic", SCLEX_BLITZBASIC, `';'`, CheckBlitzFoldPoint, blitzbasicWordListDesc);
278	}
279	static ILexer5 *LexerFactoryPureBasic() {
280	return new LexerBasic ("purebasic", SCLEX_PUREBASIC, `';'`, CheckPureFoldPoint, purebasicWordListDesc);
281	}
282	static ILexer5 *LexerFactoryFreeBasic() {
283	return new LexerBasic ("freebasic", SCLEX_FREEBASIC, `'\''`, CheckFreeFoldPoint, freebasicWordListDesc );
284	}
285	};
286
287	Sci_Position SCI_METHOD LexerBasic::PropertySet(const char key, const* char *val) {
288	if (osBasic.PropertySet(&options, key, val)) {
289	return `0`;
290	}
291	return -`1`;
292	}
293
294	Sci_Position SCI_METHOD LexerBasic::WordListSet(int n, const char *wl) {
295	WordList *wordListN = `0`;
296	switch (n) {
297	case `0`:
298	wordListN = &keywordlists[`0`];
299	break;
300	case `1`:
301	wordListN = &keywordlists[`1`];
302	break;
303	case `2`:
304	wordListN = &keywordlists[`2`];
305	break;
306	case `3`:
307	wordListN = &keywordlists[`3`];
308	break;
309	}
310	Sci_Position firstModification = -`1`;
311	if (wordListN) {
312	WordList wlNew;
313	wlNew.Set(wl);
314	if (*wordListN != wlNew) {
315	wordListN->Set(wl);
316	firstModification = `0`;
317	}
318	}
319	return firstModification;
320	}
321
322	void SCI_METHOD LexerBasic::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
323	LexAccessor styler(pAccess);
324
325	bool wasfirst = true, isfirst = true; // true if first token in a line
326	styler.StartAt(startPos);
327	int styleBeforeKeyword = SCE_B_DEFAULT;
328
329	StyleContext sc(startPos, length, initStyle, styler);
330
331	// Can't use sc.More() here else we miss the last character
332	for (; ; sc.Forward()) {
333	if (sc.state == SCE_B_IDENTIFIER) {
334	if (!IsIdentifier(sc.ch)) {
335	// Labels
336	if (wasfirst && sc.Match(`':'`)) {
337	sc.ChangeState(SCE_B_LABEL);
338	sc.ForwardSetState(SCE_B_DEFAULT);
339	} else {
340	char s[`100`];
341	int kstates[`4`] = {
342	SCE_B_KEYWORD,
343	SCE_B_KEYWORD2,
344	SCE_B_KEYWORD3,
345	SCE_B_KEYWORD4,
346	};
347	sc.GetCurrentLowered(s, sizeof(s));
348	for (int i = `0`; i < `4`; i++) {
349	if (keywordlists[i].InList(s)) {
350	sc.ChangeState(kstates[i]);
351	}
352	}
353	// Types, must set them as operator else they will be
354	// matched as number/constant
355	if (sc.Match(`'.'`) \|\| sc.Match(`'$'`) \|\| sc.Match(`'%'`) \|\|
356	sc.Match(`'#'`)) {
357	sc.SetState(SCE_B_OPERATOR);
358	} else {
359	sc.SetState(SCE_B_DEFAULT);
360	}
361	}
362	}
363	} else if (sc.state == SCE_B_OPERATOR) {
364	if (!IsOperator(sc.ch) \|\| sc.Match(`'#'`))
365	sc.SetState(SCE_B_DEFAULT);
366	} else if (sc.state == SCE_B_LABEL) {
367	if (!IsIdentifier(sc.ch))
368	sc.SetState(SCE_B_DEFAULT);
369	} else if (sc.state == SCE_B_CONSTANT) {
370	if (!IsIdentifier(sc.ch))
371	sc.SetState(SCE_B_DEFAULT);
372	} else if (sc.state == SCE_B_NUMBER) {
373	if (!IsDigit(sc.ch))
374	sc.SetState(SCE_B_DEFAULT);
375	} else if (sc.state == SCE_B_HEXNUMBER) {
376	if (!IsHexDigit(sc.ch))
377	sc.SetState(SCE_B_DEFAULT);
378	} else if (sc.state == SCE_B_BINNUMBER) {
379	if (!IsBinDigit(sc.ch))
380	sc.SetState(SCE_B_DEFAULT);
381	} else if (sc.state == SCE_B_STRING) {
382	if (sc.ch == `'"'`) {
383	sc.ForwardSetState(SCE_B_DEFAULT);
384	}
385	if (sc.atLineEnd) {
386	sc.ChangeState(SCE_B_ERROR);
387	sc.SetState(SCE_B_DEFAULT);
388	}
389	} else if (sc.state == SCE_B_COMMENT \|\| sc.state == SCE_B_PREPROCESSOR) {
390	if (sc.atLineEnd) {
391	sc.SetState(SCE_B_DEFAULT);
392	}
393	} else if (sc.state == SCE_B_DOCLINE) {
394	if (sc.atLineEnd) {
395	sc.SetState(SCE_B_DEFAULT);
396	} else if (sc.ch == `'\\'` \|\| sc.ch == `'@'`) {
397	if (IsLetter(sc.chNext) && sc.chPrev != `'\\'`) {
398	styleBeforeKeyword = sc.state;
399	sc.SetState(SCE_B_DOCKEYWORD);
400	};
401	}
402	} else if (sc.state == SCE_B_DOCKEYWORD) {
403	if (IsSpace(sc.ch)) {
404	sc.SetState(styleBeforeKeyword);
405	} else if (sc.atLineEnd && styleBeforeKeyword == SCE_B_DOCLINE) {
406	sc.SetState(SCE_B_DEFAULT);
407	}
408	} else if (sc.state == SCE_B_COMMENTBLOCK) {
409	if (sc.Match("\'/")) {
410	sc.Forward();
411	sc.ForwardSetState(SCE_B_DEFAULT);
412	}
413	} else if (sc.state == SCE_B_DOCBLOCK) {
414	if (sc.Match("\'/")) {
415	sc.Forward();
416	sc.ForwardSetState(SCE_B_DEFAULT);
417	} else if (sc.ch == `'\\'` \|\| sc.ch == `'@'`) {
418	if (IsLetter(sc.chNext) && sc.chPrev != `'\\'`) {
419	styleBeforeKeyword = sc.state;
420	sc.SetState(SCE_B_DOCKEYWORD);
421	};
422	}
423	}
424
425	if (sc.atLineStart)
426	isfirst = true;
427
428	if (sc.state == SCE_B_DEFAULT \|\| sc.state == SCE_B_ERROR) {
429	if (isfirst && sc.Match(`'.'`) && comment_char != `'\''`) {
430	sc.SetState(SCE_B_LABEL);
431	} else if (isfirst && sc.Match(`'#'`)) {
432	wasfirst = isfirst;
433	sc.SetState(SCE_B_IDENTIFIER);
434	} else if (sc.Match(comment_char)) {
435	// Hack to make deprecated QBASIC '$Include show
436	// up in freebasic with SCE_B_PREPROCESSOR.
437	if (comment_char == `'\''` && sc.Match(comment_char, `'$'`))
438	sc.SetState(SCE_B_PREPROCESSOR);
439	else if (sc.Match("\'*") \|\| sc.Match("\'!")) {
440	sc.SetState(SCE_B_DOCLINE);
441	} else {
442	sc.SetState(SCE_B_COMMENT);
443	}
444	} else if (sc.Match("/\'")) {
445	if (sc.Match("/\'") \|\| sc.Match("/\'!")) { // Support of gtk-doc/Doxygen doc. style*
446	sc.SetState(SCE_B_DOCBLOCK);
447	} else {
448	sc.SetState(SCE_B_COMMENTBLOCK);
449	}
450	sc.Forward(); // Eat the ' so it isn't used for the end of the comment
451	} else if (sc.Match(`'"'`)) {
452	sc.SetState(SCE_B_STRING);
453	} else if (IsDigit(sc.ch)) {
454	sc.SetState(SCE_B_NUMBER);
455	} else if (sc.Match(`'$'`) \|\| sc.Match("&h") \|\| sc.Match("&H") \|\| sc.Match("&o") \|\| sc.Match("&O")) {
456	sc.SetState(SCE_B_HEXNUMBER);
457	} else if (sc.Match(`'%'`) \|\| sc.Match("&b") \|\| sc.Match("&B")) {
458	sc.SetState(SCE_B_BINNUMBER);
459	} else if (sc.Match(`'#'`)) {
460	sc.SetState(SCE_B_CONSTANT);
461	} else if (IsOperator(sc.ch)) {
462	sc.SetState(SCE_B_OPERATOR);
463	} else if (IsIdentifier(sc.ch)) {
464	wasfirst = isfirst;
465	sc.SetState(SCE_B_IDENTIFIER);
466	} else if (!IsSpace(sc.ch)) {
467	sc.SetState(SCE_B_ERROR);
468	}
469	}
470
471	if (!IsSpace(sc.ch))
472	isfirst = false;
473
474	if (!sc.More())
475	break;
476	}
477	sc.Complete();
478	}
479
480
481	void SCI_METHOD LexerBasic::Fold(Sci_PositionU startPos, Sci_Position length, int / initStyle /, IDocument *pAccess) {
482
483	if (!options.fold)
484	return;
485
486	LexAccessor styler(pAccess);
487
488	Sci_Position line = styler.GetLine(startPos);
489	int level = styler.LevelAt(line);
490	int go = `0`, done = `0`;
491	Sci_Position endPos = startPos + length;
492	char word[`256`];
493	int wordlen = `0`;
494	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
495	int cNext = styler [startPos];
496
497	// Scan for tokens at the start of the line (they may include
498	// whitespace, for tokens like "End Function"
499	for (Sci_Position i = startPos; i < endPos; i++) {
500	int c = cNext;
501	cNext = styler.SafeGetCharAt(i + `1`);
502	bool atEOL = (c == `'\r'` && cNext != `'\n'`) \|\| (c == `'\n'`);
503	if (options.foldSyntaxBased && !done && !go) {
504	if (wordlen) { // are we scanning a token already?
505	word[wordlen] = static_cast<char>(LowerCase(c));
506	if (!IsIdentifier(c)) { // done with token
507	word[wordlen] = `'\0'`;
508	go = CheckFoldPoint(word, level);
509	if (!go) {
510	// Treat any whitespace as single blank, for
511	// things like "End Function".
512	if (IsSpace(c) && IsIdentifier(word[wordlen - `1`])) {
513	word[wordlen] = `' '`;
514	if (wordlen < `255`)
515	wordlen++;
516	}
517	else // done with this line
518	done = `1`;
519	}
520	} else if (wordlen < `255`) {
521	wordlen++;
522	}
523	} else { // start scanning at first non-whitespace character
524	if (!IsSpace(c)) {
525	if (IsIdentifier(c)) {
526	word[`0`] = static_cast<char>(LowerCase(c));
527	wordlen = `1`;
528	} else // done with this line
529	done = `1`;
530	}
531	}
532	}
533	if (options.foldCommentExplicit && ((styler.StyleAt(i) == SCE_B_COMMENT) \|\| options.foldExplicitAnywhere)) {
534	if (userDefinedFoldMarkers) {
535	if (styler.Match(i, options.foldExplicitStart.c_str())) {
536	level \|= SC_FOLDLEVELHEADERFLAG;
537	go = `1`;
538	} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
539	go = -`1`;
540	}
541	} else {
542	if (c == comment_char) {
543	if (cNext == `'{'`) {
544	level \|= SC_FOLDLEVELHEADERFLAG;
545	go = `1`;
546	} else if (cNext == `'}'`) {
547	go = -`1`;
548	}
549	}
550	}
551	}
552	if (atEOL) { // line end
553	if (!done && wordlen == `0` && options.foldCompact) // line was only space
554	level \|= SC_FOLDLEVELWHITEFLAG;
555	if (level != styler.LevelAt(line))
556	styler.SetLevel(line, level);
557	level += go;
558	line++;
559	// reset state
560	wordlen = `0`;
561	level &= ~SC_FOLDLEVELHEADERFLAG;
562	level &= ~SC_FOLDLEVELWHITEFLAG;
563	go = `0`;
564	done = `0`;
565	}
566	}
567	}
568
569	LexerModule lmBlitzBasic(SCLEX_BLITZBASIC, LexerBasic::LexerFactoryBlitzBasic, "blitzbasic", blitzbasicWordListDesc);
570
571	LexerModule lmPureBasic(SCLEX_PUREBASIC, LexerBasic::LexerFactoryPureBasic, "purebasic", purebasicWordListDesc);
572
573	LexerModule lmFreeBasic(SCLEX_FREEBASIC, LexerBasic::LexerFactoryFreeBasic, "freebasic", freebasicWordListDesc);
574

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexBasic.cxx