LexAsm.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexAsm.cxx]

1	// Scintilla source code edit control
2	/* @file LexAsm.cxx*
3	** Lexer for Assembler, just for the MASM syntax
4	** Written by The Black Horus
5	** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6	** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7	** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
8	**/
9	// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10	// The License.txt file describes the conditions under which this software may be distributed.
11
12	#include <stdlib.h>
13	#include <string.h>
14	#include <stdio.h>
15	#include <stdarg.h>
16	#include <assert.h>
17	#include <ctype.h>
18
19	#include <string>
20	#include <string_view>
21	#include <map>
22	#include <set>
23	#include <functional>
24
25	#include "ILexer.h"
26	#include "Scintilla.h"
27	#include "SciLexer.h"
28
29	#include "WordList.h"
30	#include "LexAccessor.h"
31	#include "StyleContext.h"
32	#include "CharacterSet.h"
33	#include "LexerModule.h"
34	#include "OptionSet.h"
35	#include "DefaultLexer.h"
36
37	using namespace Scintilla;
38	using namespace Lexilla;
39
40	static inline bool IsAWordChar(const int ch) {
41	return (ch < `0x80`) && (isalnum(ch) \|\| ch == `'.'` \|\|
42	ch == `'_'` \|\| ch == `'?'`);
43	}
44
45	static inline bool IsAWordStart(const int ch) {
46	return (ch < `0x80`) && (isalnum(ch) \|\| ch == `'_'` \|\| ch == `'.'` \|\|
47	ch == `'%'` \|\| ch == `'@'` \|\| ch == `'$'` \|\| ch == `'?'`);
48	}
49
50	static inline bool IsAsmOperator(const int ch) {
51	if ((ch < `0x80`) && (isalnum(ch)))
52	return false;
53	// '.' left out as it is used to make up numbers
54	if (ch == `'*'` \|\| ch == `'/'` \|\| ch == `'-'` \|\| ch == `'+'` \|\|
55	ch == `'('` \|\| ch == `')'` \|\| ch == `'='` \|\| ch == `'^'` \|\|
56	ch == `'['` \|\| ch == `']'` \|\| ch == `'<'` \|\| ch == `'&'` \|\|
57	ch == `'>'` \|\| ch == `','` \|\| ch == `'\|'` \|\| ch == `'~'` \|\|
58	ch == `'%'` \|\| ch == `':'`)
59	return true;
60	return false;
61	}
62
63	static bool IsStreamCommentStyle(int style) {
64	return style == SCE_ASM_COMMENTDIRECTIVE \|\| style == SCE_ASM_COMMENTBLOCK;
65	}
66
67	static inline int LowerCase(int c) {
68	if (c >= `'A'` && c <= `'Z'`)
69	return `'a'` + c - `'A'`;
70	return c;
71	}
72
73	// An individual named option for use in an OptionSet
74
75	// Options used for LexerAsm
76	struct OptionsAsm {
77	std::string delimiter;
78	bool fold;
79	bool foldSyntaxBased;
80	bool foldCommentMultiline;
81	bool foldCommentExplicit;
82	std::string foldExplicitStart;
83	std::string foldExplicitEnd;
84	bool foldExplicitAnywhere;
85	bool foldCompact;
86	std::string commentChar;
87	OptionsAsm() {
88	delimiter = "";
89	fold = false;
90	foldSyntaxBased = true;
91	foldCommentMultiline = false;
92	foldCommentExplicit = false;
93	foldExplicitStart = "";
94	foldExplicitEnd = "";
95	foldExplicitAnywhere = false;
96	foldCompact = true;
97	commentChar = "";
98	}
99	};
100
101	static const char * const asmWordListDesc[] = {
102	"CPU instructions",
103	"FPU instructions",
104	"Registers",
105	"Directives",
106	"Directive operands",
107	"Extended instructions",
108	"Directives4Foldstart",
109	"Directives4Foldend",
110	`0`
111	};
112
113	struct OptionSetAsm : public OptionSet<OptionsAsm> {
114	OptionSetAsm() {
115	DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
116	"Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
117
118	DefineProperty("fold", &OptionsAsm::fold);
119
120	DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
121	"Set this property to 0 to disable syntax based folding.");
122
123	DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
124	"Set this property to 1 to enable folding multi-line comments.");
125
126	DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
127	"This option enables folding explicit fold points when using the Asm lexer. "
128	"Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
129	"at the end of a section that should fold.");
130
131	DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
132	"The string to use for explicit fold start points, replacing the standard ;{.");
133
134	DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
135	"The string to use for explicit fold end points, replacing the standard ;}.");
136
137	DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
138	"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
139
140	DefineProperty("fold.compact", &OptionsAsm::foldCompact);
141
142	DefineProperty("lexer.as.comment.character", &OptionsAsm::commentChar,
143	"Overrides the default comment character (which is ';' for asm and '#' for as).");
144
145	DefineWordListSets(asmWordListDesc);
146	}
147	};
148
149	class LexerAsm : public DefaultLexer {
150	WordList cpuInstruction;
151	WordList mathInstruction;
152	WordList registers;
153	WordList directive;
154	WordList directiveOperand;
155	WordList extInstruction;
156	WordList directives4foldstart;
157	WordList directives4foldend;
158	OptionsAsm options;
159	OptionSetAsm osAsm;
160	int commentChar;
161	public:
162	LexerAsm(const char languageName_, int* language_, int commentChar_) : DefaultLexer (languageName_, language_) {
163	commentChar = commentChar_;
164	}
165	virtual ~LexerAsm() {
166	}
167	void SCI_METHOD Release() override {
168	delete this;
169	}
170	int SCI_METHOD Version() const override {
171	return lvRelease5;
172	}
173	const char * SCI_METHOD PropertyNames() override {
174	return osAsm.PropertyNames();
175	}
176	int SCI_METHOD PropertyType(const char *name) override {
177	return osAsm.PropertyType(name);
178	}
179	const char * SCI_METHOD DescribeProperty(const char *name) override {
180	return osAsm.DescribeProperty(name);
181	}
182	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override;
183	const char * SCI_METHOD PropertyGet(const char *key) override {
184	return osAsm.PropertyGet(key);
185	}
186	const char * SCI_METHOD DescribeWordListSets() override {
187	return osAsm.DescribeWordListSets();
188	}
189	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
190	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
191	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
192
193	void * SCI_METHOD PrivateCall(int, void *) override {
194	return `0`;
195	}
196
197	static ILexer5 *LexerFactoryAsm() {
198	return new LexerAsm ("asm", SCLEX_ASM, `';'`);
199	}
200
201	static ILexer5 *LexerFactoryAs() {
202	return new LexerAsm ("as", SCLEX_AS, `'#'`);
203	}
204	};
205
206	Sci_Position SCI_METHOD LexerAsm::PropertySet(const char key, const* char *val) {
207	if (osAsm.PropertySet(&options, key, val)) {
208	return `0`;
209	}
210	return -`1`;
211	}
212
213	Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
214	WordList *wordListN = `0`;
215	switch (n) {
216	case `0`:
217	wordListN = &cpuInstruction;
218	break;
219	case `1`:
220	wordListN = &mathInstruction;
221	break;
222	case `2`:
223	wordListN = &registers;
224	break;
225	case `3`:
226	wordListN = &directive;
227	break;
228	case `4`:
229	wordListN = &directiveOperand;
230	break;
231	case `5`:
232	wordListN = &extInstruction;
233	break;
234	case `6`:
235	wordListN = &directives4foldstart;
236	break;
237	case `7`:
238	wordListN = &directives4foldend;
239	break;
240	}
241	Sci_Position firstModification = -`1`;
242	if (wordListN) {
243	WordList wlNew;
244	wlNew.Set(wl);
245	if (*wordListN != wlNew) {
246	wordListN->Set(wl);
247	firstModification = `0`;
248	}
249	}
250	return firstModification;
251	}
252
253	void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
254	LexAccessor styler(pAccess);
255
256	const char commentCharacter = options.commentChar.empty() ?
257	commentChar : options.commentChar.front();
258
259	// Do not leak onto next line
260	if (initStyle == SCE_ASM_STRINGEOL)
261	initStyle = SCE_ASM_DEFAULT;
262
263	StyleContext sc(startPos, length, initStyle, styler);
264
265	for (; sc.More(); sc.Forward())
266	{
267
268	// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
269	if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
270	sc.SetState(SCE_ASM_STRING);
271	} else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
272	sc.SetState(SCE_ASM_CHARACTER);
273	}
274
275	// Handle line continuation generically.
276	if (sc.ch == `'\\'`) {
277	if (sc.chNext == `'\n'` \|\| sc.chNext == `'\r'`) {
278	sc.Forward();
279	if (sc.ch == `'\r'` && sc.chNext == `'\n'`) {
280	sc.Forward();
281	}
282	continue;
283	}
284	}
285
286	// Determine if the current state should terminate.
287	if (sc.state == SCE_ASM_OPERATOR) {
288	if (!IsAsmOperator(sc.ch)) {
289	sc.SetState(SCE_ASM_DEFAULT);
290	}
291	} else if (sc.state == SCE_ASM_NUMBER) {
292	if (!IsAWordChar(sc.ch)) {
293	sc.SetState(SCE_ASM_DEFAULT);
294	}
295	} else if (sc.state == SCE_ASM_IDENTIFIER) {
296	if (!IsAWordChar(sc.ch) ) {
297	char s[`100`];
298	sc.GetCurrentLowered(s, sizeof(s));
299	bool IsDirective = false;
300
301	if (cpuInstruction.InList(s)) {
302	sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
303	} else if (mathInstruction.InList(s)) {
304	sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
305	} else if (registers.InList(s)) {
306	sc.ChangeState(SCE_ASM_REGISTER);
307	} else if (directive.InList(s)) {
308	sc.ChangeState(SCE_ASM_DIRECTIVE);
309	IsDirective = true;
310	} else if (directiveOperand.InList(s)) {
311	sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
312	} else if (extInstruction.InList(s)) {
313	sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
314	}
315	sc.SetState(SCE_ASM_DEFAULT);
316	if (IsDirective && !strcmp(s, "comment")) {
317	char delimiter = options.delimiter.empty() ? `'~'` : options.delimiter.c_str()[`0`];
318	while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
319	sc.ForwardSetState(SCE_ASM_DEFAULT);
320	}
321	if (sc.ch == delimiter) {
322	sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
323	}
324	}
325	}
326	} else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
327	char delimiter = options.delimiter.empty() ? `'~'` : options.delimiter.c_str()[`0`];
328	if (sc.ch == delimiter) {
329	while (!sc.atLineEnd) {
330	sc.Forward();
331	}
332	sc.SetState(SCE_ASM_DEFAULT);
333	}
334	} else if (sc.state == SCE_ASM_COMMENT ) {
335	if (sc.atLineEnd) {
336	sc.SetState(SCE_ASM_DEFAULT);
337	}
338	} else if (sc.state == SCE_ASM_STRING) {
339	if (sc.ch == `'\\'`) {
340	if (sc.chNext == `'\"'` \|\| sc.chNext == `'\''` \|\| sc.chNext == `'\\'`) {
341	sc.Forward();
342	}
343	} else if (sc.ch == `'\"'`) {
344	sc.ForwardSetState(SCE_ASM_DEFAULT);
345	} else if (sc.atLineEnd) {
346	sc.ChangeState(SCE_ASM_STRINGEOL);
347	sc.ForwardSetState(SCE_ASM_DEFAULT);
348	}
349	} else if (sc.state == SCE_ASM_CHARACTER) {
350	if (sc.ch == `'\\'`) {
351	if (sc.chNext == `'\"'` \|\| sc.chNext == `'\''` \|\| sc.chNext == `'\\'`) {
352	sc.Forward();
353	}
354	} else if (sc.ch == `'\''`) {
355	sc.ForwardSetState(SCE_ASM_DEFAULT);
356	} else if (sc.atLineEnd) {
357	sc.ChangeState(SCE_ASM_STRINGEOL);
358	sc.ForwardSetState(SCE_ASM_DEFAULT);
359	}
360	}
361
362	// Determine if a new state should be entered.
363	if (sc.state == SCE_ASM_DEFAULT) {
364	if (sc.ch == commentCharacter) {
365	sc.SetState(SCE_ASM_COMMENT);
366	} else if (IsASCII(sc.ch) && (isdigit(sc.ch) \|\| (sc.ch == `'.'` && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
367	sc.SetState(SCE_ASM_NUMBER);
368	} else if (IsAWordStart(sc.ch)) {
369	sc.SetState(SCE_ASM_IDENTIFIER);
370	} else if (sc.ch == `'\"'`) {
371	sc.SetState(SCE_ASM_STRING);
372	} else if (sc.ch == `'\''`) {
373	sc.SetState(SCE_ASM_CHARACTER);
374	} else if (IsAsmOperator(sc.ch)) {
375	sc.SetState(SCE_ASM_OPERATOR);
376	}
377	}
378
379	}
380	sc.Complete();
381	}
382
383	// Store both the current line's fold level and the next lines in the
384	// level store to make it easy to pick up with each increment
385	// and to make it possible to fiddle the current level for "else".
386
387	void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
388
389	if (!options.fold)
390	return;
391
392	LexAccessor styler(pAccess);
393
394	Sci_PositionU endPos = startPos + length;
395	int visibleChars = `0`;
396	Sci_Position lineCurrent = styler.GetLine(startPos);
397	int levelCurrent = SC_FOLDLEVELBASE;
398	if (lineCurrent > `0`)
399	levelCurrent = styler.LevelAt(lineCurrent-`1`) >> `16`;
400	int levelNext = levelCurrent;
401	char chNext = styler [startPos];
402	int styleNext = styler.StyleAt(startPos);
403	int style = initStyle;
404	char word[`100`];
405	int wordlen = `0`;
406	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
407	for (Sci_PositionU i = startPos; i < endPos; i++) {
408	char ch = chNext;
409	chNext = styler.SafeGetCharAt(i + `1`);
410	int stylePrev = style;
411	style = styleNext;
412	styleNext = styler.StyleAt(i + `1`);
413	bool atEOL = (ch == `'\r'` && chNext != `'\n'`) \|\| (ch == `'\n'`);
414	if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
415	if (!IsStreamCommentStyle(stylePrev)) {
416	levelNext++;
417	} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
418	// Comments don't end at end of line and the next character may be unstyled.
419	levelNext--;
420	}
421	}
422	if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) \|\| options.foldExplicitAnywhere)) {
423	if (userDefinedFoldMarkers) {
424	if (styler.Match(i, options.foldExplicitStart.c_str())) {
425	levelNext++;
426	} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
427	levelNext--;
428	}
429	} else {
430	if (ch == `';'`) {
431	if (chNext == `'{'`) {
432	levelNext++;
433	} else if (chNext == `'}'`) {
434	levelNext--;
435	}
436	}
437	}
438	}
439	if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
440	word[wordlen++] = static_cast<char>(LowerCase(ch));
441	if (wordlen == `100`) { // prevent overflow
442	word[`0`] = `'\0'`;
443	wordlen = `1`;
444	}
445	if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
446	word[wordlen] = `'\0'`;
447	wordlen = `0`;
448	if (directives4foldstart.InList(word)) {
449	levelNext++;
450	} else if (directives4foldend.InList(word)){
451	levelNext--;
452	}
453	}
454	}
455	if (!IsASpace(ch))
456	visibleChars++;
457	if (atEOL \|\| (i == endPos-`1`)) {
458	int levelUse = levelCurrent;
459	int lev = levelUse \| levelNext << `16`;
460	if (visibleChars == `0` && options.foldCompact)
461	lev \|= SC_FOLDLEVELWHITEFLAG;
462	if (levelUse < levelNext)
463	lev \|= SC_FOLDLEVELHEADERFLAG;
464	if (lev != styler.LevelAt(lineCurrent)) {
465	styler.SetLevel(lineCurrent, lev);
466	}
467	lineCurrent++;
468	levelCurrent = levelNext;
469	if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - `1`))) {
470	// There is an empty line at end of file so give it same level and empty
471	styler.SetLevel(lineCurrent, (levelCurrent \| levelCurrent << `16`) \| SC_FOLDLEVELWHITEFLAG);
472	}
473	visibleChars = `0`;
474	}
475	}
476	}
477
478	LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
479	LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);
480
481

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexAsm.cxx