1// Scintilla source code edit control
2/** @file LexCIL.cxx
3 ** Lexer for Common Intermediate Language
4 ** Written by Jad Altahan (github.com/xv)
5 ** CIL manual: https://www.ecma-international.org/publications/standards/Ecma-335.htm
6 **/
7// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13#include <stdarg.h>
14#include <assert.h>
15#include <ctype.h>
16
17#include <string>
18#include <string_view>
19#include <map>
20#include <algorithm>
21#include <functional>
22
23#include "ILexer.h"
24#include "Scintilla.h"
25#include "SciLexer.h"
26
27#include "StringCopy.h"
28#include "WordList.h"
29#include "LexAccessor.h"
30#include "Accessor.h"
31#include "StyleContext.h"
32#include "CharacterSet.h"
33#include "LexerModule.h"
34#include "OptionSet.h"
35#include "DefaultLexer.h"
36
37using namespace Scintilla;
38using namespace Lexilla;
39
40namespace {
41 // Use an unnamed namespace to protect the functions and classes from name conflicts
42
43bool IsAWordChar(const int ch) {
44 return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.');
45}
46
47bool IsOperator(const int ch) {
48 if ((ch < 0x80) && (isalnum(ch)))
49 return false;
50
51 if (strchr("!%&*+-/<=>@^|~()[]{}", ch)) {
52 return true;
53 }
54
55 return false;
56}
57
58constexpr bool IsStreamCommentStyle(const int style) noexcept {
59 return style == SCE_CIL_COMMENT;
60}
61
62struct OptionsCIL {
63 bool fold;
64 bool foldComment;
65 bool foldCommentMultiline;
66 bool foldCompact;
67
68 OptionsCIL() {
69 fold = true;
70 foldComment = false;
71 foldCommentMultiline = true;
72 foldCompact = true;
73 }
74};
75
76static const char *const cilWordListDesc[] = {
77 "Primary CIL keywords",
78 "Metadata",
79 "Opcode instructions",
80 0
81};
82
83struct OptionSetCIL : public OptionSet<OptionsCIL> {
84 OptionSetCIL() {
85 DefineProperty("fold", &OptionsCIL::fold);
86 DefineProperty("fold.comment", &OptionsCIL::foldComment);
87
88 DefineProperty("fold.cil.comment.multiline", &OptionsCIL::foldCommentMultiline,
89 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
90
91 DefineProperty("fold.compact", &OptionsCIL::foldCompact);
92
93 DefineWordListSets(cilWordListDesc);
94 }
95};
96
97LexicalClass lexicalClasses[] = {
98 // Lexer CIL SCLEX_CIL SCE_CIL_:
99 0, "SCE_CIL_DEFAULT", "default", "White space",
100 1, "SCE_CIL_COMMENT", "comment", "Multi-line comment",
101 2, "SCE_CIL_COMMENTLINE", "comment line", "Line comment",
102 3, "SCE_CIL_WORD", "keyword", "Keyword 1",
103 4, "SCE_CIL_WORD2", "keyword", "Keyword 2",
104 5, "SCE_CIL_WORD3", "keyword", "Keyword 3",
105 6, "SCE_CIL_STRING", "literal string", "Double quoted string",
106 7, "SCE_CIL_LABEL", "label", "Code label",
107 8, "SCE_CIL_OPERATOR", "operator", "Operators",
108 9, "SCE_CIL_STRINGEOL", "error literal string", "String is not closed",
109 10, "SCE_CIL_IDENTIFIER", "identifier", "Identifiers",
110};
111
112}
113
114class LexerCIL : public DefaultLexer {
115 WordList keywords, keywords2, keywords3;
116 OptionsCIL options;
117 OptionSetCIL osCIL;
118
119public:
120 LexerCIL() : DefaultLexer("cil", SCLEX_CIL, lexicalClasses, ELEMENTS(lexicalClasses)) { }
121
122 virtual ~LexerCIL() { }
123
124 void SCI_METHOD Release() override {
125 delete this;
126 }
127
128 int SCI_METHOD Version() const override {
129 return lvRelease5;
130 }
131
132 const char * SCI_METHOD PropertyNames() override {
133 return osCIL.PropertyNames();
134 }
135
136 int SCI_METHOD PropertyType(const char *name) override {
137 return osCIL.PropertyType(name);
138 }
139
140 const char * SCI_METHOD DescribeProperty(const char *name) override {
141 return osCIL.DescribeProperty(name);
142 }
143
144 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
145
146 const char * SCI_METHOD PropertyGet(const char* key) override {
147 return osCIL.PropertyGet(key);
148 }
149
150 const char * SCI_METHOD DescribeWordListSets() override {
151 return osCIL.DescribeWordListSets();
152 }
153
154 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
155
156 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
157 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
158
159 void * SCI_METHOD PrivateCall(int, void *) override {
160 return 0;
161 }
162
163 int SCI_METHOD LineEndTypesSupported() override {
164 return SC_LINE_END_TYPE_UNICODE;
165 }
166
167 int SCI_METHOD PrimaryStyleFromStyle(int style) override {
168 return style;
169 }
170
171 static ILexer5 *LexerFactoryCIL() {
172 return new LexerCIL();
173 }
174};
175
176Sci_Position SCI_METHOD LexerCIL::PropertySet(const char *key, const char *val) {
177 if (osCIL.PropertySet(&options, key, val)) {
178 return 0;
179 }
180
181 return -1;
182}
183
184Sci_Position SCI_METHOD LexerCIL::WordListSet(int n, const char *wl) {
185 WordList *wordListN = 0;
186
187 switch (n) {
188 case 0:
189 wordListN = &keywords;
190 break;
191 case 1:
192 wordListN = &keywords2;
193 break;
194 case 2:
195 wordListN = &keywords3;
196 break;
197 }
198
199 Sci_Position firstModification = -1;
200
201 if (wordListN) {
202 WordList wlNew;
203 wlNew.Set(wl);
204
205 if (*wordListN != wlNew) {
206 wordListN->Set(wl);
207 firstModification = 0;
208 }
209 }
210
211 return firstModification;
212}
213
214void SCI_METHOD LexerCIL::Lex(Sci_PositionU startPos, Sci_Position length,
215 int initStyle, IDocument *pAccess) {
216 if (initStyle == SCE_CIL_STRINGEOL) {
217 initStyle = SCE_CIL_DEFAULT;
218 }
219
220 Accessor styler(pAccess, NULL);
221 StyleContext sc(startPos, length, initStyle, styler);
222
223 bool identAtLineStart = false, // Checks if an identifier is at line start (ignoring spaces)
224 canStyleLabels = false; // Checks if conditions are met to style SCE_CIL_LABEL
225
226 for (; sc.More(); sc.Forward()) {
227 if (sc.atLineStart) {
228 if (sc.state == SCE_CIL_STRING) {
229 sc.SetState(SCE_CIL_STRING);
230 }
231
232 identAtLineStart = true;
233 }
234
235 // Handle string line continuation
236 if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r') &&
237 (sc.state == SCE_CIL_STRING)) {
238 sc.Forward();
239
240 if (sc.ch == '\r' && sc.chNext == '\n') {
241 sc.Forward();
242 }
243
244 continue;
245 }
246
247 switch (sc.state) {
248 case SCE_CIL_OPERATOR:
249 sc.SetState(SCE_CIL_DEFAULT);
250 break;
251 case SCE_CIL_IDENTIFIER:
252 if (!IsAWordChar(sc.ch)) {
253 if (canStyleLabels && (sc.ch == ':' && sc.chNext != ':')) {
254 sc.ChangeState(SCE_CIL_LABEL);
255 sc.ForwardSetState(SCE_CIL_DEFAULT);
256 } else {
257 char kwSize[100];
258 sc.GetCurrent(kwSize, sizeof(kwSize));
259 int style = SCE_CIL_IDENTIFIER;
260
261 if (keywords.InList(kwSize)) {
262 style = SCE_CIL_WORD;
263 } else if (keywords2.InList(kwSize)) {
264 style = SCE_CIL_WORD2;
265 } else if (keywords3.InList(kwSize)) {
266 style = SCE_CIL_WORD3;
267 }
268
269 sc.ChangeState(style);
270 sc.SetState(SCE_CIL_DEFAULT);
271 }
272 }
273 break;
274 case SCE_CIL_COMMENT:
275 if (sc.Match('*', '/')) {
276 sc.Forward();
277 sc.ForwardSetState(SCE_CIL_DEFAULT);
278 }
279 break;
280 case SCE_CIL_COMMENTLINE:
281 if (sc.atLineStart) {
282 sc.SetState(SCE_CIL_DEFAULT);
283 }
284 break;
285 case SCE_CIL_STRING:
286 if (sc.ch == '\\') {
287 if (sc.chNext == '"' || sc.chNext == '\\') {
288 sc.Forward();
289 }
290 } else if (sc.ch == '"') {
291 sc.ForwardSetState(SCE_CIL_DEFAULT);
292 } else if (sc.atLineEnd) {
293 sc.ChangeState(SCE_CIL_STRINGEOL);
294 sc.ForwardSetState(SCE_CIL_DEFAULT);
295 }
296 break;
297 }
298
299 if (sc.state == SCE_CIL_DEFAULT) {
300 // String
301 if (sc.ch == '"') {
302 sc.SetState(SCE_CIL_STRING);
303 }
304 // Keyword
305 else if (IsAWordChar(sc.ch)) {
306 // Allow setting SCE_CIL_LABEL style only if the label is the
307 // first token in the line and does not start with a dot or a digit
308 canStyleLabels = identAtLineStart && !(sc.ch == '.' || IsADigit(sc.ch));
309 sc.SetState(SCE_CIL_IDENTIFIER);
310 }
311 // Multi-line comment
312 else if (sc.Match('/', '*')) {
313 sc.SetState(SCE_CIL_COMMENT);
314 sc.Forward();
315 }
316 // Line comment
317 else if (sc.Match('/', '/')) {
318 sc.SetState(SCE_CIL_COMMENTLINE);
319 }
320 // Operators
321 else if (IsOperator(sc.ch)) {
322 sc.SetState(SCE_CIL_OPERATOR);
323 }
324 }
325
326 if (!IsASpace(sc.ch)) {
327 identAtLineStart = false;
328 }
329 }
330
331 sc.Complete();
332}
333
334void SCI_METHOD LexerCIL::Fold(Sci_PositionU startPos, Sci_Position length,
335 int initStyle, IDocument *pAccess) {
336 if (!options.fold) {
337 return;
338 }
339
340 LexAccessor styler(pAccess);
341
342 const Sci_PositionU endPos = startPos + length;
343 Sci_Position lineCurrent = styler.GetLine(startPos);
344
345 int levelCurrent = SC_FOLDLEVELBASE;
346 if (lineCurrent > 0)
347 levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
348
349 int style = initStyle;
350 int styleNext = styler.StyleAt(startPos);
351 int levelNext = levelCurrent;
352 int visibleChars = 0;
353
354 char chNext = styler[startPos];
355
356 for (Sci_PositionU i = startPos; i < endPos; i++) {
357 const char ch = chNext;
358 int stylePrev = style;
359
360 chNext = styler.SafeGetCharAt(i + 1);
361 style = styleNext;
362 styleNext = styler.StyleAt(i + 1);
363
364 const bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
365
366 if (options.foldComment &&
367 options.foldCommentMultiline && IsStreamCommentStyle(style)) {
368 if (!IsStreamCommentStyle(stylePrev)) {
369 levelNext++;
370 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
371 levelNext--;
372 }
373 }
374
375 if (style == SCE_CIL_OPERATOR) {
376 if (ch == '{') {
377 levelNext++;
378 } else if (ch == '}') {
379 levelNext--;
380 }
381 }
382
383 if (!IsASpace(ch)) {
384 visibleChars++;
385 }
386
387 if (atEOL || (i == endPos - 1)) {
388 int lev = levelCurrent | levelNext << 16;
389 if (visibleChars == 0 && options.foldCompact)
390 lev |= SC_FOLDLEVELWHITEFLAG;
391 if (levelCurrent < levelNext)
392 lev |= SC_FOLDLEVELHEADERFLAG;
393 if (lev != styler.LevelAt(lineCurrent)) {
394 styler.SetLevel(lineCurrent, lev);
395 }
396
397 lineCurrent++;
398 levelCurrent = levelNext;
399
400 if (options.foldCompact &&
401 i == static_cast<Sci_PositionU>(styler.Length() - 1)) {
402 styler.SetLevel(lineCurrent, lev | SC_FOLDLEVELWHITEFLAG);
403 }
404
405 visibleChars = 0;
406 }
407 }
408}
409
410LexerModule lmCIL(SCLEX_CIL, LexerCIL::LexerFactoryCIL, "cil", cilWordListDesc);