1/** @file LexD.cxx
2 ** Lexer for D.
3 **
4 ** Copyright (c) 2006 by Waldemar Augustyn <waldemar@wdmsys.com>
5 ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 **/
7// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13#include <stdarg.h>
14#include <assert.h>
15#include <ctype.h>
16
17#include <string>
18#include <string_view>
19#include <map>
20#include <functional>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "WordList.h"
27#include "LexAccessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31#include "OptionSet.h"
32#include "DefaultLexer.h"
33
34using namespace Scintilla;
35using namespace Lexilla;
36
37/* Nested comments require keeping the value of the nesting level for every
38 position in the document. But since scintilla always styles line by line,
39 we only need to store one value per line. The non-negative number indicates
40 nesting level at the end of the line.
41*/
42
43// Underscore, letter, digit and universal alphas from C99 Appendix D.
44
45static bool IsWordStart(int ch) {
46 return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
47}
48
49static bool IsWord(int ch) {
50 return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
51}
52
53static bool IsDoxygen(int ch) {
54 if (IsASCII(ch) && islower(ch))
55 return true;
56 if (ch == '$' || ch == '@' || ch == '\\' ||
57 ch == '&' || ch == '#' || ch == '<' || ch == '>' ||
58 ch == '{' || ch == '}' || ch == '[' || ch == ']')
59 return true;
60 return false;
61}
62
63static bool IsStringSuffix(int ch) {
64 return ch == 'c' || ch == 'w' || ch == 'd';
65}
66
67static bool IsStreamCommentStyle(int style) {
68 return style == SCE_D_COMMENT ||
69 style == SCE_D_COMMENTDOC ||
70 style == SCE_D_COMMENTDOCKEYWORD ||
71 style == SCE_D_COMMENTDOCKEYWORDERROR;
72}
73
74// An individual named option for use in an OptionSet
75
76// Options used for LexerD
77struct OptionsD {
78 bool fold;
79 bool foldSyntaxBased;
80 bool foldComment;
81 bool foldCommentMultiline;
82 bool foldCommentExplicit;
83 std::string foldExplicitStart;
84 std::string foldExplicitEnd;
85 bool foldExplicitAnywhere;
86 bool foldCompact;
87 int foldAtElseInt;
88 bool foldAtElse;
89 OptionsD() {
90 fold = false;
91 foldSyntaxBased = true;
92 foldComment = false;
93 foldCommentMultiline = true;
94 foldCommentExplicit = true;
95 foldExplicitStart = "";
96 foldExplicitEnd = "";
97 foldExplicitAnywhere = false;
98 foldCompact = true;
99 foldAtElseInt = -1;
100 foldAtElse = false;
101 }
102};
103
104static const char * const dWordLists[] = {
105 "Primary keywords and identifiers",
106 "Secondary keywords and identifiers",
107 "Documentation comment keywords",
108 "Type definitions and aliases",
109 "Keywords 5",
110 "Keywords 6",
111 "Keywords 7",
112 0,
113 };
114
115struct OptionSetD : public OptionSet<OptionsD> {
116 OptionSetD() {
117 DefineProperty("fold", &OptionsD::fold);
118
119 DefineProperty("fold.d.syntax.based", &OptionsD::foldSyntaxBased,
120 "Set this property to 0 to disable syntax based folding.");
121
122 DefineProperty("fold.comment", &OptionsD::foldComment);
123
124 DefineProperty("fold.d.comment.multiline", &OptionsD::foldCommentMultiline,
125 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
126
127 DefineProperty("fold.d.comment.explicit", &OptionsD::foldCommentExplicit,
128 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
129
130 DefineProperty("fold.d.explicit.start", &OptionsD::foldExplicitStart,
131 "The string to use for explicit fold start points, replacing the standard //{.");
132
133 DefineProperty("fold.d.explicit.end", &OptionsD::foldExplicitEnd,
134 "The string to use for explicit fold end points, replacing the standard //}.");
135
136 DefineProperty("fold.d.explicit.anywhere", &OptionsD::foldExplicitAnywhere,
137 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
138
139 DefineProperty("fold.compact", &OptionsD::foldCompact);
140
141 DefineProperty("lexer.d.fold.at.else", &OptionsD::foldAtElseInt,
142 "This option enables D folding on a \"} else {\" line of an if statement.");
143
144 DefineProperty("fold.at.else", &OptionsD::foldAtElse);
145
146 DefineWordListSets(dWordLists);
147 }
148};
149
150class LexerD : public DefaultLexer {
151 bool caseSensitive;
152 WordList keywords;
153 WordList keywords2;
154 WordList keywords3;
155 WordList keywords4;
156 WordList keywords5;
157 WordList keywords6;
158 WordList keywords7;
159 OptionsD options;
160 OptionSetD osD;
161public:
162 LexerD(bool caseSensitive_) :
163 DefaultLexer("D", SCLEX_D),
164 caseSensitive(caseSensitive_) {
165 }
166 virtual ~LexerD() {
167 }
168 void SCI_METHOD Release() override {
169 delete this;
170 }
171 int SCI_METHOD Version() const override {
172 return lvRelease5;
173 }
174 const char * SCI_METHOD PropertyNames() override {
175 return osD.PropertyNames();
176 }
177 int SCI_METHOD PropertyType(const char *name) override {
178 return osD.PropertyType(name);
179 }
180 const char * SCI_METHOD DescribeProperty(const char *name) override {
181 return osD.DescribeProperty(name);
182 }
183 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
184 const char * SCI_METHOD PropertyGet(const char *key) override {
185 return osD.PropertyGet(key);
186 }
187 const char * SCI_METHOD DescribeWordListSets() override {
188 return osD.DescribeWordListSets();
189 }
190 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
191 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
192 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
193
194 void * SCI_METHOD PrivateCall(int, void *) override {
195 return 0;
196 }
197
198 static ILexer5 *LexerFactoryD() {
199 return new LexerD(true);
200 }
201 static ILexer5 *LexerFactoryDInsensitive() {
202 return new LexerD(false);
203 }
204};
205
206Sci_Position SCI_METHOD LexerD::PropertySet(const char *key, const char *val) {
207 if (osD.PropertySet(&options, key, val)) {
208 return 0;
209 }
210 return -1;
211}
212
213Sci_Position SCI_METHOD LexerD::WordListSet(int n, const char *wl) {
214 WordList *wordListN = 0;
215 switch (n) {
216 case 0:
217 wordListN = &keywords;
218 break;
219 case 1:
220 wordListN = &keywords2;
221 break;
222 case 2:
223 wordListN = &keywords3;
224 break;
225 case 3:
226 wordListN = &keywords4;
227 break;
228 case 4:
229 wordListN = &keywords5;
230 break;
231 case 5:
232 wordListN = &keywords6;
233 break;
234 case 6:
235 wordListN = &keywords7;
236 break;
237 }
238 Sci_Position firstModification = -1;
239 if (wordListN) {
240 WordList wlNew;
241 wlNew.Set(wl);
242 if (*wordListN != wlNew) {
243 wordListN->Set(wl);
244 firstModification = 0;
245 }
246 }
247 return firstModification;
248}
249
250void SCI_METHOD LexerD::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
251 LexAccessor styler(pAccess);
252
253 int styleBeforeDCKeyword = SCE_D_DEFAULT;
254
255 StyleContext sc(startPos, length, initStyle, styler);
256
257 Sci_Position curLine = styler.GetLine(startPos);
258 int curNcLevel = curLine > 0? styler.GetLineState(curLine-1): 0;
259 bool numFloat = false; // Float literals have '+' and '-' signs
260 bool numHex = false;
261
262 for (; sc.More(); sc.Forward()) {
263
264 if (sc.atLineStart) {
265 curLine = styler.GetLine(sc.currentPos);
266 styler.SetLineState(curLine, curNcLevel);
267 }
268
269 // Determine if the current state should terminate.
270 switch (sc.state) {
271 case SCE_D_OPERATOR:
272 sc.SetState(SCE_D_DEFAULT);
273 break;
274 case SCE_D_NUMBER:
275 // We accept almost anything because of hex. and number suffixes
276 if (IsASCII(sc.ch) && (isalnum(sc.ch) || sc.ch == '_')) {
277 continue;
278 } else if (sc.ch == '.' && sc.chNext != '.' && !numFloat) {
279 // Don't parse 0..2 as number.
280 numFloat=true;
281 continue;
282 } else if ( ( sc.ch == '-' || sc.ch == '+' ) && ( /*sign and*/
283 ( !numHex && ( sc.chPrev == 'e' || sc.chPrev == 'E' ) ) || /*decimal or*/
284 ( sc.chPrev == 'p' || sc.chPrev == 'P' ) ) ) { /*hex*/
285 // Parse exponent sign in float literals: 2e+10 0x2e+10
286 continue;
287 } else {
288 sc.SetState(SCE_D_DEFAULT);
289 }
290 break;
291 case SCE_D_IDENTIFIER:
292 if (!IsWord(sc.ch)) {
293 char s[1000];
294 if (caseSensitive) {
295 sc.GetCurrent(s, sizeof(s));
296 } else {
297 sc.GetCurrentLowered(s, sizeof(s));
298 }
299 if (keywords.InList(s)) {
300 sc.ChangeState(SCE_D_WORD);
301 } else if (keywords2.InList(s)) {
302 sc.ChangeState(SCE_D_WORD2);
303 } else if (keywords4.InList(s)) {
304 sc.ChangeState(SCE_D_TYPEDEF);
305 } else if (keywords5.InList(s)) {
306 sc.ChangeState(SCE_D_WORD5);
307 } else if (keywords6.InList(s)) {
308 sc.ChangeState(SCE_D_WORD6);
309 } else if (keywords7.InList(s)) {
310 sc.ChangeState(SCE_D_WORD7);
311 }
312 sc.SetState(SCE_D_DEFAULT);
313 }
314 break;
315 case SCE_D_COMMENT:
316 if (sc.Match('*', '/')) {
317 sc.Forward();
318 sc.ForwardSetState(SCE_D_DEFAULT);
319 }
320 break;
321 case SCE_D_COMMENTDOC:
322 if (sc.Match('*', '/')) {
323 sc.Forward();
324 sc.ForwardSetState(SCE_D_DEFAULT);
325 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
326 // Verify that we have the conditions to mark a comment-doc-keyword
327 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
328 styleBeforeDCKeyword = SCE_D_COMMENTDOC;
329 sc.SetState(SCE_D_COMMENTDOCKEYWORD);
330 }
331 }
332 break;
333 case SCE_D_COMMENTLINE:
334 if (sc.atLineStart) {
335 sc.SetState(SCE_D_DEFAULT);
336 }
337 break;
338 case SCE_D_COMMENTLINEDOC:
339 if (sc.atLineStart) {
340 sc.SetState(SCE_D_DEFAULT);
341 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
342 // Verify that we have the conditions to mark a comment-doc-keyword
343 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
344 styleBeforeDCKeyword = SCE_D_COMMENTLINEDOC;
345 sc.SetState(SCE_D_COMMENTDOCKEYWORD);
346 }
347 }
348 break;
349 case SCE_D_COMMENTDOCKEYWORD:
350 if ((styleBeforeDCKeyword == SCE_D_COMMENTDOC) && sc.Match('*', '/')) {
351 sc.ChangeState(SCE_D_COMMENTDOCKEYWORDERROR);
352 sc.Forward();
353 sc.ForwardSetState(SCE_D_DEFAULT);
354 } else if (!IsDoxygen(sc.ch)) {
355 char s[100];
356 if (caseSensitive) {
357 sc.GetCurrent(s, sizeof(s));
358 } else {
359 sc.GetCurrentLowered(s, sizeof(s));
360 }
361 if (!IsASpace(sc.ch) || !keywords3.InList(s + 1)) {
362 sc.ChangeState(SCE_D_COMMENTDOCKEYWORDERROR);
363 }
364 sc.SetState(styleBeforeDCKeyword);
365 }
366 break;
367 case SCE_D_COMMENTNESTED:
368 if (sc.Match('+', '/')) {
369 if (curNcLevel > 0)
370 curNcLevel -= 1;
371 curLine = styler.GetLine(sc.currentPos);
372 styler.SetLineState(curLine, curNcLevel);
373 sc.Forward();
374 if (curNcLevel == 0) {
375 sc.ForwardSetState(SCE_D_DEFAULT);
376 }
377 } else if (sc.Match('/','+')) {
378 curNcLevel += 1;
379 curLine = styler.GetLine(sc.currentPos);
380 styler.SetLineState(curLine, curNcLevel);
381 sc.Forward();
382 }
383 break;
384 case SCE_D_STRING:
385 if (sc.ch == '\\') {
386 if (sc.chNext == '"' || sc.chNext == '\\') {
387 sc.Forward();
388 }
389 } else if (sc.ch == '"') {
390 if(IsStringSuffix(sc.chNext))
391 sc.Forward();
392 sc.ForwardSetState(SCE_D_DEFAULT);
393 }
394 break;
395 case SCE_D_CHARACTER:
396 if (sc.atLineEnd) {
397 sc.ChangeState(SCE_D_STRINGEOL);
398 } else if (sc.ch == '\\') {
399 if (sc.chNext == '\'' || sc.chNext == '\\') {
400 sc.Forward();
401 }
402 } else if (sc.ch == '\'') {
403 // Char has no suffixes
404 sc.ForwardSetState(SCE_D_DEFAULT);
405 }
406 break;
407 case SCE_D_STRINGEOL:
408 if (sc.atLineStart) {
409 sc.SetState(SCE_D_DEFAULT);
410 }
411 break;
412 case SCE_D_STRINGB:
413 if (sc.ch == '`') {
414 if(IsStringSuffix(sc.chNext))
415 sc.Forward();
416 sc.ForwardSetState(SCE_D_DEFAULT);
417 }
418 break;
419 case SCE_D_STRINGR:
420 if (sc.ch == '"') {
421 if(IsStringSuffix(sc.chNext))
422 sc.Forward();
423 sc.ForwardSetState(SCE_D_DEFAULT);
424 }
425 break;
426 }
427
428 // Determine if a new state should be entered.
429 if (sc.state == SCE_D_DEFAULT) {
430 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
431 sc.SetState(SCE_D_NUMBER);
432 numFloat = sc.ch == '.';
433 // Remember hex literal
434 numHex = sc.ch == '0' && ( sc.chNext == 'x' || sc.chNext == 'X' );
435 } else if ( (sc.ch == 'r' || sc.ch == 'x' || sc.ch == 'q')
436 && sc.chNext == '"' ) {
437 // Limited support for hex and delimited strings: parse as r""
438 sc.SetState(SCE_D_STRINGR);
439 sc.Forward();
440 } else if (IsWordStart(sc.ch) || sc.ch == '$') {
441 sc.SetState(SCE_D_IDENTIFIER);
442 } else if (sc.Match('/','+')) {
443 curNcLevel += 1;
444 curLine = styler.GetLine(sc.currentPos);
445 styler.SetLineState(curLine, curNcLevel);
446 sc.SetState(SCE_D_COMMENTNESTED);
447 sc.Forward();
448 } else if (sc.Match('/', '*')) {
449 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
450 sc.SetState(SCE_D_COMMENTDOC);
451 } else {
452 sc.SetState(SCE_D_COMMENT);
453 }
454 sc.Forward(); // Eat the * so it isn't used for the end of the comment
455 } else if (sc.Match('/', '/')) {
456 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
457 // Support of Qt/Doxygen doc. style
458 sc.SetState(SCE_D_COMMENTLINEDOC);
459 else
460 sc.SetState(SCE_D_COMMENTLINE);
461 } else if (sc.ch == '"') {
462 sc.SetState(SCE_D_STRING);
463 } else if (sc.ch == '\'') {
464 sc.SetState(SCE_D_CHARACTER);
465 } else if (sc.ch == '`') {
466 sc.SetState(SCE_D_STRINGB);
467 } else if (isoperator(static_cast<char>(sc.ch))) {
468 sc.SetState(SCE_D_OPERATOR);
469 if (sc.ch == '.' && sc.chNext == '.') sc.Forward(); // Range operator
470 }
471 }
472 }
473 sc.Complete();
474}
475
476// Store both the current line's fold level and the next lines in the
477// level store to make it easy to pick up with each increment
478// and to make it possible to fiddle the current level for "} else {".
479
480void SCI_METHOD LexerD::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
481
482 if (!options.fold)
483 return;
484
485 LexAccessor styler(pAccess);
486
487 Sci_PositionU endPos = startPos + length;
488 int visibleChars = 0;
489 Sci_Position lineCurrent = styler.GetLine(startPos);
490 int levelCurrent = SC_FOLDLEVELBASE;
491 if (lineCurrent > 0)
492 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
493 int levelMinCurrent = levelCurrent;
494 int levelNext = levelCurrent;
495 char chNext = styler[startPos];
496 int styleNext = styler.StyleAt(startPos);
497 int style = initStyle;
498 bool foldAtElse = options.foldAtElseInt >= 0 ? options.foldAtElseInt != 0 : options.foldAtElse;
499 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
500 for (Sci_PositionU i = startPos; i < endPos; i++) {
501 char ch = chNext;
502 chNext = styler.SafeGetCharAt(i + 1);
503 int stylePrev = style;
504 style = styleNext;
505 styleNext = styler.StyleAt(i + 1);
506 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
507 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style)) {
508 if (!IsStreamCommentStyle(stylePrev)) {
509 levelNext++;
510 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
511 // Comments don't end at end of line and the next character may be unstyled.
512 levelNext--;
513 }
514 }
515 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_D_COMMENTLINE) || options.foldExplicitAnywhere)) {
516 if (userDefinedFoldMarkers) {
517 if (styler.Match(i, options.foldExplicitStart.c_str())) {
518 levelNext++;
519 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
520 levelNext--;
521 }
522 } else {
523 if ((ch == '/') && (chNext == '/')) {
524 char chNext2 = styler.SafeGetCharAt(i + 2);
525 if (chNext2 == '{') {
526 levelNext++;
527 } else if (chNext2 == '}') {
528 levelNext--;
529 }
530 }
531 }
532 }
533 if (options.foldSyntaxBased && (style == SCE_D_OPERATOR)) {
534 if (ch == '{') {
535 // Measure the minimum before a '{' to allow
536 // folding on "} else {"
537 if (levelMinCurrent > levelNext) {
538 levelMinCurrent = levelNext;
539 }
540 levelNext++;
541 } else if (ch == '}') {
542 levelNext--;
543 }
544 }
545 if (atEOL || (i == endPos-1)) {
546 if (options.foldComment && options.foldCommentMultiline) { // Handle nested comments
547 int nc;
548 nc = styler.GetLineState(lineCurrent);
549 nc -= lineCurrent>0? styler.GetLineState(lineCurrent-1): 0;
550 levelNext += nc;
551 }
552 int levelUse = levelCurrent;
553 if (options.foldSyntaxBased && foldAtElse) {
554 levelUse = levelMinCurrent;
555 }
556 int lev = levelUse | levelNext << 16;
557 if (visibleChars == 0 && options.foldCompact)
558 lev |= SC_FOLDLEVELWHITEFLAG;
559 if (levelUse < levelNext)
560 lev |= SC_FOLDLEVELHEADERFLAG;
561 if (lev != styler.LevelAt(lineCurrent)) {
562 styler.SetLevel(lineCurrent, lev);
563 }
564 lineCurrent++;
565 levelCurrent = levelNext;
566 levelMinCurrent = levelCurrent;
567 visibleChars = 0;
568 }
569 if (!IsASpace(ch))
570 visibleChars++;
571 }
572}
573
574LexerModule lmD(SCLEX_D, LexerD::LexerFactoryD, "d", dWordLists);
575