1// Scintilla source code edit control
2// Nimrod lexer
3// (c) 2009 Andreas Rumpf
4/** @file LexNimrod.cxx
5 ** Lexer for Nimrod.
6 **/
7// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13#include <stdarg.h>
14#include <assert.h>
15#include <ctype.h>
16
17#include <string>
18#include <string_view>
19
20#include "ILexer.h"
21#include "Scintilla.h"
22#include "SciLexer.h"
23
24#include "WordList.h"
25#include "LexAccessor.h"
26#include "Accessor.h"
27#include "StyleContext.h"
28#include "CharacterSet.h"
29#include "LexerModule.h"
30
31using namespace Lexilla;
32
33static inline bool IsAWordChar(int ch) {
34 return (ch >= 0x80) || isalnum(ch) || ch == '_';
35}
36
37static Sci_Position tillEndOfTripleQuote(Accessor &styler, Sci_Position pos, Sci_Position max) {
38 /* search for """ */
39 for (;;) {
40 if (styler.SafeGetCharAt(pos, '\0') == '\0') return pos;
41 if (pos >= max) return pos;
42 if (styler.Match(pos, "\"\"\"")) {
43 return pos + 2;
44 }
45 pos++;
46 }
47}
48
49#define CR 13 /* use both because Scite allows changing the line ending */
50#define LF 10
51
52static bool inline isNewLine(int ch) {
53 return ch == CR || ch == LF;
54}
55
56static Sci_Position scanString(Accessor &styler, Sci_Position pos, Sci_Position max, bool rawMode) {
57 for (;;) {
58 if (pos >= max) return pos;
59 char ch = styler.SafeGetCharAt(pos, '\0');
60 if (ch == CR || ch == LF || ch == '\0') return pos;
61 if (ch == '"') return pos;
62 if (ch == '\\' && !rawMode) {
63 pos += 2;
64 } else {
65 pos++;
66 }
67 }
68}
69
70static Sci_Position scanChar(Accessor &styler, Sci_Position pos, Sci_Position max) {
71 for (;;) {
72 if (pos >= max) return pos;
73 char ch = styler.SafeGetCharAt(pos, '\0');
74 if (ch == CR || ch == LF || ch == '\0') return pos;
75 if (ch == '\'' && !isalnum(styler.SafeGetCharAt(pos+1, '\0')) )
76 return pos;
77 if (ch == '\\') {
78 pos += 2;
79 } else {
80 pos++;
81 }
82 }
83}
84
85static Sci_Position scanIdent(Accessor &styler, Sci_Position pos, WordList &keywords) {
86 char buf[100]; /* copy to lowercase and ignore underscores */
87 Sci_Position i = 0;
88
89 for (;;) {
90 char ch = styler.SafeGetCharAt(pos, '\0');
91 if (!IsAWordChar(ch)) break;
92 if (ch != '_' && i < ((int)sizeof(buf))-1) {
93 buf[i] = static_cast<char>(tolower(ch));
94 i++;
95 }
96 pos++;
97 }
98 buf[i] = '\0';
99 /* look for keyword */
100 if (keywords.InList(buf)) {
101 styler.ColourTo(pos-1, SCE_P_WORD);
102 } else {
103 styler.ColourTo(pos-1, SCE_P_IDENTIFIER);
104 }
105 return pos;
106}
107
108static Sci_Position scanNumber(Accessor &styler, Sci_Position pos) {
109 char ch, ch2;
110 ch = styler.SafeGetCharAt(pos, '\0');
111 ch2 = styler.SafeGetCharAt(pos+1, '\0');
112 if (ch == '0' && (ch2 == 'b' || ch2 == 'B')) {
113 /* binary number: */
114 pos += 2;
115 for (;;) {
116 ch = styler.SafeGetCharAt(pos, '\0');
117 if (ch == '_' || (ch >= '0' && ch <= '1')) ++pos;
118 else break;
119 }
120 } else if (ch == '0' &&
121 (ch2 == 'o' || ch2 == 'O' || ch2 == 'c' || ch2 == 'C')) {
122 /* octal number: */
123 pos += 2;
124 for (;;) {
125 ch = styler.SafeGetCharAt(pos, '\0');
126 if (ch == '_' || (ch >= '0' && ch <= '7')) ++pos;
127 else break;
128 }
129 } else if (ch == '0' && (ch2 == 'x' || ch2 == 'X')) {
130 /* hexadecimal number: */
131 pos += 2;
132 for (;;) {
133 ch = styler.SafeGetCharAt(pos, '\0');
134 if (ch == '_' || (ch >= '0' && ch <= '9')
135 || (ch >= 'a' && ch <= 'f')
136 || (ch >= 'A' && ch <= 'F')) ++pos;
137 else break;
138 }
139 } else {
140 // skip decimal part:
141 for (;;) {
142 ch = styler.SafeGetCharAt(pos, '\0');
143 if (ch == '_' || (ch >= '0' && ch <= '9')) ++pos;
144 else break;
145 }
146 ch2 = styler.SafeGetCharAt(pos+1, '\0');
147 if (ch == '.' && ch2 >= '0' && ch2 <= '9') {
148 ++pos; // skip '.'
149 for (;;) {
150 ch = styler.SafeGetCharAt(pos, '\0');
151 if (ch == '_' || (ch >= '0' && ch <= '9')) ++pos;
152 else break;
153 }
154 }
155 if (ch == 'e' || ch == 'E') {
156 ++pos;
157 ch = styler.SafeGetCharAt(pos, '\0');
158 if (ch == '-' || ch == '+') ++pos;
159 for (;;) {
160 ch = styler.SafeGetCharAt(pos, '\0');
161 if (ch == '_' || (ch >= '0' && ch <= '9')) ++pos;
162 else break;
163 }
164 }
165 }
166 if (ch == '\'') {
167 /* a type suffix: */
168 pos++;
169 for (;;) {
170 ch = styler.SafeGetCharAt(pos);
171 if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
172 || (ch >= 'a' && ch <= 'z') || ch == '_') ++pos;
173 else break;
174 }
175 }
176 styler.ColourTo(pos-1, SCE_P_NUMBER);
177 return pos;
178}
179
180/* rewritten from scratch, because I couldn't get rid of the bugs...
181 (A character based approach sucks!)
182*/
183static void ColouriseNimrodDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
184 WordList *keywordlists[], Accessor &styler) {
185 Sci_Position pos = startPos;
186 Sci_Position max = startPos + length;
187 char ch;
188 WordList &keywords = *keywordlists[0];
189
190 styler.StartAt(startPos);
191 styler.StartSegment(startPos);
192
193 switch (initStyle) {
194 /* check where we are: */
195 case SCE_P_TRIPLEDOUBLE:
196 pos = tillEndOfTripleQuote(styler, pos, max);
197 styler.ColourTo(pos, SCE_P_TRIPLEDOUBLE);
198 pos++;
199 break;
200 default: /* nothing to do: */
201 break;
202 }
203 while (pos < max) {
204 ch = styler.SafeGetCharAt(pos, '\0');
205 switch (ch) {
206 case '\0': return;
207 case '#': {
208 bool doccomment = (styler.SafeGetCharAt(pos+1) == '#');
209 while (pos < max && !isNewLine(styler.SafeGetCharAt(pos, LF))) pos++;
210 if (doccomment)
211 styler.ColourTo(pos, SCE_C_COMMENTLINEDOC);
212 else
213 styler.ColourTo(pos, SCE_P_COMMENTLINE);
214 } break;
215 case 'r': case 'R': {
216 if (styler.SafeGetCharAt(pos+1) == '"') {
217 pos = scanString(styler, pos+2, max, true);
218 styler.ColourTo(pos, SCE_P_STRING);
219 pos++;
220 } else {
221 pos = scanIdent(styler, pos, keywords);
222 }
223 } break;
224 case '"':
225 if (styler.Match(pos+1, "\"\"")) {
226 pos = tillEndOfTripleQuote(styler, pos+3, max);
227 styler.ColourTo(pos, SCE_P_TRIPLEDOUBLE);
228 } else {
229 pos = scanString(styler, pos+1, max, false);
230 styler.ColourTo(pos, SCE_P_STRING);
231 }
232 pos++;
233 break;
234 case '\'':
235 pos = scanChar(styler, pos+1, max);
236 styler.ColourTo(pos, SCE_P_CHARACTER);
237 pos++;
238 break;
239 default: // identifers, numbers, operators, whitespace
240 if (ch >= '0' && ch <= '9') {
241 pos = scanNumber(styler, pos);
242 } else if (IsAWordChar(ch)) {
243 pos = scanIdent(styler, pos, keywords);
244 } else if (ch == '`') {
245 pos++;
246 while (pos < max) {
247 ch = styler.SafeGetCharAt(pos, LF);
248 if (ch == '`') {
249 ++pos;
250 break;
251 }
252 if (ch == CR || ch == LF) break;
253 ++pos;
254 }
255 styler.ColourTo(pos, SCE_P_IDENTIFIER);
256 } else if (strchr("()[]{}:=;-\\/&%$!+<>|^?,.*~@", ch)) {
257 styler.ColourTo(pos, SCE_P_OPERATOR);
258 pos++;
259 } else {
260 styler.ColourTo(pos, SCE_P_DEFAULT);
261 pos++;
262 }
263 break;
264 }
265 }
266}
267
268static bool IsCommentLine(Sci_Position line, Accessor &styler) {
269 Sci_Position pos = styler.LineStart(line);
270 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
271 for (Sci_Position i = pos; i < eol_pos; i++) {
272 char ch = styler[i];
273 if (ch == '#')
274 return true;
275 else if (ch != ' ' && ch != '\t')
276 return false;
277 }
278 return false;
279}
280
281static bool IsQuoteLine(Sci_Position line, Accessor &styler) {
282 int style = styler.StyleAt(styler.LineStart(line)) & 31;
283 return ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
284}
285
286
287static void FoldNimrodDoc(Sci_PositionU startPos, Sci_Position length,
288 int /*initStyle - unused*/,
289 WordList *[], Accessor &styler) {
290 const Sci_Position maxPos = startPos + length;
291 const Sci_Position maxLines = styler.GetLine(maxPos - 1); // Requested last line
292 const Sci_Position docLines = styler.GetLine(styler.Length() - 1); // Available last line
293 const bool foldComment = styler.GetPropertyInt("fold.comment.nimrod") != 0;
294 const bool foldQuotes = styler.GetPropertyInt("fold.quotes.nimrod") != 0;
295
296 // Backtrack to previous non-blank line so we can determine indent level
297 // for any white space lines (needed esp. within triple quoted strings)
298 // and so we can fix any preceding fold level (which is why we go back
299 // at least one line in all cases)
300 int spaceFlags = 0;
301 Sci_Position lineCurrent = styler.GetLine(startPos);
302 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
303 while (lineCurrent > 0) {
304 lineCurrent--;
305 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
306 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
307 (!IsCommentLine(lineCurrent, styler)) &&
308 (!IsQuoteLine(lineCurrent, styler)))
309 break;
310 }
311 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
312
313 // Set up initial loop state
314 startPos = styler.LineStart(lineCurrent);
315 int prev_state = SCE_P_DEFAULT & 31;
316 if (lineCurrent >= 1)
317 prev_state = styler.StyleAt(startPos - 1) & 31;
318 int prevQuote = foldQuotes && ((prev_state == SCE_P_TRIPLE) ||
319 (prev_state == SCE_P_TRIPLEDOUBLE));
320 int prevComment = 0;
321 if (lineCurrent >= 1)
322 prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
323
324 // Process all characters to end of requested range or end of any triple quote
325 // or comment that hangs over the end of the range. Cap processing in all cases
326 // to end of document (in case of unclosed quote or comment at end).
327 while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) ||
328 prevQuote || prevComment)) {
329
330 // Gather info
331 int lev = indentCurrent;
332 Sci_Position lineNext = lineCurrent + 1;
333 int indentNext = indentCurrent;
334 int quote = false;
335 if (lineNext <= docLines) {
336 // Information about next line is only available if not at end of document
337 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
338 int style = styler.StyleAt(styler.LineStart(lineNext)) & 31;
339 quote = foldQuotes && ((style == SCE_P_TRIPLE) || (style == SCE_P_TRIPLEDOUBLE));
340 }
341 const int quote_start = (quote && !prevQuote);
342 const int quote_continue = (quote && prevQuote);
343 const int comment = foldComment && IsCommentLine(lineCurrent, styler);
344 const int comment_start = (comment && !prevComment && (lineNext <= docLines) &&
345 IsCommentLine(lineNext, styler) &&
346 (lev > SC_FOLDLEVELBASE));
347 const int comment_continue = (comment && prevComment);
348 if ((!quote || !prevQuote) && !comment)
349 indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
350 if (quote)
351 indentNext = indentCurrentLevel;
352 if (indentNext & SC_FOLDLEVELWHITEFLAG)
353 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
354
355 if (quote_start) {
356 // Place fold point at start of triple quoted string
357 lev |= SC_FOLDLEVELHEADERFLAG;
358 } else if (quote_continue || prevQuote) {
359 // Add level to rest of lines in the string
360 lev = lev + 1;
361 } else if (comment_start) {
362 // Place fold point at start of a block of comments
363 lev |= SC_FOLDLEVELHEADERFLAG;
364 } else if (comment_continue) {
365 // Add level to rest of lines in the block
366 lev = lev + 1;
367 }
368
369 // Skip past any blank lines for next indent level info; we skip also
370 // comments (all comments, not just those starting in column 0)
371 // which effectively folds them into surrounding code rather
372 // than screwing up folding.
373
374 while (!quote &&
375 (lineNext < docLines) &&
376 ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
377 (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
378
379 lineNext++;
380 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
381 }
382
383 const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
384 const int levelBeforeComments =
385 Maximum(indentCurrentLevel,levelAfterComments);
386
387 // Now set all the indent levels on the lines we skipped
388 // Do this from end to start. Once we encounter one line
389 // which is indented more than the line after the end of
390 // the comment-block, use the level of the block before
391
392 Sci_Position skipLine = lineNext;
393 int skipLevel = levelAfterComments;
394
395 while (--skipLine > lineCurrent) {
396 int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
397
398 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
399 skipLevel = levelBeforeComments;
400
401 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
402
403 styler.SetLevel(skipLine, skipLevel | whiteFlag);
404 }
405
406 // Set fold header on non-quote/non-comment line
407 if (!quote && !comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG) ) {
408 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) <
409 (indentNext & SC_FOLDLEVELNUMBERMASK))
410 lev |= SC_FOLDLEVELHEADERFLAG;
411 }
412
413 // Keep track of triple quote and block comment state of previous line
414 prevQuote = quote;
415 prevComment = comment_start || comment_continue;
416
417 // Set fold level for this line and move to next line
418 styler.SetLevel(lineCurrent, lev);
419 indentCurrent = indentNext;
420 lineCurrent = lineNext;
421 }
422
423 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
424 // header flag set; the loop above is crafted to take care of this case!
425 //styler.SetLevel(lineCurrent, indentCurrent);
426}
427
428static const char * const nimrodWordListDesc[] = {
429 "Keywords",
430 0
431};
432
433LexerModule lmNimrod(SCLEX_NIMROD, ColouriseNimrodDoc, "nimrod", FoldNimrodDoc,
434 nimrodWordListDesc);
435