1 | // Scintilla source code edit control |
2 | /** @file LexKVIrc.cxx |
3 | ** Lexer for KVIrc script. |
4 | **/ |
5 | // Copyright 2013 by OmegaPhil <OmegaPhil+scintilla@gmail.com>, based in |
6 | // part from LexPython Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org> |
7 | // and LexCmake Copyright 2007 by Cristian Adam <cristian [dot] adam [at] gmx [dot] net> |
8 | |
9 | // The License.txt file describes the conditions under which this software may be distributed. |
10 | |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | #include <stdio.h> |
14 | #include <stdarg.h> |
15 | #include <assert.h> |
16 | #include <ctype.h> |
17 | |
18 | #include <string> |
19 | #include <string_view> |
20 | |
21 | #include "ILexer.h" |
22 | #include "Scintilla.h" |
23 | #include "SciLexer.h" |
24 | |
25 | #include "WordList.h" |
26 | #include "LexAccessor.h" |
27 | #include "Accessor.h" |
28 | #include "StyleContext.h" |
29 | #include "CharacterSet.h" |
30 | #include "LexerModule.h" |
31 | |
32 | using namespace Lexilla; |
33 | |
34 | |
35 | /* KVIrc Script syntactic rules: http://www.kvirc.net/doc/doc_syntactic_rules.html */ |
36 | |
37 | /* Utility functions */ |
38 | static inline bool IsAWordChar(int ch) { |
39 | |
40 | /* Keyword list includes modules, i.e. words including '.', and |
41 | * alias namespaces include ':' */ |
42 | return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' |
43 | || ch == ':'); |
44 | } |
45 | static inline bool IsAWordStart(int ch) { |
46 | |
47 | /* Functions (start with '$') are treated separately to keywords */ |
48 | return (ch < 0x80) && (isalnum(ch) || ch == '_' ); |
49 | } |
50 | |
51 | /* Interface function called by Scintilla to request some text to be |
52 | syntax highlighted */ |
53 | static void ColouriseKVIrcDoc(Sci_PositionU startPos, Sci_Position length, |
54 | int initStyle, WordList *keywordlists[], |
55 | Accessor &styler) |
56 | { |
57 | /* Fetching style context */ |
58 | StyleContext sc(startPos, length, initStyle, styler); |
59 | |
60 | /* Accessing keywords and function-marking keywords */ |
61 | WordList &keywords = *keywordlists[0]; |
62 | WordList &functionKeywords = *keywordlists[1]; |
63 | |
64 | /* Looping for all characters - only automatically moving forward |
65 | * when asked for (transitions leaving strings and keywords do this |
66 | * already) */ |
67 | bool next = true; |
68 | for( ; sc.More(); next ? sc.Forward() : (void)0 ) |
69 | { |
70 | /* Resetting next */ |
71 | next = true; |
72 | |
73 | /* Dealing with different states */ |
74 | switch (sc.state) |
75 | { |
76 | case SCE_KVIRC_DEFAULT: |
77 | |
78 | /* Detecting single-line comments |
79 | * Unfortunately KVIrc script allows raw '#<channel |
80 | * name>' to be used, and appending # to an array returns |
81 | * its length... |
82 | * Going for a compromise where single line comments not |
83 | * starting on a newline are allowed in all cases except |
84 | * when they are preceeded with an opening bracket or comma |
85 | * (this will probably be the most common style a valid |
86 | * string-less channel name will be used with), with the |
87 | * array length case included |
88 | */ |
89 | if ( |
90 | (sc.ch == '#' && sc.atLineStart) || |
91 | (sc.ch == '#' && ( |
92 | sc.chPrev != '(' && sc.chPrev != ',' && |
93 | sc.chPrev != ']') |
94 | ) |
95 | ) |
96 | { |
97 | sc.SetState(SCE_KVIRC_COMMENT); |
98 | break; |
99 | } |
100 | |
101 | /* Detecting multi-line comments */ |
102 | if (sc.Match('/', '*')) |
103 | { |
104 | sc.SetState(SCE_KVIRC_COMMENTBLOCK); |
105 | break; |
106 | } |
107 | |
108 | /* Detecting strings */ |
109 | if (sc.ch == '"') |
110 | { |
111 | sc.SetState(SCE_KVIRC_STRING); |
112 | break; |
113 | } |
114 | |
115 | /* Detecting functions */ |
116 | if (sc.ch == '$') |
117 | { |
118 | sc.SetState(SCE_KVIRC_FUNCTION); |
119 | break; |
120 | } |
121 | |
122 | /* Detecting variables */ |
123 | if (sc.ch == '%') |
124 | { |
125 | sc.SetState(SCE_KVIRC_VARIABLE); |
126 | break; |
127 | } |
128 | |
129 | /* Detecting numbers - isdigit is unsafe as it does not |
130 | * validate, use CharacterSet.h functions */ |
131 | if (IsADigit(sc.ch)) |
132 | { |
133 | sc.SetState(SCE_KVIRC_NUMBER); |
134 | break; |
135 | } |
136 | |
137 | /* Detecting words */ |
138 | if (IsAWordStart(sc.ch) && IsAWordChar(sc.chNext)) |
139 | { |
140 | sc.SetState(SCE_KVIRC_WORD); |
141 | sc.Forward(); |
142 | break; |
143 | } |
144 | |
145 | /* Detecting operators */ |
146 | if (isoperator(sc.ch)) |
147 | { |
148 | sc.SetState(SCE_KVIRC_OPERATOR); |
149 | break; |
150 | } |
151 | |
152 | break; |
153 | |
154 | case SCE_KVIRC_COMMENT: |
155 | |
156 | /* Breaking out of single line comment when a newline |
157 | * is introduced */ |
158 | if (sc.ch == '\r' || sc.ch == '\n') |
159 | { |
160 | sc.SetState(SCE_KVIRC_DEFAULT); |
161 | break; |
162 | } |
163 | |
164 | break; |
165 | |
166 | case SCE_KVIRC_COMMENTBLOCK: |
167 | |
168 | /* Detecting end of multi-line comment */ |
169 | if (sc.Match('*', '/')) |
170 | { |
171 | // Moving the current position forward two characters |
172 | // so that '*/' is included in the comment |
173 | sc.Forward(2); |
174 | sc.SetState(SCE_KVIRC_DEFAULT); |
175 | |
176 | /* Comment has been exited and the current position |
177 | * moved forward, yet the new current character |
178 | * has yet to be defined - loop without moving |
179 | * forward again */ |
180 | next = false; |
181 | break; |
182 | } |
183 | |
184 | break; |
185 | |
186 | case SCE_KVIRC_STRING: |
187 | |
188 | /* Detecting end of string - closing speechmarks */ |
189 | if (sc.ch == '"') |
190 | { |
191 | /* Allowing escaped speechmarks to pass */ |
192 | if (sc.chPrev == '\\') |
193 | break; |
194 | |
195 | /* Moving the current position forward to capture the |
196 | * terminating speechmarks, and ending string */ |
197 | sc.ForwardSetState(SCE_KVIRC_DEFAULT); |
198 | |
199 | /* String has been exited and the current position |
200 | * moved forward, yet the new current character |
201 | * has yet to be defined - loop without moving |
202 | * forward again */ |
203 | next = false; |
204 | break; |
205 | } |
206 | |
207 | /* Functions and variables are now highlighted in strings |
208 | * Detecting functions */ |
209 | if (sc.ch == '$') |
210 | { |
211 | /* Allowing escaped functions to pass */ |
212 | if (sc.chPrev == '\\') |
213 | break; |
214 | |
215 | sc.SetState(SCE_KVIRC_STRING_FUNCTION); |
216 | break; |
217 | } |
218 | |
219 | /* Detecting variables */ |
220 | if (sc.ch == '%') |
221 | { |
222 | /* Allowing escaped variables to pass */ |
223 | if (sc.chPrev == '\\') |
224 | break; |
225 | |
226 | sc.SetState(SCE_KVIRC_STRING_VARIABLE); |
227 | break; |
228 | } |
229 | |
230 | /* Breaking out of a string when a newline is introduced */ |
231 | if (sc.ch == '\r' || sc.ch == '\n') |
232 | { |
233 | /* Allowing escaped newlines */ |
234 | if (sc.chPrev == '\\') |
235 | break; |
236 | |
237 | sc.SetState(SCE_KVIRC_DEFAULT); |
238 | break; |
239 | } |
240 | |
241 | break; |
242 | |
243 | case SCE_KVIRC_FUNCTION: |
244 | case SCE_KVIRC_VARIABLE: |
245 | |
246 | /* Detecting the end of a function/variable (word) */ |
247 | if (!IsAWordChar(sc.ch)) |
248 | { |
249 | sc.SetState(SCE_KVIRC_DEFAULT); |
250 | |
251 | /* Word has been exited yet the current character |
252 | * has yet to be defined - loop without moving |
253 | * forward again */ |
254 | next = false; |
255 | break; |
256 | } |
257 | |
258 | break; |
259 | |
260 | case SCE_KVIRC_STRING_FUNCTION: |
261 | case SCE_KVIRC_STRING_VARIABLE: |
262 | |
263 | /* A function or variable in a string |
264 | * Detecting the end of a function/variable (word) */ |
265 | if (!IsAWordChar(sc.ch)) |
266 | { |
267 | sc.SetState(SCE_KVIRC_STRING); |
268 | |
269 | /* Word has been exited yet the current character |
270 | * has yet to be defined - loop without moving |
271 | * forward again */ |
272 | next = false; |
273 | break; |
274 | } |
275 | |
276 | break; |
277 | |
278 | case SCE_KVIRC_NUMBER: |
279 | |
280 | /* Detecting the end of a number */ |
281 | if (!IsADigit(sc.ch)) |
282 | { |
283 | sc.SetState(SCE_KVIRC_DEFAULT); |
284 | |
285 | /* Number has been exited yet the current character |
286 | * has yet to be defined - loop without moving |
287 | * forward */ |
288 | next = false; |
289 | break; |
290 | } |
291 | |
292 | break; |
293 | |
294 | case SCE_KVIRC_OPERATOR: |
295 | |
296 | /* Because '%' is an operator but is also the marker for |
297 | * a variable, I need to always treat operators as single |
298 | * character strings and therefore redo their detection |
299 | * after every character */ |
300 | sc.SetState(SCE_KVIRC_DEFAULT); |
301 | |
302 | /* Operator has been exited yet the current character |
303 | * has yet to be defined - loop without moving |
304 | * forward */ |
305 | next = false; |
306 | break; |
307 | |
308 | case SCE_KVIRC_WORD: |
309 | |
310 | /* Detecting the end of a word */ |
311 | if (!IsAWordChar(sc.ch)) |
312 | { |
313 | /* Checking if the word was actually a keyword - |
314 | * fetching the current word, NULL-terminated like |
315 | * the keyword list */ |
316 | char s[100]; |
317 | Sci_Position wordLen = sc.currentPos - styler.GetStartSegment(); |
318 | if (wordLen > 99) |
319 | wordLen = 99; /* Include '\0' in buffer */ |
320 | Sci_Position i; |
321 | for( i = 0; i < wordLen; ++i ) |
322 | { |
323 | s[i] = styler.SafeGetCharAt( styler.GetStartSegment() + i ); |
324 | } |
325 | s[wordLen] = '\0'; |
326 | |
327 | /* Actually detecting keywords and fixing the state */ |
328 | if (keywords.InList(s)) |
329 | { |
330 | /* The SetState call actually commits the |
331 | * previous keyword state */ |
332 | sc.ChangeState(SCE_KVIRC_KEYWORD); |
333 | } |
334 | else if (functionKeywords.InList(s)) |
335 | { |
336 | // Detecting function keywords and fixing the state |
337 | sc.ChangeState(SCE_KVIRC_FUNCTION_KEYWORD); |
338 | } |
339 | |
340 | /* Transitioning to default and committing the previous |
341 | * word state */ |
342 | sc.SetState(SCE_KVIRC_DEFAULT); |
343 | |
344 | /* Word has been exited yet the current character |
345 | * has yet to be defined - loop without moving |
346 | * forward again */ |
347 | next = false; |
348 | break; |
349 | } |
350 | |
351 | break; |
352 | } |
353 | } |
354 | |
355 | /* Indicating processing is complete */ |
356 | sc.Complete(); |
357 | } |
358 | |
359 | static void FoldKVIrcDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, |
360 | WordList *[], Accessor &styler) |
361 | { |
362 | /* Based on CMake's folder */ |
363 | |
364 | /* Exiting if folding isnt enabled */ |
365 | if ( styler.GetPropertyInt("fold" ) == 0 ) |
366 | return; |
367 | |
368 | /* Obtaining current line number*/ |
369 | Sci_Position currentLine = styler.GetLine(startPos); |
370 | |
371 | /* Obtaining starting character - indentation is done on a line basis, |
372 | * not character */ |
373 | Sci_PositionU safeStartPos = styler.LineStart( currentLine ); |
374 | |
375 | /* Initialising current level - this is defined as indentation level |
376 | * in the low 12 bits, with flag bits in the upper four bits. |
377 | * It looks like two indentation states are maintained in the returned |
378 | * 32bit value - 'nextLevel' in the most-significant bits, 'currentLevel' |
379 | * in the least-significant bits. Since the next level is the most |
380 | * up to date, this must refer to the current state of indentation. |
381 | * So the code bitshifts the old current level out of existence to |
382 | * get at the actual current state of indentation |
383 | * Based on the LexerCPP.cxx line 958 comment */ |
384 | int currentLevel = SC_FOLDLEVELBASE; |
385 | if (currentLine > 0) |
386 | currentLevel = styler.LevelAt(currentLine - 1) >> 16; |
387 | int nextLevel = currentLevel; |
388 | |
389 | // Looping for characters in range |
390 | for (Sci_PositionU i = safeStartPos; i < startPos + length; ++i) |
391 | { |
392 | /* Folding occurs after syntax highlighting, meaning Scintilla |
393 | * already knows where the comments are |
394 | * Fetching the current state */ |
395 | int state = styler.StyleAt(i) & 31; |
396 | |
397 | switch( styler.SafeGetCharAt(i) ) |
398 | { |
399 | case '{': |
400 | |
401 | /* Indenting only when the braces are not contained in |
402 | * a comment */ |
403 | if (state != SCE_KVIRC_COMMENT && |
404 | state != SCE_KVIRC_COMMENTBLOCK) |
405 | ++nextLevel; |
406 | break; |
407 | |
408 | case '}': |
409 | |
410 | /* Outdenting only when the braces are not contained in |
411 | * a comment */ |
412 | if (state != SCE_KVIRC_COMMENT && |
413 | state != SCE_KVIRC_COMMENTBLOCK) |
414 | --nextLevel; |
415 | break; |
416 | |
417 | case '\n': |
418 | case '\r': |
419 | |
420 | /* Preparing indentation information to return - combining |
421 | * current and next level data */ |
422 | int lev = currentLevel | nextLevel << 16; |
423 | |
424 | /* If the next level increases the indent level, mark the |
425 | * current line as a fold point - current level data is |
426 | * in the least significant bits */ |
427 | if (nextLevel > currentLevel ) |
428 | lev |= SC_FOLDLEVELHEADERFLAG; |
429 | |
430 | /* Updating indentation level if needed */ |
431 | if (lev != styler.LevelAt(currentLine)) |
432 | styler.SetLevel(currentLine, lev); |
433 | |
434 | /* Updating variables */ |
435 | ++currentLine; |
436 | currentLevel = nextLevel; |
437 | |
438 | /* Dealing with problematic Windows newlines - |
439 | * incrementing to avoid the extra newline breaking the |
440 | * fold point */ |
441 | if (styler.SafeGetCharAt(i) == '\r' && |
442 | styler.SafeGetCharAt(i + 1) == '\n') |
443 | ++i; |
444 | break; |
445 | } |
446 | } |
447 | |
448 | /* At this point the data has ended, so presumably the end of the line? |
449 | * Preparing indentation information to return - combining current |
450 | * and next level data */ |
451 | int lev = currentLevel | nextLevel << 16; |
452 | |
453 | /* If the next level increases the indent level, mark the current |
454 | * line as a fold point - current level data is in the least |
455 | * significant bits */ |
456 | if (nextLevel > currentLevel ) |
457 | lev |= SC_FOLDLEVELHEADERFLAG; |
458 | |
459 | /* Updating indentation level if needed */ |
460 | if (lev != styler.LevelAt(currentLine)) |
461 | styler.SetLevel(currentLine, lev); |
462 | } |
463 | |
464 | /* Registering wordlists */ |
465 | static const char *const kvircWordListDesc[] = { |
466 | "primary" , |
467 | "function_keywords" , |
468 | 0 |
469 | }; |
470 | |
471 | |
472 | /* Registering functions and wordlists */ |
473 | LexerModule lmKVIrc(SCLEX_KVIRC, ColouriseKVIrcDoc, "kvirc" , FoldKVIrcDoc, |
474 | kvircWordListDesc); |
475 | |