1// Scintilla source code edit control
2/** @file LexKVIrc.cxx
3 ** Lexer for KVIrc script.
4 **/
5// Copyright 2013 by OmegaPhil <OmegaPhil+scintilla@gmail.com>, based in
6// part from LexPython Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
7// and LexCmake Copyright 2007 by Cristian Adam <cristian [dot] adam [at] gmx [dot] net>
8
9// The License.txt file describes the conditions under which this software may be distributed.
10
11#include <stdlib.h>
12#include <string.h>
13#include <stdio.h>
14#include <stdarg.h>
15#include <assert.h>
16#include <ctype.h>
17
18#include <string>
19#include <string_view>
20
21#include "ILexer.h"
22#include "Scintilla.h"
23#include "SciLexer.h"
24
25#include "WordList.h"
26#include "LexAccessor.h"
27#include "Accessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31
32using namespace Lexilla;
33
34
35/* KVIrc Script syntactic rules: http://www.kvirc.net/doc/doc_syntactic_rules.html */
36
37/* Utility functions */
38static inline bool IsAWordChar(int ch) {
39
40 /* Keyword list includes modules, i.e. words including '.', and
41 * alias namespaces include ':' */
42 return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'
43 || ch == ':');
44}
45static inline bool IsAWordStart(int ch) {
46
47 /* Functions (start with '$') are treated separately to keywords */
48 return (ch < 0x80) && (isalnum(ch) || ch == '_' );
49}
50
51/* Interface function called by Scintilla to request some text to be
52 syntax highlighted */
53static void ColouriseKVIrcDoc(Sci_PositionU startPos, Sci_Position length,
54 int initStyle, WordList *keywordlists[],
55 Accessor &styler)
56{
57 /* Fetching style context */
58 StyleContext sc(startPos, length, initStyle, styler);
59
60 /* Accessing keywords and function-marking keywords */
61 WordList &keywords = *keywordlists[0];
62 WordList &functionKeywords = *keywordlists[1];
63
64 /* Looping for all characters - only automatically moving forward
65 * when asked for (transitions leaving strings and keywords do this
66 * already) */
67 bool next = true;
68 for( ; sc.More(); next ? sc.Forward() : (void)0 )
69 {
70 /* Resetting next */
71 next = true;
72
73 /* Dealing with different states */
74 switch (sc.state)
75 {
76 case SCE_KVIRC_DEFAULT:
77
78 /* Detecting single-line comments
79 * Unfortunately KVIrc script allows raw '#<channel
80 * name>' to be used, and appending # to an array returns
81 * its length...
82 * Going for a compromise where single line comments not
83 * starting on a newline are allowed in all cases except
84 * when they are preceeded with an opening bracket or comma
85 * (this will probably be the most common style a valid
86 * string-less channel name will be used with), with the
87 * array length case included
88 */
89 if (
90 (sc.ch == '#' && sc.atLineStart) ||
91 (sc.ch == '#' && (
92 sc.chPrev != '(' && sc.chPrev != ',' &&
93 sc.chPrev != ']')
94 )
95 )
96 {
97 sc.SetState(SCE_KVIRC_COMMENT);
98 break;
99 }
100
101 /* Detecting multi-line comments */
102 if (sc.Match('/', '*'))
103 {
104 sc.SetState(SCE_KVIRC_COMMENTBLOCK);
105 break;
106 }
107
108 /* Detecting strings */
109 if (sc.ch == '"')
110 {
111 sc.SetState(SCE_KVIRC_STRING);
112 break;
113 }
114
115 /* Detecting functions */
116 if (sc.ch == '$')
117 {
118 sc.SetState(SCE_KVIRC_FUNCTION);
119 break;
120 }
121
122 /* Detecting variables */
123 if (sc.ch == '%')
124 {
125 sc.SetState(SCE_KVIRC_VARIABLE);
126 break;
127 }
128
129 /* Detecting numbers - isdigit is unsafe as it does not
130 * validate, use CharacterSet.h functions */
131 if (IsADigit(sc.ch))
132 {
133 sc.SetState(SCE_KVIRC_NUMBER);
134 break;
135 }
136
137 /* Detecting words */
138 if (IsAWordStart(sc.ch) && IsAWordChar(sc.chNext))
139 {
140 sc.SetState(SCE_KVIRC_WORD);
141 sc.Forward();
142 break;
143 }
144
145 /* Detecting operators */
146 if (isoperator(sc.ch))
147 {
148 sc.SetState(SCE_KVIRC_OPERATOR);
149 break;
150 }
151
152 break;
153
154 case SCE_KVIRC_COMMENT:
155
156 /* Breaking out of single line comment when a newline
157 * is introduced */
158 if (sc.ch == '\r' || sc.ch == '\n')
159 {
160 sc.SetState(SCE_KVIRC_DEFAULT);
161 break;
162 }
163
164 break;
165
166 case SCE_KVIRC_COMMENTBLOCK:
167
168 /* Detecting end of multi-line comment */
169 if (sc.Match('*', '/'))
170 {
171 // Moving the current position forward two characters
172 // so that '*/' is included in the comment
173 sc.Forward(2);
174 sc.SetState(SCE_KVIRC_DEFAULT);
175
176 /* Comment has been exited and the current position
177 * moved forward, yet the new current character
178 * has yet to be defined - loop without moving
179 * forward again */
180 next = false;
181 break;
182 }
183
184 break;
185
186 case SCE_KVIRC_STRING:
187
188 /* Detecting end of string - closing speechmarks */
189 if (sc.ch == '"')
190 {
191 /* Allowing escaped speechmarks to pass */
192 if (sc.chPrev == '\\')
193 break;
194
195 /* Moving the current position forward to capture the
196 * terminating speechmarks, and ending string */
197 sc.ForwardSetState(SCE_KVIRC_DEFAULT);
198
199 /* String has been exited and the current position
200 * moved forward, yet the new current character
201 * has yet to be defined - loop without moving
202 * forward again */
203 next = false;
204 break;
205 }
206
207 /* Functions and variables are now highlighted in strings
208 * Detecting functions */
209 if (sc.ch == '$')
210 {
211 /* Allowing escaped functions to pass */
212 if (sc.chPrev == '\\')
213 break;
214
215 sc.SetState(SCE_KVIRC_STRING_FUNCTION);
216 break;
217 }
218
219 /* Detecting variables */
220 if (sc.ch == '%')
221 {
222 /* Allowing escaped variables to pass */
223 if (sc.chPrev == '\\')
224 break;
225
226 sc.SetState(SCE_KVIRC_STRING_VARIABLE);
227 break;
228 }
229
230 /* Breaking out of a string when a newline is introduced */
231 if (sc.ch == '\r' || sc.ch == '\n')
232 {
233 /* Allowing escaped newlines */
234 if (sc.chPrev == '\\')
235 break;
236
237 sc.SetState(SCE_KVIRC_DEFAULT);
238 break;
239 }
240
241 break;
242
243 case SCE_KVIRC_FUNCTION:
244 case SCE_KVIRC_VARIABLE:
245
246 /* Detecting the end of a function/variable (word) */
247 if (!IsAWordChar(sc.ch))
248 {
249 sc.SetState(SCE_KVIRC_DEFAULT);
250
251 /* Word has been exited yet the current character
252 * has yet to be defined - loop without moving
253 * forward again */
254 next = false;
255 break;
256 }
257
258 break;
259
260 case SCE_KVIRC_STRING_FUNCTION:
261 case SCE_KVIRC_STRING_VARIABLE:
262
263 /* A function or variable in a string
264 * Detecting the end of a function/variable (word) */
265 if (!IsAWordChar(sc.ch))
266 {
267 sc.SetState(SCE_KVIRC_STRING);
268
269 /* Word has been exited yet the current character
270 * has yet to be defined - loop without moving
271 * forward again */
272 next = false;
273 break;
274 }
275
276 break;
277
278 case SCE_KVIRC_NUMBER:
279
280 /* Detecting the end of a number */
281 if (!IsADigit(sc.ch))
282 {
283 sc.SetState(SCE_KVIRC_DEFAULT);
284
285 /* Number has been exited yet the current character
286 * has yet to be defined - loop without moving
287 * forward */
288 next = false;
289 break;
290 }
291
292 break;
293
294 case SCE_KVIRC_OPERATOR:
295
296 /* Because '%' is an operator but is also the marker for
297 * a variable, I need to always treat operators as single
298 * character strings and therefore redo their detection
299 * after every character */
300 sc.SetState(SCE_KVIRC_DEFAULT);
301
302 /* Operator has been exited yet the current character
303 * has yet to be defined - loop without moving
304 * forward */
305 next = false;
306 break;
307
308 case SCE_KVIRC_WORD:
309
310 /* Detecting the end of a word */
311 if (!IsAWordChar(sc.ch))
312 {
313 /* Checking if the word was actually a keyword -
314 * fetching the current word, NULL-terminated like
315 * the keyword list */
316 char s[100];
317 Sci_Position wordLen = sc.currentPos - styler.GetStartSegment();
318 if (wordLen > 99)
319 wordLen = 99; /* Include '\0' in buffer */
320 Sci_Position i;
321 for( i = 0; i < wordLen; ++i )
322 {
323 s[i] = styler.SafeGetCharAt( styler.GetStartSegment() + i );
324 }
325 s[wordLen] = '\0';
326
327 /* Actually detecting keywords and fixing the state */
328 if (keywords.InList(s))
329 {
330 /* The SetState call actually commits the
331 * previous keyword state */
332 sc.ChangeState(SCE_KVIRC_KEYWORD);
333 }
334 else if (functionKeywords.InList(s))
335 {
336 // Detecting function keywords and fixing the state
337 sc.ChangeState(SCE_KVIRC_FUNCTION_KEYWORD);
338 }
339
340 /* Transitioning to default and committing the previous
341 * word state */
342 sc.SetState(SCE_KVIRC_DEFAULT);
343
344 /* Word has been exited yet the current character
345 * has yet to be defined - loop without moving
346 * forward again */
347 next = false;
348 break;
349 }
350
351 break;
352 }
353 }
354
355 /* Indicating processing is complete */
356 sc.Complete();
357}
358
359static void FoldKVIrcDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/,
360 WordList *[], Accessor &styler)
361{
362 /* Based on CMake's folder */
363
364 /* Exiting if folding isnt enabled */
365 if ( styler.GetPropertyInt("fold") == 0 )
366 return;
367
368 /* Obtaining current line number*/
369 Sci_Position currentLine = styler.GetLine(startPos);
370
371 /* Obtaining starting character - indentation is done on a line basis,
372 * not character */
373 Sci_PositionU safeStartPos = styler.LineStart( currentLine );
374
375 /* Initialising current level - this is defined as indentation level
376 * in the low 12 bits, with flag bits in the upper four bits.
377 * It looks like two indentation states are maintained in the returned
378 * 32bit value - 'nextLevel' in the most-significant bits, 'currentLevel'
379 * in the least-significant bits. Since the next level is the most
380 * up to date, this must refer to the current state of indentation.
381 * So the code bitshifts the old current level out of existence to
382 * get at the actual current state of indentation
383 * Based on the LexerCPP.cxx line 958 comment */
384 int currentLevel = SC_FOLDLEVELBASE;
385 if (currentLine > 0)
386 currentLevel = styler.LevelAt(currentLine - 1) >> 16;
387 int nextLevel = currentLevel;
388
389 // Looping for characters in range
390 for (Sci_PositionU i = safeStartPos; i < startPos + length; ++i)
391 {
392 /* Folding occurs after syntax highlighting, meaning Scintilla
393 * already knows where the comments are
394 * Fetching the current state */
395 int state = styler.StyleAt(i) & 31;
396
397 switch( styler.SafeGetCharAt(i) )
398 {
399 case '{':
400
401 /* Indenting only when the braces are not contained in
402 * a comment */
403 if (state != SCE_KVIRC_COMMENT &&
404 state != SCE_KVIRC_COMMENTBLOCK)
405 ++nextLevel;
406 break;
407
408 case '}':
409
410 /* Outdenting only when the braces are not contained in
411 * a comment */
412 if (state != SCE_KVIRC_COMMENT &&
413 state != SCE_KVIRC_COMMENTBLOCK)
414 --nextLevel;
415 break;
416
417 case '\n':
418 case '\r':
419
420 /* Preparing indentation information to return - combining
421 * current and next level data */
422 int lev = currentLevel | nextLevel << 16;
423
424 /* If the next level increases the indent level, mark the
425 * current line as a fold point - current level data is
426 * in the least significant bits */
427 if (nextLevel > currentLevel )
428 lev |= SC_FOLDLEVELHEADERFLAG;
429
430 /* Updating indentation level if needed */
431 if (lev != styler.LevelAt(currentLine))
432 styler.SetLevel(currentLine, lev);
433
434 /* Updating variables */
435 ++currentLine;
436 currentLevel = nextLevel;
437
438 /* Dealing with problematic Windows newlines -
439 * incrementing to avoid the extra newline breaking the
440 * fold point */
441 if (styler.SafeGetCharAt(i) == '\r' &&
442 styler.SafeGetCharAt(i + 1) == '\n')
443 ++i;
444 break;
445 }
446 }
447
448 /* At this point the data has ended, so presumably the end of the line?
449 * Preparing indentation information to return - combining current
450 * and next level data */
451 int lev = currentLevel | nextLevel << 16;
452
453 /* If the next level increases the indent level, mark the current
454 * line as a fold point - current level data is in the least
455 * significant bits */
456 if (nextLevel > currentLevel )
457 lev |= SC_FOLDLEVELHEADERFLAG;
458
459 /* Updating indentation level if needed */
460 if (lev != styler.LevelAt(currentLine))
461 styler.SetLevel(currentLine, lev);
462}
463
464/* Registering wordlists */
465static const char *const kvircWordListDesc[] = {
466 "primary",
467 "function_keywords",
468 0
469};
470
471
472/* Registering functions and wordlists */
473LexerModule lmKVIrc(SCLEX_KVIRC, ColouriseKVIrcDoc, "kvirc", FoldKVIrcDoc,
474 kvircWordListDesc);
475