1// Scintilla source code edit control
2// Encoding: UTF-8
3/** @file LexMatlab.cxx
4 ** Lexer for Matlab.
5 ** Written by José Fonseca
6 **
7 ** Changes by Christoph Dalitz 2003/12/04:
8 ** - added support for Octave
9 ** - Strings can now be included both in single or double quotes
10 **
11 ** Changes by John Donoghue 2012/04/02
12 ** - added block comment (and nested block comments)
13 ** - added ... displayed as a comment
14 ** - removed unused IsAWord functions
15 ** - added some comments
16 **
17 ** Changes by John Donoghue 2014/08/01
18 ** - fix allowed transpose ' after {} operator
19 **
20 ** Changes by John Donoghue 2016/11/15
21 ** - update matlab code folding
22 **
23 ** Changes by John Donoghue 2017/01/18
24 ** - update matlab block comment detection
25 **/
26// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
27// The License.txt file describes the conditions under which this software may be distributed.
28
29#include <stdlib.h>
30#include <string.h>
31#include <stdio.h>
32#include <stdarg.h>
33#include <assert.h>
34#include <ctype.h>
35
36#include <string>
37#include <string_view>
38
39#include "ILexer.h"
40#include "Scintilla.h"
41#include "SciLexer.h"
42
43#include "WordList.h"
44#include "LexAccessor.h"
45#include "Accessor.h"
46#include "StyleContext.h"
47#include "CharacterSet.h"
48#include "LexerModule.h"
49
50using namespace Lexilla;
51
52static bool IsMatlabCommentChar(int c) {
53 return (c == '%') ;
54}
55
56static bool IsOctaveCommentChar(int c) {
57 return (c == '%' || c == '#') ;
58}
59
60static inline int LowerCase(int c) {
61 if (c >= 'A' && c <= 'Z')
62 return 'a' + c - 'A';
63 return c;
64}
65
66static int CheckKeywordFoldPoint(char *str) {
67 if (strcmp ("if", str) == 0 ||
68 strcmp ("for", str) == 0 ||
69 strcmp ("switch", str) == 0 ||
70 strcmp ("while", str) == 0 ||
71 strcmp ("try", str) == 0 ||
72 strcmp ("do", str) == 0 ||
73 strcmp ("parfor", str) == 0 ||
74 strcmp ("function", str) == 0)
75 return 1;
76 if (strncmp("end", str, 3) == 0 ||
77 strcmp("until", str) == 0)
78 return -1;
79 return 0;
80}
81
82static bool IsSpaceToEOL(Sci_Position startPos, Accessor &styler) {
83 Sci_Position line = styler.GetLine(startPos);
84 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
85 for (Sci_Position i = startPos; i < eol_pos; i++) {
86 char ch = styler[i];
87 if(!IsASpace(ch)) return false;
88 }
89 return true;
90}
91
92static void ColouriseMatlabOctaveDoc(
93 Sci_PositionU startPos, Sci_Position length, int initStyle,
94 WordList *keywordlists[], Accessor &styler,
95 bool (*IsCommentChar)(int),
96 bool ismatlab) {
97
98 WordList &keywords = *keywordlists[0];
99
100 styler.StartAt(startPos);
101
102 // boolean for when the ' is allowed to be transpose vs the start/end
103 // of a string
104 bool transpose = false;
105
106 // count of brackets as boolean for when end could be an operator not a keyword
107 int allow_end_op = 0;
108
109 // approximate position of first non space character in a line
110 int nonSpaceColumn = -1;
111 // approximate column position of the current character in a line
112 int column = 0;
113
114 // use the line state of each line to store the block comment depth
115 Sci_Position curLine = styler.GetLine(startPos);
116 int commentDepth = curLine > 0 ? styler.GetLineState(curLine-1) : 0;
117
118
119 StyleContext sc(startPos, length, initStyle, styler);
120
121 for (; sc.More(); sc.Forward(), column++) {
122
123 if(sc.atLineStart) {
124 // set the line state to the current commentDepth
125 curLine = styler.GetLine(sc.currentPos);
126 styler.SetLineState(curLine, commentDepth);
127
128 // reset the column to 0, nonSpace to -1 (not set)
129 column = 0;
130 nonSpaceColumn = -1;
131 }
132
133 // save the column position of first non space character in a line
134 if((nonSpaceColumn == -1) && (! IsASpace(sc.ch)))
135 {
136 nonSpaceColumn = column;
137 }
138
139 // check for end of states
140 if (sc.state == SCE_MATLAB_OPERATOR) {
141 if (sc.chPrev == '.') {
142 if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\' || sc.ch == '^') {
143 sc.ForwardSetState(SCE_MATLAB_DEFAULT);
144 transpose = false;
145 } else if (sc.ch == '\'') {
146 sc.ForwardSetState(SCE_MATLAB_DEFAULT);
147 transpose = true;
148 } else if(sc.ch == '.' && sc.chNext == '.') {
149 // we werent an operator, but a '...'
150 sc.ChangeState(SCE_MATLAB_COMMENT);
151 transpose = false;
152 } else {
153 sc.SetState(SCE_MATLAB_DEFAULT);
154 }
155 } else {
156 sc.SetState(SCE_MATLAB_DEFAULT);
157 }
158 } else if (sc.state == SCE_MATLAB_KEYWORD) {
159 if (!isalnum(sc.ch) && sc.ch != '_') {
160 char s[100];
161 sc.GetCurrent(s, sizeof(s));
162
163 if (keywords.InList(s)) {
164 if (strcmp ("end", s) == 0 && allow_end_op) {
165 sc.ChangeState(SCE_MATLAB_NUMBER);
166 }
167 sc.SetState(SCE_MATLAB_DEFAULT);
168 transpose = false;
169 } else {
170 sc.ChangeState(SCE_MATLAB_IDENTIFIER);
171 sc.SetState(SCE_MATLAB_DEFAULT);
172 transpose = true;
173 }
174 }
175 } else if (sc.state == SCE_MATLAB_NUMBER) {
176 if (!isdigit(sc.ch) && sc.ch != '.'
177 && !(sc.ch == 'e' || sc.ch == 'E')
178 && !((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E'))) {
179 sc.SetState(SCE_MATLAB_DEFAULT);
180 transpose = true;
181 }
182 } else if (sc.state == SCE_MATLAB_STRING) {
183 if (sc.ch == '\'') {
184 if (sc.chNext == '\'') {
185 sc.Forward();
186 } else {
187 sc.ForwardSetState(SCE_MATLAB_DEFAULT);
188 }
189 }
190 } else if (sc.state == SCE_MATLAB_DOUBLEQUOTESTRING) {
191 if (sc.ch == '\\' && !ismatlab) {
192 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
193 sc.Forward();
194 }
195 } else if (sc.ch == '\"') {
196 sc.ForwardSetState(SCE_MATLAB_DEFAULT);
197 }
198 } else if (sc.state == SCE_MATLAB_COMMAND) {
199 if (sc.atLineEnd) {
200 sc.SetState(SCE_MATLAB_DEFAULT);
201 transpose = false;
202 }
203 } else if (sc.state == SCE_MATLAB_COMMENT) {
204 // end or start of a nested a block comment?
205 if( IsCommentChar(sc.ch) && sc.chNext == '}' && nonSpaceColumn == column && IsSpaceToEOL(sc.currentPos+2, styler)) {
206 if(commentDepth > 0) commentDepth --;
207
208 curLine = styler.GetLine(sc.currentPos);
209 styler.SetLineState(curLine, commentDepth);
210 sc.Forward();
211
212 if (commentDepth == 0) {
213 sc.ForwardSetState(SCE_D_DEFAULT);
214 transpose = false;
215 }
216 }
217 else if( IsCommentChar(sc.ch) && sc.chNext == '{' && nonSpaceColumn == column && IsSpaceToEOL(sc.currentPos+2, styler))
218 {
219 commentDepth ++;
220
221 curLine = styler.GetLine(sc.currentPos);
222 styler.SetLineState(curLine, commentDepth);
223 sc.Forward();
224 transpose = false;
225
226 } else if(commentDepth == 0) {
227 // single line comment
228 if (sc.atLineEnd || sc.ch == '\r' || sc.ch == '\n') {
229 sc.SetState(SCE_MATLAB_DEFAULT);
230 transpose = false;
231 }
232 }
233 }
234
235 // check start of a new state
236 if (sc.state == SCE_MATLAB_DEFAULT) {
237 if (IsCommentChar(sc.ch)) {
238 // ncrement depth if we are a block comment
239 if(sc.chNext == '{' && nonSpaceColumn == column) {
240 if(IsSpaceToEOL(sc.currentPos+2, styler)) {
241 commentDepth ++;
242 }
243 }
244 curLine = styler.GetLine(sc.currentPos);
245 styler.SetLineState(curLine, commentDepth);
246 sc.SetState(SCE_MATLAB_COMMENT);
247 } else if (sc.ch == '!' && sc.chNext != '=' ) {
248 if(ismatlab) {
249 sc.SetState(SCE_MATLAB_COMMAND);
250 } else {
251 sc.SetState(SCE_MATLAB_OPERATOR);
252 }
253 } else if (sc.ch == '\'') {
254 if (transpose) {
255 sc.SetState(SCE_MATLAB_OPERATOR);
256 } else {
257 sc.SetState(SCE_MATLAB_STRING);
258 }
259 } else if (sc.ch == '"') {
260 sc.SetState(SCE_MATLAB_DOUBLEQUOTESTRING);
261 } else if (isdigit(sc.ch) || (sc.ch == '.' && isdigit(sc.chNext))) {
262 sc.SetState(SCE_MATLAB_NUMBER);
263 } else if (isalpha(sc.ch)) {
264 sc.SetState(SCE_MATLAB_KEYWORD);
265 } else if (isoperator(static_cast<char>(sc.ch)) || sc.ch == '@' || sc.ch == '\\') {
266 if (sc.ch == '(' || sc.ch == '[' || sc.ch == '{') {
267 allow_end_op ++;
268 } else if ((sc.ch == ')' || sc.ch == ']' || sc.ch == '}') && (allow_end_op > 0)) {
269 allow_end_op --;
270 }
271
272 if (sc.ch == ')' || sc.ch == ']' || sc.ch == '}') {
273 transpose = true;
274 } else {
275 transpose = false;
276 }
277 sc.SetState(SCE_MATLAB_OPERATOR);
278 } else {
279 transpose = false;
280 }
281 }
282 }
283 sc.Complete();
284}
285
286static void ColouriseMatlabDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
287 WordList *keywordlists[], Accessor &styler) {
288 ColouriseMatlabOctaveDoc(startPos, length, initStyle, keywordlists, styler, IsMatlabCommentChar, true);
289}
290
291static void ColouriseOctaveDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
292 WordList *keywordlists[], Accessor &styler) {
293 ColouriseMatlabOctaveDoc(startPos, length, initStyle, keywordlists, styler, IsOctaveCommentChar, false);
294}
295
296static void FoldMatlabOctaveDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
297 WordList *[], Accessor &styler,
298 bool (*IsComment)(int ch)) {
299
300 if (styler.GetPropertyInt("fold") == 0)
301 return;
302
303 const bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
304 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
305
306 Sci_PositionU endPos = startPos + length;
307 int visibleChars = 0;
308 Sci_Position lineCurrent = styler.GetLine(startPos);
309 int levelCurrent = SC_FOLDLEVELBASE;
310 if (lineCurrent > 0)
311 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
312 int levelNext = levelCurrent;
313 char chNext = styler[startPos];
314 int styleNext = styler.StyleAt(startPos);
315 int style = initStyle;
316 char word[100];
317 int wordlen = 0;
318 for (Sci_PositionU i = startPos; i < endPos; i++) {
319 char ch = chNext;
320 chNext = styler.SafeGetCharAt(i + 1);
321 style = styleNext;
322 styleNext = styler.StyleAt(i + 1);
323 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
324
325 // a line that starts with a comment
326 if (foldComment && style == SCE_MATLAB_COMMENT && IsComment(ch) && visibleChars == 0) {
327 // start/end of block comment
328 if (chNext == '{' && IsSpaceToEOL(i+2, styler))
329 levelNext ++;
330 if (chNext == '}' && IsSpaceToEOL(i+2, styler))
331 levelNext --;
332 }
333 // keyword
334 if(style == SCE_MATLAB_KEYWORD) {
335 word[wordlen++] = static_cast<char>(LowerCase(ch));
336 if (wordlen == 100) { // prevent overflow
337 word[0] = '\0';
338 wordlen = 1;
339 }
340 if (styleNext != SCE_MATLAB_KEYWORD) {
341 word[wordlen] = '\0';
342 wordlen = 0;
343
344 levelNext += CheckKeywordFoldPoint(word);
345 }
346 }
347 if (!IsASpace(ch))
348 visibleChars++;
349 if (atEOL || (i == endPos-1)) {
350 int levelUse = levelCurrent;
351 int lev = levelUse | levelNext << 16;
352 if (visibleChars == 0 && foldCompact)
353 lev |= SC_FOLDLEVELWHITEFLAG;
354 if (levelUse < levelNext)
355 lev |= SC_FOLDLEVELHEADERFLAG;
356 if (lev != styler.LevelAt(lineCurrent)) {
357 styler.SetLevel(lineCurrent, lev);
358 }
359 lineCurrent++;
360 levelCurrent = levelNext;
361 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
362 // There is an empty line at end of file so give it same level and empty
363 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
364 }
365 visibleChars = 0;
366 }
367 }
368}
369
370static void FoldMatlabDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
371 WordList *keywordlists[], Accessor &styler) {
372 FoldMatlabOctaveDoc(startPos, length, initStyle, keywordlists, styler, IsMatlabCommentChar);
373}
374
375static void FoldOctaveDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
376 WordList *keywordlists[], Accessor &styler) {
377 FoldMatlabOctaveDoc(startPos, length, initStyle, keywordlists, styler, IsOctaveCommentChar);
378}
379
380static const char * const matlabWordListDesc[] = {
381 "Keywords",
382 0
383};
384
385static const char * const octaveWordListDesc[] = {
386 "Keywords",
387 0
388};
389
390LexerModule lmMatlab(SCLEX_MATLAB, ColouriseMatlabDoc, "matlab", FoldMatlabDoc, matlabWordListDesc);
391
392LexerModule lmOctave(SCLEX_OCTAVE, ColouriseOctaveDoc, "octave", FoldOctaveDoc, octaveWordListDesc);
393