1 | // Scintilla source code edit control |
2 | |
3 | // @file LexTeX.cxx - general context conformant tex coloring scheme |
4 | // Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com |
5 | // Version: September 28, 2003 |
6 | |
7 | // Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org> |
8 | // The License.txt file describes the conditions under which this software may be distributed. |
9 | |
10 | // This lexer is derived from the one written for the texwork environment (1999++) which in |
11 | // turn is inspired on texedit (1991++) which finds its roots in wdt (1986). |
12 | |
13 | // If you run into strange boundary cases, just tell me and I'll look into it. |
14 | |
15 | |
16 | // TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko. |
17 | // Version: June 22, 2007 |
18 | |
19 | #include <stdlib.h> |
20 | #include <string.h> |
21 | #include <stdio.h> |
22 | #include <stdarg.h> |
23 | #include <assert.h> |
24 | #include <ctype.h> |
25 | |
26 | #include <string> |
27 | #include <string_view> |
28 | |
29 | #include "ILexer.h" |
30 | #include "Scintilla.h" |
31 | #include "SciLexer.h" |
32 | |
33 | #include "WordList.h" |
34 | #include "LexAccessor.h" |
35 | #include "Accessor.h" |
36 | #include "StyleContext.h" |
37 | #include "CharacterSet.h" |
38 | #include "LexerModule.h" |
39 | |
40 | using namespace Lexilla; |
41 | |
42 | // val SCE_TEX_DEFAULT = 0 |
43 | // val SCE_TEX_SPECIAL = 1 |
44 | // val SCE_TEX_GROUP = 2 |
45 | // val SCE_TEX_SYMBOL = 3 |
46 | // val SCE_TEX_COMMAND = 4 |
47 | // val SCE_TEX_TEXT = 5 |
48 | |
49 | // Definitions in SciTEGlobal.properties: |
50 | // |
51 | // TeX Highlighting |
52 | // |
53 | // # Default |
54 | // style.tex.0=fore:#7F7F00 |
55 | // # Special |
56 | // style.tex.1=fore:#007F7F |
57 | // # Group |
58 | // style.tex.2=fore:#880000 |
59 | // # Symbol |
60 | // style.tex.3=fore:#7F7F00 |
61 | // # Command |
62 | // style.tex.4=fore:#008800 |
63 | // # Text |
64 | // style.tex.5=fore:#000000 |
65 | |
66 | // lexer.tex.interface.default=0 |
67 | // lexer.tex.comment.process=0 |
68 | |
69 | // todo: lexer.tex.auto.if |
70 | |
71 | // Auxiliary functions: |
72 | |
73 | static inline bool endOfLine(Accessor &styler, Sci_PositionU i) { |
74 | return |
75 | (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ; |
76 | } |
77 | |
78 | static inline bool isTeXzero(int ch) { |
79 | return |
80 | (ch == '%') ; |
81 | } |
82 | |
83 | static inline bool isTeXone(int ch) { |
84 | return |
85 | (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') || |
86 | (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') || |
87 | (ch == '"') ; |
88 | } |
89 | |
90 | static inline bool isTeXtwo(int ch) { |
91 | return |
92 | (ch == '{') || (ch == '}') || (ch == '$') ; |
93 | } |
94 | |
95 | static inline bool isTeXthree(int ch) { |
96 | return |
97 | (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') || |
98 | (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') || |
99 | (ch == '/') || (ch == '|') || (ch == '%') ; |
100 | } |
101 | |
102 | static inline bool isTeXfour(int ch) { |
103 | return |
104 | (ch == '\\') ; |
105 | } |
106 | |
107 | static inline bool isTeXfive(int ch) { |
108 | return |
109 | ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || |
110 | (ch == '@') || (ch == '!') || (ch == '?') ; |
111 | } |
112 | |
113 | static inline bool isTeXsix(int ch) { |
114 | return |
115 | (ch == ' ') ; |
116 | } |
117 | |
118 | static inline bool isTeXseven(int ch) { |
119 | return |
120 | (ch == '^') ; |
121 | } |
122 | |
123 | // Interface determination |
124 | |
125 | static int CheckTeXInterface( |
126 | Sci_PositionU startPos, |
127 | Sci_Position length, |
128 | Accessor &styler, |
129 | int defaultInterface) { |
130 | |
131 | char lineBuffer[1024] ; |
132 | Sci_PositionU linePos = 0 ; |
133 | |
134 | // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)... |
135 | |
136 | if (styler.SafeGetCharAt(0) == '%') { |
137 | for (Sci_PositionU i = 0; i < startPos + length; i++) { |
138 | lineBuffer[linePos++] = styler.SafeGetCharAt(i) ; |
139 | if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) { |
140 | lineBuffer[linePos] = '\0'; |
141 | if (strstr(lineBuffer, "interface=all" )) { |
142 | return 0 ; |
143 | } else if (strstr(lineBuffer, "interface=tex" )) { |
144 | return 1 ; |
145 | } else if (strstr(lineBuffer, "interface=nl" )) { |
146 | return 2 ; |
147 | } else if (strstr(lineBuffer, "interface=en" )) { |
148 | return 3 ; |
149 | } else if (strstr(lineBuffer, "interface=de" )) { |
150 | return 4 ; |
151 | } else if (strstr(lineBuffer, "interface=cz" )) { |
152 | return 5 ; |
153 | } else if (strstr(lineBuffer, "interface=it" )) { |
154 | return 6 ; |
155 | } else if (strstr(lineBuffer, "interface=ro" )) { |
156 | return 7 ; |
157 | } else if (strstr(lineBuffer, "interface=latex" )) { |
158 | // we will move latex cum suis up to 91+ when more keyword lists are supported |
159 | return 8 ; |
160 | } else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module" )) { |
161 | // better would be to limit the search to just one line |
162 | return 3 ; |
163 | } else { |
164 | return defaultInterface ; |
165 | } |
166 | } |
167 | } |
168 | } |
169 | |
170 | return defaultInterface ; |
171 | } |
172 | |
173 | static void ColouriseTeXDoc( |
174 | Sci_PositionU startPos, |
175 | Sci_Position length, |
176 | int, |
177 | WordList *keywordlists[], |
178 | Accessor &styler) { |
179 | |
180 | styler.StartAt(startPos) ; |
181 | styler.StartSegment(startPos) ; |
182 | |
183 | bool = styler.GetPropertyInt("lexer.tex.comment.process" , 0) == 1 ; |
184 | bool useKeywords = styler.GetPropertyInt("lexer.tex.use.keywords" , 1) == 1 ; |
185 | bool autoIf = styler.GetPropertyInt("lexer.tex.auto.if" , 1) == 1 ; |
186 | int defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default" , 1) ; |
187 | |
188 | char key[100] ; |
189 | int k ; |
190 | bool newifDone = false ; |
191 | bool = false ; |
192 | |
193 | int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ; |
194 | |
195 | if (currentInterface == 0) { |
196 | useKeywords = false ; |
197 | currentInterface = 1 ; |
198 | } |
199 | |
200 | WordList &keywords = *keywordlists[currentInterface-1] ; |
201 | |
202 | StyleContext sc(startPos, length, SCE_TEX_TEXT, styler); |
203 | |
204 | bool going = sc.More() ; // needed because of a fuzzy end of file state |
205 | |
206 | for (; going; sc.Forward()) { |
207 | |
208 | if (! sc.More()) { going = false ; } // we need to go one behind the end of text |
209 | |
210 | if (inComment) { |
211 | if (sc.atLineEnd) { |
212 | sc.SetState(SCE_TEX_TEXT) ; |
213 | newifDone = false ; |
214 | inComment = false ; |
215 | } |
216 | } else { |
217 | if (! isTeXfive(sc.ch)) { |
218 | if (sc.state == SCE_TEX_COMMAND) { |
219 | if (sc.LengthCurrent() == 1) { // \<noncstoken> |
220 | if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) { |
221 | sc.Forward(2) ; // \^^ and \^^<token> |
222 | } |
223 | sc.ForwardSetState(SCE_TEX_TEXT) ; |
224 | } else { |
225 | sc.GetCurrent(key, sizeof(key)-1) ; |
226 | k = static_cast<int>(strlen(key)) ; |
227 | memmove(key,key+1,k) ; // shift left over escape token |
228 | key[k] = '\0' ; |
229 | k-- ; |
230 | if (! keywords || ! useKeywords) { |
231 | sc.SetState(SCE_TEX_COMMAND) ; |
232 | newifDone = false ; |
233 | } else if (k == 1) { //\<cstoken> |
234 | sc.SetState(SCE_TEX_COMMAND) ; |
235 | newifDone = false ; |
236 | } else if (keywords.InList(key)) { |
237 | sc.SetState(SCE_TEX_COMMAND) ; |
238 | newifDone = autoIf && (strcmp(key,"newif" ) == 0) ; |
239 | } else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if" )) { |
240 | sc.SetState(SCE_TEX_COMMAND) ; |
241 | } else { |
242 | sc.ChangeState(SCE_TEX_TEXT) ; |
243 | sc.SetState(SCE_TEX_TEXT) ; |
244 | newifDone = false ; |
245 | } |
246 | } |
247 | } |
248 | if (isTeXzero(sc.ch)) { |
249 | sc.SetState(SCE_TEX_SYMBOL); |
250 | |
251 | if (!endOfLine(styler,sc.currentPos + 1)) |
252 | sc.ForwardSetState(SCE_TEX_DEFAULT) ; |
253 | |
254 | inComment = ! processComment ; |
255 | newifDone = false ; |
256 | } else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) { |
257 | sc.SetState(SCE_TEX_TEXT) ; |
258 | sc.ForwardSetState(SCE_TEX_TEXT) ; |
259 | } else if (isTeXone(sc.ch)) { |
260 | sc.SetState(SCE_TEX_SPECIAL) ; |
261 | newifDone = false ; |
262 | } else if (isTeXtwo(sc.ch)) { |
263 | sc.SetState(SCE_TEX_GROUP) ; |
264 | newifDone = false ; |
265 | } else if (isTeXthree(sc.ch)) { |
266 | sc.SetState(SCE_TEX_SYMBOL) ; |
267 | newifDone = false ; |
268 | } else if (isTeXfour(sc.ch)) { |
269 | sc.SetState(SCE_TEX_COMMAND) ; |
270 | } else if (isTeXsix(sc.ch)) { |
271 | sc.SetState(SCE_TEX_TEXT) ; |
272 | } else if (sc.atLineEnd) { |
273 | sc.SetState(SCE_TEX_TEXT) ; |
274 | newifDone = false ; |
275 | inComment = false ; |
276 | } else { |
277 | sc.SetState(SCE_TEX_TEXT) ; |
278 | } |
279 | } else if (sc.state != SCE_TEX_COMMAND) { |
280 | sc.SetState(SCE_TEX_TEXT) ; |
281 | } |
282 | } |
283 | } |
284 | sc.ChangeState(SCE_TEX_TEXT) ; |
285 | sc.Complete(); |
286 | |
287 | } |
288 | |
289 | |
290 | static inline bool isNumber(int ch) { |
291 | return |
292 | (ch == '0') || (ch == '1') || (ch == '2') || |
293 | (ch == '3') || (ch == '4') || (ch == '5') || |
294 | (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9'); |
295 | } |
296 | |
297 | static inline bool isWordChar(int ch) { |
298 | return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')); |
299 | } |
300 | |
301 | static Sci_Position ParseTeXCommand(Sci_PositionU pos, Accessor &styler, char *command) |
302 | { |
303 | Sci_Position length=0; |
304 | char ch=styler.SafeGetCharAt(pos+1); |
305 | |
306 | if(ch==',' || ch==':' || ch==';' || ch=='%'){ |
307 | command[0]=ch; |
308 | command[1]=0; |
309 | return 1; |
310 | } |
311 | |
312 | // find end |
313 | while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){ |
314 | command[length]=ch; |
315 | length++; |
316 | ch=styler.SafeGetCharAt(pos+length+1); |
317 | } |
318 | |
319 | command[length]='\0'; |
320 | if(!length) return 0; |
321 | return length+1; |
322 | } |
323 | |
324 | static int classifyFoldPointTeXPaired(const char* s) { |
325 | int lev=0; |
326 | if (!(isdigit(s[0]) || (s[0] == '.'))){ |
327 | if (strcmp(s, "begin" )==0||strcmp(s,"FoldStart" )==0|| |
328 | strcmp(s,"abstract" )==0||strcmp(s,"unprotect" )==0|| |
329 | strcmp(s,"title" )==0||strncmp(s,"start" ,5)==0||strncmp(s,"Start" ,5)==0|| |
330 | strcmp(s,"documentclass" )==0||strncmp(s,"if" ,2)==0 |
331 | ) |
332 | lev=1; |
333 | if (strcmp(s, "end" )==0||strcmp(s,"FoldStop" )==0|| |
334 | strcmp(s,"maketitle" )==0||strcmp(s,"protect" )==0|| |
335 | strncmp(s,"stop" ,4)==0||strncmp(s,"Stop" ,4)==0|| |
336 | strcmp(s,"fi" )==0 |
337 | ) |
338 | lev=-1; |
339 | } |
340 | return lev; |
341 | } |
342 | |
343 | static int classifyFoldPointTeXUnpaired(const char* s) { |
344 | int lev=0; |
345 | if (!(isdigit(s[0]) || (s[0] == '.'))){ |
346 | if (strcmp(s,"part" )==0|| |
347 | strcmp(s,"chapter" )==0|| |
348 | strcmp(s,"section" )==0|| |
349 | strcmp(s,"subsection" )==0|| |
350 | strcmp(s,"subsubsection" )==0|| |
351 | strcmp(s,"CJKfamily" )==0|| |
352 | strcmp(s,"appendix" )==0|| |
353 | strcmp(s,"Topic" )==0||strcmp(s,"topic" )==0|| |
354 | strcmp(s,"subject" )==0||strcmp(s,"subsubject" )==0|| |
355 | strcmp(s,"def" )==0||strcmp(s,"gdef" )==0||strcmp(s,"edef" )==0|| |
356 | strcmp(s,"xdef" )==0||strcmp(s,"framed" )==0|| |
357 | strcmp(s,"frame" )==0|| |
358 | strcmp(s,"foilhead" )==0||strcmp(s,"overlays" )==0||strcmp(s,"slide" )==0 |
359 | ){ |
360 | lev=1; |
361 | } |
362 | } |
363 | return lev; |
364 | } |
365 | |
366 | static bool (Sci_Position line, Accessor &styler) { |
367 | Sci_Position pos = styler.LineStart(line); |
368 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
369 | |
370 | Sci_Position startpos = pos; |
371 | |
372 | while (startpos<eol_pos){ |
373 | char ch = styler[startpos]; |
374 | if (ch!='%' && ch!=' ') return false; |
375 | else if (ch=='%') return true; |
376 | startpos++; |
377 | } |
378 | |
379 | return false; |
380 | } |
381 | |
382 | // FoldTeXDoc: borrowed from VisualTeX with modifications |
383 | |
384 | static void FoldTexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) |
385 | { |
386 | bool foldCompact = styler.GetPropertyInt("fold.compact" , 1) != 0; |
387 | Sci_PositionU endPos = startPos+length; |
388 | int visibleChars=0; |
389 | Sci_Position lineCurrent=styler.GetLine(startPos); |
390 | int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; |
391 | int levelCurrent=levelPrev; |
392 | char chNext=styler[startPos]; |
393 | char buffer[100]="" ; |
394 | |
395 | for (Sci_PositionU i=startPos; i < endPos; i++) { |
396 | char ch=chNext; |
397 | chNext=styler.SafeGetCharAt(i+1); |
398 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
399 | |
400 | if(ch=='\\') { |
401 | ParseTeXCommand(i, styler, buffer); |
402 | levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer); |
403 | } |
404 | |
405 | if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) { |
406 | ParseTeXCommand(i+1, styler, buffer); |
407 | levelCurrent -= classifyFoldPointTeXUnpaired(buffer); |
408 | } |
409 | |
410 | char chNext2; |
411 | char chNext3; |
412 | char chNext4; |
413 | char chNext5; |
414 | chNext2=styler.SafeGetCharAt(i+2); |
415 | chNext3=styler.SafeGetCharAt(i+3); |
416 | chNext4=styler.SafeGetCharAt(i+4); |
417 | chNext5=styler.SafeGetCharAt(i+5); |
418 | |
419 | bool atEOfold = (ch == '%') && |
420 | (chNext == '%') && (chNext2=='}') && |
421 | (chNext3=='}')&& (chNext4=='-')&& (chNext5=='-'); |
422 | |
423 | bool atBOfold = (ch == '%') && |
424 | (chNext == '%') && (chNext2=='-') && |
425 | (chNext3=='-')&& (chNext4=='{')&& (chNext5=='{'); |
426 | |
427 | if(atBOfold){ |
428 | levelCurrent+=1; |
429 | } |
430 | |
431 | if(atEOfold){ |
432 | levelCurrent-=1; |
433 | } |
434 | |
435 | if(ch=='\\' && chNext=='['){ |
436 | levelCurrent+=1; |
437 | } |
438 | |
439 | if(ch=='\\' && chNext==']'){ |
440 | levelCurrent-=1; |
441 | } |
442 | |
443 | bool = styler.GetPropertyInt("fold.comment" ) != 0; |
444 | |
445 | if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler)) |
446 | { |
447 | if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler) |
448 | ) |
449 | levelCurrent++; |
450 | else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler) |
451 | && IsTeXCommentLine(lineCurrent + 1, styler) |
452 | ) |
453 | levelCurrent++; |
454 | else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) && |
455 | !IsTeXCommentLine(lineCurrent+1, styler)) |
456 | levelCurrent--; |
457 | } |
458 | |
459 | //--------------------------------------------------------------------------------------------- |
460 | |
461 | if (atEOL) { |
462 | int lev = levelPrev; |
463 | if (visibleChars == 0 && foldCompact) |
464 | lev |= SC_FOLDLEVELWHITEFLAG; |
465 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
466 | lev |= SC_FOLDLEVELHEADERFLAG; |
467 | if (lev != styler.LevelAt(lineCurrent)) { |
468 | styler.SetLevel(lineCurrent, lev); |
469 | } |
470 | lineCurrent++; |
471 | levelPrev = levelCurrent; |
472 | visibleChars = 0; |
473 | } |
474 | |
475 | if (!isspacechar(ch)) |
476 | visibleChars++; |
477 | } |
478 | |
479 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
480 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
481 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
482 | } |
483 | |
484 | |
485 | |
486 | |
487 | static const char * const texWordListDesc[] = { |
488 | "TeX, eTeX, pdfTeX, Omega" , |
489 | "ConTeXt Dutch" , |
490 | "ConTeXt English" , |
491 | "ConTeXt German" , |
492 | "ConTeXt Czech" , |
493 | "ConTeXt Italian" , |
494 | "ConTeXt Romanian" , |
495 | 0, |
496 | } ; |
497 | |
498 | LexerModule lmTeX(SCLEX_TEX, ColouriseTeXDoc, "tex" , FoldTexDoc, texWordListDesc); |
499 | |