1// Scintilla source code edit control
2
3// @file LexTeX.cxx - general context conformant tex coloring scheme
4// Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com
5// Version: September 28, 2003
6
7// Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10// This lexer is derived from the one written for the texwork environment (1999++) which in
11// turn is inspired on texedit (1991++) which finds its roots in wdt (1986).
12
13// If you run into strange boundary cases, just tell me and I'll look into it.
14
15
16// TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko.
17// Version: June 22, 2007
18
19#include <stdlib.h>
20#include <string.h>
21#include <stdio.h>
22#include <stdarg.h>
23#include <assert.h>
24#include <ctype.h>
25
26#include <string>
27#include <string_view>
28
29#include "ILexer.h"
30#include "Scintilla.h"
31#include "SciLexer.h"
32
33#include "WordList.h"
34#include "LexAccessor.h"
35#include "Accessor.h"
36#include "StyleContext.h"
37#include "CharacterSet.h"
38#include "LexerModule.h"
39
40using namespace Lexilla;
41
42// val SCE_TEX_DEFAULT = 0
43// val SCE_TEX_SPECIAL = 1
44// val SCE_TEX_GROUP = 2
45// val SCE_TEX_SYMBOL = 3
46// val SCE_TEX_COMMAND = 4
47// val SCE_TEX_TEXT = 5
48
49// Definitions in SciTEGlobal.properties:
50//
51// TeX Highlighting
52//
53// # Default
54// style.tex.0=fore:#7F7F00
55// # Special
56// style.tex.1=fore:#007F7F
57// # Group
58// style.tex.2=fore:#880000
59// # Symbol
60// style.tex.3=fore:#7F7F00
61// # Command
62// style.tex.4=fore:#008800
63// # Text
64// style.tex.5=fore:#000000
65
66// lexer.tex.interface.default=0
67// lexer.tex.comment.process=0
68
69// todo: lexer.tex.auto.if
70
71// Auxiliary functions:
72
73static inline bool endOfLine(Accessor &styler, Sci_PositionU i) {
74 return
75 (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ;
76}
77
78static inline bool isTeXzero(int ch) {
79 return
80 (ch == '%') ;
81}
82
83static inline bool isTeXone(int ch) {
84 return
85 (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') ||
86 (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') ||
87 (ch == '"') ;
88}
89
90static inline bool isTeXtwo(int ch) {
91 return
92 (ch == '{') || (ch == '}') || (ch == '$') ;
93}
94
95static inline bool isTeXthree(int ch) {
96 return
97 (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') ||
98 (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') ||
99 (ch == '/') || (ch == '|') || (ch == '%') ;
100}
101
102static inline bool isTeXfour(int ch) {
103 return
104 (ch == '\\') ;
105}
106
107static inline bool isTeXfive(int ch) {
108 return
109 ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
110 (ch == '@') || (ch == '!') || (ch == '?') ;
111}
112
113static inline bool isTeXsix(int ch) {
114 return
115 (ch == ' ') ;
116}
117
118static inline bool isTeXseven(int ch) {
119 return
120 (ch == '^') ;
121}
122
123// Interface determination
124
125static int CheckTeXInterface(
126 Sci_PositionU startPos,
127 Sci_Position length,
128 Accessor &styler,
129 int defaultInterface) {
130
131 char lineBuffer[1024] ;
132 Sci_PositionU linePos = 0 ;
133
134 // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)...
135
136 if (styler.SafeGetCharAt(0) == '%') {
137 for (Sci_PositionU i = 0; i < startPos + length; i++) {
138 lineBuffer[linePos++] = styler.SafeGetCharAt(i) ;
139 if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
140 lineBuffer[linePos] = '\0';
141 if (strstr(lineBuffer, "interface=all")) {
142 return 0 ;
143 } else if (strstr(lineBuffer, "interface=tex")) {
144 return 1 ;
145 } else if (strstr(lineBuffer, "interface=nl")) {
146 return 2 ;
147 } else if (strstr(lineBuffer, "interface=en")) {
148 return 3 ;
149 } else if (strstr(lineBuffer, "interface=de")) {
150 return 4 ;
151 } else if (strstr(lineBuffer, "interface=cz")) {
152 return 5 ;
153 } else if (strstr(lineBuffer, "interface=it")) {
154 return 6 ;
155 } else if (strstr(lineBuffer, "interface=ro")) {
156 return 7 ;
157 } else if (strstr(lineBuffer, "interface=latex")) {
158 // we will move latex cum suis up to 91+ when more keyword lists are supported
159 return 8 ;
160 } else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module")) {
161 // better would be to limit the search to just one line
162 return 3 ;
163 } else {
164 return defaultInterface ;
165 }
166 }
167 }
168 }
169
170 return defaultInterface ;
171}
172
173static void ColouriseTeXDoc(
174 Sci_PositionU startPos,
175 Sci_Position length,
176 int,
177 WordList *keywordlists[],
178 Accessor &styler) {
179
180 styler.StartAt(startPos) ;
181 styler.StartSegment(startPos) ;
182
183 bool processComment = styler.GetPropertyInt("lexer.tex.comment.process", 0) == 1 ;
184 bool useKeywords = styler.GetPropertyInt("lexer.tex.use.keywords", 1) == 1 ;
185 bool autoIf = styler.GetPropertyInt("lexer.tex.auto.if", 1) == 1 ;
186 int defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default", 1) ;
187
188 char key[100] ;
189 int k ;
190 bool newifDone = false ;
191 bool inComment = false ;
192
193 int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ;
194
195 if (currentInterface == 0) {
196 useKeywords = false ;
197 currentInterface = 1 ;
198 }
199
200 WordList &keywords = *keywordlists[currentInterface-1] ;
201
202 StyleContext sc(startPos, length, SCE_TEX_TEXT, styler);
203
204 bool going = sc.More() ; // needed because of a fuzzy end of file state
205
206 for (; going; sc.Forward()) {
207
208 if (! sc.More()) { going = false ; } // we need to go one behind the end of text
209
210 if (inComment) {
211 if (sc.atLineEnd) {
212 sc.SetState(SCE_TEX_TEXT) ;
213 newifDone = false ;
214 inComment = false ;
215 }
216 } else {
217 if (! isTeXfive(sc.ch)) {
218 if (sc.state == SCE_TEX_COMMAND) {
219 if (sc.LengthCurrent() == 1) { // \<noncstoken>
220 if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
221 sc.Forward(2) ; // \^^ and \^^<token>
222 }
223 sc.ForwardSetState(SCE_TEX_TEXT) ;
224 } else {
225 sc.GetCurrent(key, sizeof(key)-1) ;
226 k = static_cast<int>(strlen(key)) ;
227 memmove(key,key+1,k) ; // shift left over escape token
228 key[k] = '\0' ;
229 k-- ;
230 if (! keywords || ! useKeywords) {
231 sc.SetState(SCE_TEX_COMMAND) ;
232 newifDone = false ;
233 } else if (k == 1) { //\<cstoken>
234 sc.SetState(SCE_TEX_COMMAND) ;
235 newifDone = false ;
236 } else if (keywords.InList(key)) {
237 sc.SetState(SCE_TEX_COMMAND) ;
238 newifDone = autoIf && (strcmp(key,"newif") == 0) ;
239 } else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if")) {
240 sc.SetState(SCE_TEX_COMMAND) ;
241 } else {
242 sc.ChangeState(SCE_TEX_TEXT) ;
243 sc.SetState(SCE_TEX_TEXT) ;
244 newifDone = false ;
245 }
246 }
247 }
248 if (isTeXzero(sc.ch)) {
249 sc.SetState(SCE_TEX_SYMBOL);
250
251 if (!endOfLine(styler,sc.currentPos + 1))
252 sc.ForwardSetState(SCE_TEX_DEFAULT) ;
253
254 inComment = ! processComment ;
255 newifDone = false ;
256 } else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
257 sc.SetState(SCE_TEX_TEXT) ;
258 sc.ForwardSetState(SCE_TEX_TEXT) ;
259 } else if (isTeXone(sc.ch)) {
260 sc.SetState(SCE_TEX_SPECIAL) ;
261 newifDone = false ;
262 } else if (isTeXtwo(sc.ch)) {
263 sc.SetState(SCE_TEX_GROUP) ;
264 newifDone = false ;
265 } else if (isTeXthree(sc.ch)) {
266 sc.SetState(SCE_TEX_SYMBOL) ;
267 newifDone = false ;
268 } else if (isTeXfour(sc.ch)) {
269 sc.SetState(SCE_TEX_COMMAND) ;
270 } else if (isTeXsix(sc.ch)) {
271 sc.SetState(SCE_TEX_TEXT) ;
272 } else if (sc.atLineEnd) {
273 sc.SetState(SCE_TEX_TEXT) ;
274 newifDone = false ;
275 inComment = false ;
276 } else {
277 sc.SetState(SCE_TEX_TEXT) ;
278 }
279 } else if (sc.state != SCE_TEX_COMMAND) {
280 sc.SetState(SCE_TEX_TEXT) ;
281 }
282 }
283 }
284 sc.ChangeState(SCE_TEX_TEXT) ;
285 sc.Complete();
286
287}
288
289
290static inline bool isNumber(int ch) {
291 return
292 (ch == '0') || (ch == '1') || (ch == '2') ||
293 (ch == '3') || (ch == '4') || (ch == '5') ||
294 (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9');
295}
296
297static inline bool isWordChar(int ch) {
298 return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'));
299}
300
301static Sci_Position ParseTeXCommand(Sci_PositionU pos, Accessor &styler, char *command)
302{
303 Sci_Position length=0;
304 char ch=styler.SafeGetCharAt(pos+1);
305
306 if(ch==',' || ch==':' || ch==';' || ch=='%'){
307 command[0]=ch;
308 command[1]=0;
309 return 1;
310 }
311
312 // find end
313 while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){
314 command[length]=ch;
315 length++;
316 ch=styler.SafeGetCharAt(pos+length+1);
317 }
318
319 command[length]='\0';
320 if(!length) return 0;
321 return length+1;
322}
323
324static int classifyFoldPointTeXPaired(const char* s) {
325 int lev=0;
326 if (!(isdigit(s[0]) || (s[0] == '.'))){
327 if (strcmp(s, "begin")==0||strcmp(s,"FoldStart")==0||
328 strcmp(s,"abstract")==0||strcmp(s,"unprotect")==0||
329 strcmp(s,"title")==0||strncmp(s,"start",5)==0||strncmp(s,"Start",5)==0||
330 strcmp(s,"documentclass")==0||strncmp(s,"if",2)==0
331 )
332 lev=1;
333 if (strcmp(s, "end")==0||strcmp(s,"FoldStop")==0||
334 strcmp(s,"maketitle")==0||strcmp(s,"protect")==0||
335 strncmp(s,"stop",4)==0||strncmp(s,"Stop",4)==0||
336 strcmp(s,"fi")==0
337 )
338 lev=-1;
339 }
340 return lev;
341}
342
343static int classifyFoldPointTeXUnpaired(const char* s) {
344 int lev=0;
345 if (!(isdigit(s[0]) || (s[0] == '.'))){
346 if (strcmp(s,"part")==0||
347 strcmp(s,"chapter")==0||
348 strcmp(s,"section")==0||
349 strcmp(s,"subsection")==0||
350 strcmp(s,"subsubsection")==0||
351 strcmp(s,"CJKfamily")==0||
352 strcmp(s,"appendix")==0||
353 strcmp(s,"Topic")==0||strcmp(s,"topic")==0||
354 strcmp(s,"subject")==0||strcmp(s,"subsubject")==0||
355 strcmp(s,"def")==0||strcmp(s,"gdef")==0||strcmp(s,"edef")==0||
356 strcmp(s,"xdef")==0||strcmp(s,"framed")==0||
357 strcmp(s,"frame")==0||
358 strcmp(s,"foilhead")==0||strcmp(s,"overlays")==0||strcmp(s,"slide")==0
359 ){
360 lev=1;
361 }
362 }
363 return lev;
364}
365
366static bool IsTeXCommentLine(Sci_Position line, Accessor &styler) {
367 Sci_Position pos = styler.LineStart(line);
368 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
369
370 Sci_Position startpos = pos;
371
372 while (startpos<eol_pos){
373 char ch = styler[startpos];
374 if (ch!='%' && ch!=' ') return false;
375 else if (ch=='%') return true;
376 startpos++;
377 }
378
379 return false;
380}
381
382// FoldTeXDoc: borrowed from VisualTeX with modifications
383
384static void FoldTexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler)
385{
386 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
387 Sci_PositionU endPos = startPos+length;
388 int visibleChars=0;
389 Sci_Position lineCurrent=styler.GetLine(startPos);
390 int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
391 int levelCurrent=levelPrev;
392 char chNext=styler[startPos];
393 char buffer[100]="";
394
395 for (Sci_PositionU i=startPos; i < endPos; i++) {
396 char ch=chNext;
397 chNext=styler.SafeGetCharAt(i+1);
398 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
399
400 if(ch=='\\') {
401 ParseTeXCommand(i, styler, buffer);
402 levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer);
403 }
404
405 if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) {
406 ParseTeXCommand(i+1, styler, buffer);
407 levelCurrent -= classifyFoldPointTeXUnpaired(buffer);
408 }
409
410 char chNext2;
411 char chNext3;
412 char chNext4;
413 char chNext5;
414 chNext2=styler.SafeGetCharAt(i+2);
415 chNext3=styler.SafeGetCharAt(i+3);
416 chNext4=styler.SafeGetCharAt(i+4);
417 chNext5=styler.SafeGetCharAt(i+5);
418
419 bool atEOfold = (ch == '%') &&
420 (chNext == '%') && (chNext2=='}') &&
421 (chNext3=='}')&& (chNext4=='-')&& (chNext5=='-');
422
423 bool atBOfold = (ch == '%') &&
424 (chNext == '%') && (chNext2=='-') &&
425 (chNext3=='-')&& (chNext4=='{')&& (chNext5=='{');
426
427 if(atBOfold){
428 levelCurrent+=1;
429 }
430
431 if(atEOfold){
432 levelCurrent-=1;
433 }
434
435 if(ch=='\\' && chNext=='['){
436 levelCurrent+=1;
437 }
438
439 if(ch=='\\' && chNext==']'){
440 levelCurrent-=1;
441 }
442
443 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
444
445 if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler))
446 {
447 if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler)
448 )
449 levelCurrent++;
450 else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler)
451 && IsTeXCommentLine(lineCurrent + 1, styler)
452 )
453 levelCurrent++;
454 else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) &&
455 !IsTeXCommentLine(lineCurrent+1, styler))
456 levelCurrent--;
457 }
458
459//---------------------------------------------------------------------------------------------
460
461 if (atEOL) {
462 int lev = levelPrev;
463 if (visibleChars == 0 && foldCompact)
464 lev |= SC_FOLDLEVELWHITEFLAG;
465 if ((levelCurrent > levelPrev) && (visibleChars > 0))
466 lev |= SC_FOLDLEVELHEADERFLAG;
467 if (lev != styler.LevelAt(lineCurrent)) {
468 styler.SetLevel(lineCurrent, lev);
469 }
470 lineCurrent++;
471 levelPrev = levelCurrent;
472 visibleChars = 0;
473 }
474
475 if (!isspacechar(ch))
476 visibleChars++;
477 }
478
479 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
480 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
481 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
482}
483
484
485
486
487static const char * const texWordListDesc[] = {
488 "TeX, eTeX, pdfTeX, Omega",
489 "ConTeXt Dutch",
490 "ConTeXt English",
491 "ConTeXt German",
492 "ConTeXt Czech",
493 "ConTeXt Italian",
494 "ConTeXt Romanian",
495 0,
496} ;
497
498LexerModule lmTeX(SCLEX_TEX, ColouriseTeXDoc, "tex", FoldTexDoc, texWordListDesc);
499