| 1 | // Scintilla source code edit control |
| 2 | |
| 3 | // @file LexTeX.cxx - general context conformant tex coloring scheme |
| 4 | // Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com |
| 5 | // Version: September 28, 2003 |
| 6 | |
| 7 | // Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org> |
| 8 | // The License.txt file describes the conditions under which this software may be distributed. |
| 9 | |
| 10 | // This lexer is derived from the one written for the texwork environment (1999++) which in |
| 11 | // turn is inspired on texedit (1991++) which finds its roots in wdt (1986). |
| 12 | |
| 13 | // If you run into strange boundary cases, just tell me and I'll look into it. |
| 14 | |
| 15 | |
| 16 | // TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko. |
| 17 | // Version: June 22, 2007 |
| 18 | |
| 19 | #include <stdlib.h> |
| 20 | #include <string.h> |
| 21 | #include <stdio.h> |
| 22 | #include <stdarg.h> |
| 23 | #include <assert.h> |
| 24 | #include <ctype.h> |
| 25 | |
| 26 | #include <string> |
| 27 | #include <string_view> |
| 28 | |
| 29 | #include "ILexer.h" |
| 30 | #include "Scintilla.h" |
| 31 | #include "SciLexer.h" |
| 32 | |
| 33 | #include "WordList.h" |
| 34 | #include "LexAccessor.h" |
| 35 | #include "Accessor.h" |
| 36 | #include "StyleContext.h" |
| 37 | #include "CharacterSet.h" |
| 38 | #include "LexerModule.h" |
| 39 | |
| 40 | using namespace Lexilla; |
| 41 | |
| 42 | // val SCE_TEX_DEFAULT = 0 |
| 43 | // val SCE_TEX_SPECIAL = 1 |
| 44 | // val SCE_TEX_GROUP = 2 |
| 45 | // val SCE_TEX_SYMBOL = 3 |
| 46 | // val SCE_TEX_COMMAND = 4 |
| 47 | // val SCE_TEX_TEXT = 5 |
| 48 | |
| 49 | // Definitions in SciTEGlobal.properties: |
| 50 | // |
| 51 | // TeX Highlighting |
| 52 | // |
| 53 | // # Default |
| 54 | // style.tex.0=fore:#7F7F00 |
| 55 | // # Special |
| 56 | // style.tex.1=fore:#007F7F |
| 57 | // # Group |
| 58 | // style.tex.2=fore:#880000 |
| 59 | // # Symbol |
| 60 | // style.tex.3=fore:#7F7F00 |
| 61 | // # Command |
| 62 | // style.tex.4=fore:#008800 |
| 63 | // # Text |
| 64 | // style.tex.5=fore:#000000 |
| 65 | |
| 66 | // lexer.tex.interface.default=0 |
| 67 | // lexer.tex.comment.process=0 |
| 68 | |
| 69 | // todo: lexer.tex.auto.if |
| 70 | |
| 71 | // Auxiliary functions: |
| 72 | |
| 73 | static inline bool endOfLine(Accessor &styler, Sci_PositionU i) { |
| 74 | return |
| 75 | (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ; |
| 76 | } |
| 77 | |
| 78 | static inline bool isTeXzero(int ch) { |
| 79 | return |
| 80 | (ch == '%') ; |
| 81 | } |
| 82 | |
| 83 | static inline bool isTeXone(int ch) { |
| 84 | return |
| 85 | (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') || |
| 86 | (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') || |
| 87 | (ch == '"') ; |
| 88 | } |
| 89 | |
| 90 | static inline bool isTeXtwo(int ch) { |
| 91 | return |
| 92 | (ch == '{') || (ch == '}') || (ch == '$') ; |
| 93 | } |
| 94 | |
| 95 | static inline bool isTeXthree(int ch) { |
| 96 | return |
| 97 | (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') || |
| 98 | (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') || |
| 99 | (ch == '/') || (ch == '|') || (ch == '%') ; |
| 100 | } |
| 101 | |
| 102 | static inline bool isTeXfour(int ch) { |
| 103 | return |
| 104 | (ch == '\\') ; |
| 105 | } |
| 106 | |
| 107 | static inline bool isTeXfive(int ch) { |
| 108 | return |
| 109 | ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || |
| 110 | (ch == '@') || (ch == '!') || (ch == '?') ; |
| 111 | } |
| 112 | |
| 113 | static inline bool isTeXsix(int ch) { |
| 114 | return |
| 115 | (ch == ' ') ; |
| 116 | } |
| 117 | |
| 118 | static inline bool isTeXseven(int ch) { |
| 119 | return |
| 120 | (ch == '^') ; |
| 121 | } |
| 122 | |
| 123 | // Interface determination |
| 124 | |
| 125 | static int CheckTeXInterface( |
| 126 | Sci_PositionU startPos, |
| 127 | Sci_Position length, |
| 128 | Accessor &styler, |
| 129 | int defaultInterface) { |
| 130 | |
| 131 | char lineBuffer[1024] ; |
| 132 | Sci_PositionU linePos = 0 ; |
| 133 | |
| 134 | // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)... |
| 135 | |
| 136 | if (styler.SafeGetCharAt(0) == '%') { |
| 137 | for (Sci_PositionU i = 0; i < startPos + length; i++) { |
| 138 | lineBuffer[linePos++] = styler.SafeGetCharAt(i) ; |
| 139 | if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) { |
| 140 | lineBuffer[linePos] = '\0'; |
| 141 | if (strstr(lineBuffer, "interface=all" )) { |
| 142 | return 0 ; |
| 143 | } else if (strstr(lineBuffer, "interface=tex" )) { |
| 144 | return 1 ; |
| 145 | } else if (strstr(lineBuffer, "interface=nl" )) { |
| 146 | return 2 ; |
| 147 | } else if (strstr(lineBuffer, "interface=en" )) { |
| 148 | return 3 ; |
| 149 | } else if (strstr(lineBuffer, "interface=de" )) { |
| 150 | return 4 ; |
| 151 | } else if (strstr(lineBuffer, "interface=cz" )) { |
| 152 | return 5 ; |
| 153 | } else if (strstr(lineBuffer, "interface=it" )) { |
| 154 | return 6 ; |
| 155 | } else if (strstr(lineBuffer, "interface=ro" )) { |
| 156 | return 7 ; |
| 157 | } else if (strstr(lineBuffer, "interface=latex" )) { |
| 158 | // we will move latex cum suis up to 91+ when more keyword lists are supported |
| 159 | return 8 ; |
| 160 | } else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module" )) { |
| 161 | // better would be to limit the search to just one line |
| 162 | return 3 ; |
| 163 | } else { |
| 164 | return defaultInterface ; |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | return defaultInterface ; |
| 171 | } |
| 172 | |
| 173 | static void ColouriseTeXDoc( |
| 174 | Sci_PositionU startPos, |
| 175 | Sci_Position length, |
| 176 | int, |
| 177 | WordList *keywordlists[], |
| 178 | Accessor &styler) { |
| 179 | |
| 180 | styler.StartAt(startPos) ; |
| 181 | styler.StartSegment(startPos) ; |
| 182 | |
| 183 | bool = styler.GetPropertyInt("lexer.tex.comment.process" , 0) == 1 ; |
| 184 | bool useKeywords = styler.GetPropertyInt("lexer.tex.use.keywords" , 1) == 1 ; |
| 185 | bool autoIf = styler.GetPropertyInt("lexer.tex.auto.if" , 1) == 1 ; |
| 186 | int defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default" , 1) ; |
| 187 | |
| 188 | char key[100] ; |
| 189 | int k ; |
| 190 | bool newifDone = false ; |
| 191 | bool = false ; |
| 192 | |
| 193 | int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ; |
| 194 | |
| 195 | if (currentInterface == 0) { |
| 196 | useKeywords = false ; |
| 197 | currentInterface = 1 ; |
| 198 | } |
| 199 | |
| 200 | WordList &keywords = *keywordlists[currentInterface-1] ; |
| 201 | |
| 202 | StyleContext sc(startPos, length, SCE_TEX_TEXT, styler); |
| 203 | |
| 204 | bool going = sc.More() ; // needed because of a fuzzy end of file state |
| 205 | |
| 206 | for (; going; sc.Forward()) { |
| 207 | |
| 208 | if (! sc.More()) { going = false ; } // we need to go one behind the end of text |
| 209 | |
| 210 | if (inComment) { |
| 211 | if (sc.atLineEnd) { |
| 212 | sc.SetState(SCE_TEX_TEXT) ; |
| 213 | newifDone = false ; |
| 214 | inComment = false ; |
| 215 | } |
| 216 | } else { |
| 217 | if (! isTeXfive(sc.ch)) { |
| 218 | if (sc.state == SCE_TEX_COMMAND) { |
| 219 | if (sc.LengthCurrent() == 1) { // \<noncstoken> |
| 220 | if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) { |
| 221 | sc.Forward(2) ; // \^^ and \^^<token> |
| 222 | } |
| 223 | sc.ForwardSetState(SCE_TEX_TEXT) ; |
| 224 | } else { |
| 225 | sc.GetCurrent(key, sizeof(key)-1) ; |
| 226 | k = static_cast<int>(strlen(key)) ; |
| 227 | memmove(key,key+1,k) ; // shift left over escape token |
| 228 | key[k] = '\0' ; |
| 229 | k-- ; |
| 230 | if (! keywords || ! useKeywords) { |
| 231 | sc.SetState(SCE_TEX_COMMAND) ; |
| 232 | newifDone = false ; |
| 233 | } else if (k == 1) { //\<cstoken> |
| 234 | sc.SetState(SCE_TEX_COMMAND) ; |
| 235 | newifDone = false ; |
| 236 | } else if (keywords.InList(key)) { |
| 237 | sc.SetState(SCE_TEX_COMMAND) ; |
| 238 | newifDone = autoIf && (strcmp(key,"newif" ) == 0) ; |
| 239 | } else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if" )) { |
| 240 | sc.SetState(SCE_TEX_COMMAND) ; |
| 241 | } else { |
| 242 | sc.ChangeState(SCE_TEX_TEXT) ; |
| 243 | sc.SetState(SCE_TEX_TEXT) ; |
| 244 | newifDone = false ; |
| 245 | } |
| 246 | } |
| 247 | } |
| 248 | if (isTeXzero(sc.ch)) { |
| 249 | sc.SetState(SCE_TEX_SYMBOL); |
| 250 | |
| 251 | if (!endOfLine(styler,sc.currentPos + 1)) |
| 252 | sc.ForwardSetState(SCE_TEX_DEFAULT) ; |
| 253 | |
| 254 | inComment = ! processComment ; |
| 255 | newifDone = false ; |
| 256 | } else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) { |
| 257 | sc.SetState(SCE_TEX_TEXT) ; |
| 258 | sc.ForwardSetState(SCE_TEX_TEXT) ; |
| 259 | } else if (isTeXone(sc.ch)) { |
| 260 | sc.SetState(SCE_TEX_SPECIAL) ; |
| 261 | newifDone = false ; |
| 262 | } else if (isTeXtwo(sc.ch)) { |
| 263 | sc.SetState(SCE_TEX_GROUP) ; |
| 264 | newifDone = false ; |
| 265 | } else if (isTeXthree(sc.ch)) { |
| 266 | sc.SetState(SCE_TEX_SYMBOL) ; |
| 267 | newifDone = false ; |
| 268 | } else if (isTeXfour(sc.ch)) { |
| 269 | sc.SetState(SCE_TEX_COMMAND) ; |
| 270 | } else if (isTeXsix(sc.ch)) { |
| 271 | sc.SetState(SCE_TEX_TEXT) ; |
| 272 | } else if (sc.atLineEnd) { |
| 273 | sc.SetState(SCE_TEX_TEXT) ; |
| 274 | newifDone = false ; |
| 275 | inComment = false ; |
| 276 | } else { |
| 277 | sc.SetState(SCE_TEX_TEXT) ; |
| 278 | } |
| 279 | } else if (sc.state != SCE_TEX_COMMAND) { |
| 280 | sc.SetState(SCE_TEX_TEXT) ; |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | sc.ChangeState(SCE_TEX_TEXT) ; |
| 285 | sc.Complete(); |
| 286 | |
| 287 | } |
| 288 | |
| 289 | |
| 290 | static inline bool isNumber(int ch) { |
| 291 | return |
| 292 | (ch == '0') || (ch == '1') || (ch == '2') || |
| 293 | (ch == '3') || (ch == '4') || (ch == '5') || |
| 294 | (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9'); |
| 295 | } |
| 296 | |
| 297 | static inline bool isWordChar(int ch) { |
| 298 | return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')); |
| 299 | } |
| 300 | |
| 301 | static Sci_Position ParseTeXCommand(Sci_PositionU pos, Accessor &styler, char *command) |
| 302 | { |
| 303 | Sci_Position length=0; |
| 304 | char ch=styler.SafeGetCharAt(pos+1); |
| 305 | |
| 306 | if(ch==',' || ch==':' || ch==';' || ch=='%'){ |
| 307 | command[0]=ch; |
| 308 | command[1]=0; |
| 309 | return 1; |
| 310 | } |
| 311 | |
| 312 | // find end |
| 313 | while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){ |
| 314 | command[length]=ch; |
| 315 | length++; |
| 316 | ch=styler.SafeGetCharAt(pos+length+1); |
| 317 | } |
| 318 | |
| 319 | command[length]='\0'; |
| 320 | if(!length) return 0; |
| 321 | return length+1; |
| 322 | } |
| 323 | |
| 324 | static int classifyFoldPointTeXPaired(const char* s) { |
| 325 | int lev=0; |
| 326 | if (!(isdigit(s[0]) || (s[0] == '.'))){ |
| 327 | if (strcmp(s, "begin" )==0||strcmp(s,"FoldStart" )==0|| |
| 328 | strcmp(s,"abstract" )==0||strcmp(s,"unprotect" )==0|| |
| 329 | strcmp(s,"title" )==0||strncmp(s,"start" ,5)==0||strncmp(s,"Start" ,5)==0|| |
| 330 | strcmp(s,"documentclass" )==0||strncmp(s,"if" ,2)==0 |
| 331 | ) |
| 332 | lev=1; |
| 333 | if (strcmp(s, "end" )==0||strcmp(s,"FoldStop" )==0|| |
| 334 | strcmp(s,"maketitle" )==0||strcmp(s,"protect" )==0|| |
| 335 | strncmp(s,"stop" ,4)==0||strncmp(s,"Stop" ,4)==0|| |
| 336 | strcmp(s,"fi" )==0 |
| 337 | ) |
| 338 | lev=-1; |
| 339 | } |
| 340 | return lev; |
| 341 | } |
| 342 | |
| 343 | static int classifyFoldPointTeXUnpaired(const char* s) { |
| 344 | int lev=0; |
| 345 | if (!(isdigit(s[0]) || (s[0] == '.'))){ |
| 346 | if (strcmp(s,"part" )==0|| |
| 347 | strcmp(s,"chapter" )==0|| |
| 348 | strcmp(s,"section" )==0|| |
| 349 | strcmp(s,"subsection" )==0|| |
| 350 | strcmp(s,"subsubsection" )==0|| |
| 351 | strcmp(s,"CJKfamily" )==0|| |
| 352 | strcmp(s,"appendix" )==0|| |
| 353 | strcmp(s,"Topic" )==0||strcmp(s,"topic" )==0|| |
| 354 | strcmp(s,"subject" )==0||strcmp(s,"subsubject" )==0|| |
| 355 | strcmp(s,"def" )==0||strcmp(s,"gdef" )==0||strcmp(s,"edef" )==0|| |
| 356 | strcmp(s,"xdef" )==0||strcmp(s,"framed" )==0|| |
| 357 | strcmp(s,"frame" )==0|| |
| 358 | strcmp(s,"foilhead" )==0||strcmp(s,"overlays" )==0||strcmp(s,"slide" )==0 |
| 359 | ){ |
| 360 | lev=1; |
| 361 | } |
| 362 | } |
| 363 | return lev; |
| 364 | } |
| 365 | |
| 366 | static bool (Sci_Position line, Accessor &styler) { |
| 367 | Sci_Position pos = styler.LineStart(line); |
| 368 | Sci_Position eol_pos = styler.LineStart(line + 1) - 1; |
| 369 | |
| 370 | Sci_Position startpos = pos; |
| 371 | |
| 372 | while (startpos<eol_pos){ |
| 373 | char ch = styler[startpos]; |
| 374 | if (ch!='%' && ch!=' ') return false; |
| 375 | else if (ch=='%') return true; |
| 376 | startpos++; |
| 377 | } |
| 378 | |
| 379 | return false; |
| 380 | } |
| 381 | |
| 382 | // FoldTeXDoc: borrowed from VisualTeX with modifications |
| 383 | |
| 384 | static void FoldTexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) |
| 385 | { |
| 386 | bool foldCompact = styler.GetPropertyInt("fold.compact" , 1) != 0; |
| 387 | Sci_PositionU endPos = startPos+length; |
| 388 | int visibleChars=0; |
| 389 | Sci_Position lineCurrent=styler.GetLine(startPos); |
| 390 | int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; |
| 391 | int levelCurrent=levelPrev; |
| 392 | char chNext=styler[startPos]; |
| 393 | char buffer[100]="" ; |
| 394 | |
| 395 | for (Sci_PositionU i=startPos; i < endPos; i++) { |
| 396 | char ch=chNext; |
| 397 | chNext=styler.SafeGetCharAt(i+1); |
| 398 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
| 399 | |
| 400 | if(ch=='\\') { |
| 401 | ParseTeXCommand(i, styler, buffer); |
| 402 | levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer); |
| 403 | } |
| 404 | |
| 405 | if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) { |
| 406 | ParseTeXCommand(i+1, styler, buffer); |
| 407 | levelCurrent -= classifyFoldPointTeXUnpaired(buffer); |
| 408 | } |
| 409 | |
| 410 | char chNext2; |
| 411 | char chNext3; |
| 412 | char chNext4; |
| 413 | char chNext5; |
| 414 | chNext2=styler.SafeGetCharAt(i+2); |
| 415 | chNext3=styler.SafeGetCharAt(i+3); |
| 416 | chNext4=styler.SafeGetCharAt(i+4); |
| 417 | chNext5=styler.SafeGetCharAt(i+5); |
| 418 | |
| 419 | bool atEOfold = (ch == '%') && |
| 420 | (chNext == '%') && (chNext2=='}') && |
| 421 | (chNext3=='}')&& (chNext4=='-')&& (chNext5=='-'); |
| 422 | |
| 423 | bool atBOfold = (ch == '%') && |
| 424 | (chNext == '%') && (chNext2=='-') && |
| 425 | (chNext3=='-')&& (chNext4=='{')&& (chNext5=='{'); |
| 426 | |
| 427 | if(atBOfold){ |
| 428 | levelCurrent+=1; |
| 429 | } |
| 430 | |
| 431 | if(atEOfold){ |
| 432 | levelCurrent-=1; |
| 433 | } |
| 434 | |
| 435 | if(ch=='\\' && chNext=='['){ |
| 436 | levelCurrent+=1; |
| 437 | } |
| 438 | |
| 439 | if(ch=='\\' && chNext==']'){ |
| 440 | levelCurrent-=1; |
| 441 | } |
| 442 | |
| 443 | bool = styler.GetPropertyInt("fold.comment" ) != 0; |
| 444 | |
| 445 | if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler)) |
| 446 | { |
| 447 | if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler) |
| 448 | ) |
| 449 | levelCurrent++; |
| 450 | else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler) |
| 451 | && IsTeXCommentLine(lineCurrent + 1, styler) |
| 452 | ) |
| 453 | levelCurrent++; |
| 454 | else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) && |
| 455 | !IsTeXCommentLine(lineCurrent+1, styler)) |
| 456 | levelCurrent--; |
| 457 | } |
| 458 | |
| 459 | //--------------------------------------------------------------------------------------------- |
| 460 | |
| 461 | if (atEOL) { |
| 462 | int lev = levelPrev; |
| 463 | if (visibleChars == 0 && foldCompact) |
| 464 | lev |= SC_FOLDLEVELWHITEFLAG; |
| 465 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) |
| 466 | lev |= SC_FOLDLEVELHEADERFLAG; |
| 467 | if (lev != styler.LevelAt(lineCurrent)) { |
| 468 | styler.SetLevel(lineCurrent, lev); |
| 469 | } |
| 470 | lineCurrent++; |
| 471 | levelPrev = levelCurrent; |
| 472 | visibleChars = 0; |
| 473 | } |
| 474 | |
| 475 | if (!isspacechar(ch)) |
| 476 | visibleChars++; |
| 477 | } |
| 478 | |
| 479 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
| 480 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
| 481 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
| 482 | } |
| 483 | |
| 484 | |
| 485 | |
| 486 | |
| 487 | static const char * const texWordListDesc[] = { |
| 488 | "TeX, eTeX, pdfTeX, Omega" , |
| 489 | "ConTeXt Dutch" , |
| 490 | "ConTeXt English" , |
| 491 | "ConTeXt German" , |
| 492 | "ConTeXt Czech" , |
| 493 | "ConTeXt Italian" , |
| 494 | "ConTeXt Romanian" , |
| 495 | 0, |
| 496 | } ; |
| 497 | |
| 498 | LexerModule lmTeX(SCLEX_TEX, ColouriseTeXDoc, "tex" , FoldTexDoc, texWordListDesc); |
| 499 | |