1// Scintilla source code edit control
2/** @file LexJS.cxx
3 ** Lexer for JS.
4 **/
5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6// Copyright 2023 by Uniontech Software Technology Co., Ltd. Port html to pure javascript language.
7// The License.txt file describes the conditions under which this software may be distributed.
8
9#include <stdlib.h>
10#include <string.h>
11#include <stdio.h>
12#include <stdarg.h>
13#include <assert.h>
14#include <ctype.h>
15
16#include <string>
17#include <string_view>
18#include <map>
19#include <set>
20#include <functional>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25#include "WordList.h"
26#include "LexAccessor.h"
27#include "Accessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31#include "OptionSet.h"
32#include "DefaultLexer.h"
33
34using namespace Scintilla;
35using namespace Lexilla;
36
37namespace {
38
39#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
40#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
41#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
42#define SCLEX_JS (SCLEX_JAVANOCASE + 1)
43
44enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
45enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
46
47inline bool IsAWordChar(const int ch) {
48 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
49}
50
51inline bool IsAWordStart(const int ch) {
52 return (ch < 0x80) && (isalnum(ch) || ch == '_');
53}
54
55inline bool IsOperator(int ch) {
56 if (IsASCII(ch) && isalnum(ch))
57 return false;
58 // '.' left out as it is used to make up numbers
59 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
60 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
61 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
62 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
63 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
64 ch == '?' || ch == '!' || ch == '.' || ch == '~')
65 return true;
66 return false;
67}
68
69void GetTextSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end, char *s, size_t len) {
70 Sci_PositionU i = 0;
71 for (; (i < end - start + 1) && (i < len-1); i++) {
72 s[i] = MakeLowerCase(styler[start + i]);
73 }
74 s[i] = '\0';
75}
76
77std::string GetStringSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
78 std::string s;
79 Sci_PositionU i = 0;
80 for (; (i < end - start + 1); i++) {
81 s.push_back(MakeLowerCase(styler[start + i]));
82 }
83 return s;
84}
85
86std::string GetNextWord(Accessor &styler, Sci_PositionU start) {
87 std::string ret;
88 Sci_PositionU i = 0;
89 for (; i < 200; i++) { // Put an upper limit to bound time taken for unexpected text.
90 const char ch = styler.SafeGetCharAt(start + i);
91 if ((i == 0) && !IsAWordStart(ch))
92 break;
93 if ((i > 0) && !IsAWordChar(ch))
94 break;
95 ret.push_back(ch);
96 }
97 return ret;
98}
99
100script_type segIsScriptingIndicator(Accessor &styler, Sci_PositionU start, Sci_PositionU end, script_type prevValue) {
101 char s[100];
102 GetTextSegment(styler, start, end, s, sizeof(s));
103 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
104 if (strstr(s, "src")) // External script
105 return eScriptNone;
106 if (strstr(s, "vbs"))
107 return eScriptVBS;
108 if (strstr(s, "pyth"))
109 return eScriptPython;
110 if (strstr(s, "javas"))
111 return eScriptJS;
112 if (strstr(s, "jscr"))
113 return eScriptJS;
114 if (strstr(s, "php"))
115 return eScriptPHP;
116 if (strstr(s, "xml")) {
117 const char *xml = strstr(s, "xml");
118 for (const char *t=s; t<xml; t++) {
119 if (!IsASpace(*t)) {
120 return prevValue;
121 }
122 }
123 return eScriptXML;
124 }
125
126 return prevValue;
127}
128
129int PrintScriptingIndicatorOffset(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
130 int iResult = 0;
131 std::string s = GetStringSegment(styler, start, end);
132 if (0 == strncmp(s.c_str(), "php", 3)) {
133 iResult = 3;
134 }
135 return iResult;
136}
137
138script_type ScriptOfState(int state) {
139 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
140 return eScriptPython;
141 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
142 return eScriptVBS;
143 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
144 return eScriptJS;
145 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
146 return eScriptPHP;
147 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
148 return eScriptSGML;
149 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
150 return eScriptSGMLblock;
151 } else {
152 return eScriptNone;
153 }
154}
155
156int statePrintForState(int state, script_mode inScriptType) {
157 int StateToPrint = state;
158
159 if (state >= SCE_HJ_START) {
160 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
161 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
162 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
163 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
164 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
165 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
166 }
167 }
168
169 return StateToPrint;
170}
171
172int stateForPrintState(int StateToPrint) {
173 int state;
174
175 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
176 state = StateToPrint - SCE_HA_PYTHON;
177 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
178 state = StateToPrint - SCE_HA_VBS;
179 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
180 state = StateToPrint - SCE_HA_JS;
181 } else {
182 state = StateToPrint;
183 }
184
185 return state;
186}
187
188inline bool IsNumber(Sci_PositionU start, Accessor &styler) {
189 return IsADigit(styler[start]) || (styler[start] == '.') ||
190 (styler[start] == '-') || (styler[start] == '#');
191}
192
193inline bool isStringState(int state) {
194 bool bResult;
195
196 switch (state) {
197 case SCE_HJ_DOUBLESTRING:
198 case SCE_HJ_SINGLESTRING:
199 case SCE_HJA_DOUBLESTRING:
200 case SCE_HJA_SINGLESTRING:
201 case SCE_HB_STRING:
202 case SCE_HBA_STRING:
203 case SCE_HP_STRING:
204 case SCE_HP_CHARACTER:
205 case SCE_HP_TRIPLE:
206 case SCE_HP_TRIPLEDOUBLE:
207 case SCE_HPA_STRING:
208 case SCE_HPA_CHARACTER:
209 case SCE_HPA_TRIPLE:
210 case SCE_HPA_TRIPLEDOUBLE:
211 case SCE_HPHP_HSTRING:
212 case SCE_HPHP_SIMPLESTRING:
213 case SCE_HPHP_HSTRING_VARIABLE:
214 case SCE_HPHP_COMPLEX_VARIABLE:
215 bResult = true;
216 break;
217 default :
218 bResult = false;
219 break;
220 }
221 return bResult;
222}
223
224inline bool stateAllowsTermination(int state) {
225 bool allowTermination = !isStringState(state);
226 if (allowTermination) {
227 switch (state) {
228 case SCE_HB_COMMENTLINE:
229 case SCE_HPHP_COMMENT:
230 case SCE_HP_COMMENTLINE:
231 case SCE_HPA_COMMENTLINE:
232 allowTermination = false;
233 }
234 }
235 return allowTermination;
236}
237
238// not really well done, since it's only comments that should lex the %> and <%
239inline bool isCommentASPState(int state) {
240 bool bResult;
241
242 switch (state) {
243 case SCE_HJ_COMMENT:
244 case SCE_HJ_COMMENTLINE:
245 case SCE_HJ_COMMENTDOC:
246 case SCE_HB_COMMENTLINE:
247 case SCE_HP_COMMENTLINE:
248 case SCE_HPHP_COMMENT:
249 case SCE_HPHP_COMMENTLINE:
250 bResult = true;
251 break;
252 default :
253 bResult = false;
254 break;
255 }
256 return bResult;
257}
258
259void classifyAttribHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
260 const bool wordIsNumber = IsNumber(start, styler);
261 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
262 if (wordIsNumber) {
263 chAttr = SCE_H_NUMBER;
264 } else {
265 std::string s = GetStringSegment(styler, start, end);
266 if (keywords.InList(s.c_str()))
267 chAttr = SCE_H_ATTRIBUTE;
268 }
269 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
270 // No keywords -> all are known
271 chAttr = SCE_H_ATTRIBUTE;
272 styler.ColourTo(end, chAttr);
273}
274
275// https://html.spec.whatwg.org/multipage/custom-elements.html#custom-elements-core-concepts
276bool isHTMLCustomElement(const std::string &tag) {
277 // check valid HTML custom element name: starts with an ASCII lower alpha and contains hyphen.
278 // IsUpperOrLowerCase() is used for `html.tags.case.sensitive=1`.
279 if (tag.length() < 2 || !IsUpperOrLowerCase(tag[0])) {
280 return false;
281 }
282 if (tag.find('-') == std::string::npos) {
283 return false;
284 }
285 return true;
286}
287
288int classifyTagHTML(Sci_PositionU start, Sci_PositionU end,
289 const WordList &keywords, Accessor &styler, bool &tagDontFold,
290 bool caseSensitive, bool isXml, bool allowScripts,
291 const std::set<std::string> &nonFoldingTags) {
292 std::string tag;
293 // Copy after the '<' and stop before ' '
294 for (Sci_PositionU cPos = start; cPos <= end; cPos++) {
295 const char ch = styler[cPos];
296 if (IsASpace(ch)) {
297 break;
298 }
299 if ((ch != '<') && (ch != '/')) {
300 tag.push_back(caseSensitive ? ch : MakeLowerCase(ch));
301 }
302 }
303 // if the current language is XML, I can fold any tag
304 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
305 //...to find it in the list of no-container-tags
306 tagDontFold = (!isXml) && (nonFoldingTags.count(tag) > 0);
307 // No keywords -> all are known
308 char chAttr = SCE_H_TAGUNKNOWN;
309 if (!tag.empty() && (tag[0] == '!')) {
310 chAttr = SCE_H_SGML_DEFAULT;
311 } else if (!keywords || keywords.InList(tag.c_str())) {
312 chAttr = SCE_H_TAG;
313 } else if (!isXml && isHTMLCustomElement(tag)) {
314 chAttr = SCE_H_TAG;
315 }
316 if (chAttr != SCE_H_TAGUNKNOWN) {
317 styler.ColourTo(end, chAttr);
318 }
319 if (chAttr == SCE_H_TAG) {
320 if (allowScripts && (tag == "script")) {
321 // check to see if this is a self-closing tag by sniffing ahead
322 bool isSelfClose = false;
323 for (Sci_PositionU cPos = end; cPos <= end + 200; cPos++) {
324 const char ch = styler.SafeGetCharAt(cPos, '\0');
325 if (ch == '\0' || ch == '>')
326 break;
327 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
328 isSelfClose = true;
329 break;
330 }
331 }
332
333 // do not enter a script state if the tag self-closed
334 if (!isSelfClose)
335 chAttr = SCE_H_SCRIPT;
336 } else if (!isXml && (tag == "comment")) {
337 chAttr = SCE_H_COMMENT;
338 }
339 }
340 return chAttr;
341}
342
343void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end,
344 const WordList &keywords, Accessor &styler, script_mode inScriptType) {
345 char s[30 + 1];
346 Sci_PositionU i = 0;
347 for (; i < end - start + 1 && i < 30; i++) {
348 s[i] = styler[start + i];
349 }
350 s[i] = '\0';
351
352 char chAttr = SCE_HJ_WORD;
353 const bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
354 if (wordIsNumber) {
355 chAttr = SCE_HJ_NUMBER;
356 } else if (keywords.InList(s)) {
357 chAttr = SCE_HJ_KEYWORD;
358 }
359 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
360}
361
362int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, script_mode inScriptType) {
363 char chAttr = SCE_HB_IDENTIFIER;
364 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
365 if (wordIsNumber) {
366 chAttr = SCE_HB_NUMBER;
367 } else {
368 std::string s = GetStringSegment(styler, start, end);
369 if (keywords.InList(s.c_str())) {
370 chAttr = SCE_HB_WORD;
371 if (s == "rem")
372 chAttr = SCE_HB_COMMENTLINE;
373 }
374 }
375 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
376 if (chAttr == SCE_HB_COMMENTLINE)
377 return SCE_HB_COMMENTLINE;
378 else
379 return SCE_HB_DEFAULT;
380}
381
382void classifyWordHTPy(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, std::string &prevWord, script_mode inScriptType, bool isMako) {
383 const bool wordIsNumber = IsADigit(styler[start]);
384 std::string s;
385 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
386 s.push_back(styler[start + i]);
387 }
388 char chAttr = SCE_HP_IDENTIFIER;
389 if (prevWord == "class")
390 chAttr = SCE_HP_CLASSNAME;
391 else if (prevWord == "def")
392 chAttr = SCE_HP_DEFNAME;
393 else if (wordIsNumber)
394 chAttr = SCE_HP_NUMBER;
395 else if (keywords.InList(s.c_str()))
396 chAttr = SCE_HP_WORD;
397 else if (isMako && (s == "block"))
398 chAttr = SCE_HP_WORD;
399 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
400 prevWord = s;
401}
402
403// Update the word colour to default or keyword
404// Called when in a PHP word
405void classifyWordHTPHP(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
406 char chAttr = SCE_HPHP_DEFAULT;
407 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
408 if (wordIsNumber) {
409 chAttr = SCE_HPHP_NUMBER;
410 } else {
411 std::string s = GetStringSegment(styler, start, end);
412 if (keywords.InList(s.c_str()))
413 chAttr = SCE_HPHP_WORD;
414 }
415 styler.ColourTo(end, chAttr);
416}
417
418bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
419 std::string s;
420 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
421 s.push_back(styler[start + i]);
422 }
423 return keywords.InList(s.c_str());
424}
425
426bool isWordCdata(Sci_PositionU start, Sci_PositionU end, Accessor &styler) {
427 std::string s;
428 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
429 s.push_back(styler[start + i]);
430 }
431 return s == "[CDATA[";
432}
433
434// Return the first state to reach when entering a scripting language
435int StateForScript(script_type scriptLanguage) {
436 int Result;
437 switch (scriptLanguage) {
438 case eScriptVBS:
439 Result = SCE_HB_START;
440 break;
441 case eScriptPython:
442 Result = SCE_HP_START;
443 break;
444 case eScriptPHP:
445 Result = SCE_HPHP_DEFAULT;
446 break;
447 case eScriptXML:
448 Result = SCE_H_TAGUNKNOWN;
449 break;
450 case eScriptSGML:
451 Result = SCE_H_SGML_DEFAULT;
452 break;
453 case eScriptComment:
454 Result = SCE_H_COMMENT;
455 break;
456 default :
457 Result = SCE_HJ_START;
458 break;
459 }
460 return Result;
461}
462
463inline bool issgmlwordchar(int ch) {
464 return !IsASCII(ch) ||
465 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
466}
467
468inline bool IsPhpWordStart(int ch) {
469 return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
470}
471
472inline bool IsPhpWordChar(int ch) {
473 return IsADigit(ch) || IsPhpWordStart(ch);
474}
475
476bool IsCommentState(const int state) {
477 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
478}
479
480bool IsScriptCommentState(const int state) {
481 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
482 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
483}
484
485bool isLineEnd(int ch) {
486 return ch == '\r' || ch == '\n';
487}
488
489bool isMakoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
490 if (blockType.empty()) {
491 return ((ch == '%') && (chNext == '>'));
492 } else if ((blockType == "inherit") ||
493 (blockType == "namespace") ||
494 (blockType == "include") ||
495 (blockType == "page")) {
496 return ((ch == '/') && (chNext == '>'));
497 } else if (blockType == "%") {
498 if (ch == '/' && isLineEnd(chNext))
499 return true;
500 else
501 return isLineEnd(ch);
502 } else if (blockType == "{") {
503 return ch == '}';
504 } else {
505 return (ch == '>');
506 }
507}
508
509bool isDjangoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
510 if (blockType.empty()) {
511 return false;
512 } else if (blockType == "%") {
513 return ((ch == '%') && (chNext == '}'));
514 } else if (blockType == "{") {
515 return ((ch == '}') && (chNext == '}'));
516 } else {
517 return false;
518 }
519}
520
521class PhpNumberState {
522 enum NumberBase { BASE_10 = 0, BASE_2, BASE_8, BASE_16 };
523 static constexpr const char *const digitList[] = { "_0123456789", "_01", "_01234567", "_0123456789abcdefABCDEF" };
524
525 NumberBase base = BASE_10;
526 bool decimalPart = false;
527 bool exponentPart = false;
528 bool invalid = false;
529 bool finished = false;
530
531 bool leadingZero = false;
532 bool invalidBase8 = false;
533
534 bool betweenDigits = false;
535 bool decimalChar = false;
536 bool exponentChar = false;
537
538public:
539 inline bool isInvalid() { return invalid; }
540 inline bool isFinished() { return finished; }
541
542 bool init(int ch, int chPlus1, int chPlus2) {
543 base = BASE_10;
544 decimalPart = false;
545 exponentPart = false;
546 invalid = false;
547 finished = false;
548
549 leadingZero = false;
550 invalidBase8 = false;
551
552 betweenDigits = false;
553 decimalChar = false;
554 exponentChar = false;
555
556 if (ch == '.' && strchr(digitList[BASE_10] + !betweenDigits, chPlus1) != nullptr) {
557 decimalPart = true;
558 betweenDigits = true;
559 } else if (ch == '0' && (chPlus1 == 'b' || chPlus1 == 'B')) {
560 base = BASE_2;
561 } else if (ch == '0' && (chPlus1 == 'o' || chPlus1 == 'O')) {
562 base = BASE_8;
563 } else if (ch == '0' && (chPlus1 == 'x' || chPlus1 == 'X')) {
564 base = BASE_16;
565 } else if (strchr(digitList[BASE_10] + !betweenDigits, ch) != nullptr) {
566 leadingZero = ch == '0';
567 betweenDigits = true;
568 check(chPlus1, chPlus2);
569 if (finished && leadingZero) {
570 // single zero should be base 10
571 base = BASE_10;
572 }
573 } else {
574 return false;
575 }
576 return true;
577 }
578
579 bool check(int ch, int chPlus1) {
580 if (strchr(digitList[base] + !betweenDigits, ch) != nullptr) {
581 if (leadingZero) {
582 invalidBase8 = invalidBase8 || strchr(digitList[BASE_8] + !betweenDigits, ch) == nullptr;
583 }
584
585 betweenDigits = ch != '_';
586 decimalChar = false;
587 exponentChar = false;
588 } else if (ch == '_') {
589 invalid = true;
590
591 betweenDigits = false;
592 decimalChar = false;
593 // exponentChar is unchanged
594 } else if (base == BASE_10 && ch == '.' && (
595 !(decimalPart || exponentPart) || strchr(digitList[BASE_10] + !betweenDigits, chPlus1) != nullptr)
596 ) {
597 invalid = invalid || !betweenDigits || decimalPart || exponentPart;
598 decimalPart = true;
599
600 betweenDigits = false;
601 decimalChar = true;
602 exponentChar = false;
603 } else if (base == BASE_10 && (ch == 'e' || ch == 'E')) {
604 invalid = invalid || !(betweenDigits || decimalChar) || exponentPart;
605 exponentPart = true;
606
607 betweenDigits = false;
608 decimalChar = false;
609 exponentChar = true;
610 } else if (base == BASE_10 && (ch == '-' || ch == '+') && exponentChar) {
611 invalid = invalid || strchr(digitList[BASE_10] + !betweenDigits, chPlus1) == nullptr;
612
613 betweenDigits = false;
614 decimalChar = false;
615 // exponentChar is unchanged
616 } else if (IsPhpWordChar(ch)) {
617 invalid = true;
618
619 betweenDigits = false;
620 decimalChar = false;
621 exponentChar = false;
622 } else {
623 invalid = invalid || !(betweenDigits || decimalChar);
624 finished = true;
625 if (base == BASE_10 && leadingZero && !decimalPart && !exponentPart) {
626 base = BASE_8;
627 invalid = invalid || invalidBase8;
628 }
629 }
630 return finished;
631 }
632};
633
634bool isPHPStringState(int state) {
635 return
636 (state == SCE_HPHP_HSTRING) ||
637 (state == SCE_HPHP_SIMPLESTRING) ||
638 (state == SCE_HPHP_HSTRING_VARIABLE) ||
639 (state == SCE_HPHP_COMPLEX_VARIABLE);
640}
641
642Sci_Position FindPhpStringDelimiter(std::string &phpStringDelimiter, Sci_Position i, const Sci_Position lengthDoc, Accessor &styler, bool &isSimpleString) {
643 Sci_Position j;
644 const Sci_Position beginning = i - 1;
645 bool isQuoted = false;
646
647 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
648 i++;
649 char ch = styler.SafeGetCharAt(i);
650 const char chNext = styler.SafeGetCharAt(i + 1);
651 phpStringDelimiter.clear();
652 if (!IsPhpWordStart(ch)) {
653 if ((ch == '\'' || ch == '\"') && IsPhpWordStart(chNext)) {
654 isSimpleString = ch == '\'';
655 isQuoted = true;
656 i++;
657 ch = chNext;
658 } else {
659 return beginning;
660 }
661 }
662 phpStringDelimiter.push_back(ch);
663 i++;
664 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
665 if (!IsPhpWordChar(styler[j]) && isQuoted) {
666 if (((isSimpleString && styler[j] == '\'') || (!isSimpleString && styler[j] == '\"')) && isLineEnd(styler.SafeGetCharAt(j + 1))) {
667 isQuoted = false;
668 j++;
669 break;
670 } else {
671 phpStringDelimiter.clear();
672 return beginning;
673 }
674 }
675 phpStringDelimiter.push_back(styler[j]);
676 }
677 if (isQuoted) {
678 phpStringDelimiter.clear();
679 return beginning;
680 }
681 return j - 1;
682}
683
684// Options used for LexerHTML
685struct OptionsHTML {
686 int aspDefaultLanguage = eScriptJS;
687 bool caseSensitive = false;
688 bool allowScripts = true;
689 bool isMako = false;
690 bool isDjango = false;
691 bool fold = false;
692 bool foldHTML = false;
693 bool foldHTMLPreprocessor = true;
694 bool foldCompact = true;
695 bool foldComment = false;
696 bool foldHeredoc = false;
697 bool foldXmlAtTagOpen = false;
698 OptionsHTML() noexcept {
699 }
700};
701
702const char * const jsWordListDesc[] = {
703 "",
704 "JavaScript keywords",
705 "",
706 "",
707 "",
708 "",
709 0,
710};
711
712const char * const phpscriptWordListDesc[] = {
713 "", //Unused
714 "", //Unused
715 "", //Unused
716 "", //Unused
717 "PHP keywords",
718 "", //Unused
719 0,
720};
721
722struct OptionSetHTML : public OptionSet<OptionsHTML> {
723 OptionSetHTML(bool isPHPScript_) {
724
725 DefineProperty("asp.default.language", &OptionsHTML::aspDefaultLanguage,
726 "Script in ASP code is initially assumed to be in JavaScript. "
727 "To change this to VBScript set asp.default.language to 2. Python is 3.");
728
729 DefineProperty("html.tags.case.sensitive", &OptionsHTML::caseSensitive,
730 "For XML and HTML, setting this property to 1 will make tags match in a case "
731 "sensitive way which is the expected behaviour for XML and XHTML.");
732
733 DefineProperty("lexer.xml.allow.scripts", &OptionsHTML::allowScripts,
734 "Set to 0 to disable scripts in XML.");
735
736 DefineProperty("lexer.html.mako", &OptionsHTML::isMako,
737 "Set to 1 to enable the mako template language.");
738
739 DefineProperty("lexer.html.django", &OptionsHTML::isDjango,
740 "Set to 1 to enable the django template language.");
741
742 DefineProperty("fold", &OptionsHTML::fold);
743
744 DefineProperty("fold.html", &OptionsHTML::foldHTML,
745 "Folding is turned on or off for HTML and XML files with this option. "
746 "The fold option must also be on for folding to occur.");
747
748 DefineProperty("fold.html.preprocessor", &OptionsHTML::foldHTMLPreprocessor,
749 "Folding is turned on or off for scripts embedded in HTML files with this option. "
750 "The default is on.");
751
752 DefineProperty("fold.compact", &OptionsHTML::foldCompact);
753
754 DefineProperty("fold.hypertext.comment", &OptionsHTML::foldComment,
755 "Allow folding for comments in scripts embedded in HTML. "
756 "The default is off.");
757
758 DefineProperty("fold.hypertext.heredoc", &OptionsHTML::foldHeredoc,
759 "Allow folding for heredocs in scripts embedded in HTML. "
760 "The default is off.");
761
762 DefineProperty("fold.xml.at.tag.open", &OptionsHTML::foldXmlAtTagOpen,
763 "Enable folding for XML at the start of open tag. "
764 "The default is off.");
765
766 DefineWordListSets(isPHPScript_ ? phpscriptWordListDesc : jsWordListDesc);
767 }
768};
769
770LexicalClass lexicalClassesHTML[] = {
771 // Lexer HTML SCLEX_HTML SCE_H_ SCE_HJ_ SCE_HJA_ SCE_HB_ SCE_HBA_ SCE_HP_ SCE_HPHP_ SCE_HPA_:
772 0, "SCE_H_DEFAULT", "default", "Text",
773 1, "SCE_H_TAG", "tag", "Tags",
774 2, "SCE_H_ERRORTAGUNKNOWN", "error tag", "Unknown Tags",
775 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
776 4, "SCE_H_ATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
777 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
778 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
779 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
780 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
781 9, "SCE_H_COMMENT", "comment", "Comment",
782 10, "SCE_H_ENTITY", "literal", "Entities",
783 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
784 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
785 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
786 14, "SCE_H_SCRIPT", "error", "Internal state which should never be visible",
787 15, "SCE_H_ASP", "preprocessor", "ASP <% ... %>",
788 16, "SCE_H_ASPAT", "preprocessor", "ASP <% ... %>",
789 17, "SCE_H_CDATA", "literal", "CDATA",
790 18, "SCE_H_QUESTION", "preprocessor", "PHP",
791 19, "SCE_H_VALUE", "literal string", "Unquoted values",
792 20, "SCE_H_XCCOMMENT", "comment", "JSP Comment <%-- ... --%>",
793 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
794 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
795 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
796 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
797 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
798 26, "SCE_H_SGML_ERROR", "error", "SGML error",
799 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
800 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
801 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
802 30, "SCE_H_SGML_1ST_PARAM_COMMENT", "error comment", "SGML first parameter - lexer internal. It is an error if any text is in this style.",
803 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
804 32, "", "predefined", "",
805 33, "", "predefined", "",
806 34, "", "predefined", "",
807 35, "", "predefined", "",
808 36, "", "predefined", "",
809 37, "", "predefined", "",
810 38, "", "predefined", "",
811 39, "", "predefined", "",
812 40, "SCE_HJ_START", "client javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
813 41, "SCE_HJ_DEFAULT", "client javascript default", "JS Default",
814 42, "SCE_HJ_COMMENT", "client javascript comment", "JS Comment",
815 43, "SCE_HJ_COMMENTLINE", "client javascript comment line", "JS Line Comment",
816 44, "SCE_HJ_COMMENTDOC", "client javascript comment documentation", "JS Doc comment",
817 45, "SCE_HJ_NUMBER", "client javascript literal numeric", "JS Number",
818 46, "SCE_HJ_WORD", "client javascript identifier", "JS Word",
819 47, "SCE_HJ_KEYWORD", "client javascript keyword", "JS Keyword",
820 48, "SCE_HJ_DOUBLESTRING", "client javascript literal string", "JS Double quoted string",
821 49, "SCE_HJ_SINGLESTRING", "client javascript literal string", "JS Single quoted string",
822 50, "SCE_HJ_SYMBOLS", "client javascript operator", "JS Symbols",
823 51, "SCE_HJ_STRINGEOL", "client javascript error literal string", "JavaScript EOL",
824 52, "SCE_HJ_REGEX", "client javascript literal regex", "JavaScript RegEx",
825 53, "", "unused", "",
826 54, "", "unused", "",
827 55, "SCE_HJA_START", "server javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
828 56, "SCE_HJA_DEFAULT", "server javascript default", "JS Default",
829 57, "SCE_HJA_COMMENT", "server javascript comment", "JS Comment",
830 58, "SCE_HJA_COMMENTLINE", "server javascript comment line", "JS Line Comment",
831 59, "SCE_HJA_COMMENTDOC", "server javascript comment documentation", "JS Doc comment",
832 60, "SCE_HJA_NUMBER", "server javascript literal numeric", "JS Number",
833 61, "SCE_HJA_WORD", "server javascript identifier", "JS Word",
834 62, "SCE_HJA_KEYWORD", "server javascript keyword", "JS Keyword",
835 63, "SCE_HJA_DOUBLESTRING", "server javascript literal string", "JS Double quoted string",
836 64, "SCE_HJA_SINGLESTRING", "server javascript literal string", "JS Single quoted string",
837 65, "SCE_HJA_SYMBOLS", "server javascript operator", "JS Symbols",
838 66, "SCE_HJA_STRINGEOL", "server javascript error literal string", "JavaScript EOL",
839 67, "SCE_HJA_REGEX", "server javascript literal regex", "JavaScript RegEx",
840 68, "", "unused", "",
841 69, "", "unused", "",
842 70, "SCE_HB_START", "client basic default", "Start",
843 71, "SCE_HB_DEFAULT", "client basic default", "Default",
844 72, "SCE_HB_COMMENTLINE", "client basic comment line", "Comment",
845 73, "SCE_HB_NUMBER", "client basic literal numeric", "Number",
846 74, "SCE_HB_WORD", "client basic keyword", "KeyWord",
847 75, "SCE_HB_STRING", "client basic literal string", "String",
848 76, "SCE_HB_IDENTIFIER", "client basic identifier", "Identifier",
849 77, "SCE_HB_STRINGEOL", "client basic literal string", "Unterminated string",
850 78, "", "unused", "",
851 79, "", "unused", "",
852 80, "SCE_HBA_START", "server basic default", "Start",
853 81, "SCE_HBA_DEFAULT", "server basic default", "Default",
854 82, "SCE_HBA_COMMENTLINE", "server basic comment line", "Comment",
855 83, "SCE_HBA_NUMBER", "server basic literal numeric", "Number",
856 84, "SCE_HBA_WORD", "server basic keyword", "KeyWord",
857 85, "SCE_HBA_STRING", "server basic literal string", "String",
858 86, "SCE_HBA_IDENTIFIER", "server basic identifier", "Identifier",
859 87, "SCE_HBA_STRINGEOL", "server basic literal string", "Unterminated string",
860 88, "", "unused", "",
861 89, "", "unused", "",
862 90, "SCE_HP_START", "client python default", "Embedded Python",
863 91, "SCE_HP_DEFAULT", "client python default", "Embedded Python",
864 92, "SCE_HP_COMMENTLINE", "client python comment line", "Comment",
865 93, "SCE_HP_NUMBER", "client python literal numeric", "Number",
866 94, "SCE_HP_STRING", "client python literal string", "String",
867 95, "SCE_HP_CHARACTER", "client python literal string character", "Single quoted string",
868 96, "SCE_HP_WORD", "client python keyword", "Keyword",
869 97, "SCE_HP_TRIPLE", "client python literal string", "Triple quotes",
870 98, "SCE_HP_TRIPLEDOUBLE", "client python literal string", "Triple double quotes",
871 99, "SCE_HP_CLASSNAME", "client python identifier", "Class name definition",
872 100, "SCE_HP_DEFNAME", "client python identifier", "Function or method name definition",
873 101, "SCE_HP_OPERATOR", "client python operator", "Operators",
874 102, "SCE_HP_IDENTIFIER", "client python identifier", "Identifiers",
875 103, "", "unused", "",
876 104, "SCE_HPHP_COMPLEX_VARIABLE", "server php identifier", "PHP complex variable",
877 105, "SCE_HPA_START", "server python default", "ASP Python",
878 106, "SCE_HPA_DEFAULT", "server python default", "ASP Python",
879 107, "SCE_HPA_COMMENTLINE", "server python comment line", "Comment",
880 108, "SCE_HPA_NUMBER", "server python literal numeric", "Number",
881 109, "SCE_HPA_STRING", "server python literal string", "String",
882 110, "SCE_HPA_CHARACTER", "server python literal string character", "Single quoted string",
883 111, "SCE_HPA_WORD", "server python keyword", "Keyword",
884 112, "SCE_HPA_TRIPLE", "server python literal string", "Triple quotes",
885 113, "SCE_HPA_TRIPLEDOUBLE", "server python literal string", "Triple double quotes",
886 114, "SCE_HPA_CLASSNAME", "server python identifier", "Class name definition",
887 115, "SCE_HPA_DEFNAME", "server python identifier", "Function or method name definition",
888 116, "SCE_HPA_OPERATOR", "server python operator", "Operators",
889 117, "SCE_HPA_IDENTIFIER", "server python identifier", "Identifiers",
890 118, "SCE_HPHP_DEFAULT", "server php default", "Default",
891 119, "SCE_HPHP_HSTRING", "server php literal string", "Double quoted String",
892 120, "SCE_HPHP_SIMPLESTRING", "server php literal string", "Single quoted string",
893 121, "SCE_HPHP_WORD", "server php keyword", "Keyword",
894 122, "SCE_HPHP_NUMBER", "server php literal numeric", "Number",
895 123, "SCE_HPHP_VARIABLE", "server php identifier", "Variable",
896 124, "SCE_HPHP_COMMENT", "server php comment", "Comment",
897 125, "SCE_HPHP_COMMENTLINE", "server php comment line", "One line comment",
898 126, "SCE_HPHP_HSTRING_VARIABLE", "server php literal string identifier", "PHP variable in double quoted string",
899 127, "SCE_HPHP_OPERATOR", "server php operator", "PHP operator",
900};
901
902LexicalClass lexicalClassesXML[] = {
903 // Lexer.Secondary XML SCLEX_XML SCE_H_:
904 0, "SCE_H_DEFAULT", "default", "Default",
905 1, "SCE_H_TAG", "tag", "Tags",
906 2, "SCE_H_TAGUNKNOWN", "error tag", "Unknown Tags",
907 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
908 4, "SCE_H_ERRORATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
909 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
910 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
911 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
912 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
913 9, "SCE_H_COMMENT", "comment", "Comment",
914 10, "SCE_H_ENTITY", "literal", "Entities",
915 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
916 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
917 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
918 14, "", "unused", "",
919 15, "", "unused", "",
920 16, "", "unused", "",
921 17, "SCE_H_CDATA", "literal", "CDATA",
922 18, "SCE_H_QUESTION", "preprocessor", "Question",
923 19, "SCE_H_VALUE", "literal string", "Unquoted Value",
924 20, "", "unused", "",
925 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
926 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
927 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
928 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
929 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
930 26, "SCE_H_SGML_ERROR", "error", "SGML error",
931 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
932 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
933 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
934 30, "", "unused", "",
935 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
936};
937
938const char *tagsThatDoNotFold[] = {
939 "area",
940 "base",
941 "basefont",
942 "br",
943 "col",
944 "command",
945 "embed",
946 "frame",
947 "hr",
948 "img",
949 "input",
950 "isindex",
951 "keygen",
952 "link",
953 "meta",
954 "param",
955 "source",
956 "track",
957 "wbr"
958};
959
960}
961
962class LexerJS : public DefaultLexer {
963 bool isXml;
964 bool isPHPScript;
965 WordList keywords;
966 WordList keywords2;
967 WordList keywords3;
968 WordList keywords4;
969 WordList keywords5;
970 WordList keywords6; // SGML (DTD) keywords
971 OptionsHTML options;
972 OptionSetHTML osHTML;
973 std::set<std::string> nonFoldingTags;
974public:
975 explicit LexerJS(bool isXml_, bool isPHPScript_) :
976 DefaultLexer(
977 isXml_ ? "xml" : (isPHPScript_ ? "phpscript" : "hypertext"),
978 isXml_ ? SCLEX_XML : (isPHPScript_ ? SCLEX_PHPSCRIPT : SCLEX_JS),
979 isXml_ ? lexicalClassesXML : lexicalClassesHTML,
980 isXml_ ? std::size(lexicalClassesXML) : std::size(lexicalClassesHTML)),
981 isXml(isXml_),
982 isPHPScript(isPHPScript_),
983 osHTML(isPHPScript_),
984 nonFoldingTags(std::begin(tagsThatDoNotFold), std::end(tagsThatDoNotFold)) {
985 }
986 ~LexerJS() override {
987 }
988 void SCI_METHOD Release() override {
989 delete this;
990 }
991 const char *SCI_METHOD PropertyNames() override {
992 return osHTML.PropertyNames();
993 }
994 int SCI_METHOD PropertyType(const char *name) override {
995 return osHTML.PropertyType(name);
996 }
997 const char *SCI_METHOD DescribeProperty(const char *name) override {
998 return osHTML.DescribeProperty(name);
999 }
1000 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
1001 const char * SCI_METHOD PropertyGet(const char *key) override {
1002 return osHTML.PropertyGet(key);
1003 }
1004 const char *SCI_METHOD DescribeWordListSets() override {
1005 return osHTML.DescribeWordListSets();
1006 }
1007 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
1008 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
1009 // No Fold as all folding performs in Lex.
1010
1011 static ILexer5 *LexerFactoryHTML() {
1012 return new LexerJS(false, false);
1013 }
1014 static ILexer5 *LexerFactoryJS() {
1015 return new LexerJS(false, false);
1016 }
1017 static ILexer5 *LexerFactoryXML() {
1018 return new LexerJS(true, false);
1019 }
1020 static ILexer5 *LexerFactoryPHPScript() {
1021 return new LexerJS(false, true);
1022 }
1023};
1024
1025Sci_Position SCI_METHOD LexerJS::PropertySet(const char *key, const char *val) {
1026 if (osHTML.PropertySet(&options, key, val)) {
1027 return 0;
1028 }
1029 return -1;
1030}
1031
1032Sci_Position SCI_METHOD LexerJS::WordListSet(int n, const char *wl) {
1033 WordList *wordListN = 0;
1034 switch (n) {
1035 case 0:
1036 wordListN = &keywords;
1037 break;
1038 case 1:
1039 wordListN = &keywords2;
1040 break;
1041 case 2:
1042 wordListN = &keywords3;
1043 break;
1044 case 3:
1045 wordListN = &keywords4;
1046 break;
1047 case 4:
1048 wordListN = &keywords5;
1049 break;
1050 case 5:
1051 wordListN = &keywords6;
1052 break;
1053 }
1054 Sci_Position firstModification = -1;
1055 if (wordListN) {
1056 WordList wlNew;
1057 wlNew.Set(wl);
1058 if (*wordListN != wlNew) {
1059 wordListN->Set(wl);
1060 firstModification = 0;
1061 }
1062 }
1063 return firstModification;
1064}
1065
1066void SCI_METHOD LexerJS::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1067 Accessor styler(pAccess, nullptr);
1068 if (isPHPScript && (startPos == 0)) {
1069 initStyle = SCE_HPHP_DEFAULT;
1070 }
1071 styler.StartAt(startPos);
1072 std::string prevWord;
1073 PhpNumberState phpNumber;
1074 std::string phpStringDelimiter;
1075 int StateToPrint = initStyle;
1076 int state = stateForPrintState(StateToPrint);
1077 std::string makoBlockType;
1078 int makoComment = 0;
1079 std::string djangoBlockType;
1080 styler.StartAt(startPos);
1081
1082 Sci_Position lineCurrent = styler.GetLine(startPos);
1083 int lineState;
1084 if (lineCurrent > 0) {
1085 lineState = styler.GetLineState(lineCurrent-1);
1086 } else {
1087 // Default client and ASP scripting language is JavaScript
1088 lineState = eScriptJS << 8;
1089 lineState |= options.aspDefaultLanguage << 4;
1090 }
1091 script_mode inScriptType = static_cast<script_mode>((lineState >> 0) & 0x03); // 2 bits of scripting mode
1092
1093 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
1094 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
1095 bool tagDontFold = false; //some HTML tags should not be folded
1096 script_type aspScript = static_cast<script_type>((lineState >> 4) & 0x0F); // 4 bits of script name
1097 script_type clientScript = static_cast<script_type>((lineState >> 8) & 0x0F); // 4 bits of script name
1098 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
1099
1100 script_type scriptLanguage = ScriptOfState(state);
1101 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
1102 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
1103 scriptLanguage = eScriptComment;
1104 }
1105 script_type beforeLanguage = ScriptOfState(beforePreProc);
1106 const bool foldHTML = options.foldHTML;
1107 const bool fold = foldHTML && options.fold;
1108 const bool foldHTMLPreprocessor = foldHTML && options.foldHTMLPreprocessor;
1109 const bool foldCompact = options.foldCompact;
1110 const bool foldComment = fold && options.foldComment;
1111 const bool foldHeredoc = fold && options.foldHeredoc;
1112 const bool foldXmlAtTagOpen = isXml && fold && options.foldXmlAtTagOpen;
1113 const bool caseSensitive = options.caseSensitive;
1114 const bool allowScripts = options.allowScripts;
1115 const bool isMako = options.isMako;
1116 const bool isDjango = options.isDjango;
1117 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", true);
1118 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", true);
1119 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", true);
1120 // TODO: also handle + and - (except if they're part of ++ or --) and return keywords
1121 const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
1122
1123 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1124 int levelCurrent = levelPrev;
1125 int visibleChars = 0;
1126 int lineStartVisibleChars = 0;
1127
1128 int chPrev = ' ';
1129 int ch = ' ';
1130 int chPrevNonWhite = ' ';
1131 // look back to set chPrevNonWhite properly for better regex colouring
1132 if (scriptLanguage == eScriptJS && startPos > 0) {
1133 Sci_Position back = startPos;
1134 int style = 0;
1135 while (--back) {
1136 style = styler.StyleAt(back);
1137 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
1138 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
1139 break;
1140 }
1141 if (style == SCE_HJ_SYMBOLS) {
1142 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
1143 }
1144 }
1145
1146 styler.StartSegment(startPos);
1147 const Sci_Position lengthDoc = startPos + length;
1148 bool bFirst = true;
1149 int chNext = 0;
1150 int chNext2 = 0;
1151 int chPrev2 = 0;
1152 for (Sci_Position i = startPos; i < lengthDoc; i++) {
1153 if (bFirst) {
1154 StateToPrint = 2;
1155 aspScript=eScriptJS;
1156 beforeLanguage=eScriptNone;
1157 ch = 10;
1158 chNext = 102;
1159 chNext2 = 117;
1160 chPrev = 62;
1161 chPrev2 = 116;
1162 chPrevNonWhite = 62;
1163 clientScript=eScriptJS;
1164 inScriptType= eNonHtmlScript;
1165 initStyle =0;
1166 scriptLanguage = clientScript;
1167 state=SCE_HJ_START;
1168 tagOpened =false;
1169 tagClosing=false;
1170 tagDontFold =false;
1171 StateToPrint = 2;
1172 bFirst = false;
1173 }
1174 chPrev2 = chPrev;
1175 chPrev = ch;
1176 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
1177 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
1178 chPrevNonWhite = ch;
1179 ch = static_cast<unsigned char>(styler[i]);
1180 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1181 chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
1182 printf("---%d:%c %c %c", static_cast<int>(i), ch, chNext, chNext2);
1183
1184 // Handle DBCS codepages
1185 if (styler.IsLeadByte(static_cast<char>(ch))) {
1186 chPrev = ' ';
1187 i += 1;
1188 continue;
1189 }
1190
1191 if ((!IsASpace(ch) || !foldCompact) && fold)
1192 visibleChars++;
1193 if (!IsASpace(ch))
1194 lineStartVisibleChars++;
1195
1196 // decide what is the current state to print (depending of the script tag)
1197 StateToPrint = statePrintForState(state, inScriptType);
1198
1199 // handle script folding
1200 if (fold) {
1201 switch (scriptLanguage) {
1202 case eScriptJS:
1203 case eScriptPHP:
1204 //not currently supported case eScriptVBS:
1205
1206 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
1207 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
1208 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
1209 if (ch == '#') {
1210 Sci_Position j = i + 1;
1211 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1212 j++;
1213 }
1214 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1215 levelCurrent++;
1216 } else if (styler.Match(j, "end")) {
1217 levelCurrent--;
1218 }
1219 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
1220 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
1221 }
1222 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
1223 levelCurrent--;
1224 }
1225 break;
1226 case eScriptPython:
1227 if (state != SCE_HP_COMMENTLINE && !isMako) {
1228 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
1229 levelCurrent++;
1230 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
1231 // check if the number of tabs is lower than the level
1232 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
1233 for (Sci_Position j = 0; Findlevel > 0; j++) {
1234 const char chTmp = styler.SafeGetCharAt(i + j + 1);
1235 if (chTmp == '\t') {
1236 Findlevel -= 8;
1237 } else if (chTmp == ' ') {
1238 Findlevel--;
1239 } else {
1240 break;
1241 }
1242 }
1243
1244 if (Findlevel > 0) {
1245 levelCurrent -= Findlevel / 8;
1246 if (Findlevel % 8)
1247 levelCurrent--;
1248 }
1249 }
1250 }
1251 break;
1252 default:
1253 break;
1254 }
1255 }
1256
1257 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
1258 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
1259 // Avoid triggering two times on Dos/Win
1260 // New line -> record any line state onto /next/ line
1261 if (fold) {
1262 int lev = levelPrev;
1263 if (visibleChars == 0)
1264 lev |= SC_FOLDLEVELWHITEFLAG;
1265 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1266 lev |= SC_FOLDLEVELHEADERFLAG;
1267
1268 styler.SetLevel(lineCurrent, lev);
1269 visibleChars = 0;
1270 levelPrev = levelCurrent;
1271 }
1272 styler.SetLineState(lineCurrent,
1273 ((inScriptType & 0x03) << 0) |
1274 ((tagOpened ? 1 : 0) << 2) |
1275 ((tagClosing ? 1 : 0) << 3) |
1276 ((aspScript & 0x0F) << 4) |
1277 ((clientScript & 0x0F) << 8) |
1278 ((beforePreProc & 0xFF) << 12));
1279 lineCurrent++;
1280 lineStartVisibleChars = 0;
1281 }
1282
1283 // handle start of Mako comment line
1284 if (isMako && ch == '#' && chNext == '#') {
1285 makoComment = 1;
1286 state = SCE_HP_COMMENTLINE;
1287 }
1288
1289 // handle end of Mako comment line
1290 else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
1291 makoComment = 0;
1292 styler.ColourTo(i - 1, StateToPrint);
1293 if (scriptLanguage == eScriptPython) {
1294 state = SCE_HP_DEFAULT;
1295 } else {
1296 state = SCE_H_DEFAULT;
1297 }
1298 }
1299 // Allow falling through to mako handling code if newline is going to end a block
1300 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
1301 (!isMako || (makoBlockType != "%"))) {
1302 }
1303 // Ignore everything in mako comment until the line ends
1304 else if (isMako && makoComment) {
1305 }
1306
1307 // generic end of script processing
1308 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
1309 // Check if it's the end of the script tag (or any other HTML tag)
1310 switch (state) {
1311 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
1312 case SCE_H_DOUBLESTRING:
1313 case SCE_H_SINGLESTRING:
1314 case SCE_HJ_COMMENT:
1315 case SCE_HJ_COMMENTDOC:
1316 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
1317 // the end of script marker from some JS interpreters.
1318 case SCE_HB_COMMENTLINE:
1319 case SCE_HBA_COMMENTLINE:
1320 case SCE_HJ_DOUBLESTRING:
1321 case SCE_HJ_SINGLESTRING:
1322 case SCE_HJ_REGEX:
1323 case SCE_HB_STRING:
1324 case SCE_HBA_STRING:
1325 case SCE_HP_STRING:
1326 case SCE_HP_TRIPLE:
1327 case SCE_HP_TRIPLEDOUBLE:
1328 case SCE_HPHP_HSTRING:
1329 case SCE_HPHP_SIMPLESTRING:
1330 case SCE_HPHP_COMMENT:
1331 case SCE_HPHP_COMMENTLINE:
1332 break;
1333 default :
1334 // check if the closing tag is a script tag
1335 if (const char *tag =
1336 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
1337 state == SCE_H_COMMENT ? "comment" : 0) {
1338 Sci_Position j = i + 2;
1339 int chr;
1340 do {
1341 chr = static_cast<int>(*tag++);
1342 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
1343 if (chr != 0) break;
1344 }
1345 // closing tag of the script (it's a closing HTML tag anyway)
1346 styler.ColourTo(i - 1, StateToPrint);
1347 state = SCE_H_TAGUNKNOWN;
1348 inScriptType = eHtml;
1349 scriptLanguage = eScriptNone;
1350 clientScript = eScriptJS;
1351 i += 2;
1352 visibleChars += 2;
1353 tagClosing = true;
1354 if (foldXmlAtTagOpen) {
1355 levelCurrent--;
1356 }
1357 continue;
1358 }
1359 }
1360
1361 /////////////////////////////////////
1362 // handle the start of PHP pre-processor = Non-HTML
1363 else if ((state != SCE_H_ASPAT) &&
1364 !isPHPStringState(state) &&
1365 (state != SCE_HPHP_COMMENT) &&
1366 (state != SCE_HPHP_COMMENTLINE) &&
1367 (ch == '<') &&
1368 (chNext == '?') &&
1369 !IsScriptCommentState(state)) {
1370 beforeLanguage = scriptLanguage;
1371 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
1372 if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
1373 styler.ColourTo(i - 1, StateToPrint);
1374 beforePreProc = state;
1375 i++;
1376 visibleChars++;
1377 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
1378 if (scriptLanguage == eScriptXML)
1379 styler.ColourTo(i, SCE_H_XMLSTART);
1380 else
1381 styler.ColourTo(i, SCE_H_QUESTION);
1382 state = StateForScript(scriptLanguage);
1383 if (inScriptType == eNonHtmlScript)
1384 inScriptType = eNonHtmlScriptPreProc;
1385 else
1386 inScriptType = eNonHtmlPreProc;
1387 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
1388 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1389 levelCurrent++;
1390 }
1391 // should be better
1392 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1393 continue;
1394 }
1395
1396 // handle the start Mako template Python code
1397 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
1398 (lineStartVisibleChars == 1 && ch == '%') ||
1399 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
1400 (ch == '$' && chNext == '{') ||
1401 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
1402 if (ch == '%' || ch == '/')
1403 makoBlockType = "%";
1404 else if (ch == '$')
1405 makoBlockType = "{";
1406 else if (chNext == '/')
1407 makoBlockType = GetNextWord(styler, i+3);
1408 else
1409 makoBlockType = GetNextWord(styler, i+2);
1410 styler.ColourTo(i - 1, StateToPrint);
1411 beforePreProc = state;
1412 if (inScriptType == eNonHtmlScript)
1413 inScriptType = eNonHtmlScriptPreProc;
1414 else
1415 inScriptType = eNonHtmlPreProc;
1416
1417 if (chNext == '/') {
1418 i += 2;
1419 visibleChars += 2;
1420 } else if (ch != '%') {
1421 i++;
1422 visibleChars++;
1423 }
1424 state = SCE_HP_START;
1425 scriptLanguage = eScriptPython;
1426 styler.ColourTo(i, SCE_H_ASP);
1427 if (ch != '%' && ch != '$' && ch != '/') {
1428 i += makoBlockType.length();
1429 visibleChars += static_cast<int>(makoBlockType.length());
1430 if (keywords4.InList(makoBlockType.c_str()))
1431 styler.ColourTo(i, SCE_HP_WORD);
1432 else
1433 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1434 }
1435
1436 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1437 continue;
1438 }
1439
1440 // handle the start/end of Django comment
1441 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
1442 styler.ColourTo(i - 1, StateToPrint);
1443 beforePreProc = state;
1444 beforeLanguage = scriptLanguage;
1445 if (inScriptType == eNonHtmlScript)
1446 inScriptType = eNonHtmlScriptPreProc;
1447 else
1448 inScriptType = eNonHtmlPreProc;
1449 i += 1;
1450 visibleChars += 1;
1451 scriptLanguage = eScriptComment;
1452 state = SCE_H_COMMENT;
1453 styler.ColourTo(i, SCE_H_ASP);
1454 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1455 continue;
1456 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
1457 styler.ColourTo(i - 1, StateToPrint);
1458 i += 1;
1459 visibleChars += 1;
1460 styler.ColourTo(i, SCE_H_ASP);
1461 state = beforePreProc;
1462 if (inScriptType == eNonHtmlScriptPreProc)
1463 inScriptType = eNonHtmlScript;
1464 else
1465 inScriptType = eHtml;
1466 scriptLanguage = beforeLanguage;
1467 continue;
1468 }
1469
1470 // handle the start Django template code
1471 else if (isDjango && scriptLanguage != eScriptPython && scriptLanguage != eScriptComment && (ch == '{' && (chNext == '%' || chNext == '{'))) {
1472 if (chNext == '%')
1473 djangoBlockType = "%";
1474 else
1475 djangoBlockType = "{";
1476 styler.ColourTo(i - 1, StateToPrint);
1477 beforePreProc = state;
1478 if (inScriptType == eNonHtmlScript)
1479 inScriptType = eNonHtmlScriptPreProc;
1480 else
1481 inScriptType = eNonHtmlPreProc;
1482
1483 i += 1;
1484 visibleChars += 1;
1485 state = SCE_HP_START;
1486 beforeLanguage = scriptLanguage;
1487 scriptLanguage = eScriptPython;
1488 styler.ColourTo(i, SCE_H_ASP);
1489
1490 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1491 continue;
1492 }
1493
1494 // handle the start of ASP pre-processor = Non-HTML
1495 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1496 styler.ColourTo(i - 1, StateToPrint);
1497 beforePreProc = state;
1498 if (inScriptType == eNonHtmlScript)
1499 inScriptType = eNonHtmlScriptPreProc;
1500 else
1501 inScriptType = eNonHtmlPreProc;
1502
1503 if (chNext2 == '@') {
1504 i += 2; // place as if it was the second next char treated
1505 visibleChars += 2;
1506 state = SCE_H_ASPAT;
1507 scriptLanguage = eScriptVBS;
1508 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1509 styler.ColourTo(i + 3, SCE_H_ASP);
1510 state = SCE_H_XCCOMMENT;
1511 scriptLanguage = eScriptVBS;
1512 continue;
1513 } else {
1514 if (chNext2 == '=') {
1515 i += 2; // place as if it was the second next char treated
1516 visibleChars += 2;
1517 } else {
1518 i++; // place as if it was the next char treated
1519 visibleChars++;
1520 }
1521
1522 state = StateForScript(aspScript);
1523 scriptLanguage = aspScript;
1524 }
1525 styler.ColourTo(i, SCE_H_ASP);
1526 // fold whole script
1527 if (foldHTMLPreprocessor)
1528 levelCurrent++;
1529 // should be better
1530 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1531 continue;
1532 }
1533
1534 /////////////////////////////////////
1535 // handle the start of SGML language (DTD)
1536 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1537 (chPrev == '<') &&
1538 (ch == '!') &&
1539 (StateToPrint != SCE_H_CDATA) &&
1540 (!isStringState(StateToPrint)) &&
1541 (!IsCommentState(StateToPrint)) &&
1542 (!IsScriptCommentState(StateToPrint))) {
1543 beforePreProc = state;
1544 styler.ColourTo(i - 2, StateToPrint);
1545 if ((chNext == '-') && (chNext2 == '-')) {
1546 state = SCE_H_COMMENT; // wait for a pending command
1547 styler.ColourTo(i + 2, SCE_H_COMMENT);
1548 i += 2; // follow styling after the --
1549 } else if (isWordCdata(i + 1, i + 7, styler)) {
1550 state = SCE_H_CDATA;
1551 } else {
1552 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1553 scriptLanguage = eScriptSGML;
1554 state = SCE_H_SGML_COMMAND; // wait for a pending command
1555 }
1556 // fold whole tag (-- when closing the tag)
1557 if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1558 levelCurrent++;
1559 continue;
1560 }
1561
1562 // handle the end of Mako Python code
1563 else if (isMako &&
1564 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1565 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1566 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1567 if (state == SCE_H_ASPAT) {
1568 aspScript = segIsScriptingIndicator(styler,
1569 styler.GetStartSegment(), i - 1, aspScript);
1570 }
1571 if (state == SCE_HP_WORD) {
1572 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1573 } else {
1574 styler.ColourTo(i - 1, StateToPrint);
1575 }
1576 if ((makoBlockType != "%") && (makoBlockType != "{") && ch != '>') {
1577 i++;
1578 visibleChars++;
1579 }
1580 else if ((makoBlockType == "%") && ch == '/') {
1581 i++;
1582 visibleChars++;
1583 }
1584 if ((makoBlockType != "%") || ch == '/') {
1585 styler.ColourTo(i, SCE_H_ASP);
1586 }
1587 state = beforePreProc;
1588 if (inScriptType == eNonHtmlScriptPreProc)
1589 inScriptType = eNonHtmlScript;
1590 else
1591 inScriptType = eHtml;
1592 scriptLanguage = eScriptNone;
1593 continue;
1594 }
1595
1596 // handle the end of Django template code
1597 else if (isDjango &&
1598 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1599 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1600 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1601 if (state == SCE_H_ASPAT) {
1602 aspScript = segIsScriptingIndicator(styler,
1603 styler.GetStartSegment(), i - 1, aspScript);
1604 }
1605 if (state == SCE_HP_WORD) {
1606 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1607 } else {
1608 styler.ColourTo(i - 1, StateToPrint);
1609 }
1610 i += 1;
1611 visibleChars += 1;
1612 styler.ColourTo(i, SCE_H_ASP);
1613 state = beforePreProc;
1614 if (inScriptType == eNonHtmlScriptPreProc)
1615 inScriptType = eNonHtmlScript;
1616 else
1617 inScriptType = eHtml;
1618 scriptLanguage = beforeLanguage;
1619 continue;
1620 }
1621
1622 // handle the end of a pre-processor = Non-HTML
1623 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1624 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1625 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1626 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1627 if (state == SCE_H_ASPAT) {
1628 aspScript = segIsScriptingIndicator(styler,
1629 styler.GetStartSegment(), i - 1, aspScript);
1630 }
1631 // Bounce out of any ASP mode
1632 switch (state) {
1633 case SCE_HJ_WORD:
1634 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1635 break;
1636 case SCE_HB_WORD:
1637 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1638 break;
1639 case SCE_HP_WORD:
1640 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1641 break;
1642 case SCE_HPHP_WORD:
1643 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1644 break;
1645 case SCE_H_XCCOMMENT:
1646 styler.ColourTo(i - 1, state);
1647 break;
1648 default :
1649 styler.ColourTo(i - 1, StateToPrint);
1650 break;
1651 }
1652 if (scriptLanguage != eScriptSGML) {
1653 i++;
1654 visibleChars++;
1655 }
1656 if (ch == '%')
1657 styler.ColourTo(i, SCE_H_ASP);
1658 else if (scriptLanguage == eScriptXML)
1659 styler.ColourTo(i, SCE_H_XMLEND);
1660 else if (scriptLanguage == eScriptSGML)
1661 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1662 else
1663 styler.ColourTo(i, SCE_H_QUESTION);
1664 state = beforePreProc;
1665 if (inScriptType == eNonHtmlScriptPreProc)
1666 inScriptType = eNonHtmlScript;
1667 else
1668 inScriptType = eHtml;
1669 // Unfold all scripting languages, except for XML tag
1670 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1671 levelCurrent--;
1672 }
1673 scriptLanguage = beforeLanguage;
1674 continue;
1675 }
1676 /////////////////////////////////////
1677
1678 switch (state) {
1679 case SCE_H_DEFAULT:
1680 if (ch == '<') {
1681 // in HTML, fold on tag open and unfold on tag close
1682 tagOpened = true;
1683 tagClosing = (chNext == '/');
1684 if (foldXmlAtTagOpen && !(chNext == '/' || chNext == '?' || chNext == '!' || chNext == '-' || chNext == '%')) {
1685 levelCurrent++;
1686 }
1687 if (foldXmlAtTagOpen && chNext == '/') {
1688 levelCurrent--;
1689 }
1690 styler.ColourTo(i - 1, StateToPrint);
1691 if (chNext != '!')
1692 state = SCE_H_TAGUNKNOWN;
1693 } else if (ch == '&') {
1694 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1695 state = SCE_H_ENTITY;
1696 }
1697 break;
1698 case SCE_H_SGML_DEFAULT:
1699 case SCE_H_SGML_BLOCK_DEFAULT:
1700// if (scriptLanguage == eScriptSGMLblock)
1701// StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1702
1703 if (ch == '\"') {
1704 styler.ColourTo(i - 1, StateToPrint);
1705 state = SCE_H_SGML_DOUBLESTRING;
1706 } else if (ch == '\'') {
1707 styler.ColourTo(i - 1, StateToPrint);
1708 state = SCE_H_SGML_SIMPLESTRING;
1709 } else if ((ch == '-') && (chPrev == '-')) {
1710 if (static_cast<Sci_Position>(styler.GetStartSegment()) <= (i - 2)) {
1711 styler.ColourTo(i - 2, StateToPrint);
1712 }
1713 state = SCE_H_SGML_COMMENT;
1714 } else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1715 styler.ColourTo(i - 2, StateToPrint);
1716 state = SCE_H_SGML_ENTITY;
1717 } else if (ch == '#') {
1718 styler.ColourTo(i - 1, StateToPrint);
1719 state = SCE_H_SGML_SPECIAL;
1720 } else if (ch == '[') {
1721 styler.ColourTo(i - 1, StateToPrint);
1722 scriptLanguage = eScriptSGMLblock;
1723 state = SCE_H_SGML_BLOCK_DEFAULT;
1724 } else if (ch == ']') {
1725 if (scriptLanguage == eScriptSGMLblock) {
1726 styler.ColourTo(i, StateToPrint);
1727 scriptLanguage = eScriptSGML;
1728 } else {
1729 styler.ColourTo(i - 1, StateToPrint);
1730 styler.ColourTo(i, SCE_H_SGML_ERROR);
1731 }
1732 state = SCE_H_SGML_DEFAULT;
1733 } else if (scriptLanguage == eScriptSGMLblock) {
1734 if ((ch == '!') && (chPrev == '<')) {
1735 styler.ColourTo(i - 2, StateToPrint);
1736 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1737 state = SCE_H_SGML_COMMAND;
1738 } else if (ch == '>') {
1739 styler.ColourTo(i - 1, StateToPrint);
1740 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1741 }
1742 }
1743 break;
1744 case SCE_H_SGML_COMMAND:
1745 if ((ch == '-') && (chPrev == '-')) {
1746 styler.ColourTo(i - 2, StateToPrint);
1747 state = SCE_H_SGML_COMMENT;
1748 } else if (!issgmlwordchar(ch)) {
1749 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1750 styler.ColourTo(i - 1, StateToPrint);
1751 state = SCE_H_SGML_1ST_PARAM;
1752 } else {
1753 state = SCE_H_SGML_ERROR;
1754 }
1755 }
1756 break;
1757 case SCE_H_SGML_1ST_PARAM:
1758 // wait for the beginning of the word
1759 if ((ch == '-') && (chPrev == '-')) {
1760 if (scriptLanguage == eScriptSGMLblock) {
1761 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1762 } else {
1763 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1764 }
1765 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1766 } else if (issgmlwordchar(ch)) {
1767 if (scriptLanguage == eScriptSGMLblock) {
1768 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1769 } else {
1770 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1771 }
1772 // find the length of the word
1773 int size = 1;
1774 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1775 size++;
1776 styler.ColourTo(i + size - 1, StateToPrint);
1777 i += size - 1;
1778 visibleChars += size - 1;
1779 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1780 if (scriptLanguage == eScriptSGMLblock) {
1781 state = SCE_H_SGML_BLOCK_DEFAULT;
1782 } else {
1783 state = SCE_H_SGML_DEFAULT;
1784 }
1785 continue;
1786 }
1787 break;
1788 case SCE_H_SGML_ERROR:
1789 if ((ch == '-') && (chPrev == '-')) {
1790 styler.ColourTo(i - 2, StateToPrint);
1791 state = SCE_H_SGML_COMMENT;
1792 }
1793 break;
1794 case SCE_H_SGML_DOUBLESTRING:
1795 if (ch == '\"') {
1796 styler.ColourTo(i, StateToPrint);
1797 state = SCE_H_SGML_DEFAULT;
1798 }
1799 break;
1800 case SCE_H_SGML_SIMPLESTRING:
1801 if (ch == '\'') {
1802 styler.ColourTo(i, StateToPrint);
1803 state = SCE_H_SGML_DEFAULT;
1804 }
1805 break;
1806 case SCE_H_SGML_COMMENT:
1807 if ((ch == '-') && (chPrev == '-')) {
1808 styler.ColourTo(i, StateToPrint);
1809 state = SCE_H_SGML_DEFAULT;
1810 }
1811 break;
1812 case SCE_H_CDATA:
1813 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1814 styler.ColourTo(i, StateToPrint);
1815 state = SCE_H_DEFAULT;
1816 levelCurrent--;
1817 }
1818 break;
1819 case SCE_H_COMMENT:
1820 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1821 styler.ColourTo(i, StateToPrint);
1822 state = SCE_H_DEFAULT;
1823 levelCurrent--;
1824 }
1825 break;
1826 case SCE_H_SGML_1ST_PARAM_COMMENT:
1827 if ((ch == '-') && (chPrev == '-')) {
1828 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1829 state = SCE_H_SGML_1ST_PARAM;
1830 }
1831 break;
1832 case SCE_H_SGML_SPECIAL:
1833 if (!(IsASCII(ch) && isupper(ch))) {
1834 styler.ColourTo(i - 1, StateToPrint);
1835 if (isalnum(ch)) {
1836 state = SCE_H_SGML_ERROR;
1837 } else {
1838 state = SCE_H_SGML_DEFAULT;
1839 }
1840 }
1841 break;
1842 case SCE_H_SGML_ENTITY:
1843 if (ch == ';') {
1844 styler.ColourTo(i, StateToPrint);
1845 state = SCE_H_SGML_DEFAULT;
1846 } else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1847 styler.ColourTo(i, SCE_H_SGML_ERROR);
1848 state = SCE_H_SGML_DEFAULT;
1849 }
1850 break;
1851 case SCE_H_ENTITY:
1852 if (ch == ';') {
1853 styler.ColourTo(i, StateToPrint);
1854 state = SCE_H_DEFAULT;
1855 }
1856 if (ch != '#' && !(IsASCII(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1857 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1858 if (!IsASCII(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1859 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1860 else
1861 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1862 state = SCE_H_DEFAULT;
1863 }
1864 break;
1865 case SCE_H_TAGUNKNOWN:
1866 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1867 int eClass = classifyTagHTML(styler.GetStartSegment(),
1868 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts, nonFoldingTags);
1869 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1870 if (!tagClosing) {
1871 inScriptType = eNonHtmlScript;
1872 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1873 } else {
1874 scriptLanguage = eScriptNone;
1875 }
1876 eClass = SCE_H_TAG;
1877 }
1878 if (ch == '>') {
1879 styler.ColourTo(i, eClass);
1880 if (inScriptType == eNonHtmlScript) {
1881 state = StateForScript(scriptLanguage);
1882 } else {
1883 state = SCE_H_DEFAULT;
1884 }
1885 tagOpened = false;
1886 if (!(foldXmlAtTagOpen || tagDontFold)) {
1887 if (tagClosing) {
1888 levelCurrent--;
1889 } else {
1890 levelCurrent++;
1891 }
1892 }
1893 tagClosing = false;
1894 } else if (ch == '/' && chNext == '>') {
1895 if (eClass == SCE_H_TAGUNKNOWN) {
1896 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1897 } else {
1898 styler.ColourTo(i - 1, StateToPrint);
1899 styler.ColourTo(i + 1, SCE_H_TAGEND);
1900 }
1901 i++;
1902 ch = chNext;
1903 state = SCE_H_DEFAULT;
1904 tagOpened = false;
1905 if (foldXmlAtTagOpen) {
1906 levelCurrent--;
1907 }
1908 } else {
1909 if (eClass != SCE_H_TAGUNKNOWN) {
1910 if (eClass == SCE_H_SGML_DEFAULT) {
1911 state = SCE_H_SGML_DEFAULT;
1912 } else {
1913 state = SCE_H_OTHER;
1914 }
1915 }
1916 }
1917 }
1918 break;
1919 case SCE_H_ATTRIBUTE:
1920 if (!setAttributeContinue.Contains(ch)) {
1921 if (inScriptType == eNonHtmlScript) {
1922 const int scriptLanguagePrev = scriptLanguage;
1923 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1924 scriptLanguage = clientScript;
1925 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1926 inScriptType = eHtml;
1927 }
1928 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1929 if (ch == '>') {
1930 styler.ColourTo(i, SCE_H_TAG);
1931 if (inScriptType == eNonHtmlScript) {
1932 state = StateForScript(scriptLanguage);
1933 } else {
1934 state = SCE_H_DEFAULT;
1935 }
1936 tagOpened = false;
1937 if (!(foldXmlAtTagOpen || tagDontFold)) {
1938 if (tagClosing) {
1939 levelCurrent--;
1940 } else {
1941 levelCurrent++;
1942 }
1943 }
1944 tagClosing = false;
1945 } else if (ch == '=') {
1946 styler.ColourTo(i, SCE_H_OTHER);
1947 state = SCE_H_VALUE;
1948 } else {
1949 state = SCE_H_OTHER;
1950 }
1951 }
1952 break;
1953 case SCE_H_OTHER:
1954 if (ch == '>') {
1955 styler.ColourTo(i - 1, StateToPrint);
1956 styler.ColourTo(i, SCE_H_TAG);
1957 if (inScriptType == eNonHtmlScript) {
1958 state = StateForScript(scriptLanguage);
1959 } else {
1960 state = SCE_H_DEFAULT;
1961 }
1962 tagOpened = false;
1963 if (!(foldXmlAtTagOpen || tagDontFold)) {
1964 if (tagClosing) {
1965 levelCurrent--;
1966 } else {
1967 levelCurrent++;
1968 }
1969 }
1970 tagClosing = false;
1971 } else if (ch == '\"') {
1972 styler.ColourTo(i - 1, StateToPrint);
1973 state = SCE_H_DOUBLESTRING;
1974 } else if (ch == '\'') {
1975 styler.ColourTo(i - 1, StateToPrint);
1976 state = SCE_H_SINGLESTRING;
1977 } else if (ch == '=') {
1978 styler.ColourTo(i, StateToPrint);
1979 state = SCE_H_VALUE;
1980 } else if (ch == '/' && chNext == '>') {
1981 styler.ColourTo(i - 1, StateToPrint);
1982 styler.ColourTo(i + 1, SCE_H_TAGEND);
1983 i++;
1984 ch = chNext;
1985 state = SCE_H_DEFAULT;
1986 tagOpened = false;
1987 if (foldXmlAtTagOpen) {
1988 levelCurrent--;
1989 }
1990 } else if (ch == '?' && chNext == '>') {
1991 styler.ColourTo(i - 1, StateToPrint);
1992 styler.ColourTo(i + 1, SCE_H_XMLEND);
1993 i++;
1994 ch = chNext;
1995 state = SCE_H_DEFAULT;
1996 } else if (setHTMLWord.Contains(ch)) {
1997 styler.ColourTo(i - 1, StateToPrint);
1998 state = SCE_H_ATTRIBUTE;
1999 }
2000 break;
2001 case SCE_H_DOUBLESTRING:
2002 if (ch == '\"') {
2003 if (inScriptType == eNonHtmlScript) {
2004 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
2005 }
2006 styler.ColourTo(i, SCE_H_DOUBLESTRING);
2007 state = SCE_H_OTHER;
2008 }
2009 break;
2010 case SCE_H_SINGLESTRING:
2011 if (ch == '\'') {
2012 if (inScriptType == eNonHtmlScript) {
2013 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
2014 }
2015 styler.ColourTo(i, SCE_H_SINGLESTRING);
2016 state = SCE_H_OTHER;
2017 }
2018 break;
2019 case SCE_H_VALUE:
2020 if (!setHTMLWord.Contains(ch)) {
2021 if (ch == '\"' && chPrev == '=') {
2022 // Should really test for being first character
2023 state = SCE_H_DOUBLESTRING;
2024 } else if (ch == '\'' && chPrev == '=') {
2025 state = SCE_H_SINGLESTRING;
2026 } else {
2027 if (IsNumber(styler.GetStartSegment(), styler)) {
2028 styler.ColourTo(i - 1, SCE_H_NUMBER);
2029 } else {
2030 styler.ColourTo(i - 1, StateToPrint);
2031 }
2032 if (ch == '>') {
2033 styler.ColourTo(i, SCE_H_TAG);
2034 if (inScriptType == eNonHtmlScript) {
2035 state = StateForScript(scriptLanguage);
2036 } else {
2037 state = SCE_H_DEFAULT;
2038 }
2039 tagOpened = false;
2040 if (!tagDontFold) {
2041 if (tagClosing) {
2042 levelCurrent--;
2043 } else {
2044 levelCurrent++;
2045 }
2046 }
2047 tagClosing = false;
2048 } else {
2049 state = SCE_H_OTHER;
2050 }
2051 }
2052 }
2053 break;
2054 case SCE_HJ_DEFAULT:
2055 case SCE_HJ_START:
2056 case SCE_HJ_SYMBOLS:
2057 if (IsAWordStart(ch)) {
2058 styler.ColourTo(i - 1, StateToPrint);
2059 state = SCE_HJ_WORD;
2060 } else if (ch == '/' && chNext == '*') {
2061 styler.ColourTo(i - 1, StateToPrint);
2062 if (chNext2 == '*')
2063 state = SCE_HJ_COMMENTDOC;
2064 else
2065 state = SCE_HJ_COMMENT;
2066 if (chNext2 == '/') {
2067 // Eat the * so it isn't used for the end of the comment
2068 i++;
2069 }
2070 } else if (ch == '/' && chNext == '/') {
2071 styler.ColourTo(i - 1, StateToPrint);
2072 state = SCE_HJ_COMMENTLINE;
2073 } else if (ch == '/' && setOKBeforeJSRE.Contains(chPrevNonWhite)) {
2074 styler.ColourTo(i - 1, StateToPrint);
2075 state = SCE_HJ_REGEX;
2076 } else if (ch == '\"') {
2077 styler.ColourTo(i - 1, StateToPrint);
2078 state = SCE_HJ_DOUBLESTRING;
2079 } else if (ch == '\'') {
2080 styler.ColourTo(i - 1, StateToPrint);
2081 state = SCE_HJ_SINGLESTRING;
2082 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2083 styler.SafeGetCharAt(i + 3) == '-') {
2084 styler.ColourTo(i - 1, StateToPrint);
2085 state = SCE_HJ_COMMENTLINE;
2086 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2087 styler.ColourTo(i - 1, StateToPrint);
2088 state = SCE_HJ_COMMENTLINE;
2089 i += 2;
2090 } else if (IsOperator(ch)) {
2091 styler.ColourTo(i - 1, StateToPrint);
2092 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2093 state = SCE_HJ_DEFAULT;
2094 } else if ((ch == ' ') || (ch == '\t')) {
2095 if (state == SCE_HJ_START) {
2096 styler.ColourTo(i - 1, StateToPrint);
2097 state = SCE_HJ_DEFAULT;
2098 }
2099 }
2100 break;
2101 case SCE_HJ_WORD:
2102 if (!IsAWordChar(ch)) {
2103 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
2104 //styler.ColourTo(i - 1, eHTJSKeyword);
2105 state = SCE_HJ_DEFAULT;
2106 if (ch == '/' && chNext == '*') {
2107 if (chNext2 == '*')
2108 state = SCE_HJ_COMMENTDOC;
2109 else
2110 state = SCE_HJ_COMMENT;
2111 } else if (ch == '/' && chNext == '/') {
2112 state = SCE_HJ_COMMENTLINE;
2113 } else if (ch == '\"') {
2114 state = SCE_HJ_DOUBLESTRING;
2115 } else if (ch == '\'') {
2116 state = SCE_HJ_SINGLESTRING;
2117 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2118 styler.ColourTo(i - 1, StateToPrint);
2119 state = SCE_HJ_COMMENTLINE;
2120 i += 2;
2121 } else if (IsOperator(ch)) {
2122 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2123 state = SCE_HJ_DEFAULT;
2124 }
2125 }
2126 break;
2127 case SCE_HJ_COMMENT:
2128 case SCE_HJ_COMMENTDOC:
2129 if (ch == '/' && chPrev == '*') {
2130 styler.ColourTo(i, StateToPrint);
2131 state = SCE_HJ_DEFAULT;
2132 ch = ' ';
2133 }
2134 break;
2135 case SCE_HJ_COMMENTLINE:
2136 if (ch == '\r' || ch == '\n') {
2137 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
2138 state = SCE_HJ_DEFAULT;
2139 ch = ' ';
2140 }
2141 break;
2142 case SCE_HJ_DOUBLESTRING:
2143 if (ch == '\\') {
2144 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2145 i++;
2146 }
2147 } else if (ch == '\"') {
2148 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
2149 state = SCE_HJ_DEFAULT;
2150 } else if (isLineEnd(ch)) {
2151 styler.ColourTo(i - 1, StateToPrint);
2152 state = SCE_HJ_STRINGEOL;
2153 }
2154 break;
2155 case SCE_HJ_SINGLESTRING:
2156 if (ch == '\\') {
2157 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2158 i++;
2159 }
2160 } else if (ch == '\'') {
2161 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
2162 state = SCE_HJ_DEFAULT;
2163 } else if (isLineEnd(ch)) {
2164 styler.ColourTo(i - 1, StateToPrint);
2165 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
2166 state = SCE_HJ_STRINGEOL;
2167 }
2168 }
2169 break;
2170 case SCE_HJ_STRINGEOL:
2171 if (!isLineEnd(ch)) {
2172 styler.ColourTo(i - 1, StateToPrint);
2173 state = SCE_HJ_DEFAULT;
2174 } else if (!isLineEnd(chNext)) {
2175 styler.ColourTo(i, StateToPrint);
2176 state = SCE_HJ_DEFAULT;
2177 }
2178 break;
2179 case SCE_HJ_REGEX:
2180 if (ch == '\r' || ch == '\n' || ch == '/') {
2181 if (ch == '/') {
2182 while (IsASCII(chNext) && islower(chNext)) { // gobble regex flags
2183 i++;
2184 ch = chNext;
2185 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2186 }
2187 }
2188 styler.ColourTo(i, StateToPrint);
2189 state = SCE_HJ_DEFAULT;
2190 } else if (ch == '\\') {
2191 // Gobble up the quoted character
2192 if (chNext == '\\' || chNext == '/') {
2193 i++;
2194 ch = chNext;
2195 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2196 }
2197 }
2198 break;
2199 case SCE_HB_DEFAULT:
2200 case SCE_HB_START:
2201 if (IsAWordStart(ch)) {
2202 styler.ColourTo(i - 1, StateToPrint);
2203 state = SCE_HB_WORD;
2204 } else if (ch == '\'') {
2205 styler.ColourTo(i - 1, StateToPrint);
2206 state = SCE_HB_COMMENTLINE;
2207 } else if (ch == '\"') {
2208 styler.ColourTo(i - 1, StateToPrint);
2209 state = SCE_HB_STRING;
2210 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2211 styler.SafeGetCharAt(i + 3) == '-') {
2212 styler.ColourTo(i - 1, StateToPrint);
2213 state = SCE_HB_COMMENTLINE;
2214 } else if (IsOperator(ch)) {
2215 styler.ColourTo(i - 1, StateToPrint);
2216 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2217 state = SCE_HB_DEFAULT;
2218 } else if ((ch == ' ') || (ch == '\t')) {
2219 if (state == SCE_HB_START) {
2220 styler.ColourTo(i - 1, StateToPrint);
2221 state = SCE_HB_DEFAULT;
2222 }
2223 }
2224 break;
2225 case SCE_HB_WORD:
2226 if (!IsAWordChar(ch)) {
2227 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
2228 if (state == SCE_HB_DEFAULT) {
2229 if (ch == '\"') {
2230 state = SCE_HB_STRING;
2231 } else if (ch == '\'') {
2232 state = SCE_HB_COMMENTLINE;
2233 } else if (IsOperator(ch)) {
2234 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2235 state = SCE_HB_DEFAULT;
2236 }
2237 }
2238 }
2239 break;
2240 case SCE_HB_STRING:
2241 if (ch == '\"') {
2242 styler.ColourTo(i, StateToPrint);
2243 state = SCE_HB_DEFAULT;
2244 } else if (ch == '\r' || ch == '\n') {
2245 styler.ColourTo(i - 1, StateToPrint);
2246 state = SCE_HB_STRINGEOL;
2247 }
2248 break;
2249 case SCE_HB_COMMENTLINE:
2250 if (ch == '\r' || ch == '\n') {
2251 styler.ColourTo(i - 1, StateToPrint);
2252 state = SCE_HB_DEFAULT;
2253 }
2254 break;
2255 case SCE_HB_STRINGEOL:
2256 if (!isLineEnd(ch)) {
2257 styler.ColourTo(i - 1, StateToPrint);
2258 state = SCE_HB_DEFAULT;
2259 } else if (!isLineEnd(chNext)) {
2260 styler.ColourTo(i, StateToPrint);
2261 state = SCE_HB_DEFAULT;
2262 }
2263 break;
2264 case SCE_HP_DEFAULT:
2265 case SCE_HP_START:
2266 if (IsAWordStart(ch)) {
2267 styler.ColourTo(i - 1, StateToPrint);
2268 state = SCE_HP_WORD;
2269 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2270 styler.SafeGetCharAt(i + 3) == '-') {
2271 styler.ColourTo(i - 1, StateToPrint);
2272 state = SCE_HP_COMMENTLINE;
2273 } else if (ch == '#') {
2274 styler.ColourTo(i - 1, StateToPrint);
2275 state = SCE_HP_COMMENTLINE;
2276 } else if (ch == '\"') {
2277 styler.ColourTo(i - 1, StateToPrint);
2278 if (chNext == '\"' && chNext2 == '\"') {
2279 i += 2;
2280 state = SCE_HP_TRIPLEDOUBLE;
2281 ch = ' ';
2282 chPrev = ' ';
2283 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2284 } else {
2285 // state = statePrintForState(SCE_HP_STRING,inScriptType);
2286 state = SCE_HP_STRING;
2287 }
2288 } else if (ch == '\'') {
2289 styler.ColourTo(i - 1, StateToPrint);
2290 if (chNext == '\'' && chNext2 == '\'') {
2291 i += 2;
2292 state = SCE_HP_TRIPLE;
2293 ch = ' ';
2294 chPrev = ' ';
2295 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2296 } else {
2297 state = SCE_HP_CHARACTER;
2298 }
2299 } else if (IsOperator(ch)) {
2300 styler.ColourTo(i - 1, StateToPrint);
2301 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2302 } else if ((ch == ' ') || (ch == '\t')) {
2303 if (state == SCE_HP_START) {
2304 styler.ColourTo(i - 1, StateToPrint);
2305 state = SCE_HP_DEFAULT;
2306 }
2307 }
2308 break;
2309 case SCE_HP_WORD:
2310 if (!IsAWordChar(ch)) {
2311 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
2312 state = SCE_HP_DEFAULT;
2313 if (ch == '#') {
2314 state = SCE_HP_COMMENTLINE;
2315 } else if (ch == '\"') {
2316 if (chNext == '\"' && chNext2 == '\"') {
2317 i += 2;
2318 state = SCE_HP_TRIPLEDOUBLE;
2319 ch = ' ';
2320 chPrev = ' ';
2321 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2322 } else {
2323 state = SCE_HP_STRING;
2324 }
2325 } else if (ch == '\'') {
2326 if (chNext == '\'' && chNext2 == '\'') {
2327 i += 2;
2328 state = SCE_HP_TRIPLE;
2329 ch = ' ';
2330 chPrev = ' ';
2331 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2332 } else {
2333 state = SCE_HP_CHARACTER;
2334 }
2335 } else if (IsOperator(ch)) {
2336 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2337 }
2338 }
2339 break;
2340 case SCE_HP_COMMENTLINE:
2341 if (ch == '\r' || ch == '\n') {
2342 styler.ColourTo(i - 1, StateToPrint);
2343 state = SCE_HP_DEFAULT;
2344 }
2345 break;
2346 case SCE_HP_STRING:
2347 if (ch == '\\') {
2348 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2349 i++;
2350 ch = chNext;
2351 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2352 }
2353 } else if (ch == '\"') {
2354 styler.ColourTo(i, StateToPrint);
2355 state = SCE_HP_DEFAULT;
2356 }
2357 break;
2358 case SCE_HP_CHARACTER:
2359 if (ch == '\\') {
2360 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2361 i++;
2362 ch = chNext;
2363 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2364 }
2365 } else if (ch == '\'') {
2366 styler.ColourTo(i, StateToPrint);
2367 state = SCE_HP_DEFAULT;
2368 }
2369 break;
2370 case SCE_HP_TRIPLE:
2371 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
2372 styler.ColourTo(i, StateToPrint);
2373 state = SCE_HP_DEFAULT;
2374 }
2375 break;
2376 case SCE_HP_TRIPLEDOUBLE:
2377 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
2378 styler.ColourTo(i, StateToPrint);
2379 state = SCE_HP_DEFAULT;
2380 }
2381 break;
2382 ///////////// start - PHP state handling
2383 case SCE_HPHP_WORD:
2384 if (!IsPhpWordChar(ch)) {
2385 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
2386 if (ch == '/' && chNext == '*') {
2387 i++;
2388 state = SCE_HPHP_COMMENT;
2389 } else if (ch == '/' && chNext == '/') {
2390 i++;
2391 state = SCE_HPHP_COMMENTLINE;
2392 } else if (ch == '#' && chNext != '[') {
2393 state = SCE_HPHP_COMMENTLINE;
2394 } else if (ch == '\"') {
2395 state = SCE_HPHP_HSTRING;
2396 phpStringDelimiter = "\"";
2397 } else if (styler.Match(i, "<<<")) {
2398 bool isSimpleString = false;
2399 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2400 if (!phpStringDelimiter.empty()) {
2401 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2402 if (foldHeredoc) levelCurrent++;
2403 }
2404 } else if (ch == '\'') {
2405 state = SCE_HPHP_SIMPLESTRING;
2406 phpStringDelimiter = "\'";
2407 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2408 state = SCE_HPHP_VARIABLE;
2409 } else if (IsOperator(ch)) {
2410 state = SCE_HPHP_OPERATOR;
2411 } else {
2412 state = SCE_HPHP_DEFAULT;
2413 }
2414 }
2415 break;
2416 case SCE_HPHP_NUMBER:
2417 if (phpNumber.check(chNext, chNext2)) {
2418 styler.ColourTo(i, phpNumber.isInvalid() ? SCE_HPHP_DEFAULT : SCE_HPHP_NUMBER);
2419 state = SCE_HPHP_DEFAULT;
2420 }
2421 break;
2422 case SCE_HPHP_VARIABLE:
2423 if (!IsPhpWordChar(chNext)) {
2424 styler.ColourTo(i, SCE_HPHP_VARIABLE);
2425 state = SCE_HPHP_DEFAULT;
2426 }
2427 break;
2428 case SCE_HPHP_COMMENT:
2429 if (ch == '/' && chPrev == '*') {
2430 styler.ColourTo(i, StateToPrint);
2431 state = SCE_HPHP_DEFAULT;
2432 }
2433 break;
2434 case SCE_HPHP_COMMENTLINE:
2435 if (ch == '\r' || ch == '\n') {
2436 styler.ColourTo(i - 1, StateToPrint);
2437 state = SCE_HPHP_DEFAULT;
2438 }
2439 break;
2440 case SCE_HPHP_HSTRING:
2441 if (ch == '\\' && ((phpStringDelimiter == "\"") || chNext == '$' || chNext == '{')) {
2442 // skip the next char
2443 i++;
2444 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
2445 && IsPhpWordStart(chNext2)) {
2446 styler.ColourTo(i - 1, StateToPrint);
2447 state = SCE_HPHP_COMPLEX_VARIABLE;
2448 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2449 styler.ColourTo(i - 1, StateToPrint);
2450 state = SCE_HPHP_HSTRING_VARIABLE;
2451 } else if (styler.Match(i, phpStringDelimiter.c_str())) {
2452 if (phpStringDelimiter == "\"") {
2453 styler.ColourTo(i, StateToPrint);
2454 state = SCE_HPHP_DEFAULT;
2455 } else if (lineStartVisibleChars == 1) {
2456 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2457 if (!IsPhpWordChar(styler.SafeGetCharAt(i + psdLength))) {
2458 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2459 styler.ColourTo(i, StateToPrint);
2460 state = SCE_HPHP_DEFAULT;
2461 if (foldHeredoc) levelCurrent--;
2462 }
2463 }
2464 }
2465 break;
2466 case SCE_HPHP_SIMPLESTRING:
2467 if (phpStringDelimiter == "\'") {
2468 if (ch == '\\') {
2469 // skip the next char
2470 i++;
2471 } else if (ch == '\'') {
2472 styler.ColourTo(i, StateToPrint);
2473 state = SCE_HPHP_DEFAULT;
2474 }
2475 } else if (lineStartVisibleChars == 1 && styler.Match(i, phpStringDelimiter.c_str())) {
2476 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2477 if (!IsPhpWordChar(styler.SafeGetCharAt(i + psdLength))) {
2478 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2479 styler.ColourTo(i, StateToPrint);
2480 state = SCE_HPHP_DEFAULT;
2481 if (foldHeredoc) levelCurrent--;
2482 }
2483 }
2484 break;
2485 case SCE_HPHP_HSTRING_VARIABLE:
2486 if (!IsPhpWordChar(chNext)) {
2487 styler.ColourTo(i, StateToPrint);
2488 state = SCE_HPHP_HSTRING;
2489 }
2490 break;
2491 case SCE_HPHP_COMPLEX_VARIABLE:
2492 if (ch == '}') {
2493 styler.ColourTo(i, StateToPrint);
2494 state = SCE_HPHP_HSTRING;
2495 }
2496 break;
2497 case SCE_HPHP_OPERATOR:
2498 case SCE_HPHP_DEFAULT:
2499 styler.ColourTo(i - 1, StateToPrint);
2500 if (phpNumber.init(ch, chNext, chNext2)) {
2501 if (phpNumber.isFinished()) {
2502 styler.ColourTo(i, phpNumber.isInvalid() ? SCE_HPHP_DEFAULT : SCE_HPHP_NUMBER);
2503 state = SCE_HPHP_DEFAULT;
2504 } else {
2505 state = SCE_HPHP_NUMBER;
2506 }
2507 } else if (IsAWordStart(ch)) {
2508 state = SCE_HPHP_WORD;
2509 } else if (ch == '/' && chNext == '*') {
2510 i++;
2511 state = SCE_HPHP_COMMENT;
2512 } else if (ch == '/' && chNext == '/') {
2513 i++;
2514 state = SCE_HPHP_COMMENTLINE;
2515 } else if (ch == '#' && chNext != '[') {
2516 state = SCE_HPHP_COMMENTLINE;
2517 } else if (ch == '\"') {
2518 state = SCE_HPHP_HSTRING;
2519 phpStringDelimiter = "\"";
2520 } else if (styler.Match(i, "<<<")) {
2521 bool isSimpleString = false;
2522 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2523 if (!phpStringDelimiter.empty()) {
2524 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2525 if (foldHeredoc) levelCurrent++;
2526 }
2527 } else if (ch == '\'') {
2528 state = SCE_HPHP_SIMPLESTRING;
2529 phpStringDelimiter = "\'";
2530 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2531 state = SCE_HPHP_VARIABLE;
2532 } else if (IsOperator(ch)) {
2533 state = SCE_HPHP_OPERATOR;
2534 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2535 state = SCE_HPHP_DEFAULT;
2536 }
2537 break;
2538 ///////////// end - PHP state handling
2539 }
2540
2541 // Some of the above terminated their lexeme but since the same character starts
2542 // the same class again, only reenter if non empty segment.
2543
2544 const bool nonEmptySegment = i >= static_cast<Sci_Position>(styler.GetStartSegment());
2545 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2546 if ((ch == '\"') && (nonEmptySegment)) {
2547 state = SCE_HB_STRING;
2548 } else if (ch == '\'') {
2549 state = SCE_HB_COMMENTLINE;
2550 } else if (IsAWordStart(ch)) {
2551 state = SCE_HB_WORD;
2552 } else if (IsOperator(ch)) {
2553 styler.ColourTo(i, SCE_HB_DEFAULT);
2554 }
2555 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2556 if ((ch == '\"') && (nonEmptySegment)) {
2557 state = SCE_HBA_STRING;
2558 } else if (ch == '\'') {
2559 state = SCE_HBA_COMMENTLINE;
2560 } else if (IsAWordStart(ch)) {
2561 state = SCE_HBA_WORD;
2562 } else if (IsOperator(ch)) {
2563 styler.ColourTo(i, SCE_HBA_DEFAULT);
2564 }
2565 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2566 if (ch == '/' && chNext == '*') {
2567 if (styler.SafeGetCharAt(i + 2) == '*')
2568 state = SCE_HJ_COMMENTDOC;
2569 else
2570 state = SCE_HJ_COMMENT;
2571 } else if (ch == '/' && chNext == '/') {
2572 state = SCE_HJ_COMMENTLINE;
2573 } else if ((ch == '\"') && (nonEmptySegment)) {
2574 state = SCE_HJ_DOUBLESTRING;
2575 } else if ((ch == '\'') && (nonEmptySegment)) {
2576 state = SCE_HJ_SINGLESTRING;
2577 } else if (IsAWordStart(ch)) {
2578 state = SCE_HJ_WORD;
2579 } else if (IsOperator(ch)) {
2580 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2581 }
2582 }
2583 }
2584
2585 switch (state) {
2586 case SCE_HJ_WORD:
2587 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2588 break;
2589 case SCE_HB_WORD:
2590 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2591 break;
2592 case SCE_HP_WORD:
2593 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2594 break;
2595 case SCE_HPHP_WORD:
2596 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2597 break;
2598 default:
2599 StateToPrint = statePrintForState(state, inScriptType);
2600 if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
2601 styler.ColourTo(lengthDoc - 1, StateToPrint);
2602 break;
2603 }
2604
2605 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2606 if (fold) {
2607 const int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2608 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2609 }
2610 styler.Flush();
2611}
2612
2613LexerModule lmJS(SCLEX_JS, LexerJS::LexerFactoryJS, "js", jsWordListDesc);
2614