1// Scintilla source code edit control
2/** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <stdlib.h>
9#include <string.h>
10#include <stdio.h>
11#include <stdarg.h>
12#include <assert.h>
13#include <ctype.h>
14
15#include <string>
16#include <string_view>
17#include <map>
18#include <set>
19#include <functional>
20
21#include "ILexer.h"
22#include "Scintilla.h"
23#include "SciLexer.h"
24#include "WordList.h"
25#include "LexAccessor.h"
26#include "Accessor.h"
27#include "StyleContext.h"
28#include "CharacterSet.h"
29#include "LexerModule.h"
30#include "OptionSet.h"
31#include "DefaultLexer.h"
32
33using namespace Scintilla;
34using namespace Lexilla;
35
36namespace {
37
38#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
39#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
40#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
41
42enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
43enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
44
45inline bool IsAWordChar(const int ch) {
46 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
47}
48
49inline bool IsAWordStart(const int ch) {
50 return (ch < 0x80) && (isalnum(ch) || ch == '_');
51}
52
53inline bool IsOperator(int ch) {
54 if (IsASCII(ch) && isalnum(ch))
55 return false;
56 // '.' left out as it is used to make up numbers
57 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
58 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
59 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
60 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
61 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
62 ch == '?' || ch == '!' || ch == '.' || ch == '~')
63 return true;
64 return false;
65}
66
67void GetTextSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end, char *s, size_t len) {
68 Sci_PositionU i = 0;
69 for (; (i < end - start + 1) && (i < len-1); i++) {
70 s[i] = MakeLowerCase(styler[start + i]);
71 }
72 s[i] = '\0';
73}
74
75std::string GetStringSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
76 std::string s;
77 Sci_PositionU i = 0;
78 for (; (i < end - start + 1); i++) {
79 s.push_back(MakeLowerCase(styler[start + i]));
80 }
81 return s;
82}
83
84std::string GetNextWord(Accessor &styler, Sci_PositionU start) {
85 std::string ret;
86 Sci_PositionU i = 0;
87 for (; i < 200; i++) { // Put an upper limit to bound time taken for unexpected text.
88 const char ch = styler.SafeGetCharAt(start + i);
89 if ((i == 0) && !IsAWordStart(ch))
90 break;
91 if ((i > 0) && !IsAWordChar(ch))
92 break;
93 ret.push_back(ch);
94 }
95 return ret;
96}
97
98script_type segIsScriptingIndicator(Accessor &styler, Sci_PositionU start, Sci_PositionU end, script_type prevValue) {
99 char s[100];
100 GetTextSegment(styler, start, end, s, sizeof(s));
101 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
102 if (strstr(s, "src")) // External script
103 return eScriptNone;
104 if (strstr(s, "vbs"))
105 return eScriptVBS;
106 if (strstr(s, "pyth"))
107 return eScriptPython;
108 if (strstr(s, "javas"))
109 return eScriptJS;
110 if (strstr(s, "jscr"))
111 return eScriptJS;
112 if (strstr(s, "php"))
113 return eScriptPHP;
114 if (strstr(s, "xml")) {
115 const char *xml = strstr(s, "xml");
116 for (const char *t=s; t<xml; t++) {
117 if (!IsASpace(*t)) {
118 return prevValue;
119 }
120 }
121 return eScriptXML;
122 }
123
124 return prevValue;
125}
126
127int PrintScriptingIndicatorOffset(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
128 int iResult = 0;
129 std::string s = GetStringSegment(styler, start, end);
130 if (0 == strncmp(s.c_str(), "php", 3)) {
131 iResult = 3;
132 }
133 return iResult;
134}
135
136script_type ScriptOfState(int state) {
137 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
138 return eScriptPython;
139 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
140 return eScriptVBS;
141 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
142 return eScriptJS;
143 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
144 return eScriptPHP;
145 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
146 return eScriptSGML;
147 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
148 return eScriptSGMLblock;
149 } else {
150 return eScriptNone;
151 }
152}
153
154int statePrintForState(int state, script_mode inScriptType) {
155 int StateToPrint = state;
156
157 if (state >= SCE_HJ_START) {
158 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
159 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
160 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
161 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
162 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
163 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
164 }
165 }
166
167 return StateToPrint;
168}
169
170int stateForPrintState(int StateToPrint) {
171 int state;
172
173 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
174 state = StateToPrint - SCE_HA_PYTHON;
175 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
176 state = StateToPrint - SCE_HA_VBS;
177 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
178 state = StateToPrint - SCE_HA_JS;
179 } else {
180 state = StateToPrint;
181 }
182
183 return state;
184}
185
186inline bool IsNumber(Sci_PositionU start, Accessor &styler) {
187 return IsADigit(styler[start]) || (styler[start] == '.') ||
188 (styler[start] == '-') || (styler[start] == '#');
189}
190
191inline bool isStringState(int state) {
192 bool bResult;
193
194 switch (state) {
195 case SCE_HJ_DOUBLESTRING:
196 case SCE_HJ_SINGLESTRING:
197 case SCE_HJA_DOUBLESTRING:
198 case SCE_HJA_SINGLESTRING:
199 case SCE_HB_STRING:
200 case SCE_HBA_STRING:
201 case SCE_HP_STRING:
202 case SCE_HP_CHARACTER:
203 case SCE_HP_TRIPLE:
204 case SCE_HP_TRIPLEDOUBLE:
205 case SCE_HPA_STRING:
206 case SCE_HPA_CHARACTER:
207 case SCE_HPA_TRIPLE:
208 case SCE_HPA_TRIPLEDOUBLE:
209 case SCE_HPHP_HSTRING:
210 case SCE_HPHP_SIMPLESTRING:
211 case SCE_HPHP_HSTRING_VARIABLE:
212 case SCE_HPHP_COMPLEX_VARIABLE:
213 bResult = true;
214 break;
215 default :
216 bResult = false;
217 break;
218 }
219 return bResult;
220}
221
222inline bool stateAllowsTermination(int state) {
223 bool allowTermination = !isStringState(state);
224 if (allowTermination) {
225 switch (state) {
226 case SCE_HB_COMMENTLINE:
227 case SCE_HPHP_COMMENT:
228 case SCE_HP_COMMENTLINE:
229 case SCE_HPA_COMMENTLINE:
230 allowTermination = false;
231 }
232 }
233 return allowTermination;
234}
235
236// not really well done, since it's only comments that should lex the %> and <%
237inline bool isCommentASPState(int state) {
238 bool bResult;
239
240 switch (state) {
241 case SCE_HJ_COMMENT:
242 case SCE_HJ_COMMENTLINE:
243 case SCE_HJ_COMMENTDOC:
244 case SCE_HB_COMMENTLINE:
245 case SCE_HP_COMMENTLINE:
246 case SCE_HPHP_COMMENT:
247 case SCE_HPHP_COMMENTLINE:
248 bResult = true;
249 break;
250 default :
251 bResult = false;
252 break;
253 }
254 return bResult;
255}
256
257void classifyAttribHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
258 const bool wordIsNumber = IsNumber(start, styler);
259 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
260 if (wordIsNumber) {
261 chAttr = SCE_H_NUMBER;
262 } else {
263 std::string s = GetStringSegment(styler, start, end);
264 if (keywords.InList(s.c_str()))
265 chAttr = SCE_H_ATTRIBUTE;
266 }
267 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
268 // No keywords -> all are known
269 chAttr = SCE_H_ATTRIBUTE;
270 styler.ColourTo(end, chAttr);
271}
272
273// https://html.spec.whatwg.org/multipage/custom-elements.html#custom-elements-core-concepts
274bool isHTMLCustomElement(const std::string &tag) {
275 // check valid HTML custom element name: starts with an ASCII lower alpha and contains hyphen.
276 // IsUpperOrLowerCase() is used for `html.tags.case.sensitive=1`.
277 if (tag.length() < 2 || !IsUpperOrLowerCase(tag[0])) {
278 return false;
279 }
280 if (tag.find('-') == std::string::npos) {
281 return false;
282 }
283 return true;
284}
285
286int classifyTagHTML(Sci_PositionU start, Sci_PositionU end,
287 const WordList &keywords, Accessor &styler, bool &tagDontFold,
288 bool caseSensitive, bool isXml, bool allowScripts,
289 const std::set<std::string> &nonFoldingTags) {
290 std::string tag;
291 // Copy after the '<' and stop before ' '
292 for (Sci_PositionU cPos = start; cPos <= end; cPos++) {
293 const char ch = styler[cPos];
294 if (IsASpace(ch)) {
295 break;
296 }
297 if ((ch != '<') && (ch != '/')) {
298 tag.push_back(caseSensitive ? ch : MakeLowerCase(ch));
299 }
300 }
301 // if the current language is XML, I can fold any tag
302 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
303 //...to find it in the list of no-container-tags
304 tagDontFold = (!isXml) && (nonFoldingTags.count(tag) > 0);
305 // No keywords -> all are known
306 char chAttr = SCE_H_TAGUNKNOWN;
307 if (!tag.empty() && (tag[0] == '!')) {
308 chAttr = SCE_H_SGML_DEFAULT;
309 } else if (!keywords || keywords.InList(tag.c_str())) {
310 chAttr = SCE_H_TAG;
311 } else if (!isXml && isHTMLCustomElement(tag)) {
312 chAttr = SCE_H_TAG;
313 }
314 if (chAttr != SCE_H_TAGUNKNOWN) {
315 styler.ColourTo(end, chAttr);
316 }
317 if (chAttr == SCE_H_TAG) {
318 if (allowScripts && (tag == "script")) {
319 // check to see if this is a self-closing tag by sniffing ahead
320 bool isSelfClose = false;
321 for (Sci_PositionU cPos = end; cPos <= end + 200; cPos++) {
322 const char ch = styler.SafeGetCharAt(cPos, '\0');
323 if (ch == '\0' || ch == '>')
324 break;
325 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
326 isSelfClose = true;
327 break;
328 }
329 }
330
331 // do not enter a script state if the tag self-closed
332 if (!isSelfClose)
333 chAttr = SCE_H_SCRIPT;
334 } else if (!isXml && (tag == "comment")) {
335 chAttr = SCE_H_COMMENT;
336 }
337 }
338 return chAttr;
339}
340
341void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end,
342 const WordList &keywords, Accessor &styler, script_mode inScriptType) {
343 char s[30 + 1];
344 Sci_PositionU i = 0;
345 for (; i < end - start + 1 && i < 30; i++) {
346 s[i] = styler[start + i];
347 }
348 s[i] = '\0';
349
350 char chAttr = SCE_HJ_WORD;
351 const bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
352 if (wordIsNumber) {
353 chAttr = SCE_HJ_NUMBER;
354 } else if (keywords.InList(s)) {
355 chAttr = SCE_HJ_KEYWORD;
356 }
357 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
358}
359
360int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, script_mode inScriptType) {
361 char chAttr = SCE_HB_IDENTIFIER;
362 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
363 if (wordIsNumber) {
364 chAttr = SCE_HB_NUMBER;
365 } else {
366 std::string s = GetStringSegment(styler, start, end);
367 if (keywords.InList(s.c_str())) {
368 chAttr = SCE_HB_WORD;
369 if (s == "rem")
370 chAttr = SCE_HB_COMMENTLINE;
371 }
372 }
373 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
374 if (chAttr == SCE_HB_COMMENTLINE)
375 return SCE_HB_COMMENTLINE;
376 else
377 return SCE_HB_DEFAULT;
378}
379
380void classifyWordHTPy(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, std::string &prevWord, script_mode inScriptType, bool isMako) {
381 const bool wordIsNumber = IsADigit(styler[start]);
382 std::string s;
383 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
384 s.push_back(styler[start + i]);
385 }
386 char chAttr = SCE_HP_IDENTIFIER;
387 if (prevWord == "class")
388 chAttr = SCE_HP_CLASSNAME;
389 else if (prevWord == "def")
390 chAttr = SCE_HP_DEFNAME;
391 else if (wordIsNumber)
392 chAttr = SCE_HP_NUMBER;
393 else if (keywords.InList(s.c_str()))
394 chAttr = SCE_HP_WORD;
395 else if (isMako && (s == "block"))
396 chAttr = SCE_HP_WORD;
397 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
398 prevWord = s;
399}
400
401// Update the word colour to default or keyword
402// Called when in a PHP word
403void classifyWordHTPHP(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
404 char chAttr = SCE_HPHP_DEFAULT;
405 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
406 if (wordIsNumber) {
407 chAttr = SCE_HPHP_NUMBER;
408 } else {
409 std::string s = GetStringSegment(styler, start, end);
410 if (keywords.InList(s.c_str()))
411 chAttr = SCE_HPHP_WORD;
412 }
413 styler.ColourTo(end, chAttr);
414}
415
416bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
417 std::string s;
418 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
419 s.push_back(styler[start + i]);
420 }
421 return keywords.InList(s.c_str());
422}
423
424bool isWordCdata(Sci_PositionU start, Sci_PositionU end, Accessor &styler) {
425 std::string s;
426 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
427 s.push_back(styler[start + i]);
428 }
429 return s == "[CDATA[";
430}
431
432// Return the first state to reach when entering a scripting language
433int StateForScript(script_type scriptLanguage) {
434 int Result;
435 switch (scriptLanguage) {
436 case eScriptVBS:
437 Result = SCE_HB_START;
438 break;
439 case eScriptPython:
440 Result = SCE_HP_START;
441 break;
442 case eScriptPHP:
443 Result = SCE_HPHP_DEFAULT;
444 break;
445 case eScriptXML:
446 Result = SCE_H_TAGUNKNOWN;
447 break;
448 case eScriptSGML:
449 Result = SCE_H_SGML_DEFAULT;
450 break;
451 case eScriptComment:
452 Result = SCE_H_COMMENT;
453 break;
454 default :
455 Result = SCE_HJ_START;
456 break;
457 }
458 return Result;
459}
460
461inline bool issgmlwordchar(int ch) {
462 return !IsASCII(ch) ||
463 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
464}
465
466inline bool IsPhpWordStart(int ch) {
467 return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
468}
469
470inline bool IsPhpWordChar(int ch) {
471 return IsADigit(ch) || IsPhpWordStart(ch);
472}
473
474bool InTagState(int state) {
475 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
476 state == SCE_H_SCRIPT ||
477 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
478 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
479 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
480}
481
482bool IsCommentState(const int state) {
483 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
484}
485
486bool IsScriptCommentState(const int state) {
487 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
488 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
489}
490
491bool isLineEnd(int ch) {
492 return ch == '\r' || ch == '\n';
493}
494
495bool isMakoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
496 if (blockType.empty()) {
497 return ((ch == '%') && (chNext == '>'));
498 } else if ((blockType == "inherit") ||
499 (blockType == "namespace") ||
500 (blockType == "include") ||
501 (blockType == "page")) {
502 return ((ch == '/') && (chNext == '>'));
503 } else if (blockType == "%") {
504 if (ch == '/' && isLineEnd(chNext))
505 return true;
506 else
507 return isLineEnd(ch);
508 } else if (blockType == "{") {
509 return ch == '}';
510 } else {
511 return (ch == '>');
512 }
513}
514
515bool isDjangoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
516 if (blockType.empty()) {
517 return false;
518 } else if (blockType == "%") {
519 return ((ch == '%') && (chNext == '}'));
520 } else if (blockType == "{") {
521 return ((ch == '}') && (chNext == '}'));
522 } else {
523 return false;
524 }
525}
526
527class PhpNumberState {
528 enum NumberBase { BASE_10 = 0, BASE_2, BASE_8, BASE_16 };
529 static constexpr const char *const digitList[] = { "_0123456789", "_01", "_01234567", "_0123456789abcdefABCDEF" };
530
531 NumberBase base = BASE_10;
532 bool decimalPart = false;
533 bool exponentPart = false;
534 bool invalid = false;
535 bool finished = false;
536
537 bool leadingZero = false;
538 bool invalidBase8 = false;
539
540 bool betweenDigits = false;
541 bool decimalChar = false;
542 bool exponentChar = false;
543
544public:
545 inline bool isInvalid() { return invalid; }
546 inline bool isFinished() { return finished; }
547
548 bool init(int ch, int chPlus1, int chPlus2) {
549 base = BASE_10;
550 decimalPart = false;
551 exponentPart = false;
552 invalid = false;
553 finished = false;
554
555 leadingZero = false;
556 invalidBase8 = false;
557
558 betweenDigits = false;
559 decimalChar = false;
560 exponentChar = false;
561
562 if (ch == '.' && strchr(digitList[BASE_10] + !betweenDigits, chPlus1) != nullptr) {
563 decimalPart = true;
564 betweenDigits = true;
565 } else if (ch == '0' && (chPlus1 == 'b' || chPlus1 == 'B')) {
566 base = BASE_2;
567 } else if (ch == '0' && (chPlus1 == 'o' || chPlus1 == 'O')) {
568 base = BASE_8;
569 } else if (ch == '0' && (chPlus1 == 'x' || chPlus1 == 'X')) {
570 base = BASE_16;
571 } else if (strchr(digitList[BASE_10] + !betweenDigits, ch) != nullptr) {
572 leadingZero = ch == '0';
573 betweenDigits = true;
574 check(chPlus1, chPlus2);
575 if (finished && leadingZero) {
576 // single zero should be base 10
577 base = BASE_10;
578 }
579 } else {
580 return false;
581 }
582 return true;
583 }
584
585 bool check(int ch, int chPlus1) {
586 if (strchr(digitList[base] + !betweenDigits, ch) != nullptr) {
587 if (leadingZero) {
588 invalidBase8 = invalidBase8 || strchr(digitList[BASE_8] + !betweenDigits, ch) == nullptr;
589 }
590
591 betweenDigits = ch != '_';
592 decimalChar = false;
593 exponentChar = false;
594 } else if (ch == '_') {
595 invalid = true;
596
597 betweenDigits = false;
598 decimalChar = false;
599 // exponentChar is unchanged
600 } else if (base == BASE_10 && ch == '.' && (
601 !(decimalPart || exponentPart) || strchr(digitList[BASE_10] + !betweenDigits, chPlus1) != nullptr)
602 ) {
603 invalid = invalid || !betweenDigits || decimalPart || exponentPart;
604 decimalPart = true;
605
606 betweenDigits = false;
607 decimalChar = true;
608 exponentChar = false;
609 } else if (base == BASE_10 && (ch == 'e' || ch == 'E')) {
610 invalid = invalid || !(betweenDigits || decimalChar) || exponentPart;
611 exponentPart = true;
612
613 betweenDigits = false;
614 decimalChar = false;
615 exponentChar = true;
616 } else if (base == BASE_10 && (ch == '-' || ch == '+') && exponentChar) {
617 invalid = invalid || strchr(digitList[BASE_10] + !betweenDigits, chPlus1) == nullptr;
618
619 betweenDigits = false;
620 decimalChar = false;
621 // exponentChar is unchanged
622 } else if (IsPhpWordChar(ch)) {
623 invalid = true;
624
625 betweenDigits = false;
626 decimalChar = false;
627 exponentChar = false;
628 } else {
629 invalid = invalid || !(betweenDigits || decimalChar);
630 finished = true;
631 if (base == BASE_10 && leadingZero && !decimalPart && !exponentPart) {
632 base = BASE_8;
633 invalid = invalid || invalidBase8;
634 }
635 }
636 return finished;
637 }
638};
639
640bool isPHPStringState(int state) {
641 return
642 (state == SCE_HPHP_HSTRING) ||
643 (state == SCE_HPHP_SIMPLESTRING) ||
644 (state == SCE_HPHP_HSTRING_VARIABLE) ||
645 (state == SCE_HPHP_COMPLEX_VARIABLE);
646}
647
648Sci_Position FindPhpStringDelimiter(std::string &phpStringDelimiter, Sci_Position i, const Sci_Position lengthDoc, Accessor &styler, bool &isSimpleString) {
649 Sci_Position j;
650 const Sci_Position beginning = i - 1;
651 bool isQuoted = false;
652
653 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
654 i++;
655 char ch = styler.SafeGetCharAt(i);
656 const char chNext = styler.SafeGetCharAt(i + 1);
657 phpStringDelimiter.clear();
658 if (!IsPhpWordStart(ch)) {
659 if ((ch == '\'' || ch == '\"') && IsPhpWordStart(chNext)) {
660 isSimpleString = ch == '\'';
661 isQuoted = true;
662 i++;
663 ch = chNext;
664 } else {
665 return beginning;
666 }
667 }
668 phpStringDelimiter.push_back(ch);
669 i++;
670 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
671 if (!IsPhpWordChar(styler[j]) && isQuoted) {
672 if (((isSimpleString && styler[j] == '\'') || (!isSimpleString && styler[j] == '\"')) && isLineEnd(styler.SafeGetCharAt(j + 1))) {
673 isQuoted = false;
674 j++;
675 break;
676 } else {
677 phpStringDelimiter.clear();
678 return beginning;
679 }
680 }
681 phpStringDelimiter.push_back(styler[j]);
682 }
683 if (isQuoted) {
684 phpStringDelimiter.clear();
685 return beginning;
686 }
687 return j - 1;
688}
689
690// Options used for LexerHTML
691struct OptionsHTML {
692 int aspDefaultLanguage = eScriptJS;
693 bool caseSensitive = false;
694 bool allowScripts = true;
695 bool isMako = false;
696 bool isDjango = false;
697 bool fold = false;
698 bool foldHTML = false;
699 bool foldHTMLPreprocessor = true;
700 bool foldCompact = true;
701 bool foldComment = false;
702 bool foldHeredoc = false;
703 bool foldXmlAtTagOpen = false;
704 OptionsHTML() noexcept {
705 }
706};
707
708const char * const htmlWordListDesc[] = {
709 "HTML elements and attributes",
710 "JavaScript keywords",
711 "VBScript keywords",
712 "Python keywords",
713 "PHP keywords",
714 "SGML and DTD keywords",
715 0,
716};
717
718const char * const phpscriptWordListDesc[] = {
719 "", //Unused
720 "", //Unused
721 "", //Unused
722 "", //Unused
723 "PHP keywords",
724 "", //Unused
725 0,
726};
727
728struct OptionSetHTML : public OptionSet<OptionsHTML> {
729 OptionSetHTML(bool isPHPScript_) {
730
731 DefineProperty("asp.default.language", &OptionsHTML::aspDefaultLanguage,
732 "Script in ASP code is initially assumed to be in JavaScript. "
733 "To change this to VBScript set asp.default.language to 2. Python is 3.");
734
735 DefineProperty("html.tags.case.sensitive", &OptionsHTML::caseSensitive,
736 "For XML and HTML, setting this property to 1 will make tags match in a case "
737 "sensitive way which is the expected behaviour for XML and XHTML.");
738
739 DefineProperty("lexer.xml.allow.scripts", &OptionsHTML::allowScripts,
740 "Set to 0 to disable scripts in XML.");
741
742 DefineProperty("lexer.html.mako", &OptionsHTML::isMako,
743 "Set to 1 to enable the mako template language.");
744
745 DefineProperty("lexer.html.django", &OptionsHTML::isDjango,
746 "Set to 1 to enable the django template language.");
747
748 DefineProperty("fold", &OptionsHTML::fold);
749
750 DefineProperty("fold.html", &OptionsHTML::foldHTML,
751 "Folding is turned on or off for HTML and XML files with this option. "
752 "The fold option must also be on for folding to occur.");
753
754 DefineProperty("fold.html.preprocessor", &OptionsHTML::foldHTMLPreprocessor,
755 "Folding is turned on or off for scripts embedded in HTML files with this option. "
756 "The default is on.");
757
758 DefineProperty("fold.compact", &OptionsHTML::foldCompact);
759
760 DefineProperty("fold.hypertext.comment", &OptionsHTML::foldComment,
761 "Allow folding for comments in scripts embedded in HTML. "
762 "The default is off.");
763
764 DefineProperty("fold.hypertext.heredoc", &OptionsHTML::foldHeredoc,
765 "Allow folding for heredocs in scripts embedded in HTML. "
766 "The default is off.");
767
768 DefineProperty("fold.xml.at.tag.open", &OptionsHTML::foldXmlAtTagOpen,
769 "Enable folding for XML at the start of open tag. "
770 "The default is off.");
771
772 DefineWordListSets(isPHPScript_ ? phpscriptWordListDesc : htmlWordListDesc);
773 }
774};
775
776LexicalClass lexicalClassesHTML[] = {
777 // Lexer HTML SCLEX_HTML SCE_H_ SCE_HJ_ SCE_HJA_ SCE_HB_ SCE_HBA_ SCE_HP_ SCE_HPHP_ SCE_HPA_:
778 0, "SCE_H_DEFAULT", "default", "Text",
779 1, "SCE_H_TAG", "tag", "Tags",
780 2, "SCE_H_ERRORTAGUNKNOWN", "error tag", "Unknown Tags",
781 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
782 4, "SCE_H_ATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
783 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
784 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
785 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
786 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
787 9, "SCE_H_COMMENT", "comment", "Comment",
788 10, "SCE_H_ENTITY", "literal", "Entities",
789 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
790 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
791 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
792 14, "SCE_H_SCRIPT", "error", "Internal state which should never be visible",
793 15, "SCE_H_ASP", "preprocessor", "ASP <% ... %>",
794 16, "SCE_H_ASPAT", "preprocessor", "ASP <% ... %>",
795 17, "SCE_H_CDATA", "literal", "CDATA",
796 18, "SCE_H_QUESTION", "preprocessor", "PHP",
797 19, "SCE_H_VALUE", "literal string", "Unquoted values",
798 20, "SCE_H_XCCOMMENT", "comment", "JSP Comment <%-- ... --%>",
799 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
800 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
801 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
802 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
803 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
804 26, "SCE_H_SGML_ERROR", "error", "SGML error",
805 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
806 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
807 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
808 30, "SCE_H_SGML_1ST_PARAM_COMMENT", "error comment", "SGML first parameter - lexer internal. It is an error if any text is in this style.",
809 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
810 32, "", "predefined", "",
811 33, "", "predefined", "",
812 34, "", "predefined", "",
813 35, "", "predefined", "",
814 36, "", "predefined", "",
815 37, "", "predefined", "",
816 38, "", "predefined", "",
817 39, "", "predefined", "",
818 40, "SCE_HJ_START", "client javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
819 41, "SCE_HJ_DEFAULT", "client javascript default", "JS Default",
820 42, "SCE_HJ_COMMENT", "client javascript comment", "JS Comment",
821 43, "SCE_HJ_COMMENTLINE", "client javascript comment line", "JS Line Comment",
822 44, "SCE_HJ_COMMENTDOC", "client javascript comment documentation", "JS Doc comment",
823 45, "SCE_HJ_NUMBER", "client javascript literal numeric", "JS Number",
824 46, "SCE_HJ_WORD", "client javascript identifier", "JS Word",
825 47, "SCE_HJ_KEYWORD", "client javascript keyword", "JS Keyword",
826 48, "SCE_HJ_DOUBLESTRING", "client javascript literal string", "JS Double quoted string",
827 49, "SCE_HJ_SINGLESTRING", "client javascript literal string", "JS Single quoted string",
828 50, "SCE_HJ_SYMBOLS", "client javascript operator", "JS Symbols",
829 51, "SCE_HJ_STRINGEOL", "client javascript error literal string", "JavaScript EOL",
830 52, "SCE_HJ_REGEX", "client javascript literal regex", "JavaScript RegEx",
831 53, "", "unused", "",
832 54, "", "unused", "",
833 55, "SCE_HJA_START", "server javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
834 56, "SCE_HJA_DEFAULT", "server javascript default", "JS Default",
835 57, "SCE_HJA_COMMENT", "server javascript comment", "JS Comment",
836 58, "SCE_HJA_COMMENTLINE", "server javascript comment line", "JS Line Comment",
837 59, "SCE_HJA_COMMENTDOC", "server javascript comment documentation", "JS Doc comment",
838 60, "SCE_HJA_NUMBER", "server javascript literal numeric", "JS Number",
839 61, "SCE_HJA_WORD", "server javascript identifier", "JS Word",
840 62, "SCE_HJA_KEYWORD", "server javascript keyword", "JS Keyword",
841 63, "SCE_HJA_DOUBLESTRING", "server javascript literal string", "JS Double quoted string",
842 64, "SCE_HJA_SINGLESTRING", "server javascript literal string", "JS Single quoted string",
843 65, "SCE_HJA_SYMBOLS", "server javascript operator", "JS Symbols",
844 66, "SCE_HJA_STRINGEOL", "server javascript error literal string", "JavaScript EOL",
845 67, "SCE_HJA_REGEX", "server javascript literal regex", "JavaScript RegEx",
846 68, "", "unused", "",
847 69, "", "unused", "",
848 70, "SCE_HB_START", "client basic default", "Start",
849 71, "SCE_HB_DEFAULT", "client basic default", "Default",
850 72, "SCE_HB_COMMENTLINE", "client basic comment line", "Comment",
851 73, "SCE_HB_NUMBER", "client basic literal numeric", "Number",
852 74, "SCE_HB_WORD", "client basic keyword", "KeyWord",
853 75, "SCE_HB_STRING", "client basic literal string", "String",
854 76, "SCE_HB_IDENTIFIER", "client basic identifier", "Identifier",
855 77, "SCE_HB_STRINGEOL", "client basic literal string", "Unterminated string",
856 78, "", "unused", "",
857 79, "", "unused", "",
858 80, "SCE_HBA_START", "server basic default", "Start",
859 81, "SCE_HBA_DEFAULT", "server basic default", "Default",
860 82, "SCE_HBA_COMMENTLINE", "server basic comment line", "Comment",
861 83, "SCE_HBA_NUMBER", "server basic literal numeric", "Number",
862 84, "SCE_HBA_WORD", "server basic keyword", "KeyWord",
863 85, "SCE_HBA_STRING", "server basic literal string", "String",
864 86, "SCE_HBA_IDENTIFIER", "server basic identifier", "Identifier",
865 87, "SCE_HBA_STRINGEOL", "server basic literal string", "Unterminated string",
866 88, "", "unused", "",
867 89, "", "unused", "",
868 90, "SCE_HP_START", "client python default", "Embedded Python",
869 91, "SCE_HP_DEFAULT", "client python default", "Embedded Python",
870 92, "SCE_HP_COMMENTLINE", "client python comment line", "Comment",
871 93, "SCE_HP_NUMBER", "client python literal numeric", "Number",
872 94, "SCE_HP_STRING", "client python literal string", "String",
873 95, "SCE_HP_CHARACTER", "client python literal string character", "Single quoted string",
874 96, "SCE_HP_WORD", "client python keyword", "Keyword",
875 97, "SCE_HP_TRIPLE", "client python literal string", "Triple quotes",
876 98, "SCE_HP_TRIPLEDOUBLE", "client python literal string", "Triple double quotes",
877 99, "SCE_HP_CLASSNAME", "client python identifier", "Class name definition",
878 100, "SCE_HP_DEFNAME", "client python identifier", "Function or method name definition",
879 101, "SCE_HP_OPERATOR", "client python operator", "Operators",
880 102, "SCE_HP_IDENTIFIER", "client python identifier", "Identifiers",
881 103, "", "unused", "",
882 104, "SCE_HPHP_COMPLEX_VARIABLE", "server php identifier", "PHP complex variable",
883 105, "SCE_HPA_START", "server python default", "ASP Python",
884 106, "SCE_HPA_DEFAULT", "server python default", "ASP Python",
885 107, "SCE_HPA_COMMENTLINE", "server python comment line", "Comment",
886 108, "SCE_HPA_NUMBER", "server python literal numeric", "Number",
887 109, "SCE_HPA_STRING", "server python literal string", "String",
888 110, "SCE_HPA_CHARACTER", "server python literal string character", "Single quoted string",
889 111, "SCE_HPA_WORD", "server python keyword", "Keyword",
890 112, "SCE_HPA_TRIPLE", "server python literal string", "Triple quotes",
891 113, "SCE_HPA_TRIPLEDOUBLE", "server python literal string", "Triple double quotes",
892 114, "SCE_HPA_CLASSNAME", "server python identifier", "Class name definition",
893 115, "SCE_HPA_DEFNAME", "server python identifier", "Function or method name definition",
894 116, "SCE_HPA_OPERATOR", "server python operator", "Operators",
895 117, "SCE_HPA_IDENTIFIER", "server python identifier", "Identifiers",
896 118, "SCE_HPHP_DEFAULT", "server php default", "Default",
897 119, "SCE_HPHP_HSTRING", "server php literal string", "Double quoted String",
898 120, "SCE_HPHP_SIMPLESTRING", "server php literal string", "Single quoted string",
899 121, "SCE_HPHP_WORD", "server php keyword", "Keyword",
900 122, "SCE_HPHP_NUMBER", "server php literal numeric", "Number",
901 123, "SCE_HPHP_VARIABLE", "server php identifier", "Variable",
902 124, "SCE_HPHP_COMMENT", "server php comment", "Comment",
903 125, "SCE_HPHP_COMMENTLINE", "server php comment line", "One line comment",
904 126, "SCE_HPHP_HSTRING_VARIABLE", "server php literal string identifier", "PHP variable in double quoted string",
905 127, "SCE_HPHP_OPERATOR", "server php operator", "PHP operator",
906};
907
908LexicalClass lexicalClassesXML[] = {
909 // Lexer.Secondary XML SCLEX_XML SCE_H_:
910 0, "SCE_H_DEFAULT", "default", "Default",
911 1, "SCE_H_TAG", "tag", "Tags",
912 2, "SCE_H_TAGUNKNOWN", "error tag", "Unknown Tags",
913 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
914 4, "SCE_H_ERRORATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
915 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
916 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
917 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
918 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
919 9, "SCE_H_COMMENT", "comment", "Comment",
920 10, "SCE_H_ENTITY", "literal", "Entities",
921 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
922 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
923 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
924 14, "", "unused", "",
925 15, "", "unused", "",
926 16, "", "unused", "",
927 17, "SCE_H_CDATA", "literal", "CDATA",
928 18, "SCE_H_QUESTION", "preprocessor", "Question",
929 19, "SCE_H_VALUE", "literal string", "Unquoted Value",
930 20, "", "unused", "",
931 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
932 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
933 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
934 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
935 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
936 26, "SCE_H_SGML_ERROR", "error", "SGML error",
937 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
938 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
939 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
940 30, "", "unused", "",
941 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
942};
943
944const char *tagsThatDoNotFold[] = {
945 "area",
946 "base",
947 "basefont",
948 "br",
949 "col",
950 "command",
951 "embed",
952 "frame",
953 "hr",
954 "img",
955 "input",
956 "isindex",
957 "keygen",
958 "link",
959 "meta",
960 "param",
961 "source",
962 "track",
963 "wbr"
964};
965
966}
967
968class LexerHTML : public DefaultLexer {
969 bool isXml;
970 bool isPHPScript;
971 WordList keywords;
972 WordList keywords2;
973 WordList keywords3;
974 WordList keywords4;
975 WordList keywords5;
976 WordList keywords6; // SGML (DTD) keywords
977 OptionsHTML options;
978 OptionSetHTML osHTML;
979 std::set<std::string> nonFoldingTags;
980public:
981 explicit LexerHTML(bool isXml_, bool isPHPScript_) :
982 DefaultLexer(
983 isXml_ ? "xml" : (isPHPScript_ ? "phpscript" : "hypertext"),
984 isXml_ ? SCLEX_XML : (isPHPScript_ ? SCLEX_PHPSCRIPT : SCLEX_HTML),
985 isXml_ ? lexicalClassesXML : lexicalClassesHTML,
986 isXml_ ? std::size(lexicalClassesXML) : std::size(lexicalClassesHTML)),
987 isXml(isXml_),
988 isPHPScript(isPHPScript_),
989 osHTML(isPHPScript_),
990 nonFoldingTags(std::begin(tagsThatDoNotFold), std::end(tagsThatDoNotFold)) {
991 }
992 ~LexerHTML() override {
993 }
994 void SCI_METHOD Release() override {
995 delete this;
996 }
997 const char *SCI_METHOD PropertyNames() override {
998 return osHTML.PropertyNames();
999 }
1000 int SCI_METHOD PropertyType(const char *name) override {
1001 return osHTML.PropertyType(name);
1002 }
1003 const char *SCI_METHOD DescribeProperty(const char *name) override {
1004 return osHTML.DescribeProperty(name);
1005 }
1006 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
1007 const char * SCI_METHOD PropertyGet(const char *key) override {
1008 return osHTML.PropertyGet(key);
1009 }
1010 const char *SCI_METHOD DescribeWordListSets() override {
1011 return osHTML.DescribeWordListSets();
1012 }
1013 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
1014 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
1015 // No Fold as all folding performs in Lex.
1016
1017 static ILexer5 *LexerFactoryHTML() {
1018 return new LexerHTML(false, false);
1019 }
1020 static ILexer5 *LexerFactoryXML() {
1021 return new LexerHTML(true, false);
1022 }
1023 static ILexer5 *LexerFactoryPHPScript() {
1024 return new LexerHTML(false, true);
1025 }
1026};
1027
1028Sci_Position SCI_METHOD LexerHTML::PropertySet(const char *key, const char *val) {
1029 if (osHTML.PropertySet(&options, key, val)) {
1030 return 0;
1031 }
1032 return -1;
1033}
1034
1035Sci_Position SCI_METHOD LexerHTML::WordListSet(int n, const char *wl) {
1036 WordList *wordListN = 0;
1037 switch (n) {
1038 case 0:
1039 wordListN = &keywords;
1040 break;
1041 case 1:
1042 wordListN = &keywords2;
1043 break;
1044 case 2:
1045 wordListN = &keywords3;
1046 break;
1047 case 3:
1048 wordListN = &keywords4;
1049 break;
1050 case 4:
1051 wordListN = &keywords5;
1052 break;
1053 case 5:
1054 wordListN = &keywords6;
1055 break;
1056 }
1057 Sci_Position firstModification = -1;
1058 if (wordListN) {
1059 WordList wlNew;
1060 wlNew.Set(wl);
1061 if (*wordListN != wlNew) {
1062 wordListN->Set(wl);
1063 firstModification = 0;
1064 }
1065 }
1066 return firstModification;
1067}
1068
1069void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1070 Accessor styler(pAccess, nullptr);
1071 if (isPHPScript && (startPos == 0)) {
1072 initStyle = SCE_HPHP_DEFAULT;
1073 }
1074 styler.StartAt(startPos);
1075 std::string prevWord;
1076 PhpNumberState phpNumber;
1077 std::string phpStringDelimiter;
1078 int StateToPrint = initStyle;
1079 int state = stateForPrintState(StateToPrint);
1080 std::string makoBlockType;
1081 int makoComment = 0;
1082 std::string djangoBlockType;
1083 // If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
1084 if (InTagState(state)) {
1085 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
1086 const Sci_Position backLineStart = styler.LineStart(styler.GetLine(startPos-1));
1087 length += startPos - backLineStart;
1088 startPos = backLineStart;
1089 }
1090 state = SCE_H_DEFAULT;
1091 }
1092 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
1093 if (isPHPStringState(state)) {
1094 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
1095 startPos--;
1096 length++;
1097 state = styler.StyleAt(startPos);
1098 }
1099 if (startPos == 0)
1100 state = SCE_H_DEFAULT;
1101 }
1102 styler.StartAt(startPos);
1103
1104 /* Nothing handles getting out of these, so we need not start in any of them.
1105 * As we're at line start and they can't span lines, we'll re-detect them anyway */
1106 switch (state) {
1107 case SCE_H_QUESTION:
1108 case SCE_H_XMLSTART:
1109 case SCE_H_XMLEND:
1110 case SCE_H_ASP:
1111 state = SCE_H_DEFAULT;
1112 break;
1113 }
1114
1115 Sci_Position lineCurrent = styler.GetLine(startPos);
1116 int lineState;
1117 if (lineCurrent > 0) {
1118 lineState = styler.GetLineState(lineCurrent-1);
1119 } else {
1120 // Default client and ASP scripting language is JavaScript
1121 lineState = eScriptJS << 8;
1122 lineState |= options.aspDefaultLanguage << 4;
1123 }
1124 script_mode inScriptType = static_cast<script_mode>((lineState >> 0) & 0x03); // 2 bits of scripting mode
1125
1126 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
1127 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
1128 bool tagDontFold = false; //some HTML tags should not be folded
1129 script_type aspScript = static_cast<script_type>((lineState >> 4) & 0x0F); // 4 bits of script name
1130 script_type clientScript = static_cast<script_type>((lineState >> 8) & 0x0F); // 4 bits of script name
1131 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
1132
1133 script_type scriptLanguage = ScriptOfState(state);
1134 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
1135 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
1136 scriptLanguage = eScriptComment;
1137 }
1138 script_type beforeLanguage = ScriptOfState(beforePreProc);
1139 const bool foldHTML = options.foldHTML;
1140 const bool fold = foldHTML && options.fold;
1141 const bool foldHTMLPreprocessor = foldHTML && options.foldHTMLPreprocessor;
1142 const bool foldCompact = options.foldCompact;
1143 const bool foldComment = fold && options.foldComment;
1144 const bool foldHeredoc = fold && options.foldHeredoc;
1145 const bool foldXmlAtTagOpen = isXml && fold && options.foldXmlAtTagOpen;
1146 const bool caseSensitive = options.caseSensitive;
1147 const bool allowScripts = options.allowScripts;
1148 const bool isMako = options.isMako;
1149 const bool isDjango = options.isDjango;
1150 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", true);
1151 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", true);
1152 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", true);
1153 // TODO: also handle + and - (except if they're part of ++ or --) and return keywords
1154 const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
1155
1156 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1157 int levelCurrent = levelPrev;
1158 int visibleChars = 0;
1159 int lineStartVisibleChars = 0;
1160
1161 int chPrev = ' ';
1162 int ch = ' ';
1163 int chPrevNonWhite = ' ';
1164 // look back to set chPrevNonWhite properly for better regex colouring
1165 if (scriptLanguage == eScriptJS && startPos > 0) {
1166 Sci_Position back = startPos;
1167 int style = 0;
1168 while (--back) {
1169 style = styler.StyleAt(back);
1170 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
1171 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
1172 break;
1173 }
1174 if (style == SCE_HJ_SYMBOLS) {
1175 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
1176 }
1177 }
1178
1179 styler.StartSegment(startPos);
1180 const Sci_Position lengthDoc = startPos + length;
1181 for (Sci_Position i = startPos; i < lengthDoc; i++) {
1182 const int chPrev2 = chPrev;
1183 chPrev = ch;
1184 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
1185 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
1186 chPrevNonWhite = ch;
1187 ch = static_cast<unsigned char>(styler[i]);
1188 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1189 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
1190
1191 // Handle DBCS codepages
1192 if (styler.IsLeadByte(static_cast<char>(ch))) {
1193 chPrev = ' ';
1194 i += 1;
1195 continue;
1196 }
1197
1198 if ((!IsASpace(ch) || !foldCompact) && fold)
1199 visibleChars++;
1200 if (!IsASpace(ch))
1201 lineStartVisibleChars++;
1202
1203 // decide what is the current state to print (depending of the script tag)
1204 StateToPrint = statePrintForState(state, inScriptType);
1205
1206 // handle script folding
1207 if (fold) {
1208 switch (scriptLanguage) {
1209 case eScriptJS:
1210 case eScriptPHP:
1211 //not currently supported case eScriptVBS:
1212
1213 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
1214 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
1215 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
1216 if (ch == '#') {
1217 Sci_Position j = i + 1;
1218 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1219 j++;
1220 }
1221 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1222 levelCurrent++;
1223 } else if (styler.Match(j, "end")) {
1224 levelCurrent--;
1225 }
1226 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
1227 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
1228 }
1229 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
1230 levelCurrent--;
1231 }
1232 break;
1233 case eScriptPython:
1234 if (state != SCE_HP_COMMENTLINE && !isMako) {
1235 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
1236 levelCurrent++;
1237 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
1238 // check if the number of tabs is lower than the level
1239 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
1240 for (Sci_Position j = 0; Findlevel > 0; j++) {
1241 const char chTmp = styler.SafeGetCharAt(i + j + 1);
1242 if (chTmp == '\t') {
1243 Findlevel -= 8;
1244 } else if (chTmp == ' ') {
1245 Findlevel--;
1246 } else {
1247 break;
1248 }
1249 }
1250
1251 if (Findlevel > 0) {
1252 levelCurrent -= Findlevel / 8;
1253 if (Findlevel % 8)
1254 levelCurrent--;
1255 }
1256 }
1257 }
1258 break;
1259 default:
1260 break;
1261 }
1262 }
1263
1264 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
1265 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
1266 // Avoid triggering two times on Dos/Win
1267 // New line -> record any line state onto /next/ line
1268 if (fold) {
1269 int lev = levelPrev;
1270 if (visibleChars == 0)
1271 lev |= SC_FOLDLEVELWHITEFLAG;
1272 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1273 lev |= SC_FOLDLEVELHEADERFLAG;
1274
1275 styler.SetLevel(lineCurrent, lev);
1276 visibleChars = 0;
1277 levelPrev = levelCurrent;
1278 }
1279 styler.SetLineState(lineCurrent,
1280 ((inScriptType & 0x03) << 0) |
1281 ((tagOpened ? 1 : 0) << 2) |
1282 ((tagClosing ? 1 : 0) << 3) |
1283 ((aspScript & 0x0F) << 4) |
1284 ((clientScript & 0x0F) << 8) |
1285 ((beforePreProc & 0xFF) << 12));
1286 lineCurrent++;
1287 lineStartVisibleChars = 0;
1288 }
1289
1290 // handle start of Mako comment line
1291 if (isMako && ch == '#' && chNext == '#') {
1292 makoComment = 1;
1293 state = SCE_HP_COMMENTLINE;
1294 }
1295
1296 // handle end of Mako comment line
1297 else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
1298 makoComment = 0;
1299 styler.ColourTo(i - 1, StateToPrint);
1300 if (scriptLanguage == eScriptPython) {
1301 state = SCE_HP_DEFAULT;
1302 } else {
1303 state = SCE_H_DEFAULT;
1304 }
1305 }
1306 // Allow falling through to mako handling code if newline is going to end a block
1307 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
1308 (!isMako || (makoBlockType != "%"))) {
1309 }
1310 // Ignore everything in mako comment until the line ends
1311 else if (isMako && makoComment) {
1312 }
1313
1314 // generic end of script processing
1315 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
1316 // Check if it's the end of the script tag (or any other HTML tag)
1317 switch (state) {
1318 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
1319 case SCE_H_DOUBLESTRING:
1320 case SCE_H_SINGLESTRING:
1321 case SCE_HJ_COMMENT:
1322 case SCE_HJ_COMMENTDOC:
1323 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
1324 // the end of script marker from some JS interpreters.
1325 case SCE_HB_COMMENTLINE:
1326 case SCE_HBA_COMMENTLINE:
1327 case SCE_HJ_DOUBLESTRING:
1328 case SCE_HJ_SINGLESTRING:
1329 case SCE_HJ_REGEX:
1330 case SCE_HB_STRING:
1331 case SCE_HBA_STRING:
1332 case SCE_HP_STRING:
1333 case SCE_HP_TRIPLE:
1334 case SCE_HP_TRIPLEDOUBLE:
1335 case SCE_HPHP_HSTRING:
1336 case SCE_HPHP_SIMPLESTRING:
1337 case SCE_HPHP_COMMENT:
1338 case SCE_HPHP_COMMENTLINE:
1339 break;
1340 default :
1341 // check if the closing tag is a script tag
1342 if (const char *tag =
1343 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
1344 state == SCE_H_COMMENT ? "comment" : 0) {
1345 Sci_Position j = i + 2;
1346 int chr;
1347 do {
1348 chr = static_cast<int>(*tag++);
1349 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
1350 if (chr != 0) break;
1351 }
1352 // closing tag of the script (it's a closing HTML tag anyway)
1353 styler.ColourTo(i - 1, StateToPrint);
1354 state = SCE_H_TAGUNKNOWN;
1355 inScriptType = eHtml;
1356 scriptLanguage = eScriptNone;
1357 clientScript = eScriptJS;
1358 i += 2;
1359 visibleChars += 2;
1360 tagClosing = true;
1361 if (foldXmlAtTagOpen) {
1362 levelCurrent--;
1363 }
1364 continue;
1365 }
1366 }
1367
1368 /////////////////////////////////////
1369 // handle the start of PHP pre-processor = Non-HTML
1370 else if ((state != SCE_H_ASPAT) &&
1371 !isPHPStringState(state) &&
1372 (state != SCE_HPHP_COMMENT) &&
1373 (state != SCE_HPHP_COMMENTLINE) &&
1374 (ch == '<') &&
1375 (chNext == '?') &&
1376 !IsScriptCommentState(state)) {
1377 beforeLanguage = scriptLanguage;
1378 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
1379 if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
1380 styler.ColourTo(i - 1, StateToPrint);
1381 beforePreProc = state;
1382 i++;
1383 visibleChars++;
1384 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
1385 if (scriptLanguage == eScriptXML)
1386 styler.ColourTo(i, SCE_H_XMLSTART);
1387 else
1388 styler.ColourTo(i, SCE_H_QUESTION);
1389 state = StateForScript(scriptLanguage);
1390 if (inScriptType == eNonHtmlScript)
1391 inScriptType = eNonHtmlScriptPreProc;
1392 else
1393 inScriptType = eNonHtmlPreProc;
1394 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
1395 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1396 levelCurrent++;
1397 }
1398 // should be better
1399 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1400 continue;
1401 }
1402
1403 // handle the start Mako template Python code
1404 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
1405 (lineStartVisibleChars == 1 && ch == '%') ||
1406 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
1407 (ch == '$' && chNext == '{') ||
1408 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
1409 if (ch == '%' || ch == '/')
1410 makoBlockType = "%";
1411 else if (ch == '$')
1412 makoBlockType = "{";
1413 else if (chNext == '/')
1414 makoBlockType = GetNextWord(styler, i+3);
1415 else
1416 makoBlockType = GetNextWord(styler, i+2);
1417 styler.ColourTo(i - 1, StateToPrint);
1418 beforePreProc = state;
1419 if (inScriptType == eNonHtmlScript)
1420 inScriptType = eNonHtmlScriptPreProc;
1421 else
1422 inScriptType = eNonHtmlPreProc;
1423
1424 if (chNext == '/') {
1425 i += 2;
1426 visibleChars += 2;
1427 } else if (ch != '%') {
1428 i++;
1429 visibleChars++;
1430 }
1431 state = SCE_HP_START;
1432 scriptLanguage = eScriptPython;
1433 styler.ColourTo(i, SCE_H_ASP);
1434 if (ch != '%' && ch != '$' && ch != '/') {
1435 i += makoBlockType.length();
1436 visibleChars += static_cast<int>(makoBlockType.length());
1437 if (keywords4.InList(makoBlockType.c_str()))
1438 styler.ColourTo(i, SCE_HP_WORD);
1439 else
1440 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1441 }
1442
1443 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1444 continue;
1445 }
1446
1447 // handle the start/end of Django comment
1448 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
1449 styler.ColourTo(i - 1, StateToPrint);
1450 beforePreProc = state;
1451 beforeLanguage = scriptLanguage;
1452 if (inScriptType == eNonHtmlScript)
1453 inScriptType = eNonHtmlScriptPreProc;
1454 else
1455 inScriptType = eNonHtmlPreProc;
1456 i += 1;
1457 visibleChars += 1;
1458 scriptLanguage = eScriptComment;
1459 state = SCE_H_COMMENT;
1460 styler.ColourTo(i, SCE_H_ASP);
1461 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1462 continue;
1463 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
1464 styler.ColourTo(i - 1, StateToPrint);
1465 i += 1;
1466 visibleChars += 1;
1467 styler.ColourTo(i, SCE_H_ASP);
1468 state = beforePreProc;
1469 if (inScriptType == eNonHtmlScriptPreProc)
1470 inScriptType = eNonHtmlScript;
1471 else
1472 inScriptType = eHtml;
1473 scriptLanguage = beforeLanguage;
1474 continue;
1475 }
1476
1477 // handle the start Django template code
1478 else if (isDjango && scriptLanguage != eScriptPython && scriptLanguage != eScriptComment && (ch == '{' && (chNext == '%' || chNext == '{'))) {
1479 if (chNext == '%')
1480 djangoBlockType = "%";
1481 else
1482 djangoBlockType = "{";
1483 styler.ColourTo(i - 1, StateToPrint);
1484 beforePreProc = state;
1485 if (inScriptType == eNonHtmlScript)
1486 inScriptType = eNonHtmlScriptPreProc;
1487 else
1488 inScriptType = eNonHtmlPreProc;
1489
1490 i += 1;
1491 visibleChars += 1;
1492 state = SCE_HP_START;
1493 beforeLanguage = scriptLanguage;
1494 scriptLanguage = eScriptPython;
1495 styler.ColourTo(i, SCE_H_ASP);
1496
1497 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1498 continue;
1499 }
1500
1501 // handle the start of ASP pre-processor = Non-HTML
1502 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1503 styler.ColourTo(i - 1, StateToPrint);
1504 beforePreProc = state;
1505 if (inScriptType == eNonHtmlScript)
1506 inScriptType = eNonHtmlScriptPreProc;
1507 else
1508 inScriptType = eNonHtmlPreProc;
1509
1510 if (chNext2 == '@') {
1511 i += 2; // place as if it was the second next char treated
1512 visibleChars += 2;
1513 state = SCE_H_ASPAT;
1514 scriptLanguage = eScriptVBS;
1515 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1516 styler.ColourTo(i + 3, SCE_H_ASP);
1517 state = SCE_H_XCCOMMENT;
1518 scriptLanguage = eScriptVBS;
1519 continue;
1520 } else {
1521 if (chNext2 == '=') {
1522 i += 2; // place as if it was the second next char treated
1523 visibleChars += 2;
1524 } else {
1525 i++; // place as if it was the next char treated
1526 visibleChars++;
1527 }
1528
1529 state = StateForScript(aspScript);
1530 scriptLanguage = aspScript;
1531 }
1532 styler.ColourTo(i, SCE_H_ASP);
1533 // fold whole script
1534 if (foldHTMLPreprocessor)
1535 levelCurrent++;
1536 // should be better
1537 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1538 continue;
1539 }
1540
1541 /////////////////////////////////////
1542 // handle the start of SGML language (DTD)
1543 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1544 (chPrev == '<') &&
1545 (ch == '!') &&
1546 (StateToPrint != SCE_H_CDATA) &&
1547 (!isStringState(StateToPrint)) &&
1548 (!IsCommentState(StateToPrint)) &&
1549 (!IsScriptCommentState(StateToPrint))) {
1550 beforePreProc = state;
1551 styler.ColourTo(i - 2, StateToPrint);
1552 if ((chNext == '-') && (chNext2 == '-')) {
1553 state = SCE_H_COMMENT; // wait for a pending command
1554 styler.ColourTo(i + 2, SCE_H_COMMENT);
1555 i += 2; // follow styling after the --
1556 } else if (isWordCdata(i + 1, i + 7, styler)) {
1557 state = SCE_H_CDATA;
1558 } else {
1559 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1560 scriptLanguage = eScriptSGML;
1561 state = SCE_H_SGML_COMMAND; // wait for a pending command
1562 }
1563 // fold whole tag (-- when closing the tag)
1564 if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1565 levelCurrent++;
1566 continue;
1567 }
1568
1569 // handle the end of Mako Python code
1570 else if (isMako &&
1571 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1572 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1573 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1574 if (state == SCE_H_ASPAT) {
1575 aspScript = segIsScriptingIndicator(styler,
1576 styler.GetStartSegment(), i - 1, aspScript);
1577 }
1578 if (state == SCE_HP_WORD) {
1579 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1580 } else {
1581 styler.ColourTo(i - 1, StateToPrint);
1582 }
1583 if ((makoBlockType != "%") && (makoBlockType != "{") && ch != '>') {
1584 i++;
1585 visibleChars++;
1586 }
1587 else if ((makoBlockType == "%") && ch == '/') {
1588 i++;
1589 visibleChars++;
1590 }
1591 if ((makoBlockType != "%") || ch == '/') {
1592 styler.ColourTo(i, SCE_H_ASP);
1593 }
1594 state = beforePreProc;
1595 if (inScriptType == eNonHtmlScriptPreProc)
1596 inScriptType = eNonHtmlScript;
1597 else
1598 inScriptType = eHtml;
1599 scriptLanguage = eScriptNone;
1600 continue;
1601 }
1602
1603 // handle the end of Django template code
1604 else if (isDjango &&
1605 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1606 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1607 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1608 if (state == SCE_H_ASPAT) {
1609 aspScript = segIsScriptingIndicator(styler,
1610 styler.GetStartSegment(), i - 1, aspScript);
1611 }
1612 if (state == SCE_HP_WORD) {
1613 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1614 } else {
1615 styler.ColourTo(i - 1, StateToPrint);
1616 }
1617 i += 1;
1618 visibleChars += 1;
1619 styler.ColourTo(i, SCE_H_ASP);
1620 state = beforePreProc;
1621 if (inScriptType == eNonHtmlScriptPreProc)
1622 inScriptType = eNonHtmlScript;
1623 else
1624 inScriptType = eHtml;
1625 scriptLanguage = beforeLanguage;
1626 continue;
1627 }
1628
1629 // handle the end of a pre-processor = Non-HTML
1630 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1631 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1632 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1633 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1634 if (state == SCE_H_ASPAT) {
1635 aspScript = segIsScriptingIndicator(styler,
1636 styler.GetStartSegment(), i - 1, aspScript);
1637 }
1638 // Bounce out of any ASP mode
1639 switch (state) {
1640 case SCE_HJ_WORD:
1641 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1642 break;
1643 case SCE_HB_WORD:
1644 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1645 break;
1646 case SCE_HP_WORD:
1647 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1648 break;
1649 case SCE_HPHP_WORD:
1650 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1651 break;
1652 case SCE_H_XCCOMMENT:
1653 styler.ColourTo(i - 1, state);
1654 break;
1655 default :
1656 styler.ColourTo(i - 1, StateToPrint);
1657 break;
1658 }
1659 if (scriptLanguage != eScriptSGML) {
1660 i++;
1661 visibleChars++;
1662 }
1663 if (ch == '%')
1664 styler.ColourTo(i, SCE_H_ASP);
1665 else if (scriptLanguage == eScriptXML)
1666 styler.ColourTo(i, SCE_H_XMLEND);
1667 else if (scriptLanguage == eScriptSGML)
1668 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1669 else
1670 styler.ColourTo(i, SCE_H_QUESTION);
1671 state = beforePreProc;
1672 if (inScriptType == eNonHtmlScriptPreProc)
1673 inScriptType = eNonHtmlScript;
1674 else
1675 inScriptType = eHtml;
1676 // Unfold all scripting languages, except for XML tag
1677 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1678 levelCurrent--;
1679 }
1680 scriptLanguage = beforeLanguage;
1681 continue;
1682 }
1683 /////////////////////////////////////
1684
1685 switch (state) {
1686 case SCE_H_DEFAULT:
1687 if (ch == '<') {
1688 // in HTML, fold on tag open and unfold on tag close
1689 tagOpened = true;
1690 tagClosing = (chNext == '/');
1691 if (foldXmlAtTagOpen && !(chNext == '/' || chNext == '?' || chNext == '!' || chNext == '-' || chNext == '%')) {
1692 levelCurrent++;
1693 }
1694 if (foldXmlAtTagOpen && chNext == '/') {
1695 levelCurrent--;
1696 }
1697 styler.ColourTo(i - 1, StateToPrint);
1698 if (chNext != '!')
1699 state = SCE_H_TAGUNKNOWN;
1700 } else if (ch == '&') {
1701 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1702 state = SCE_H_ENTITY;
1703 }
1704 break;
1705 case SCE_H_SGML_DEFAULT:
1706 case SCE_H_SGML_BLOCK_DEFAULT:
1707// if (scriptLanguage == eScriptSGMLblock)
1708// StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1709
1710 if (ch == '\"') {
1711 styler.ColourTo(i - 1, StateToPrint);
1712 state = SCE_H_SGML_DOUBLESTRING;
1713 } else if (ch == '\'') {
1714 styler.ColourTo(i - 1, StateToPrint);
1715 state = SCE_H_SGML_SIMPLESTRING;
1716 } else if ((ch == '-') && (chPrev == '-')) {
1717 if (static_cast<Sci_Position>(styler.GetStartSegment()) <= (i - 2)) {
1718 styler.ColourTo(i - 2, StateToPrint);
1719 }
1720 state = SCE_H_SGML_COMMENT;
1721 } else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1722 styler.ColourTo(i - 2, StateToPrint);
1723 state = SCE_H_SGML_ENTITY;
1724 } else if (ch == '#') {
1725 styler.ColourTo(i - 1, StateToPrint);
1726 state = SCE_H_SGML_SPECIAL;
1727 } else if (ch == '[') {
1728 styler.ColourTo(i - 1, StateToPrint);
1729 scriptLanguage = eScriptSGMLblock;
1730 state = SCE_H_SGML_BLOCK_DEFAULT;
1731 } else if (ch == ']') {
1732 if (scriptLanguage == eScriptSGMLblock) {
1733 styler.ColourTo(i, StateToPrint);
1734 scriptLanguage = eScriptSGML;
1735 } else {
1736 styler.ColourTo(i - 1, StateToPrint);
1737 styler.ColourTo(i, SCE_H_SGML_ERROR);
1738 }
1739 state = SCE_H_SGML_DEFAULT;
1740 } else if (scriptLanguage == eScriptSGMLblock) {
1741 if ((ch == '!') && (chPrev == '<')) {
1742 styler.ColourTo(i - 2, StateToPrint);
1743 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1744 state = SCE_H_SGML_COMMAND;
1745 } else if (ch == '>') {
1746 styler.ColourTo(i - 1, StateToPrint);
1747 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1748 }
1749 }
1750 break;
1751 case SCE_H_SGML_COMMAND:
1752 if ((ch == '-') && (chPrev == '-')) {
1753 styler.ColourTo(i - 2, StateToPrint);
1754 state = SCE_H_SGML_COMMENT;
1755 } else if (!issgmlwordchar(ch)) {
1756 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1757 styler.ColourTo(i - 1, StateToPrint);
1758 state = SCE_H_SGML_1ST_PARAM;
1759 } else {
1760 state = SCE_H_SGML_ERROR;
1761 }
1762 }
1763 break;
1764 case SCE_H_SGML_1ST_PARAM:
1765 // wait for the beginning of the word
1766 if ((ch == '-') && (chPrev == '-')) {
1767 if (scriptLanguage == eScriptSGMLblock) {
1768 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1769 } else {
1770 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1771 }
1772 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1773 } else if (issgmlwordchar(ch)) {
1774 if (scriptLanguage == eScriptSGMLblock) {
1775 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1776 } else {
1777 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1778 }
1779 // find the length of the word
1780 int size = 1;
1781 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1782 size++;
1783 styler.ColourTo(i + size - 1, StateToPrint);
1784 i += size - 1;
1785 visibleChars += size - 1;
1786 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1787 if (scriptLanguage == eScriptSGMLblock) {
1788 state = SCE_H_SGML_BLOCK_DEFAULT;
1789 } else {
1790 state = SCE_H_SGML_DEFAULT;
1791 }
1792 continue;
1793 }
1794 break;
1795 case SCE_H_SGML_ERROR:
1796 if ((ch == '-') && (chPrev == '-')) {
1797 styler.ColourTo(i - 2, StateToPrint);
1798 state = SCE_H_SGML_COMMENT;
1799 }
1800 break;
1801 case SCE_H_SGML_DOUBLESTRING:
1802 if (ch == '\"') {
1803 styler.ColourTo(i, StateToPrint);
1804 state = SCE_H_SGML_DEFAULT;
1805 }
1806 break;
1807 case SCE_H_SGML_SIMPLESTRING:
1808 if (ch == '\'') {
1809 styler.ColourTo(i, StateToPrint);
1810 state = SCE_H_SGML_DEFAULT;
1811 }
1812 break;
1813 case SCE_H_SGML_COMMENT:
1814 if ((ch == '-') && (chPrev == '-')) {
1815 styler.ColourTo(i, StateToPrint);
1816 state = SCE_H_SGML_DEFAULT;
1817 }
1818 break;
1819 case SCE_H_CDATA:
1820 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1821 styler.ColourTo(i, StateToPrint);
1822 state = SCE_H_DEFAULT;
1823 levelCurrent--;
1824 }
1825 break;
1826 case SCE_H_COMMENT:
1827 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1828 styler.ColourTo(i, StateToPrint);
1829 state = SCE_H_DEFAULT;
1830 levelCurrent--;
1831 }
1832 break;
1833 case SCE_H_SGML_1ST_PARAM_COMMENT:
1834 if ((ch == '-') && (chPrev == '-')) {
1835 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1836 state = SCE_H_SGML_1ST_PARAM;
1837 }
1838 break;
1839 case SCE_H_SGML_SPECIAL:
1840 if (!(IsASCII(ch) && isupper(ch))) {
1841 styler.ColourTo(i - 1, StateToPrint);
1842 if (isalnum(ch)) {
1843 state = SCE_H_SGML_ERROR;
1844 } else {
1845 state = SCE_H_SGML_DEFAULT;
1846 }
1847 }
1848 break;
1849 case SCE_H_SGML_ENTITY:
1850 if (ch == ';') {
1851 styler.ColourTo(i, StateToPrint);
1852 state = SCE_H_SGML_DEFAULT;
1853 } else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1854 styler.ColourTo(i, SCE_H_SGML_ERROR);
1855 state = SCE_H_SGML_DEFAULT;
1856 }
1857 break;
1858 case SCE_H_ENTITY:
1859 if (ch == ';') {
1860 styler.ColourTo(i, StateToPrint);
1861 state = SCE_H_DEFAULT;
1862 }
1863 if (ch != '#' && !(IsASCII(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1864 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1865 if (!IsASCII(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1866 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1867 else
1868 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1869 state = SCE_H_DEFAULT;
1870 }
1871 break;
1872 case SCE_H_TAGUNKNOWN:
1873 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1874 int eClass = classifyTagHTML(styler.GetStartSegment(),
1875 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts, nonFoldingTags);
1876 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1877 if (!tagClosing) {
1878 inScriptType = eNonHtmlScript;
1879 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1880 } else {
1881 scriptLanguage = eScriptNone;
1882 }
1883 eClass = SCE_H_TAG;
1884 }
1885 if (ch == '>') {
1886 styler.ColourTo(i, eClass);
1887 if (inScriptType == eNonHtmlScript) {
1888 state = StateForScript(scriptLanguage);
1889 } else {
1890 state = SCE_H_DEFAULT;
1891 }
1892 tagOpened = false;
1893 if (!(foldXmlAtTagOpen || tagDontFold)) {
1894 if (tagClosing) {
1895 levelCurrent--;
1896 } else {
1897 levelCurrent++;
1898 }
1899 }
1900 tagClosing = false;
1901 } else if (ch == '/' && chNext == '>') {
1902 if (eClass == SCE_H_TAGUNKNOWN) {
1903 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1904 } else {
1905 styler.ColourTo(i - 1, StateToPrint);
1906 styler.ColourTo(i + 1, SCE_H_TAGEND);
1907 }
1908 i++;
1909 ch = chNext;
1910 state = SCE_H_DEFAULT;
1911 tagOpened = false;
1912 if (foldXmlAtTagOpen) {
1913 levelCurrent--;
1914 }
1915 } else {
1916 if (eClass != SCE_H_TAGUNKNOWN) {
1917 if (eClass == SCE_H_SGML_DEFAULT) {
1918 state = SCE_H_SGML_DEFAULT;
1919 } else {
1920 state = SCE_H_OTHER;
1921 }
1922 }
1923 }
1924 }
1925 break;
1926 case SCE_H_ATTRIBUTE:
1927 if (!setAttributeContinue.Contains(ch)) {
1928 if (inScriptType == eNonHtmlScript) {
1929 const int scriptLanguagePrev = scriptLanguage;
1930 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1931 scriptLanguage = clientScript;
1932 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1933 inScriptType = eHtml;
1934 }
1935 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1936 if (ch == '>') {
1937 styler.ColourTo(i, SCE_H_TAG);
1938 if (inScriptType == eNonHtmlScript) {
1939 state = StateForScript(scriptLanguage);
1940 } else {
1941 state = SCE_H_DEFAULT;
1942 }
1943 tagOpened = false;
1944 if (!(foldXmlAtTagOpen || tagDontFold)) {
1945 if (tagClosing) {
1946 levelCurrent--;
1947 } else {
1948 levelCurrent++;
1949 }
1950 }
1951 tagClosing = false;
1952 } else if (ch == '=') {
1953 styler.ColourTo(i, SCE_H_OTHER);
1954 state = SCE_H_VALUE;
1955 } else {
1956 state = SCE_H_OTHER;
1957 }
1958 }
1959 break;
1960 case SCE_H_OTHER:
1961 if (ch == '>') {
1962 styler.ColourTo(i - 1, StateToPrint);
1963 styler.ColourTo(i, SCE_H_TAG);
1964 if (inScriptType == eNonHtmlScript) {
1965 state = StateForScript(scriptLanguage);
1966 } else {
1967 state = SCE_H_DEFAULT;
1968 }
1969 tagOpened = false;
1970 if (!(foldXmlAtTagOpen || tagDontFold)) {
1971 if (tagClosing) {
1972 levelCurrent--;
1973 } else {
1974 levelCurrent++;
1975 }
1976 }
1977 tagClosing = false;
1978 } else if (ch == '\"') {
1979 styler.ColourTo(i - 1, StateToPrint);
1980 state = SCE_H_DOUBLESTRING;
1981 } else if (ch == '\'') {
1982 styler.ColourTo(i - 1, StateToPrint);
1983 state = SCE_H_SINGLESTRING;
1984 } else if (ch == '=') {
1985 styler.ColourTo(i, StateToPrint);
1986 state = SCE_H_VALUE;
1987 } else if (ch == '/' && chNext == '>') {
1988 styler.ColourTo(i - 1, StateToPrint);
1989 styler.ColourTo(i + 1, SCE_H_TAGEND);
1990 i++;
1991 ch = chNext;
1992 state = SCE_H_DEFAULT;
1993 tagOpened = false;
1994 if (foldXmlAtTagOpen) {
1995 levelCurrent--;
1996 }
1997 } else if (ch == '?' && chNext == '>') {
1998 styler.ColourTo(i - 1, StateToPrint);
1999 styler.ColourTo(i + 1, SCE_H_XMLEND);
2000 i++;
2001 ch = chNext;
2002 state = SCE_H_DEFAULT;
2003 } else if (setHTMLWord.Contains(ch)) {
2004 styler.ColourTo(i - 1, StateToPrint);
2005 state = SCE_H_ATTRIBUTE;
2006 }
2007 break;
2008 case SCE_H_DOUBLESTRING:
2009 if (ch == '\"') {
2010 if (inScriptType == eNonHtmlScript) {
2011 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
2012 }
2013 styler.ColourTo(i, SCE_H_DOUBLESTRING);
2014 state = SCE_H_OTHER;
2015 }
2016 break;
2017 case SCE_H_SINGLESTRING:
2018 if (ch == '\'') {
2019 if (inScriptType == eNonHtmlScript) {
2020 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
2021 }
2022 styler.ColourTo(i, SCE_H_SINGLESTRING);
2023 state = SCE_H_OTHER;
2024 }
2025 break;
2026 case SCE_H_VALUE:
2027 if (!setHTMLWord.Contains(ch)) {
2028 if (ch == '\"' && chPrev == '=') {
2029 // Should really test for being first character
2030 state = SCE_H_DOUBLESTRING;
2031 } else if (ch == '\'' && chPrev == '=') {
2032 state = SCE_H_SINGLESTRING;
2033 } else {
2034 if (IsNumber(styler.GetStartSegment(), styler)) {
2035 styler.ColourTo(i - 1, SCE_H_NUMBER);
2036 } else {
2037 styler.ColourTo(i - 1, StateToPrint);
2038 }
2039 if (ch == '>') {
2040 styler.ColourTo(i, SCE_H_TAG);
2041 if (inScriptType == eNonHtmlScript) {
2042 state = StateForScript(scriptLanguage);
2043 } else {
2044 state = SCE_H_DEFAULT;
2045 }
2046 tagOpened = false;
2047 if (!tagDontFold) {
2048 if (tagClosing) {
2049 levelCurrent--;
2050 } else {
2051 levelCurrent++;
2052 }
2053 }
2054 tagClosing = false;
2055 } else {
2056 state = SCE_H_OTHER;
2057 }
2058 }
2059 }
2060 break;
2061 case SCE_HJ_DEFAULT:
2062 case SCE_HJ_START:
2063 case SCE_HJ_SYMBOLS:
2064 if (IsAWordStart(ch)) {
2065 styler.ColourTo(i - 1, StateToPrint);
2066 state = SCE_HJ_WORD;
2067 } else if (ch == '/' && chNext == '*') {
2068 styler.ColourTo(i - 1, StateToPrint);
2069 if (chNext2 == '*')
2070 state = SCE_HJ_COMMENTDOC;
2071 else
2072 state = SCE_HJ_COMMENT;
2073 if (chNext2 == '/') {
2074 // Eat the * so it isn't used for the end of the comment
2075 i++;
2076 }
2077 } else if (ch == '/' && chNext == '/') {
2078 styler.ColourTo(i - 1, StateToPrint);
2079 state = SCE_HJ_COMMENTLINE;
2080 } else if (ch == '/' && setOKBeforeJSRE.Contains(chPrevNonWhite)) {
2081 styler.ColourTo(i - 1, StateToPrint);
2082 state = SCE_HJ_REGEX;
2083 } else if (ch == '\"') {
2084 styler.ColourTo(i - 1, StateToPrint);
2085 state = SCE_HJ_DOUBLESTRING;
2086 } else if (ch == '\'') {
2087 styler.ColourTo(i - 1, StateToPrint);
2088 state = SCE_HJ_SINGLESTRING;
2089 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2090 styler.SafeGetCharAt(i + 3) == '-') {
2091 styler.ColourTo(i - 1, StateToPrint);
2092 state = SCE_HJ_COMMENTLINE;
2093 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2094 styler.ColourTo(i - 1, StateToPrint);
2095 state = SCE_HJ_COMMENTLINE;
2096 i += 2;
2097 } else if (IsOperator(ch)) {
2098 styler.ColourTo(i - 1, StateToPrint);
2099 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2100 state = SCE_HJ_DEFAULT;
2101 } else if ((ch == ' ') || (ch == '\t')) {
2102 if (state == SCE_HJ_START) {
2103 styler.ColourTo(i - 1, StateToPrint);
2104 state = SCE_HJ_DEFAULT;
2105 }
2106 }
2107 break;
2108 case SCE_HJ_WORD:
2109 if (!IsAWordChar(ch)) {
2110 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
2111 //styler.ColourTo(i - 1, eHTJSKeyword);
2112 state = SCE_HJ_DEFAULT;
2113 if (ch == '/' && chNext == '*') {
2114 if (chNext2 == '*')
2115 state = SCE_HJ_COMMENTDOC;
2116 else
2117 state = SCE_HJ_COMMENT;
2118 } else if (ch == '/' && chNext == '/') {
2119 state = SCE_HJ_COMMENTLINE;
2120 } else if (ch == '\"') {
2121 state = SCE_HJ_DOUBLESTRING;
2122 } else if (ch == '\'') {
2123 state = SCE_HJ_SINGLESTRING;
2124 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2125 styler.ColourTo(i - 1, StateToPrint);
2126 state = SCE_HJ_COMMENTLINE;
2127 i += 2;
2128 } else if (IsOperator(ch)) {
2129 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2130 state = SCE_HJ_DEFAULT;
2131 }
2132 }
2133 break;
2134 case SCE_HJ_COMMENT:
2135 case SCE_HJ_COMMENTDOC:
2136 if (ch == '/' && chPrev == '*') {
2137 styler.ColourTo(i, StateToPrint);
2138 state = SCE_HJ_DEFAULT;
2139 ch = ' ';
2140 }
2141 break;
2142 case SCE_HJ_COMMENTLINE:
2143 if (ch == '\r' || ch == '\n') {
2144 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
2145 state = SCE_HJ_DEFAULT;
2146 ch = ' ';
2147 }
2148 break;
2149 case SCE_HJ_DOUBLESTRING:
2150 if (ch == '\\') {
2151 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2152 i++;
2153 }
2154 } else if (ch == '\"') {
2155 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
2156 state = SCE_HJ_DEFAULT;
2157 } else if (isLineEnd(ch)) {
2158 styler.ColourTo(i - 1, StateToPrint);
2159 state = SCE_HJ_STRINGEOL;
2160 }
2161 break;
2162 case SCE_HJ_SINGLESTRING:
2163 if (ch == '\\') {
2164 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2165 i++;
2166 }
2167 } else if (ch == '\'') {
2168 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
2169 state = SCE_HJ_DEFAULT;
2170 } else if (isLineEnd(ch)) {
2171 styler.ColourTo(i - 1, StateToPrint);
2172 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
2173 state = SCE_HJ_STRINGEOL;
2174 }
2175 }
2176 break;
2177 case SCE_HJ_STRINGEOL:
2178 if (!isLineEnd(ch)) {
2179 styler.ColourTo(i - 1, StateToPrint);
2180 state = SCE_HJ_DEFAULT;
2181 } else if (!isLineEnd(chNext)) {
2182 styler.ColourTo(i, StateToPrint);
2183 state = SCE_HJ_DEFAULT;
2184 }
2185 break;
2186 case SCE_HJ_REGEX:
2187 if (ch == '\r' || ch == '\n' || ch == '/') {
2188 if (ch == '/') {
2189 while (IsASCII(chNext) && islower(chNext)) { // gobble regex flags
2190 i++;
2191 ch = chNext;
2192 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2193 }
2194 }
2195 styler.ColourTo(i, StateToPrint);
2196 state = SCE_HJ_DEFAULT;
2197 } else if (ch == '\\') {
2198 // Gobble up the quoted character
2199 if (chNext == '\\' || chNext == '/') {
2200 i++;
2201 ch = chNext;
2202 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2203 }
2204 }
2205 break;
2206 case SCE_HB_DEFAULT:
2207 case SCE_HB_START:
2208 if (IsAWordStart(ch)) {
2209 styler.ColourTo(i - 1, StateToPrint);
2210 state = SCE_HB_WORD;
2211 } else if (ch == '\'') {
2212 styler.ColourTo(i - 1, StateToPrint);
2213 state = SCE_HB_COMMENTLINE;
2214 } else if (ch == '\"') {
2215 styler.ColourTo(i - 1, StateToPrint);
2216 state = SCE_HB_STRING;
2217 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2218 styler.SafeGetCharAt(i + 3) == '-') {
2219 styler.ColourTo(i - 1, StateToPrint);
2220 state = SCE_HB_COMMENTLINE;
2221 } else if (IsOperator(ch)) {
2222 styler.ColourTo(i - 1, StateToPrint);
2223 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2224 state = SCE_HB_DEFAULT;
2225 } else if ((ch == ' ') || (ch == '\t')) {
2226 if (state == SCE_HB_START) {
2227 styler.ColourTo(i - 1, StateToPrint);
2228 state = SCE_HB_DEFAULT;
2229 }
2230 }
2231 break;
2232 case SCE_HB_WORD:
2233 if (!IsAWordChar(ch)) {
2234 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
2235 if (state == SCE_HB_DEFAULT) {
2236 if (ch == '\"') {
2237 state = SCE_HB_STRING;
2238 } else if (ch == '\'') {
2239 state = SCE_HB_COMMENTLINE;
2240 } else if (IsOperator(ch)) {
2241 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2242 state = SCE_HB_DEFAULT;
2243 }
2244 }
2245 }
2246 break;
2247 case SCE_HB_STRING:
2248 if (ch == '\"') {
2249 styler.ColourTo(i, StateToPrint);
2250 state = SCE_HB_DEFAULT;
2251 } else if (ch == '\r' || ch == '\n') {
2252 styler.ColourTo(i - 1, StateToPrint);
2253 state = SCE_HB_STRINGEOL;
2254 }
2255 break;
2256 case SCE_HB_COMMENTLINE:
2257 if (ch == '\r' || ch == '\n') {
2258 styler.ColourTo(i - 1, StateToPrint);
2259 state = SCE_HB_DEFAULT;
2260 }
2261 break;
2262 case SCE_HB_STRINGEOL:
2263 if (!isLineEnd(ch)) {
2264 styler.ColourTo(i - 1, StateToPrint);
2265 state = SCE_HB_DEFAULT;
2266 } else if (!isLineEnd(chNext)) {
2267 styler.ColourTo(i, StateToPrint);
2268 state = SCE_HB_DEFAULT;
2269 }
2270 break;
2271 case SCE_HP_DEFAULT:
2272 case SCE_HP_START:
2273 if (IsAWordStart(ch)) {
2274 styler.ColourTo(i - 1, StateToPrint);
2275 state = SCE_HP_WORD;
2276 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2277 styler.SafeGetCharAt(i + 3) == '-') {
2278 styler.ColourTo(i - 1, StateToPrint);
2279 state = SCE_HP_COMMENTLINE;
2280 } else if (ch == '#') {
2281 styler.ColourTo(i - 1, StateToPrint);
2282 state = SCE_HP_COMMENTLINE;
2283 } else if (ch == '\"') {
2284 styler.ColourTo(i - 1, StateToPrint);
2285 if (chNext == '\"' && chNext2 == '\"') {
2286 i += 2;
2287 state = SCE_HP_TRIPLEDOUBLE;
2288 ch = ' ';
2289 chPrev = ' ';
2290 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2291 } else {
2292 // state = statePrintForState(SCE_HP_STRING,inScriptType);
2293 state = SCE_HP_STRING;
2294 }
2295 } else if (ch == '\'') {
2296 styler.ColourTo(i - 1, StateToPrint);
2297 if (chNext == '\'' && chNext2 == '\'') {
2298 i += 2;
2299 state = SCE_HP_TRIPLE;
2300 ch = ' ';
2301 chPrev = ' ';
2302 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2303 } else {
2304 state = SCE_HP_CHARACTER;
2305 }
2306 } else if (IsOperator(ch)) {
2307 styler.ColourTo(i - 1, StateToPrint);
2308 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2309 } else if ((ch == ' ') || (ch == '\t')) {
2310 if (state == SCE_HP_START) {
2311 styler.ColourTo(i - 1, StateToPrint);
2312 state = SCE_HP_DEFAULT;
2313 }
2314 }
2315 break;
2316 case SCE_HP_WORD:
2317 if (!IsAWordChar(ch)) {
2318 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
2319 state = SCE_HP_DEFAULT;
2320 if (ch == '#') {
2321 state = SCE_HP_COMMENTLINE;
2322 } else if (ch == '\"') {
2323 if (chNext == '\"' && chNext2 == '\"') {
2324 i += 2;
2325 state = SCE_HP_TRIPLEDOUBLE;
2326 ch = ' ';
2327 chPrev = ' ';
2328 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2329 } else {
2330 state = SCE_HP_STRING;
2331 }
2332 } else if (ch == '\'') {
2333 if (chNext == '\'' && chNext2 == '\'') {
2334 i += 2;
2335 state = SCE_HP_TRIPLE;
2336 ch = ' ';
2337 chPrev = ' ';
2338 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2339 } else {
2340 state = SCE_HP_CHARACTER;
2341 }
2342 } else if (IsOperator(ch)) {
2343 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2344 }
2345 }
2346 break;
2347 case SCE_HP_COMMENTLINE:
2348 if (ch == '\r' || ch == '\n') {
2349 styler.ColourTo(i - 1, StateToPrint);
2350 state = SCE_HP_DEFAULT;
2351 }
2352 break;
2353 case SCE_HP_STRING:
2354 if (ch == '\\') {
2355 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2356 i++;
2357 ch = chNext;
2358 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2359 }
2360 } else if (ch == '\"') {
2361 styler.ColourTo(i, StateToPrint);
2362 state = SCE_HP_DEFAULT;
2363 }
2364 break;
2365 case SCE_HP_CHARACTER:
2366 if (ch == '\\') {
2367 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2368 i++;
2369 ch = chNext;
2370 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2371 }
2372 } else if (ch == '\'') {
2373 styler.ColourTo(i, StateToPrint);
2374 state = SCE_HP_DEFAULT;
2375 }
2376 break;
2377 case SCE_HP_TRIPLE:
2378 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
2379 styler.ColourTo(i, StateToPrint);
2380 state = SCE_HP_DEFAULT;
2381 }
2382 break;
2383 case SCE_HP_TRIPLEDOUBLE:
2384 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
2385 styler.ColourTo(i, StateToPrint);
2386 state = SCE_HP_DEFAULT;
2387 }
2388 break;
2389 ///////////// start - PHP state handling
2390 case SCE_HPHP_WORD:
2391 if (!IsPhpWordChar(ch)) {
2392 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
2393 if (ch == '/' && chNext == '*') {
2394 i++;
2395 state = SCE_HPHP_COMMENT;
2396 } else if (ch == '/' && chNext == '/') {
2397 i++;
2398 state = SCE_HPHP_COMMENTLINE;
2399 } else if (ch == '#' && chNext != '[') {
2400 state = SCE_HPHP_COMMENTLINE;
2401 } else if (ch == '\"') {
2402 state = SCE_HPHP_HSTRING;
2403 phpStringDelimiter = "\"";
2404 } else if (styler.Match(i, "<<<")) {
2405 bool isSimpleString = false;
2406 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2407 if (!phpStringDelimiter.empty()) {
2408 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2409 if (foldHeredoc) levelCurrent++;
2410 }
2411 } else if (ch == '\'') {
2412 state = SCE_HPHP_SIMPLESTRING;
2413 phpStringDelimiter = "\'";
2414 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2415 state = SCE_HPHP_VARIABLE;
2416 } else if (IsOperator(ch)) {
2417 state = SCE_HPHP_OPERATOR;
2418 } else {
2419 state = SCE_HPHP_DEFAULT;
2420 }
2421 }
2422 break;
2423 case SCE_HPHP_NUMBER:
2424 if (phpNumber.check(chNext, chNext2)) {
2425 styler.ColourTo(i, phpNumber.isInvalid() ? SCE_HPHP_DEFAULT : SCE_HPHP_NUMBER);
2426 state = SCE_HPHP_DEFAULT;
2427 }
2428 break;
2429 case SCE_HPHP_VARIABLE:
2430 if (!IsPhpWordChar(chNext)) {
2431 styler.ColourTo(i, SCE_HPHP_VARIABLE);
2432 state = SCE_HPHP_DEFAULT;
2433 }
2434 break;
2435 case SCE_HPHP_COMMENT:
2436 if (ch == '/' && chPrev == '*') {
2437 styler.ColourTo(i, StateToPrint);
2438 state = SCE_HPHP_DEFAULT;
2439 }
2440 break;
2441 case SCE_HPHP_COMMENTLINE:
2442 if (ch == '\r' || ch == '\n') {
2443 styler.ColourTo(i - 1, StateToPrint);
2444 state = SCE_HPHP_DEFAULT;
2445 }
2446 break;
2447 case SCE_HPHP_HSTRING:
2448 if (ch == '\\' && ((phpStringDelimiter == "\"") || chNext == '$' || chNext == '{')) {
2449 // skip the next char
2450 i++;
2451 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
2452 && IsPhpWordStart(chNext2)) {
2453 styler.ColourTo(i - 1, StateToPrint);
2454 state = SCE_HPHP_COMPLEX_VARIABLE;
2455 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2456 styler.ColourTo(i - 1, StateToPrint);
2457 state = SCE_HPHP_HSTRING_VARIABLE;
2458 } else if (styler.Match(i, phpStringDelimiter.c_str())) {
2459 if (phpStringDelimiter == "\"") {
2460 styler.ColourTo(i, StateToPrint);
2461 state = SCE_HPHP_DEFAULT;
2462 } else if (lineStartVisibleChars == 1) {
2463 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2464 if (!IsPhpWordChar(styler.SafeGetCharAt(i + psdLength))) {
2465 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2466 styler.ColourTo(i, StateToPrint);
2467 state = SCE_HPHP_DEFAULT;
2468 if (foldHeredoc) levelCurrent--;
2469 }
2470 }
2471 }
2472 break;
2473 case SCE_HPHP_SIMPLESTRING:
2474 if (phpStringDelimiter == "\'") {
2475 if (ch == '\\') {
2476 // skip the next char
2477 i++;
2478 } else if (ch == '\'') {
2479 styler.ColourTo(i, StateToPrint);
2480 state = SCE_HPHP_DEFAULT;
2481 }
2482 } else if (lineStartVisibleChars == 1 && styler.Match(i, phpStringDelimiter.c_str())) {
2483 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2484 if (!IsPhpWordChar(styler.SafeGetCharAt(i + psdLength))) {
2485 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2486 styler.ColourTo(i, StateToPrint);
2487 state = SCE_HPHP_DEFAULT;
2488 if (foldHeredoc) levelCurrent--;
2489 }
2490 }
2491 break;
2492 case SCE_HPHP_HSTRING_VARIABLE:
2493 if (!IsPhpWordChar(chNext)) {
2494 styler.ColourTo(i, StateToPrint);
2495 state = SCE_HPHP_HSTRING;
2496 }
2497 break;
2498 case SCE_HPHP_COMPLEX_VARIABLE:
2499 if (ch == '}') {
2500 styler.ColourTo(i, StateToPrint);
2501 state = SCE_HPHP_HSTRING;
2502 }
2503 break;
2504 case SCE_HPHP_OPERATOR:
2505 case SCE_HPHP_DEFAULT:
2506 styler.ColourTo(i - 1, StateToPrint);
2507 if (phpNumber.init(ch, chNext, chNext2)) {
2508 if (phpNumber.isFinished()) {
2509 styler.ColourTo(i, phpNumber.isInvalid() ? SCE_HPHP_DEFAULT : SCE_HPHP_NUMBER);
2510 state = SCE_HPHP_DEFAULT;
2511 } else {
2512 state = SCE_HPHP_NUMBER;
2513 }
2514 } else if (IsAWordStart(ch)) {
2515 state = SCE_HPHP_WORD;
2516 } else if (ch == '/' && chNext == '*') {
2517 i++;
2518 state = SCE_HPHP_COMMENT;
2519 } else if (ch == '/' && chNext == '/') {
2520 i++;
2521 state = SCE_HPHP_COMMENTLINE;
2522 } else if (ch == '#' && chNext != '[') {
2523 state = SCE_HPHP_COMMENTLINE;
2524 } else if (ch == '\"') {
2525 state = SCE_HPHP_HSTRING;
2526 phpStringDelimiter = "\"";
2527 } else if (styler.Match(i, "<<<")) {
2528 bool isSimpleString = false;
2529 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2530 if (!phpStringDelimiter.empty()) {
2531 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2532 if (foldHeredoc) levelCurrent++;
2533 }
2534 } else if (ch == '\'') {
2535 state = SCE_HPHP_SIMPLESTRING;
2536 phpStringDelimiter = "\'";
2537 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2538 state = SCE_HPHP_VARIABLE;
2539 } else if (IsOperator(ch)) {
2540 state = SCE_HPHP_OPERATOR;
2541 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2542 state = SCE_HPHP_DEFAULT;
2543 }
2544 break;
2545 ///////////// end - PHP state handling
2546 }
2547
2548 // Some of the above terminated their lexeme but since the same character starts
2549 // the same class again, only reenter if non empty segment.
2550
2551 const bool nonEmptySegment = i >= static_cast<Sci_Position>(styler.GetStartSegment());
2552 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2553 if ((ch == '\"') && (nonEmptySegment)) {
2554 state = SCE_HB_STRING;
2555 } else if (ch == '\'') {
2556 state = SCE_HB_COMMENTLINE;
2557 } else if (IsAWordStart(ch)) {
2558 state = SCE_HB_WORD;
2559 } else if (IsOperator(ch)) {
2560 styler.ColourTo(i, SCE_HB_DEFAULT);
2561 }
2562 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2563 if ((ch == '\"') && (nonEmptySegment)) {
2564 state = SCE_HBA_STRING;
2565 } else if (ch == '\'') {
2566 state = SCE_HBA_COMMENTLINE;
2567 } else if (IsAWordStart(ch)) {
2568 state = SCE_HBA_WORD;
2569 } else if (IsOperator(ch)) {
2570 styler.ColourTo(i, SCE_HBA_DEFAULT);
2571 }
2572 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2573 if (ch == '/' && chNext == '*') {
2574 if (styler.SafeGetCharAt(i + 2) == '*')
2575 state = SCE_HJ_COMMENTDOC;
2576 else
2577 state = SCE_HJ_COMMENT;
2578 } else if (ch == '/' && chNext == '/') {
2579 state = SCE_HJ_COMMENTLINE;
2580 } else if ((ch == '\"') && (nonEmptySegment)) {
2581 state = SCE_HJ_DOUBLESTRING;
2582 } else if ((ch == '\'') && (nonEmptySegment)) {
2583 state = SCE_HJ_SINGLESTRING;
2584 } else if (IsAWordStart(ch)) {
2585 state = SCE_HJ_WORD;
2586 } else if (IsOperator(ch)) {
2587 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2588 }
2589 }
2590 }
2591
2592 switch (state) {
2593 case SCE_HJ_WORD:
2594 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2595 break;
2596 case SCE_HB_WORD:
2597 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2598 break;
2599 case SCE_HP_WORD:
2600 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2601 break;
2602 case SCE_HPHP_WORD:
2603 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2604 break;
2605 default:
2606 StateToPrint = statePrintForState(state, inScriptType);
2607 if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
2608 styler.ColourTo(lengthDoc - 1, StateToPrint);
2609 break;
2610 }
2611
2612 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2613 if (fold) {
2614 const int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2615 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2616 }
2617 styler.Flush();
2618}
2619
2620LexerModule lmHTML(SCLEX_HTML, LexerHTML::LexerFactoryHTML, "hypertext", htmlWordListDesc);
2621LexerModule lmXML(SCLEX_XML, LexerHTML::LexerFactoryXML, "xml", htmlWordListDesc);
2622LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, LexerHTML::LexerFactoryPHPScript, "phpscript", phpscriptWordListDesc);
2623