1// Scintilla source code edit control
2/** @file LexJAVA.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7// The License.txt file describes the conditions under which this software may be distributed.
8
9#include <cstdlib>
10#include <cassert>
11#include <cstring>
12
13#include <utility>
14#include <string>
15#include <string_view>
16#include <vector>
17#include <map>
18#include <algorithm>
19#include <iterator>
20#include <functional>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "StringCopy.h"
27#include "WordList.h"
28#include "LexAccessor.h"
29#include "Accessor.h"
30#include "StyleContext.h"
31#include "CharacterSet.h"
32#include "LexerModule.h"
33#include "OptionSet.h"
34#include "SparseState.h"
35#include "SubStyles.h"
36
37using namespace Scintilla;
38using namespace Lexilla;
39
40namespace {
41 // Use an unnamed namespace to protect the functions and classes from name conflicts
42
43constexpr bool IsSpaceEquiv(int state) noexcept {
44 return (state <= SCE_JAVA_COMMENTDOC) ||
45 // including SCE_JAVA_DEFAULT, SCE_JAVA_COMMENT, SCE_JAVA_COMMENTLINE
46 (state == SCE_JAVA_COMMENTLINEDOC) || (state == SCE_JAVA_COMMENTDOCKEYWORD) ||
47 (state == SCE_JAVA_COMMENTDOCKEYWORDERROR);
48}
49
50// Preconditions: sc.currentPos points to a character after '+' or '-'.
51// The test for pos reaching 0 should be redundant,
52// and is in only for safety measures.
53// Limitation: this code will give the incorrect answer for code like
54// a = b+++/ptn/...
55// Putting a space between the '++' post-inc operator and the '+' binary op
56// fixes this, and is highly recommended for readability anyway.
57bool FollowsPostfixOperator(const StyleContext &sc, LexAccessor &styler) {
58 Sci_Position pos = sc.currentPos;
59 while (--pos > 0) {
60 const char ch = styler[pos];
61 if (ch == '+' || ch == '-') {
62 return styler[pos - 1] == ch;
63 }
64 }
65 return false;
66}
67
68bool followsReturnKeyword(const StyleContext &sc, LexAccessor &styler) {
69 // Don't look at styles, so no need to flush.
70 Sci_Position pos = sc.currentPos;
71 const Sci_Position currentLine = styler.GetLine(pos);
72 const Sci_Position lineStartPos = styler.LineStart(currentLine);
73 while (--pos > lineStartPos) {
74 const char ch = styler.SafeGetCharAt(pos);
75 if (ch != ' ' && ch != '\t') {
76 break;
77 }
78 }
79 const char *retBack = "nruter";
80 const char *s = retBack;
81 while (*s
82 && pos >= lineStartPos
83 && styler.SafeGetCharAt(pos) == *s) {
84 s++;
85 pos--;
86 }
87 return !*s;
88}
89
90constexpr bool IsSpaceOrTab(int ch) noexcept {
91 return ch == ' ' || ch == '\t';
92}
93
94bool OnlySpaceOrTab(const std::string &s) noexcept {
95 for (const char ch : s) {
96 if (!IsSpaceOrTab(ch))
97 return false;
98 }
99 return true;
100}
101
102std::vector<std::string> StringSplit(const std::string &text, int separator) {
103 std::vector<std::string> vs(text.empty() ? 0 : 1);
104 for (const char ch : text) {
105 if (ch == separator) {
106 vs.emplace_back();
107 } else {
108 vs.back() += ch;
109 }
110 }
111 return vs;
112}
113
114struct BracketPair {
115 std::vector<std::string>::iterator itBracket;
116 std::vector<std::string>::iterator itEndBracket;
117};
118
119BracketPair FindBracketPair(std::vector<std::string> &tokens) {
120 BracketPair bp;
121 std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
122 bp.itBracket = tokens.end();
123 bp.itEndBracket = tokens.end();
124 if (itTok != tokens.end()) {
125 bp.itBracket = itTok;
126 size_t nest = 0;
127 while (itTok != tokens.end()) {
128 if (*itTok == "(") {
129 nest++;
130 } else if (*itTok == ")") {
131 nest--;
132 if (nest == 0) {
133 bp.itEndBracket = itTok;
134 return bp;
135 }
136 }
137 ++itTok;
138 }
139 }
140 bp.itBracket = tokens.end();
141 return bp;
142}
143
144void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
145 int activity, const WordList &markerList, bool caseSensitive){
146 if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
147 constexpr Sci_PositionU lengthMarker = 50;
148 char marker[lengthMarker+1] = "";
149 const Sci_PositionU currPos = sc.currentPos;
150 Sci_PositionU i = 0;
151 while (i < lengthMarker) {
152 const char ch = styler.SafeGetCharAt(currPos + i);
153 if (IsASpace(ch) || isoperator(ch)) {
154 break;
155 }
156 if (caseSensitive)
157 marker[i] = ch;
158 else
159 marker[i] = MakeLowerCase(ch);
160 i++;
161 }
162 marker[i] = '\0';
163 if (markerList.InList(marker)) {
164 sc.SetState(SCE_JAVA_TASKMARKER|activity);
165 }
166 }
167}
168
169class EscapeSequence {
170 const CharacterSet setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
171 const CharacterSet setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
172 const CharacterSet setNoneNumeric;
173 const CharacterSet *escapeSetValid = nullptr;
174 int digitsLeft = 0;
175public:
176 EscapeSequence() = default;
177 void resetEscapeState(int nextChar) {
178 digitsLeft = 0;
179 escapeSetValid = &setNoneNumeric;
180 if (nextChar == 'U') {
181 digitsLeft = 9;
182 escapeSetValid = &setHexDigits;
183 } else if (nextChar == 'u') {
184 digitsLeft = 5;
185 escapeSetValid = &setHexDigits;
186 } else if (nextChar == 'x') {
187 digitsLeft = 5;
188 escapeSetValid = &setHexDigits;
189 } else if (setOctDigits.Contains(nextChar)) {
190 digitsLeft = 3;
191 escapeSetValid = &setOctDigits;
192 }
193 }
194 bool atEscapeEnd(int currChar) const {
195 return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
196 }
197 void consumeDigit() noexcept {
198 digitsLeft--;
199 }
200};
201
202std::string GetRestOfLine(LexAccessor &styler, Sci_Position start, bool allowSpace) {
203 std::string restOfLine;
204 Sci_Position line = styler.GetLine(start);
205 Sci_Position pos = start;
206 Sci_Position endLine = styler.LineEnd(line);
207 char ch = styler.SafeGetCharAt(start, '\n');
208 while (pos < endLine) {
209 if (ch == '\\' && ((pos + 1) == endLine)) {
210 // Continuation line
211 line++;
212 pos = styler.LineStart(line);
213 endLine = styler.LineEnd(line);
214 ch = styler.SafeGetCharAt(pos, '\n');
215 } else {
216 const char chNext = styler.SafeGetCharAt(pos + 1, '\n');
217 if (ch == '/' && (chNext == '/' || chNext == '*'))
218 break;
219 if (allowSpace || (ch != ' ')) {
220 restOfLine += ch;
221 }
222 pos++;
223 ch = chNext;
224 }
225 }
226 return restOfLine;
227}
228
229constexpr bool IsStreamCommentStyle(int style) noexcept {
230 return style == SCE_JAVA_COMMENT ||
231 style == SCE_JAVA_COMMENTDOC ||
232 style == SCE_JAVA_COMMENTDOCKEYWORD ||
233 style == SCE_JAVA_COMMENTDOCKEYWORDERROR;
234}
235
236struct PPDefinition {
237 Sci_Position line;
238 std::string key;
239 std::string value;
240 bool isUndef;
241 std::string arguments;
242 PPDefinition(Sci_Position line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, const std::string &arguments_="") :
243 line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
244 }
245};
246
247constexpr int inactiveFlag = 0x40;
248
249class LinePPState {
250 // Track the state of preprocessor conditionals to allow showing active and inactive
251 // code in different styles.
252 // Only works up to 31 levels of conditional nesting.
253
254 // state is a bit mask with 1 bit per level
255 // bit is 1 for level if section inactive, so any bits set = inactive style
256 int state = 0;
257 // ifTaken is a bit mask with 1 bit per level
258 // bit is 1 for level if some branch at this level has been taken
259 int ifTaken = 0;
260 // level is the nesting level of #if constructs
261 int level = -1;
262 static const int maximumNestingLevel = 31;
263 int maskLevel() const noexcept {
264 if (level >= 0) {
265 return 1 << level;
266 } else {
267 return 1;
268 }
269 }
270public:
271 LinePPState() noexcept {
272 }
273 bool ValidLevel() const noexcept {
274 return level >= 0 && level < maximumNestingLevel;
275 }
276 bool IsActive() const noexcept {
277 return state == 0;
278 }
279 bool IsInactive() const noexcept {
280 return state != 0;
281 }
282 int ActiveState() const noexcept {
283 return state ? inactiveFlag : 0;
284 }
285 bool CurrentIfTaken() const noexcept {
286 return (ifTaken & maskLevel()) != 0;
287 }
288 void StartSection(bool on) noexcept {
289 level++;
290 if (ValidLevel()) {
291 if (on) {
292 state &= ~maskLevel();
293 ifTaken |= maskLevel();
294 } else {
295 state |= maskLevel();
296 ifTaken &= ~maskLevel();
297 }
298 }
299 }
300 void EndSection() noexcept {
301 if (ValidLevel()) {
302 state &= ~maskLevel();
303 ifTaken &= ~maskLevel();
304 }
305 level--;
306 }
307 void InvertCurrentLevel() noexcept {
308 if (ValidLevel()) {
309 state ^= maskLevel();
310 ifTaken |= maskLevel();
311 }
312 }
313};
314
315// Hold the preprocessor state for each line seen.
316// Currently one entry per line but could become sparse with just one entry per preprocessor line.
317class PPStates {
318 std::vector<LinePPState> vlls;
319public:
320 LinePPState ForLine(Sci_Position line) const noexcept {
321 if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
322 return vlls[line];
323 } else {
324 return LinePPState();
325 }
326 }
327 void Add(Sci_Position line, LinePPState lls) {
328 vlls.resize(line+1);
329 vlls[line] = lls;
330 }
331};
332
333// An individual named option for use in an OptionSet
334
335// Options used for LexerJAVA
336struct OptionsJAVA {
337 bool stylingWithinPreprocessor;
338 bool identifiersAllowDollars;
339 bool trackPreprocessor;
340 bool updatePreprocessor;
341 bool verbatimStringsAllowEscapes;
342 bool triplequotedStrings;
343 bool hashquotedStrings;
344 bool backQuotedStrings;
345 bool escapeSequence;
346 bool fold;
347 bool foldSyntaxBased;
348 bool foldComment;
349 bool foldCommentMultiline;
350 bool foldCommentExplicit;
351 std::string foldExplicitStart;
352 std::string foldExplicitEnd;
353 bool foldExplicitAnywhere;
354 bool foldPreprocessor;
355 bool foldPreprocessorAtElse;
356 bool foldCompact;
357 bool foldAtElse;
358 OptionsJAVA() {
359 stylingWithinPreprocessor = false;
360 identifiersAllowDollars = true;
361 trackPreprocessor = true;
362 updatePreprocessor = true;
363 verbatimStringsAllowEscapes = false;
364 triplequotedStrings = false;
365 hashquotedStrings = false;
366 backQuotedStrings = false;
367 escapeSequence = false;
368 fold = false;
369 foldSyntaxBased = true;
370 foldComment = false;
371 foldCommentMultiline = true;
372 foldCommentExplicit = true;
373 foldExplicitStart = "";
374 foldExplicitEnd = "";
375 foldExplicitAnywhere = false;
376 foldPreprocessor = false;
377 foldPreprocessorAtElse = false;
378 foldCompact = false;
379 foldAtElse = false;
380 }
381};
382
383const char *const javaWordLists[] = {
384 "Primary keywords and identifiers",
385 "Secondary keywords and identifiers",
386 "Documentation comment keywords",
387 "Global classes and typedefs",
388 "Preprocessor definitions",
389 "Task marker and error marker keywords",
390 nullptr,
391};
392
393struct OptionSetJAVA : public OptionSet<OptionsJAVA> {
394 OptionSetJAVA() {
395 DefineProperty("styling.within.preprocessor", &OptionsJAVA::stylingWithinPreprocessor,
396 "For C++ code, determines whether all preprocessor code is styled in the "
397 "preprocessor style (0, the default) or only from the initial # to the end "
398 "of the command word(1).");
399
400 DefineProperty("lexer.java.allow.dollars", &OptionsJAVA::identifiersAllowDollars,
401 "Set to 0 to disallow the '$' character in identifiers with the java lexer.");
402
403 DefineProperty("lexer.java.track.preprocessor", &OptionsJAVA::trackPreprocessor,
404 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
405
406 DefineProperty("lexer.java.update.preprocessor", &OptionsJAVA::updatePreprocessor,
407 "Set to 1 to update preprocessor definitions when #define found.");
408
409 DefineProperty("lexer.java.verbatim.strings.allow.escapes", &OptionsJAVA::verbatimStringsAllowEscapes,
410 "Set to 1 to allow verbatim strings to contain escape sequences.");
411
412 DefineProperty("lexer.java.triplequoted.strings", &OptionsJAVA::triplequotedStrings,
413 "Set to 1 to enable highlighting of triple-quoted strings.");
414
415 DefineProperty("lexer.java.hashquoted.strings", &OptionsJAVA::hashquotedStrings,
416 "Set to 1 to enable highlighting of hash-quoted strings.");
417
418 DefineProperty("lexer.java.backquoted.strings", &OptionsJAVA::backQuotedStrings,
419 "Set to 1 to enable highlighting of back-quoted raw strings .");
420
421 DefineProperty("lexer.java.escape.sequence", &OptionsJAVA::escapeSequence,
422 "Set to 1 to enable highlighting of escape sequences in strings");
423
424 DefineProperty("fold", &OptionsJAVA::fold);
425
426 DefineProperty("fold.java.syntax.based", &OptionsJAVA::foldSyntaxBased,
427 "Set this property to 0 to disable syntax based folding.");
428
429 DefineProperty("fold.comment", &OptionsJAVA::foldComment,
430 "This option enables folding multi-line comments and explicit fold points when using the Java lexer. "
431 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
432 "at the end of a section that should fold.");
433
434 DefineProperty("fold.java.comment.multiline", &OptionsJAVA::foldCommentMultiline,
435 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
436
437 DefineProperty("fold.java.comment.explicit", &OptionsJAVA::foldCommentExplicit,
438 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
439
440 DefineProperty("fold.java.explicit.start", &OptionsJAVA::foldExplicitStart,
441 "The string to use for explicit fold start points, replacing the standard //{.");
442
443 DefineProperty("fold.java.explicit.end", &OptionsJAVA::foldExplicitEnd,
444 "The string to use for explicit fold end points, replacing the standard //}.");
445
446 DefineProperty("fold.java.explicit.anywhere", &OptionsJAVA::foldExplicitAnywhere,
447 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
448
449 DefineProperty("fold.java.preprocessor.at.else", &OptionsJAVA::foldPreprocessorAtElse,
450 "This option enables folding on a preprocessor #else or #endif line of an #if statement.");
451
452 DefineProperty("fold.preprocessor", &OptionsJAVA::foldPreprocessor,
453 "This option enables folding preprocessor directives when using the Java lexer. "
454 "Includes C#'s explicit #region and #endregion folding directives.");
455
456 DefineProperty("fold.compact", &OptionsJAVA::foldCompact);
457
458 DefineProperty("fold.at.else", &OptionsJAVA::foldAtElse,
459 "This option enables Java folding on a \"} else {\" line of an if statement.");
460
461 DefineWordListSets(javaWordLists);
462 }
463};
464
465const char styleSubable[] = {SCE_JAVA_IDENTIFIER, SCE_JAVA_COMMENTDOCKEYWORD, 0};
466
467LexicalClass lexicalClasses[] = {
468 // Lexer Java SCLEX_JAVA SCE_JAVA_:
469 0, "SCE_JAVA_DEFAULT", "default", "White space",
470 1, "SCE_JAVA_COMMENT", "comment", "Comment: /* */.",
471 2, "SCE_JAVA_COMMENTLINE", "comment line", "Line Comment: //.",
472 3, "SCE_JAVA_COMMENTDOC", "comment documentation", "Doc comment: block comments beginning with /** or /*!",
473 4, "SCE_JAVA_NUMBER", "literal numeric", "Number",
474 5, "SCE_JAVA_WORD", "keyword", "Keyword",
475 6, "SCE_JAVA_STRING", "literal string", "Double quoted string",
476 7, "SCE_JAVA_CHARACTER", "literal string character", "Single quoted string",
477 8, "SCE_JAVA_UUID", "literal uuid", "UUIDs (only in IDL)",
478 9, "SCE_JAVA_PREPROCESSOR", "preprocessor", "Preprocessor",
479 10, "SCE_JAVA_OPERATOR", "operator", "Operators",
480 11, "SCE_JAVA_IDENTIFIER", "identifier", "Identifiers",
481 12, "SCE_JAVA_STRINGEOL", "error literal string", "End of line where string is not closed",
482 13, "SCE_JAVA_VERBATIM", "literal string multiline raw", "Verbatim strings for C#",
483 14, "SCE_JAVA_REGEX", "literal regex", "Regular expressions for JavaScript",
484 15, "SCE_JAVA_COMMENTLINEDOC", "comment documentation line", "Doc Comment Line: line comments beginning with /// or //!.",
485 16, "SCE_JAVA_WORD2", "identifier", "Keywords2",
486 17, "SCE_JAVA_COMMENTDOCKEYWORD", "comment documentation keyword", "Comment keyword",
487 18, "SCE_JAVA_COMMENTDOCKEYWORDERROR", "error comment documentation keyword", "Comment keyword error",
488 19, "SCE_JAVA_GLOBALCLASS", "identifier", "Global class",
489 20, "SCE_JAVA_STRINGRAW", "literal string multiline raw", "Raw strings for C++0x",
490 21, "SCE_JAVA_TRIPLEVERBATIM", "literal string multiline raw", "Triple-quoted strings for Vala",
491 22, "SCE_JAVA_HASHQUOTEDSTRING", "literal string", "Hash-quoted strings for Pike",
492 23, "SCE_JAVA_PREPROCESSORCOMMENT", "comment preprocessor", "Preprocessor stream comment",
493 24, "SCE_JAVA_PREPROCESSORCOMMENTDOC", "comment preprocessor documentation", "Preprocessor stream doc comment",
494 25, "SCE_JAVA_USERLITERAL", "literal", "User defined literals",
495 26, "SCE_JAVA_TASKMARKER", "comment taskmarker", "Task Marker",
496 27, "SCE_JAVA_ESCAPESEQUENCE", "literal string escapesequence", "Escape sequence",
497};
498
499const int sizeLexicalClasses = static_cast<int>(std::size(lexicalClasses));
500
501}
502
503class LexerJAVA : public ILexer5 {
504 bool caseSensitive;
505 CharacterSet setWord;
506 CharacterSet setNegationOp;
507 CharacterSet setAddOp;
508 CharacterSet setMultOp;
509 CharacterSet setRelOp;
510 CharacterSet setLogicalOp;
511 CharacterSet setWordStart;
512 PPStates vlls;
513 std::vector<PPDefinition> ppDefineHistory;
514 WordList keywords;
515 WordList keywords2;
516 WordList keywords3;
517 WordList keywords4;
518 WordList ppDefinitions;
519 WordList markerList;
520 struct SymbolValue {
521 std::string value;
522 std::string arguments;
523 SymbolValue() noexcept = default;
524 SymbolValue(const std::string &value_, const std::string &arguments_) : value(value_), arguments(arguments_) {
525 }
526 SymbolValue &operator = (const std::string &value_) {
527 value = value_;
528 arguments.clear();
529 return *this;
530 }
531 bool IsMacro() const noexcept {
532 return !arguments.empty();
533 }
534 };
535 typedef std::map<std::string, SymbolValue> SymbolTable;
536 SymbolTable preprocessorDefinitionsStart;
537 OptionsJAVA options;
538 OptionSetJAVA osJava;
539 EscapeSequence escapeSeq;
540 SparseState<std::string> rawStringTerminators;
541 enum { ssIdentifier, ssDocKeyword };
542 SubStyles subStyles;
543 std::string returnBuffer;
544public:
545 explicit LexerJAVA(bool caseSensitive_) :
546 caseSensitive(caseSensitive_),
547 setWord(CharacterSet::setAlphaNum, "._", true),
548 setNegationOp(CharacterSet::setNone, "!"),
549 setAddOp(CharacterSet::setNone, "+-"),
550 setMultOp(CharacterSet::setNone, "*/%"),
551 setRelOp(CharacterSet::setNone, "=!<>"),
552 setLogicalOp(CharacterSet::setNone, "|&"),
553 subStyles(styleSubable, 0x80, 0x40, inactiveFlag) {
554 }
555 // Deleted so LexerJAVA objects can not be copied.
556 LexerJAVA(const LexerJAVA &) = delete;
557 LexerJAVA(LexerJAVA &&) = delete;
558 void operator=(const LexerJAVA &) = delete;
559 void operator=(LexerJAVA &&) = delete;
560 virtual ~LexerJAVA() {
561 }
562 void SCI_METHOD Release() noexcept override {
563 delete this;
564 }
565 int SCI_METHOD Version() const noexcept override {
566 return lvRelease5;
567 }
568 const char * SCI_METHOD PropertyNames() override {
569 return osJava.PropertyNames();
570 }
571 int SCI_METHOD PropertyType(const char *name) override {
572 return osJava.PropertyType(name);
573 }
574 const char * SCI_METHOD DescribeProperty(const char *name) override {
575 return osJava.DescribeProperty(name);
576 }
577 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
578 const char * SCI_METHOD DescribeWordListSets() override {
579 return osJava.DescribeWordListSets();
580 }
581 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
582 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
583 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
584
585 void * SCI_METHOD PrivateCall(int, void *) noexcept override {
586 return nullptr;
587 }
588
589 int SCI_METHOD LineEndTypesSupported() noexcept override {
590 return SC_LINE_END_TYPE_UNICODE;
591 }
592
593 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
594 return subStyles.Allocate(styleBase, numberStyles);
595 }
596 int SCI_METHOD SubStylesStart(int styleBase) override {
597 return subStyles.Start(styleBase);
598 }
599 int SCI_METHOD SubStylesLength(int styleBase) override {
600 return subStyles.Length(styleBase);
601 }
602 int SCI_METHOD StyleFromSubStyle(int subStyle) override {
603 const int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
604 const int inactive = subStyle & inactiveFlag;
605 return styleBase | inactive;
606 }
607 int SCI_METHOD PrimaryStyleFromStyle(int style) noexcept override {
608 return MaskActive(style);
609 }
610 void SCI_METHOD FreeSubStyles() override {
611 subStyles.Free();
612 }
613 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
614 subStyles.SetIdentifiers(style, identifiers);
615 }
616 int SCI_METHOD DistanceToSecondaryStyles() noexcept override {
617 return inactiveFlag;
618 }
619 const char * SCI_METHOD GetSubStyleBases() noexcept override {
620 return styleSubable;
621 }
622 int SCI_METHOD NamedStyles() override {
623 return std::max(subStyles.LastAllocated() + 1,
624 sizeLexicalClasses) +
625 inactiveFlag;
626 }
627 const char * SCI_METHOD NameOfStyle(int style) override {
628 if (style >= NamedStyles())
629 return "";
630 if (style < sizeLexicalClasses)
631 return lexicalClasses[style].name;
632 // TODO: inactive and substyles
633 return "";
634 }
635 const char * SCI_METHOD TagsOfStyle(int style) override {
636 if (style >= NamedStyles())
637 return "Excess";
638 returnBuffer.clear();
639 const int firstSubStyle = subStyles.FirstAllocated();
640 if (firstSubStyle >= 0) {
641 const int lastSubStyle = subStyles.LastAllocated();
642 if (((style >= firstSubStyle) && (style <= (lastSubStyle))) ||
643 ((style >= firstSubStyle + inactiveFlag) && (style <= (lastSubStyle + inactiveFlag)))) {
644 int styleActive = style;
645 if (style > lastSubStyle) {
646 returnBuffer = "inactive ";
647 styleActive -= inactiveFlag;
648 }
649 const int styleMain = StyleFromSubStyle(styleActive);
650 returnBuffer += lexicalClasses[styleMain].tags;
651 return returnBuffer.c_str();
652 }
653 }
654 if (style < sizeLexicalClasses)
655 return lexicalClasses[style].tags;
656 if (style >= inactiveFlag) {
657 returnBuffer = "inactive ";
658 const int styleActive = style - inactiveFlag;
659 if (styleActive < sizeLexicalClasses)
660 returnBuffer += lexicalClasses[styleActive].tags;
661 else
662 returnBuffer = "";
663 return returnBuffer.c_str();
664 }
665 return "";
666 }
667 const char * SCI_METHOD DescriptionOfStyle(int style) override {
668 if (style >= NamedStyles())
669 return "";
670 if (style < sizeLexicalClasses)
671 return lexicalClasses[style].description;
672 // TODO: inactive and substyles
673 return "";
674 }
675
676 // ILexer5 methods
677 const char * SCI_METHOD GetName() override {
678 return caseSensitive ? "java" : "javanocase";
679 }
680 int SCI_METHOD GetIdentifier() override {
681 return caseSensitive ? SCLEX_JAVA : SCLEX_JAVANOCASE;
682 }
683 const char * SCI_METHOD PropertyGet(const char *key) override;
684
685 static ILexer5 *LexerFactoryJAVA() {
686 return new LexerJAVA(true);
687 }
688 static ILexer5 *LexerFactoryJAVAInsensitive() {
689 return new LexerJAVA(false);
690 }
691 constexpr static int MaskActive(int style) noexcept {
692 return style & ~inactiveFlag;
693 }
694 void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
695 std::vector<std::string> Tokenize(const std::string &expr) const;
696 bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
697};
698
699Sci_Position SCI_METHOD LexerJAVA::PropertySet(const char *key, const char *val) {
700 if (osJava.PropertySet(&options, key, val)) {
701 if (strcmp(key, "lexer.java.allow.dollars") == 0) {
702 setWord = CharacterSet(CharacterSet::setAlphaNum, "._", true);
703 if (options.identifiersAllowDollars) {
704 setWord.Add('$');
705 }
706 }
707 return 0;
708 }
709 return -1;
710}
711
712const char * SCI_METHOD LexerJAVA::PropertyGet(const char *key) {
713 return osJava.PropertyGet(key);
714}
715
716Sci_Position SCI_METHOD LexerJAVA::WordListSet(int n, const char *wl) {
717 WordList *wordListN = nullptr;
718 switch (n) {
719 case 0:
720 wordListN = &keywords;
721 break;
722 case 1:
723 wordListN = &keywords2;
724 break;
725 case 2:
726 wordListN = &keywords3;
727 break;
728 case 3:
729 wordListN = &keywords4;
730 break;
731 case 4:
732 wordListN = &ppDefinitions;
733 break;
734 case 5:
735 wordListN = &markerList;
736 break;
737 }
738 Sci_Position firstModification = -1;
739 if (wordListN) {
740 WordList wlNew;
741 wlNew.Set(wl);
742 if (*wordListN != wlNew) {
743 wordListN->Set(wl);
744 firstModification = 0;
745 if (n == 4) {
746 // Rebuild preprocessorDefinitions
747 preprocessorDefinitionsStart.clear();
748 for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
749 const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
750 const char *cpEquals = strchr(cpDefinition, '=');
751 if (cpEquals) {
752 std::string name(cpDefinition, cpEquals - cpDefinition);
753 std::string val(cpEquals+1);
754 const size_t bracket = name.find('(');
755 const size_t bracketEnd = name.find(')');
756 if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
757 // Macro
758 std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
759 name = name.substr(0, bracket);
760 preprocessorDefinitionsStart[name] = SymbolValue(val, args);
761 } else {
762 preprocessorDefinitionsStart[name] = val;
763 }
764 } else {
765 std::string name(cpDefinition);
766 std::string val("1");
767 preprocessorDefinitionsStart[name] = val;
768 }
769 }
770 }
771 }
772 }
773 return firstModification;
774}
775
776void SCI_METHOD LexerJAVA::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
777 LexAccessor styler(pAccess);
778
779 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
780 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
781
782 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
783
784 setWordStart = CharacterSet(CharacterSet::setAlpha, "_", true);
785
786 CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
787
788 if (options.identifiersAllowDollars) {
789 setWordStart.Add('$');
790 }
791
792 int chPrevNonWhite = ' ';
793 int visibleChars = 0;
794 bool lastWordWasUUID = false;
795 int styleBeforeDCKeyword = SCE_JAVA_DEFAULT;
796 int styleBeforeTaskMarker = SCE_JAVA_DEFAULT;
797 bool continuationLine = false;
798 bool isIncludePreprocessor = false;
799 bool isStringInPreprocessor = false;
800 bool inRERange = false;
801 bool seenDocKeyBrace = false;
802
803 Sci_Position lineCurrent = styler.GetLine(startPos);
804 if ((MaskActive(initStyle) == SCE_JAVA_PREPROCESSOR) ||
805 (MaskActive(initStyle) == SCE_JAVA_COMMENTLINE) ||
806 (MaskActive(initStyle) == SCE_JAVA_COMMENTLINEDOC)) {
807 // Set continuationLine if last character of previous line is '\'
808 if (lineCurrent > 0) {
809 const Sci_Position endLinePrevious = styler.LineEnd(lineCurrent - 1);
810 if (endLinePrevious > 0) {
811 continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
812 }
813 }
814 }
815
816 // look back to set chPrevNonWhite properly for better regex colouring
817 if (startPos > 0) {
818 Sci_Position back = startPos;
819 while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
820 ;
821 if (MaskActive(styler.StyleAt(back)) == SCE_JAVA_OPERATOR) {
822 chPrevNonWhite = styler.SafeGetCharAt(back);
823 }
824 }
825
826 StyleContext sc(startPos, length, initStyle, styler);
827 LinePPState preproc = vlls.ForLine(lineCurrent);
828
829 bool definitionsChanged = false;
830
831 // Truncate ppDefineHistory before current line
832
833 if (!options.updatePreprocessor)
834 ppDefineHistory.clear();
835
836 std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(),
837 [lineCurrent](const PPDefinition &p) noexcept { return p.line >= lineCurrent; });
838 if (itInvalid != ppDefineHistory.end()) {
839 ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
840 definitionsChanged = true;
841 }
842
843 SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
844 for (const PPDefinition &ppDef : ppDefineHistory) {
845 if (ppDef.isUndef)
846 preprocessorDefinitions.erase(ppDef.key);
847 else
848 preprocessorDefinitions[ppDef.key] = SymbolValue(ppDef.value, ppDef.arguments);
849 }
850
851 std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
852 SparseState<std::string> rawSTNew(lineCurrent);
853
854 int activitySet = preproc.ActiveState();
855
856 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_JAVA_IDENTIFIER);
857 const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_JAVA_COMMENTDOCKEYWORD);
858
859 Sci_PositionU lineEndNext = styler.LineEnd(lineCurrent);
860
861 for (; sc.More();) {
862
863 if (sc.atLineStart) {
864 // Using MaskActive() is not needed in the following statement.
865 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
866 if ((sc.state == SCE_JAVA_STRING) || (sc.state == SCE_JAVA_CHARACTER)) {
867 // Prevent SCE_JAVA_STRINGEOL from leaking back to previous line which
868 // ends with a line continuation by locking in the state up to this position.
869 sc.SetState(sc.state);
870 }
871 if ((MaskActive(sc.state) == SCE_JAVA_PREPROCESSOR) && (!continuationLine)) {
872 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
873 }
874 // Reset states to beginning of colourise so no surprises
875 // if different sets of lines lexed.
876 visibleChars = 0;
877 lastWordWasUUID = false;
878 isIncludePreprocessor = false;
879 inRERange = false;
880 if (preproc.IsInactive()) {
881 activitySet = inactiveFlag;
882 sc.SetState(sc.state | activitySet);
883 }
884 }
885
886 if (sc.atLineEnd) {
887 lineCurrent++;
888 lineEndNext = styler.LineEnd(lineCurrent);
889 vlls.Add(lineCurrent, preproc);
890 if (rawStringTerminator != "") {
891 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
892 }
893 }
894
895 // Handle line continuation generically.
896 if (sc.ch == '\\') {
897 if ((sc.currentPos+1) >= lineEndNext) {
898 lineCurrent++;
899 lineEndNext = styler.LineEnd(lineCurrent);
900 vlls.Add(lineCurrent, preproc);
901 if (rawStringTerminator != "") {
902 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
903 }
904 sc.Forward();
905 if (sc.ch == '\r' && sc.chNext == '\n') {
906 // Even in UTF-8, \r and \n are separate
907 sc.Forward();
908 }
909 continuationLine = true;
910 sc.Forward();
911 continue;
912 }
913 }
914
915 const bool atLineEndBeforeSwitch = sc.atLineEnd;
916
917 // Determine if the current state should terminate.
918 switch (MaskActive(sc.state)) {
919 case SCE_JAVA_OPERATOR:
920 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
921 break;
922 case SCE_JAVA_NUMBER:
923 // We accept almost anything because of hex. and number suffixes
924 if (sc.ch == '_') {
925 sc.ChangeState(SCE_JAVA_USERLITERAL|activitySet);
926 } else if (!(setWord.Contains(sc.ch)
927 || (sc.ch == '\'')
928 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
929 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
930 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
931 }
932 break;
933 case SCE_JAVA_USERLITERAL:
934 if (!(setWord.Contains(sc.ch)))
935 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
936 break;
937 case SCE_JAVA_IDENTIFIER:
938 if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
939 char s[1000];
940 if (caseSensitive) {
941 sc.GetCurrent(s, sizeof(s));
942 } else {
943 sc.GetCurrentLowered(s, sizeof(s));
944 }
945 if (keywords.InList(s)) {
946 lastWordWasUUID = strcmp(s, "uuid") == 0;
947 sc.ChangeState(SCE_JAVA_WORD|activitySet);
948 } else if (keywords2.InList(s)) {
949 sc.ChangeState(SCE_JAVA_WORD2|activitySet);
950 } else if (keywords4.InList(s)) {
951 sc.ChangeState(SCE_JAVA_GLOBALCLASS|activitySet);
952 } else {
953 int subStyle = classifierIdentifiers.ValueFor(s);
954 if (subStyle >= 0) {
955 sc.ChangeState(subStyle|activitySet);
956 }
957 }
958 const bool literalString = sc.ch == '\"';
959 if (literalString || sc.ch == '\'') {
960 size_t lenS = strlen(s);
961 const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
962 if (raw)
963 s[lenS--] = '\0';
964 const bool valid =
965 (lenS == 0) ||
966 ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
967 ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
968 if (valid) {
969 if (literalString) {
970 if (raw) {
971 // Set the style of the string prefix to SCE_JAVA_STRINGRAW but then change to
972 // SCE_JAVA_DEFAULT as that allows the raw string start code to run.
973 sc.ChangeState(SCE_JAVA_STRINGRAW|activitySet);
974 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
975 } else {
976 sc.ChangeState(SCE_JAVA_STRING|activitySet);
977 }
978 } else {
979 sc.ChangeState(SCE_JAVA_CHARACTER|activitySet);
980 }
981 } else {
982 sc.SetState(SCE_JAVA_DEFAULT | activitySet);
983 }
984 } else {
985 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
986 }
987 }
988 break;
989 case SCE_JAVA_PREPROCESSOR:
990 if (options.stylingWithinPreprocessor) {
991 if (IsASpace(sc.ch) || (sc.ch == '(')) {
992 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
993 }
994 } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
995 isStringInPreprocessor = false;
996 } else if (!isStringInPreprocessor) {
997 if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
998 isStringInPreprocessor = true;
999 } else if (sc.Match('/', '*')) {
1000 if (sc.Match("/**") || sc.Match("/*!")) {
1001 sc.SetState(SCE_JAVA_PREPROCESSORCOMMENTDOC|activitySet);
1002 } else {
1003 sc.SetState(SCE_JAVA_PREPROCESSORCOMMENT|activitySet);
1004 }
1005 sc.Forward(); // Eat the *
1006 } else if (sc.Match('/', '/')) {
1007 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1008 }
1009 }
1010 break;
1011 case SCE_JAVA_PREPROCESSORCOMMENT:
1012 case SCE_JAVA_PREPROCESSORCOMMENTDOC:
1013 if (sc.Match('*', '/')) {
1014 sc.Forward();
1015 sc.ForwardSetState(SCE_JAVA_PREPROCESSOR|activitySet);
1016 continue; // Without advancing in case of '\'.
1017 }
1018 break;
1019 case SCE_JAVA_COMMENT:
1020 if (sc.Match('*', '/')) {
1021 sc.Forward();
1022 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1023 } else {
1024 styleBeforeTaskMarker = SCE_JAVA_COMMENT;
1025 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
1026 }
1027 break;
1028 case SCE_JAVA_COMMENTDOC:
1029 if (sc.Match('*', '/')) {
1030 sc.Forward();
1031 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1032 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1033 // Verify that we have the conditions to mark a comment-doc-keyword
1034 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
1035 styleBeforeDCKeyword = SCE_JAVA_COMMENTDOC;
1036 sc.SetState(SCE_JAVA_COMMENTDOCKEYWORD|activitySet);
1037 }
1038 } else if ((sc.ch == '<' && sc.chNext != '/')
1039 || (sc.ch == '/' && sc.chPrev == '<')) { // XML comment style
1040 styleBeforeDCKeyword = SCE_JAVA_COMMENTDOC;
1041 sc.ForwardSetState(SCE_JAVA_COMMENTDOCKEYWORD | activitySet);
1042 }
1043 break;
1044 case SCE_JAVA_COMMENTLINE:
1045 if (sc.atLineStart && !continuationLine) {
1046 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1047 } else {
1048 styleBeforeTaskMarker = SCE_JAVA_COMMENTLINE;
1049 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
1050 }
1051 break;
1052 case SCE_JAVA_COMMENTLINEDOC:
1053 if (sc.atLineStart && !continuationLine) {
1054 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1055 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1056 // Verify that we have the conditions to mark a comment-doc-keyword
1057 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
1058 styleBeforeDCKeyword = SCE_JAVA_COMMENTLINEDOC;
1059 sc.SetState(SCE_JAVA_COMMENTDOCKEYWORD|activitySet);
1060 }
1061 } else if ((sc.ch == '<' && sc.chNext != '/')
1062 || (sc.ch == '/' && sc.chPrev == '<')) { // XML comment style
1063 styleBeforeDCKeyword = SCE_JAVA_COMMENTLINEDOC;
1064 sc.ForwardSetState(SCE_JAVA_COMMENTDOCKEYWORD | activitySet);
1065 }
1066 break;
1067 case SCE_JAVA_COMMENTDOCKEYWORD:
1068 if ((styleBeforeDCKeyword == SCE_JAVA_COMMENTDOC) && sc.Match('*', '/')) {
1069 sc.ChangeState(SCE_JAVA_COMMENTDOCKEYWORDERROR);
1070 sc.Forward();
1071 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1072 seenDocKeyBrace = false;
1073 } else if (sc.ch == '[' || sc.ch == '{') {
1074 seenDocKeyBrace = true;
1075 } else if (!setDoxygen.Contains(sc.ch)
1076 && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
1077 char s[100];
1078 if (caseSensitive) {
1079 sc.GetCurrent(s, sizeof(s));
1080 } else {
1081 sc.GetCurrentLowered(s, sizeof(s));
1082 }
1083 if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
1084 sc.ChangeState(SCE_JAVA_COMMENTDOCKEYWORDERROR|activitySet);
1085 } else if (!keywords3.InList(s + 1) && !keywords3.InList(s)) {
1086 int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
1087 if (subStyleCDKW >= 0) {
1088 sc.ChangeState(subStyleCDKW|activitySet);
1089 } else {
1090 sc.ChangeState(SCE_JAVA_COMMENTDOCKEYWORDERROR|activitySet);
1091 }
1092 }
1093 sc.SetState(styleBeforeDCKeyword|activitySet);
1094 seenDocKeyBrace = false;
1095 } else if (sc.ch == '>') {
1096 char s[100];
1097 if (caseSensitive) {
1098 sc.GetCurrent(s, sizeof(s));
1099 } else {
1100 sc.GetCurrentLowered(s, sizeof(s));
1101 }
1102 if (!keywords3.InList(s)) {
1103 int subStyleCDKW = classifierDocKeyWords.ValueFor(s + 1);
1104 if (subStyleCDKW >= 0) {
1105 sc.ChangeState(subStyleCDKW | activitySet);
1106 } else {
1107 sc.ChangeState(SCE_JAVA_COMMENTDOCKEYWORDERROR | activitySet);
1108 }
1109 }
1110 sc.SetState(styleBeforeDCKeyword | activitySet);
1111 seenDocKeyBrace = false;
1112 }
1113 break;
1114 case SCE_JAVA_STRING:
1115 if (sc.atLineEnd) {
1116 sc.ChangeState(SCE_JAVA_STRINGEOL|activitySet);
1117 } else if (isIncludePreprocessor) {
1118 if (sc.ch == '>') {
1119 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1120 isIncludePreprocessor = false;
1121 }
1122 } else if (sc.ch == '\\') {
1123 if (options.escapeSequence) {
1124 sc.SetState(SCE_JAVA_ESCAPESEQUENCE|activitySet);
1125 escapeSeq.resetEscapeState(sc.chNext);
1126 }
1127 sc.Forward(); // Skip all characters after the backslash
1128 } else if (sc.ch == '\"') {
1129 if (sc.chNext == '_') {
1130 sc.ChangeState(SCE_JAVA_USERLITERAL|activitySet);
1131 } else {
1132 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1133 }
1134 }
1135 break;
1136 case SCE_JAVA_ESCAPESEQUENCE:
1137 escapeSeq.consumeDigit();
1138 if (!escapeSeq.atEscapeEnd(sc.ch)) {
1139 break;
1140 }
1141 if (sc.ch == '"') {
1142 sc.SetState(SCE_JAVA_STRING|activitySet);
1143 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1144 } else if (sc.ch == '\\') {
1145 escapeSeq.resetEscapeState(sc.chNext);
1146 sc.Forward();
1147 } else {
1148 sc.SetState(SCE_JAVA_STRING|activitySet);
1149 if (sc.atLineEnd) {
1150 sc.ChangeState(SCE_JAVA_STRINGEOL|activitySet);
1151 }
1152 }
1153 break;
1154 case SCE_JAVA_HASHQUOTEDSTRING:
1155 if (sc.ch == '\\') {
1156 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1157 sc.Forward();
1158 }
1159 } else if (sc.ch == '\"') {
1160 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1161 }
1162 break;
1163 case SCE_JAVA_STRINGRAW:
1164 if (sc.Match(rawStringTerminator.c_str())) {
1165 for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
1166 sc.Forward();
1167 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1168 rawStringTerminator = "";
1169 }
1170 break;
1171 case SCE_JAVA_CHARACTER:
1172 if (sc.atLineEnd) {
1173 sc.ChangeState(SCE_JAVA_STRINGEOL|activitySet);
1174 } else if (sc.ch == '\\') {
1175 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1176 sc.Forward();
1177 }
1178 } else if (sc.ch == '\'') {
1179 if (sc.chNext == '_') {
1180 sc.ChangeState(SCE_JAVA_USERLITERAL|activitySet);
1181 } else {
1182 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1183 }
1184 }
1185 break;
1186 case SCE_JAVA_REGEX:
1187 if (sc.atLineStart) {
1188 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1189 } else if (!inRERange && sc.ch == '/') {
1190 sc.Forward();
1191 while (IsLowerCase(sc.ch))
1192 sc.Forward(); // gobble regex flags
1193 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1194 } else if (sc.ch == '\\' && ((sc.currentPos+1) < lineEndNext)) {
1195 // Gobble up the escaped character
1196 sc.Forward();
1197 } else if (sc.ch == '[') {
1198 inRERange = true;
1199 } else if (sc.ch == ']') {
1200 inRERange = false;
1201 }
1202 break;
1203 case SCE_JAVA_STRINGEOL:
1204 if (sc.atLineStart) {
1205 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1206 }
1207 break;
1208 case SCE_JAVA_VERBATIM:
1209 if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
1210 sc.Forward(); // Skip all characters after the backslash
1211 } else if (sc.ch == '\"') {
1212 if (sc.chNext == '\"') {
1213 sc.Forward();
1214 } else {
1215 sc.ForwardSetState(SCE_JAVA_DEFAULT|activitySet);
1216 }
1217 }
1218 break;
1219 case SCE_JAVA_TRIPLEVERBATIM:
1220 if (sc.Match(R"(""")")) {
1221 while (sc.Match('"')) {
1222 sc.Forward();
1223 }
1224 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1225 }
1226 break;
1227 case SCE_JAVA_UUID:
1228 if (sc.atLineEnd || sc.ch == ')') {
1229 sc.SetState(SCE_JAVA_DEFAULT|activitySet);
1230 }
1231 break;
1232 case SCE_JAVA_TASKMARKER:
1233 if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1234 sc.SetState(styleBeforeTaskMarker|activitySet);
1235 styleBeforeTaskMarker = SCE_JAVA_DEFAULT;
1236 }
1237 }
1238
1239 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1240 // State exit processing consumed characters up to end of line.
1241 lineCurrent++;
1242 lineEndNext = styler.LineEnd(lineCurrent);
1243 vlls.Add(lineCurrent, preproc);
1244 }
1245
1246 const bool atLineEndBeforeStateEntry = sc.atLineEnd;
1247
1248 // Determine if a new state should be entered.
1249 if (MaskActive(sc.state) == SCE_JAVA_DEFAULT) {
1250 if (sc.Match('@', '\"')) {
1251 sc.SetState(SCE_JAVA_VERBATIM|activitySet);
1252 sc.Forward();
1253 } else if (options.triplequotedStrings && sc.Match(R"(""")")) {
1254 sc.SetState(SCE_JAVA_TRIPLEVERBATIM|activitySet);
1255 sc.Forward(2);
1256 } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1257 sc.SetState(SCE_JAVA_HASHQUOTEDSTRING|activitySet);
1258 sc.Forward();
1259 } else if (options.backQuotedStrings && sc.Match('`')) {
1260 sc.SetState(SCE_JAVA_STRINGRAW|activitySet);
1261 rawStringTerminator = "`";
1262 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1263 if (lastWordWasUUID) {
1264 sc.SetState(SCE_JAVA_UUID|activitySet);
1265 lastWordWasUUID = false;
1266 } else {
1267 sc.SetState(SCE_JAVA_NUMBER|activitySet);
1268 }
1269 } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1270 if (lastWordWasUUID) {
1271 sc.SetState(SCE_JAVA_UUID|activitySet);
1272 lastWordWasUUID = false;
1273 } else {
1274 sc.SetState(SCE_JAVA_IDENTIFIER|activitySet);
1275 }
1276 } else if (sc.Match('/', '*')) {
1277 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
1278 sc.SetState(SCE_JAVA_COMMENTDOC|activitySet);
1279 } else {
1280 sc.SetState(SCE_JAVA_COMMENT|activitySet);
1281 }
1282 sc.Forward(); // Eat the * so it isn't used for the end of the comment
1283 } else if (sc.Match('/', '/')) {
1284 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1285 // Support of Qt/Doxygen doc. style
1286 sc.SetState(SCE_JAVA_COMMENTLINEDOC|activitySet);
1287 else
1288 sc.SetState(SCE_JAVA_COMMENTLINE|activitySet);
1289 } else if (sc.ch == '/'
1290 && (setOKBeforeRE.Contains(chPrevNonWhite)
1291 || followsReturnKeyword(sc, styler))
1292 && (!setCouldBePostOp.Contains(chPrevNonWhite)
1293 || !FollowsPostfixOperator(sc, styler))) {
1294 sc.SetState(SCE_JAVA_REGEX|activitySet); // JavaScript's RegEx
1295 inRERange = false;
1296 } else if (sc.ch == '\"') {
1297 if (sc.chPrev == 'R') {
1298 styler.Flush();
1299 if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_JAVA_STRINGRAW) {
1300 sc.SetState(SCE_JAVA_STRINGRAW|activitySet);
1301 rawStringTerminator = ")";
1302 for (Sci_Position termPos = sc.currentPos + 1;; termPos++) {
1303 const char chTerminator = styler.SafeGetCharAt(termPos, '(');
1304 if (chTerminator == '(')
1305 break;
1306 rawStringTerminator += chTerminator;
1307 }
1308 rawStringTerminator += '\"';
1309 } else {
1310 sc.SetState(SCE_JAVA_STRING|activitySet);
1311 }
1312 } else {
1313 sc.SetState(SCE_JAVA_STRING|activitySet);
1314 }
1315 isIncludePreprocessor = false; // ensure that '>' won't end the string
1316 } else if (isIncludePreprocessor && sc.ch == '<') {
1317 sc.SetState(SCE_JAVA_STRING|activitySet);
1318 } else if (sc.ch == '\'') {
1319 sc.SetState(SCE_JAVA_CHARACTER|activitySet);
1320 } else if (sc.ch == '#' && visibleChars == 0) {
1321 // Preprocessor commands are alone on their line
1322 sc.SetState(SCE_JAVA_PREPROCESSOR|activitySet);
1323 // Skip whitespace between # and preprocessor word
1324 do {
1325 sc.Forward();
1326 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1327 if (sc.Match("include")) {
1328 isIncludePreprocessor = true;
1329 } else {
1330 if (options.trackPreprocessor && IsAlphaNumeric(sc.ch)) {
1331 // If #if is nested too deeply (>31 levels) the active/inactive appearance
1332 // will stop reflecting the code.
1333 if (sc.Match("ifdef") || sc.Match("ifndef")) {
1334 const bool isIfDef = sc.Match("ifdef");
1335 const int startRest = isIfDef ? 5 : 6;
1336 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + startRest + 1, false);
1337 bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1338 preproc.StartSection(isIfDef == foundDef);
1339 } else if (sc.Match("if")) {
1340 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1341 const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1342 preproc.StartSection(ifGood);
1343 } else if (sc.Match("else")) {
1344 // #else is shown as active if either preceding or following section is active
1345 // as that means that it contributed to the result.
1346 if (preproc.ValidLevel()) {
1347 // If #else has no corresponding #if then take no action as invalid
1348 if (!preproc.CurrentIfTaken()) {
1349 // Inactive, may become active if parent scope active
1350 assert(sc.state == (SCE_JAVA_PREPROCESSOR | inactiveFlag));
1351 preproc.InvertCurrentLevel();
1352 activitySet = preproc.ActiveState();
1353 // If following is active then show "else" as active
1354 if (!activitySet)
1355 sc.ChangeState(SCE_JAVA_PREPROCESSOR);
1356 } else if (preproc.IsActive()) {
1357 // Active -> inactive
1358 assert(sc.state == SCE_JAVA_PREPROCESSOR);
1359 preproc.InvertCurrentLevel();
1360 activitySet = preproc.ActiveState();
1361 // Continue to show "else" as active as it ends active section.
1362 }
1363 }
1364 } else if (sc.Match("elif")) {
1365 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1366 // #elif is shown as active if either preceding or following section is active
1367 // as that means that it contributed to the result.
1368 if (preproc.ValidLevel()) {
1369 if (!preproc.CurrentIfTaken()) {
1370 // Inactive, if expression true then may become active if parent scope active
1371 assert(sc.state == (SCE_JAVA_PREPROCESSOR | inactiveFlag));
1372 // Similar to #if
1373 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 4, true);
1374 const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1375 if (ifGood) {
1376 preproc.InvertCurrentLevel();
1377 activitySet = preproc.ActiveState();
1378 if (!activitySet)
1379 sc.ChangeState(SCE_JAVA_PREPROCESSOR);
1380 }
1381 } else if (preproc.IsActive()) {
1382 // Active -> inactive
1383 assert(sc.state == SCE_JAVA_PREPROCESSOR);
1384 preproc.InvertCurrentLevel();
1385 activitySet = preproc.ActiveState();
1386 // Continue to show "elif" as active as it ends active section.
1387 }
1388 }
1389 } else if (sc.Match("endif")) {
1390 preproc.EndSection();
1391 activitySet = preproc.ActiveState();
1392 sc.ChangeState(SCE_JAVA_PREPROCESSOR|activitySet);
1393 } else if (sc.Match("define")) {
1394 if (options.updatePreprocessor && preproc.IsActive()) {
1395 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1396 size_t startName = 0;
1397 while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1398 startName++;
1399 size_t endName = startName;
1400 while ((endName < restOfLine.length()) && setWord.Contains(restOfLine[endName]))
1401 endName++;
1402 std::string key = restOfLine.substr(startName, endName-startName);
1403 if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1404 // Macro
1405 size_t endArgs = endName;
1406 while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1407 endArgs++;
1408 std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1409 size_t startValue = endArgs+1;
1410 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1411 startValue++;
1412 std::string value;
1413 if (startValue < restOfLine.length())
1414 value = restOfLine.substr(startValue);
1415 preprocessorDefinitions[key] = SymbolValue(value, args);
1416 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1417 definitionsChanged = true;
1418 } else {
1419 // Value
1420 size_t startValue = endName;
1421 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1422 startValue++;
1423 std::string value = restOfLine.substr(startValue);
1424 if (OnlySpaceOrTab(value))
1425 value = "1"; // No value defaults to 1
1426 preprocessorDefinitions[key] = value;
1427 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1428 definitionsChanged = true;
1429 }
1430 }
1431 } else if (sc.Match("undef")) {
1432 if (options.updatePreprocessor && preproc.IsActive()) {
1433 const std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, false);
1434 std::vector<std::string> tokens = Tokenize(restOfLine);
1435 if (tokens.size() >= 1) {
1436 const std::string key = tokens[0];
1437 preprocessorDefinitions.erase(key);
1438 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1439 definitionsChanged = true;
1440 }
1441 }
1442 }
1443 }
1444 }
1445 } else if (isoperator(sc.ch)) {
1446 sc.SetState(SCE_JAVA_OPERATOR|activitySet);
1447 }
1448 }
1449
1450 if (sc.atLineEnd && !atLineEndBeforeStateEntry) {
1451 // State entry processing consumed characters up to end of line.
1452 lineCurrent++;
1453 lineEndNext = styler.LineEnd(lineCurrent);
1454 vlls.Add(lineCurrent, preproc);
1455 }
1456
1457 if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1458 chPrevNonWhite = sc.ch;
1459 visibleChars++;
1460 }
1461 continuationLine = false;
1462 sc.Forward();
1463 }
1464 const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1465 if (definitionsChanged || rawStringsChanged)
1466 styler.ChangeLexerState(startPos, startPos + length);
1467 sc.Complete();
1468}
1469
1470// Store both the current line's fold level and the next lines in the
1471// level store to make it easy to pick up with each increment
1472// and to make it possible to fiddle the current level for "} else {".
1473
1474void SCI_METHOD LexerJAVA::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1475
1476 if (!options.fold)
1477 return;
1478
1479 LexAccessor styler(pAccess);
1480
1481 const Sci_PositionU endPos = startPos + length;
1482 int visibleChars = 0;
1483 bool inLineComment = false;
1484 Sci_Position lineCurrent = styler.GetLine(startPos);
1485 int levelCurrent = SC_FOLDLEVELBASE;
1486 if (lineCurrent > 0)
1487 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1488 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
1489 int levelMinCurrent = levelCurrent;
1490 int levelNext = levelCurrent;
1491 char chNext = styler[startPos];
1492 int styleNext = MaskActive(styler.StyleAt(startPos));
1493 int style = MaskActive(initStyle);
1494 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1495 for (Sci_PositionU i = startPos; i < endPos; i++) {
1496 const char ch = chNext;
1497 chNext = styler.SafeGetCharAt(i + 1);
1498 const int stylePrev = style;
1499 style = styleNext;
1500 styleNext = MaskActive(styler.StyleAt(i + 1));
1501 const bool atEOL = i == (lineStartNext-1);
1502 if ((style == SCE_JAVA_COMMENTLINE) || (style == SCE_JAVA_COMMENTLINEDOC))
1503 inLineComment = true;
1504 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1505 if (!IsStreamCommentStyle(stylePrev)) {
1506 levelNext++;
1507 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1508 // Comments don't end at end of line and the next character may be unstyled.
1509 levelNext--;
1510 }
1511 }
1512 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_JAVA_COMMENTLINE) || options.foldExplicitAnywhere)) {
1513 if (userDefinedFoldMarkers) {
1514 if (styler.Match(i, options.foldExplicitStart.c_str())) {
1515 levelNext++;
1516 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1517 levelNext--;
1518 }
1519 } else {
1520 if ((ch == '/') && (chNext == '/')) {
1521 const char chNext2 = styler.SafeGetCharAt(i + 2);
1522 if (chNext2 == '{') {
1523 levelNext++;
1524 } else if (chNext2 == '}') {
1525 levelNext--;
1526 }
1527 }
1528 }
1529 }
1530 if (options.foldPreprocessor && (style == SCE_JAVA_PREPROCESSOR)) {
1531 if (ch == '#') {
1532 Sci_PositionU j = i + 1;
1533 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1534 j++;
1535 }
1536 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1537 levelNext++;
1538 } else if (styler.Match(j, "end")) {
1539 levelNext--;
1540 }
1541
1542 if (options.foldPreprocessorAtElse && (styler.Match(j, "else") || styler.Match(j, "elif"))) {
1543 levelMinCurrent--;
1544 }
1545 }
1546 }
1547 if (options.foldSyntaxBased && (style == SCE_JAVA_OPERATOR)) {
1548 if (ch == '{' || ch == '[' || ch == '(') {
1549 // Measure the minimum before a '{' to allow
1550 // folding on "} else {"
1551 if (options.foldAtElse && levelMinCurrent > levelNext) {
1552 levelMinCurrent = levelNext;
1553 }
1554 levelNext++;
1555 } else if (ch == '}' || ch == ']' || ch == ')') {
1556 levelNext--;
1557 }
1558 }
1559 if (!IsASpace(ch))
1560 visibleChars++;
1561 if (atEOL || (i == endPos-1)) {
1562 int levelUse = levelCurrent;
1563 if ((options.foldSyntaxBased && options.foldAtElse) ||
1564 (options.foldPreprocessor && options.foldPreprocessorAtElse)
1565 ) {
1566 levelUse = levelMinCurrent;
1567 }
1568 int lev = levelUse | levelNext << 16;
1569 if (visibleChars == 0 && options.foldCompact)
1570 lev |= SC_FOLDLEVELWHITEFLAG;
1571 if (levelUse < levelNext)
1572 lev |= SC_FOLDLEVELHEADERFLAG;
1573 if (lev != styler.LevelAt(lineCurrent)) {
1574 styler.SetLevel(lineCurrent, lev);
1575 }
1576 lineCurrent++;
1577 lineStartNext = styler.LineStart(lineCurrent+1);
1578 levelCurrent = levelNext;
1579 levelMinCurrent = levelCurrent;
1580 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
1581 // There is an empty line at end of file so give it same level and empty
1582 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1583 }
1584 visibleChars = 0;
1585 inLineComment = false;
1586 }
1587 }
1588}
1589
1590void LexerJAVA::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1591
1592 // Remove whitespace tokens
1593 tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1594
1595 // Evaluate defined statements to either 0 or 1
1596 for (size_t i=0; (i+1)<tokens.size();) {
1597 if (tokens[i] == "defined") {
1598 const char *val = "0";
1599 if (tokens[i+1] == "(") {
1600 if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1601 // defined()
1602 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1603 } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1604 // defined(<identifier>)
1605 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1606 if (it != preprocessorDefinitions.end()) {
1607 val = "1";
1608 }
1609 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1610 } else {
1611 // Spurious '(' so erase as more likely to result in false
1612 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1613 }
1614 } else {
1615 // defined <identifier>
1616 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1617 if (it != preprocessorDefinitions.end()) {
1618 val = "1";
1619 }
1620 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1621 }
1622 tokens[i] = val;
1623 } else {
1624 i++;
1625 }
1626 }
1627
1628 // Evaluate identifiers
1629 constexpr size_t maxIterations = 100;
1630 size_t iterations = 0; // Limit number of iterations in case there is a recursive macro.
1631 for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1632 iterations++;
1633 if (setWordStart.Contains(tokens[i][0])) {
1634 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1635 if (it != preprocessorDefinitions.end()) {
1636 // Tokenize value
1637 std::vector<std::string> macroTokens = Tokenize(it->second.value);
1638 if (it->second.IsMacro()) {
1639 if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1640 // Create map of argument name to value
1641 std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1642 std::map<std::string, std::string> arguments;
1643 size_t arg = 0;
1644 size_t tok = i+2;
1645 while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1646 if (tokens.at(tok) != ",") {
1647 arguments[argumentNames.at(arg)] = tokens.at(tok);
1648 arg++;
1649 }
1650 tok++;
1651 }
1652
1653 // Remove invocation
1654 tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1655
1656 // Substitute values into macro
1657 macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1658
1659 for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1660 if (setWordStart.Contains(macroTokens[iMacro][0])) {
1661 std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1662 if (itFind != arguments.end()) {
1663 // TODO: Possible that value will be expression so should insert tokenized form
1664 macroTokens[iMacro] = itFind->second;
1665 }
1666 }
1667 iMacro++;
1668 }
1669
1670 // Insert results back into tokens
1671 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1672
1673 } else {
1674 i++;
1675 }
1676 } else {
1677 // Remove invocation
1678 tokens.erase(tokens.begin() + i);
1679 // Insert results back into tokens
1680 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1681 }
1682 } else {
1683 // Identifier not found and value defaults to zero
1684 tokens[i] = "0";
1685 }
1686 } else {
1687 i++;
1688 }
1689 }
1690
1691 // Find bracketed subexpressions and recurse on them
1692 BracketPair bracketPair = FindBracketPair(tokens);
1693 while (bracketPair.itBracket != tokens.end()) {
1694 std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1695 EvaluateTokens(inBracket, preprocessorDefinitions);
1696
1697 // The insertion is done before the removal because there were failures with the opposite approach
1698 tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1699
1700 bracketPair = FindBracketPair(tokens);
1701 tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1702
1703 bracketPair = FindBracketPair(tokens);
1704 }
1705
1706 // Evaluate logical negations
1707 for (size_t j=0; (j+1)<tokens.size();) {
1708 if (setNegationOp.Contains(tokens[j][0])) {
1709 int isTrue = atoi(tokens[j+1].c_str());
1710 if (tokens[j] == "!")
1711 isTrue = !isTrue;
1712 std::vector<std::string>::iterator itInsert =
1713 tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1714 tokens.insert(itInsert, isTrue ? "1" : "0");
1715 } else {
1716 j++;
1717 }
1718 }
1719
1720 // Evaluate expressions in precedence order
1721 enum precedence { precMult, precAdd, precRelative
1722 , precLogical, /* end marker */ precLast };
1723 for (int prec = precMult; prec < precLast; prec++) {
1724 // Looking at 3 tokens at a time so end at 2 before end
1725 for (size_t k=0; (k+2)<tokens.size();) {
1726 const char chOp = tokens[k+1][0];
1727 if (
1728 ((prec==precMult) && setMultOp.Contains(chOp)) ||
1729 ((prec==precAdd) && setAddOp.Contains(chOp)) ||
1730 ((prec==precRelative) && setRelOp.Contains(chOp)) ||
1731 ((prec==precLogical) && setLogicalOp.Contains(chOp))
1732 ) {
1733 const int valA = atoi(tokens[k].c_str());
1734 const int valB = atoi(tokens[k+2].c_str());
1735 int result = 0;
1736 if (tokens[k+1] == "+")
1737 result = valA + valB;
1738 else if (tokens[k+1] == "-")
1739 result = valA - valB;
1740 else if (tokens[k+1] == "*")
1741 result = valA * valB;
1742 else if (tokens[k+1] == "/")
1743 result = valA / (valB ? valB : 1);
1744 else if (tokens[k+1] == "%")
1745 result = valA % (valB ? valB : 1);
1746 else if (tokens[k+1] == "<")
1747 result = valA < valB;
1748 else if (tokens[k+1] == "<=")
1749 result = valA <= valB;
1750 else if (tokens[k+1] == ">")
1751 result = valA > valB;
1752 else if (tokens[k+1] == ">=")
1753 result = valA >= valB;
1754 else if (tokens[k+1] == "==")
1755 result = valA == valB;
1756 else if (tokens[k+1] == "!=")
1757 result = valA != valB;
1758 else if (tokens[k+1] == "||")
1759 result = valA || valB;
1760 else if (tokens[k+1] == "&&")
1761 result = valA && valB;
1762 std::vector<std::string>::iterator itInsert =
1763 tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1764 tokens.insert(itInsert, std::to_string(result));
1765 } else {
1766 k++;
1767 }
1768 }
1769 }
1770}
1771
1772std::vector<std::string> LexerJAVA::Tokenize(const std::string &expr) const {
1773 // Break into tokens
1774 std::vector<std::string> tokens;
1775 const char *cp = expr.c_str();
1776 while (*cp) {
1777 std::string word;
1778 if (setWord.Contains(*cp)) {
1779 // Identifiers and numbers
1780 while (setWord.Contains(*cp)) {
1781 word += *cp;
1782 cp++;
1783 }
1784 } else if (IsSpaceOrTab(*cp)) {
1785 while (IsSpaceOrTab(*cp)) {
1786 word += *cp;
1787 cp++;
1788 }
1789 } else if (setRelOp.Contains(*cp)) {
1790 word += *cp;
1791 cp++;
1792 if (setRelOp.Contains(*cp)) {
1793 word += *cp;
1794 cp++;
1795 }
1796 } else if (setLogicalOp.Contains(*cp)) {
1797 word += *cp;
1798 cp++;
1799 if (setLogicalOp.Contains(*cp)) {
1800 word += *cp;
1801 cp++;
1802 }
1803 } else {
1804 // Should handle strings, characters, and comments here
1805 word += *cp;
1806 cp++;
1807 }
1808 tokens.push_back(word);
1809 }
1810 return tokens;
1811}
1812
1813bool LexerJAVA::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1814 std::vector<std::string> tokens = Tokenize(expr);
1815
1816 EvaluateTokens(tokens, preprocessorDefinitions);
1817
1818 // "0" or "" -> false else true
1819 const bool isFalse = tokens.empty() ||
1820 ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1821 return !isFalse;
1822}
1823
1824LexerModule lmJAVA(SCLEX_JAVA, LexerJAVA::LexerFactoryJAVA, "java", javaWordLists);
1825LexerModule lmJAVANoCase(SCLEX_JAVANOCASE, LexerJAVA::LexerFactoryJAVAInsensitive, "javanocase", javaWordLists);
1826