1// Scintilla source code edit control
2/** @file LexErrorList.cxx
3 ** Lexer for error lists. Used for the output pane in SciTE.
4 **/
5// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <stdlib.h>
9#include <string.h>
10#include <stdio.h>
11#include <stdarg.h>
12#include <assert.h>
13#include <ctype.h>
14
15#include <string>
16#include <string_view>
17
18#include "ILexer.h"
19#include "Scintilla.h"
20#include "SciLexer.h"
21
22#include "WordList.h"
23#include "LexAccessor.h"
24#include "Accessor.h"
25#include "StyleContext.h"
26#include "CharacterSet.h"
27#include "LexerModule.h"
28
29using namespace Lexilla;
30
31namespace {
32
33bool strstart(const char *haystack, const char *needle) noexcept {
34 return strncmp(haystack, needle, strlen(needle)) == 0;
35}
36
37constexpr bool Is0To9(char ch) noexcept {
38 return (ch >= '0') && (ch <= '9');
39}
40
41constexpr bool Is1To9(char ch) noexcept {
42 return (ch >= '1') && (ch <= '9');
43}
44
45bool IsAlphabetic(int ch) {
46 return IsASCII(ch) && isalpha(ch);
47}
48
49inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
50 return (styler[i] == '\n') ||
51 ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
52}
53
54bool IsGccExcerpt(const char *s) noexcept {
55 while (*s) {
56 if (s[0] == ' ' && s[1] == '|' && (s[2] == ' ' || s[2] == '+')) {
57 return true;
58 }
59 if (!(s[0] == ' ' || s[0] == '+' || Is0To9(s[0]))) {
60 return false;
61 }
62 s++;
63 }
64 return true;
65}
66
67int RecogniseErrorListLine(const char *lineBuffer, Sci_PositionU lengthLine, Sci_Position &startValue) {
68 if (lineBuffer[0] == '>') {
69 // Command or return status
70 return SCE_ERR_CMD;
71 } else if (lineBuffer[0] == '<') {
72 // Diff removal.
73 return SCE_ERR_DIFF_DELETION;
74 } else if (lineBuffer[0] == '!') {
75 return SCE_ERR_DIFF_CHANGED;
76 } else if (lineBuffer[0] == '+') {
77 if (strstart(lineBuffer, "+++ ")) {
78 return SCE_ERR_DIFF_MESSAGE;
79 } else {
80 return SCE_ERR_DIFF_ADDITION;
81 }
82 } else if (lineBuffer[0] == '-') {
83 if (strstart(lineBuffer, "--- ")) {
84 return SCE_ERR_DIFF_MESSAGE;
85 } else {
86 return SCE_ERR_DIFF_DELETION;
87 }
88 } else if (strstart(lineBuffer, "cf90-")) {
89 // Absoft Pro Fortran 90/95 v8.2 error and/or warning message
90 return SCE_ERR_ABSF;
91 } else if (strstart(lineBuffer, "fortcom:")) {
92 // Intel Fortran Compiler v8.0 error/warning message
93 return SCE_ERR_IFORT;
94 } else if (strstr(lineBuffer, "File \"") && strstr(lineBuffer, ", line ")) {
95 return SCE_ERR_PYTHON;
96 } else if (strstr(lineBuffer, " in ") && strstr(lineBuffer, " on line ")) {
97 return SCE_ERR_PHP;
98 } else if ((strstart(lineBuffer, "Error ") ||
99 strstart(lineBuffer, "Warning ")) &&
100 strstr(lineBuffer, " at (") &&
101 strstr(lineBuffer, ") : ") &&
102 (strstr(lineBuffer, " at (") < strstr(lineBuffer, ") : "))) {
103 // Intel Fortran Compiler error/warning message
104 return SCE_ERR_IFC;
105 } else if (strstart(lineBuffer, "Error ")) {
106 // Borland error message
107 return SCE_ERR_BORLAND;
108 } else if (strstart(lineBuffer, "Warning ")) {
109 // Borland warning message
110 return SCE_ERR_BORLAND;
111 } else if (strstr(lineBuffer, "at line ") &&
112 (strstr(lineBuffer, "at line ") < (lineBuffer + lengthLine)) &&
113 strstr(lineBuffer, "file ") &&
114 (strstr(lineBuffer, "file ") < (lineBuffer + lengthLine))) {
115 // Lua 4 error message
116 return SCE_ERR_LUA;
117 } else if (strstr(lineBuffer, " at ") &&
118 (strstr(lineBuffer, " at ") < (lineBuffer + lengthLine)) &&
119 strstr(lineBuffer, " line ") &&
120 (strstr(lineBuffer, " line ") < (lineBuffer + lengthLine)) &&
121 (strstr(lineBuffer, " at ") + 4 < (strstr(lineBuffer, " line ")))) {
122 // perl error message:
123 // <message> at <file> line <line>
124 return SCE_ERR_PERL;
125 } else if ((lengthLine >= 6) &&
126 (memcmp(lineBuffer, " at ", 6) == 0) &&
127 strstr(lineBuffer, ":line ")) {
128 // A .NET traceback
129 return SCE_ERR_NET;
130 } else if (strstart(lineBuffer, "Line ") &&
131 strstr(lineBuffer, ", file ")) {
132 // Essential Lahey Fortran error message
133 return SCE_ERR_ELF;
134 } else if (strstart(lineBuffer, "line ") &&
135 strstr(lineBuffer, " column ")) {
136 // HTML tidy style: line 42 column 1
137 return SCE_ERR_TIDY;
138 } else if (strstart(lineBuffer, "\tat ") &&
139 strstr(lineBuffer, "(") &&
140 strstr(lineBuffer, ".java:")) {
141 // Java stack back trace
142 return SCE_ERR_JAVA_STACK;
143 } else if (strstart(lineBuffer, "In file included from ") ||
144 strstart(lineBuffer, " from ")) {
145 // GCC showing include path to following error
146 return SCE_ERR_GCC_INCLUDED_FROM;
147 } else if (strstart(lineBuffer, "NMAKE : fatal error")) {
148 // Microsoft nmake fatal error:
149 // NMAKE : fatal error <code>: <program> : return code <return>
150 return SCE_ERR_MS;
151 } else if (strstr(lineBuffer, "warning LNK") ||
152 strstr(lineBuffer, "error LNK")) {
153 // Microsoft linker warning:
154 // {<object> : } (warning|error) LNK9999
155 return SCE_ERR_MS;
156 } else if (IsGccExcerpt(lineBuffer)) {
157 // GCC code excerpt and pointer to issue
158 // 73 | GTimeVal last_popdown;
159 // | ^~~~~~~~~~~~
160 return SCE_ERR_GCC_EXCERPT;
161 } else {
162 // Look for one of the following formats:
163 // GCC: <filename>:<line>:<message>
164 // Microsoft: <filename>(<line>) :<message>
165 // Common: <filename>(<line>): warning|error|note|remark|catastrophic|fatal
166 // Common: <filename>(<line>) warning|error|note|remark|catastrophic|fatal
167 // Microsoft: <filename>(<line>,<column>)<message>
168 // CTags: <identifier>\t<filename>\t<message>
169 // Lua 5 traceback: \t<filename>:<line>:<message>
170 // Lua 5.1: <exe>: <filename>:<line>:<message>
171 const bool initialTab = (lineBuffer[0] == '\t');
172 bool initialColonPart = false;
173 bool canBeCtags = !initialTab; // For ctags must have an identifier with no spaces then a tab
174 enum { stInitial,
175 stGccStart, stGccDigit, stGccColumn, stGcc,
176 stMsStart, stMsDigit, stMsBracket, stMsVc, stMsDigitComma, stMsDotNet,
177 stCtagsStart, stCtagsFile, stCtagsStartString, stCtagsStringDollar, stCtags,
178 stUnrecognized
179 } state = stInitial;
180 for (Sci_PositionU i = 0; i < lengthLine; i++) {
181 const char ch = lineBuffer[i];
182 char chNext = ' ';
183 if ((i + 1) < lengthLine)
184 chNext = lineBuffer[i + 1];
185 if (state == stInitial) {
186 if (ch == ':') {
187 // May be GCC, or might be Lua 5 (Lua traceback same but with tab prefix)
188 if ((chNext != '\\') && (chNext != '/') && (chNext != ' ')) {
189 // This check is not completely accurate as may be on
190 // GTK+ with a file name that includes ':'.
191 state = stGccStart;
192 } else if (chNext == ' ') { // indicates a Lua 5.1 error message
193 initialColonPart = true;
194 }
195 } else if ((ch == '(') && Is1To9(chNext) && (!initialTab)) {
196 // May be Microsoft
197 // Check against '0' often removes phone numbers
198 state = stMsStart;
199 } else if ((ch == '\t') && canBeCtags) {
200 // May be CTags
201 state = stCtagsStart;
202 } else if (ch == ' ') {
203 canBeCtags = false;
204 }
205 } else if (state == stGccStart) { // <filename>:
206 state = ((ch == '-') || Is0To9(ch)) ? stGccDigit : stUnrecognized;
207 } else if (state == stGccDigit) { // <filename>:<line>
208 if (ch == ':') {
209 state = stGccColumn; // :9.*: is GCC
210 startValue = i + 1;
211 } else if (!Is0To9(ch)) {
212 state = stUnrecognized;
213 }
214 } else if (state == stGccColumn) { // <filename>:<line>:<column>
215 if (!Is0To9(ch)) {
216 state = stGcc;
217 if (ch == ':')
218 startValue = i + 1;
219 break;
220 }
221 } else if (state == stMsStart) { // <filename>(
222 state = Is0To9(ch) ? stMsDigit : stUnrecognized;
223 } else if (state == stMsDigit) { // <filename>(<line>
224 if (ch == ',') {
225 state = stMsDigitComma;
226 } else if (ch == ')') {
227 state = stMsBracket;
228 } else if ((ch != ' ') && !Is0To9(ch)) {
229 state = stUnrecognized;
230 }
231 } else if (state == stMsBracket) { // <filename>(<line>)
232 if ((ch == ' ') && (chNext == ':')) {
233 state = stMsVc;
234 } else if ((ch == ':' && chNext == ' ') || (ch == ' ')) {
235 // Possibly Delphi.. don't test against chNext as it's one of the strings below.
236 char word[512];
237 unsigned numstep;
238 if (ch == ' ')
239 numstep = 1; // ch was ' ', handle as if it's a delphi errorline, only add 1 to i.
240 else
241 numstep = 2; // otherwise add 2.
242 Sci_PositionU chPos = 0;
243 for (Sci_PositionU j = i + numstep; j < lengthLine && IsAlphabetic(lineBuffer[j]) && chPos < sizeof(word) - 1; j++)
244 word[chPos++] = lineBuffer[j];
245 word[chPos] = 0;
246 if (!CompareCaseInsensitive(word, "error") || !CompareCaseInsensitive(word, "warning") ||
247 !CompareCaseInsensitive(word, "fatal") || !CompareCaseInsensitive(word, "catastrophic") ||
248 !CompareCaseInsensitive(word, "note") || !CompareCaseInsensitive(word, "remark")) {
249 state = stMsVc;
250 } else {
251 state = stUnrecognized;
252 }
253 } else {
254 state = stUnrecognized;
255 }
256 } else if (state == stMsDigitComma) { // <filename>(<line>,
257 if (ch == ')') {
258 state = stMsDotNet;
259 break;
260 } else if ((ch != ' ') && !Is0To9(ch)) {
261 state = stUnrecognized;
262 }
263 } else if (state == stCtagsStart) {
264 if (ch == '\t') {
265 state = stCtagsFile;
266 }
267 } else if (state == stCtagsFile) {
268 if ((lineBuffer[i - 1] == '\t') &&
269 ((ch == '/' && chNext == '^') || Is0To9(ch))) {
270 state = stCtags;
271 break;
272 } else if ((ch == '/') && (chNext == '^')) {
273 state = stCtagsStartString;
274 }
275 } else if ((state == stCtagsStartString) && ((lineBuffer[i] == '$') && (lineBuffer[i + 1] == '/'))) {
276 state = stCtagsStringDollar;
277 break;
278 }
279 }
280 if (state == stGcc) {
281 return initialColonPart ? SCE_ERR_LUA : SCE_ERR_GCC;
282 } else if ((state == stMsVc) || (state == stMsDotNet)) {
283 return SCE_ERR_MS;
284 } else if ((state == stCtagsStringDollar) || (state == stCtags)) {
285 return SCE_ERR_CTAG;
286 } else if (initialColonPart && strstr(lineBuffer, ": warning C")) {
287 // Microsoft warning without line number
288 // <filename>: warning C9999
289 return SCE_ERR_MS;
290 } else {
291 return SCE_ERR_DEFAULT;
292 }
293 }
294}
295
296#define CSI "\033["
297
298constexpr bool SequenceEnd(int ch) noexcept {
299 return (ch == 0) || ((ch >= '@') && (ch <= '~'));
300}
301
302int StyleFromSequence(const char *seq) noexcept {
303 int bold = 0;
304 int colour = 0;
305 while (!SequenceEnd(*seq)) {
306 if (Is0To9(*seq)) {
307 int base = *seq - '0';
308 if (Is0To9(seq[1])) {
309 base = base * 10;
310 base += seq[1] - '0';
311 seq++;
312 }
313 if (base == 0) {
314 colour = 0;
315 bold = 0;
316 }
317 else if (base == 1) {
318 bold = 1;
319 }
320 else if (base >= 30 && base <= 37) {
321 colour = base - 30;
322 }
323 }
324 seq++;
325 }
326 return SCE_ERR_ES_BLACK + bold * 8 + colour;
327}
328
329void ColouriseErrorListLine(
330 const std::string &lineBuffer,
331 Sci_PositionU endPos,
332 Accessor &styler,
333 bool valueSeparate,
334 bool escapeSequences) {
335 Sci_Position startValue = -1;
336 const Sci_PositionU lengthLine = lineBuffer.length();
337 const int style = RecogniseErrorListLine(lineBuffer.c_str(), lengthLine, startValue);
338 if (escapeSequences && strstr(lineBuffer.c_str(), CSI)) {
339 const Sci_Position startPos = endPos - lengthLine;
340 const char *linePortion = lineBuffer.c_str();
341 Sci_Position startPortion = startPos;
342 int portionStyle = style;
343 while (const char *startSeq = strstr(linePortion, CSI)) {
344 if (startSeq > linePortion) {
345 styler.ColourTo(startPortion + static_cast<int>(startSeq - linePortion), portionStyle);
346 }
347 const char *endSeq = startSeq + 2;
348 while (!SequenceEnd(*endSeq))
349 endSeq++;
350 const Sci_Position endSeqPosition = startPortion + static_cast<Sci_Position>(endSeq - linePortion) + 1;
351 switch (*endSeq) {
352 case 0:
353 styler.ColourTo(endPos, SCE_ERR_ESCSEQ_UNKNOWN);
354 return;
355 case 'm': // Colour command
356 styler.ColourTo(endSeqPosition, SCE_ERR_ESCSEQ);
357 portionStyle = StyleFromSequence(startSeq+2);
358 break;
359 case 'K': // Erase to end of line -> ignore
360 styler.ColourTo(endSeqPosition, SCE_ERR_ESCSEQ);
361 break;
362 default:
363 styler.ColourTo(endSeqPosition, SCE_ERR_ESCSEQ_UNKNOWN);
364 portionStyle = style;
365 }
366 startPortion = endSeqPosition;
367 linePortion = endSeq + 1;
368 }
369 styler.ColourTo(endPos, portionStyle);
370 } else {
371 if (valueSeparate && (startValue >= 0)) {
372 styler.ColourTo(endPos - (lengthLine - startValue), style);
373 styler.ColourTo(endPos, SCE_ERR_VALUE);
374 } else {
375 styler.ColourTo(endPos, style);
376 }
377 }
378}
379
380void ColouriseErrorListDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) {
381 std::string lineBuffer;
382 styler.StartAt(startPos);
383 styler.StartSegment(startPos);
384
385 // property lexer.errorlist.value.separate
386 // For lines in the output pane that are matches from Find in Files or GCC-style
387 // diagnostics, style the path and line number separately from the rest of the
388 // line with style 21 used for the rest of the line.
389 // This allows matched text to be more easily distinguished from its location.
390 const bool valueSeparate = styler.GetPropertyInt("lexer.errorlist.value.separate", 0) != 0;
391
392 // property lexer.errorlist.escape.sequences
393 // Set to 1 to interpret escape sequences.
394 const bool escapeSequences = styler.GetPropertyInt("lexer.errorlist.escape.sequences") != 0;
395
396 for (Sci_PositionU i = startPos; i < startPos + length; i++) {
397 lineBuffer.push_back(styler[i]);
398 if (AtEOL(styler, i)) {
399 // End of line met, colourise it
400 ColouriseErrorListLine(lineBuffer, i, styler, valueSeparate, escapeSequences);
401 lineBuffer.clear();
402 }
403 }
404 if (!lineBuffer.empty()) { // Last line does not have ending characters
405 ColouriseErrorListLine(lineBuffer, startPos + length - 1, styler, valueSeparate, escapeSequences);
406 }
407}
408
409const char *const emptyWordListDesc[] = {
410 nullptr
411};
412
413}
414
415LexerModule lmErrorList(SCLEX_ERRORLIST, ColouriseErrorListDoc, "errorlist", 0, emptyWordListDesc);
416