1// Scintilla source code edit control
2/** @file LexYAML.cxx
3 ** Lexer for YAML.
4 **/
5// Copyright 2003- by Sean O'Dell <sean@celsoft.com>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#include <stdlib.h>
9#include <string.h>
10#include <stdio.h>
11#include <stdarg.h>
12#include <assert.h>
13#include <ctype.h>
14
15#include <string>
16#include <string_view>
17
18#include "ILexer.h"
19#include "Scintilla.h"
20#include "SciLexer.h"
21
22#include "WordList.h"
23#include "LexAccessor.h"
24#include "Accessor.h"
25#include "StyleContext.h"
26#include "CharacterSet.h"
27#include "LexerModule.h"
28
29using namespace Lexilla;
30
31static const char * const yamlWordListDesc[] = {
32 "Keywords",
33 0
34};
35
36static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
37 return (styler[i] == '\n') ||
38 ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
39}
40
41/**
42 * Check for space, tab, line feed, or carriage return.
43 * See YAML 1.2 spec sections 5.4. Line Break Characters and 5.5. White Space Characters.
44 */
45static constexpr bool IsWhiteSpaceOrEOL(char ch) noexcept {
46 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
47}
48
49static unsigned int SpaceCount(char* lineBuffer) {
50 if (lineBuffer == NULL)
51 return 0;
52
53 char* headBuffer = lineBuffer;
54
55 while (*headBuffer == ' ')
56 headBuffer++;
57
58 return static_cast<unsigned int>(headBuffer - lineBuffer);
59}
60
61static bool KeywordAtChar(char* lineBuffer, char* startComment, const WordList &keywords) {
62 if (lineBuffer == NULL || startComment <= lineBuffer)
63 return false;
64 char* endValue = startComment - 1;
65 while (endValue >= lineBuffer && *endValue == ' ')
66 endValue--;
67 Sci_PositionU len = static_cast<Sci_PositionU>(endValue - lineBuffer) + 1;
68 char s[100];
69 if (len > (sizeof(s) / sizeof(s[0]) - 1))
70 return false;
71 strncpy(s, lineBuffer, len);
72 s[len] = '\0';
73 return (keywords.InList(s));
74}
75
76#define YAML_STATE_BITSIZE 16
77#define YAML_STATE_MASK (0xFFFF0000)
78#define YAML_STATE_DOCUMENT (1 << YAML_STATE_BITSIZE)
79#define YAML_STATE_VALUE (2 << YAML_STATE_BITSIZE)
80#define YAML_STATE_COMMENT (3 << YAML_STATE_BITSIZE)
81#define YAML_STATE_TEXT_PARENT (4 << YAML_STATE_BITSIZE)
82#define YAML_STATE_TEXT (5 << YAML_STATE_BITSIZE)
83
84static void ColouriseYAMLLine(
85 char *lineBuffer,
86 Sci_PositionU currentLine,
87 Sci_PositionU lengthLine,
88 Sci_PositionU startLine,
89 Sci_PositionU endPos,
90 WordList &keywords,
91 Accessor &styler) {
92
93 Sci_PositionU i = 0;
94 bool bInQuotes = false;
95 unsigned int indentAmount = SpaceCount(lineBuffer);
96
97 if (currentLine > 0) {
98 int parentLineState = styler.GetLineState(currentLine - 1);
99
100 if ((parentLineState&YAML_STATE_MASK) == YAML_STATE_TEXT || (parentLineState&YAML_STATE_MASK) == YAML_STATE_TEXT_PARENT) {
101 unsigned int parentIndentAmount = parentLineState&(~YAML_STATE_MASK);
102 if (indentAmount > parentIndentAmount) {
103 styler.SetLineState(currentLine, YAML_STATE_TEXT | parentIndentAmount);
104 styler.ColourTo(endPos, SCE_YAML_TEXT);
105 return;
106 }
107 }
108 }
109 styler.SetLineState(currentLine, 0);
110 if (strncmp(lineBuffer, "---", 3) == 0 || strncmp(lineBuffer, "...", 3) == 0) { // Document marker
111 styler.SetLineState(currentLine, YAML_STATE_DOCUMENT);
112 styler.ColourTo(endPos, SCE_YAML_DOCUMENT);
113 return;
114 }
115 // Skip initial spaces
116 while ((i < lengthLine) && lineBuffer[i] == ' ') { // YAML always uses space, never TABS or anything else
117 i++;
118 }
119 if (lineBuffer[i] == '\t') { // if we skipped all spaces, and we are NOT inside a text block, this is wrong
120 styler.ColourTo(endPos, SCE_YAML_ERROR);
121 return;
122 }
123 if (lineBuffer[i] == '#') { // Comment
124 styler.SetLineState(currentLine, YAML_STATE_COMMENT);
125 styler.ColourTo(endPos, SCE_YAML_COMMENT);
126 return;
127 }
128 while (i < lengthLine) {
129 if (lineBuffer[i] == '\'' || lineBuffer[i] == '\"') {
130 bInQuotes = !bInQuotes;
131 } else if (lineBuffer[i] == '#' && isspacechar(lineBuffer[i - 1]) && !bInQuotes) {
132 styler.ColourTo(startLine + i - 1, SCE_YAML_DEFAULT);
133 styler.ColourTo(endPos, SCE_YAML_COMMENT);
134 return;
135 } else if (lineBuffer[i] == ':' && !bInQuotes && (IsWhiteSpaceOrEOL(lineBuffer[i + 1]) || i == lengthLine - 1)) {
136 styler.ColourTo(startLine + i - 1, SCE_YAML_IDENTIFIER);
137 styler.ColourTo(startLine + i, SCE_YAML_OPERATOR);
138 // Non-folding scalar
139 i++;
140 while ((i < lengthLine) && isspacechar(lineBuffer[i]))
141 i++;
142 Sci_PositionU endValue = lengthLine - 1;
143 while ((endValue >= i) && isspacechar(lineBuffer[endValue]))
144 endValue--;
145 lineBuffer[endValue + 1] = '\0';
146 if (lineBuffer[i] == '|' || lineBuffer[i] == '>') {
147 i++;
148 if (lineBuffer[i] == '+' || lineBuffer[i] == '-')
149 i++;
150 while ((i < lengthLine) && isspacechar(lineBuffer[i]))
151 i++;
152 if (lineBuffer[i] == '\0') {
153 styler.SetLineState(currentLine, YAML_STATE_TEXT_PARENT | indentAmount);
154 styler.ColourTo(endPos, SCE_YAML_DEFAULT);
155 return;
156 } else if (lineBuffer[i] == '#') {
157 styler.SetLineState(currentLine, YAML_STATE_TEXT_PARENT | indentAmount);
158 styler.ColourTo(startLine + i - 1, SCE_YAML_DEFAULT);
159 styler.ColourTo(endPos, SCE_YAML_COMMENT);
160 return;
161 } else {
162 styler.ColourTo(endPos, SCE_YAML_ERROR);
163 return;
164 }
165 } else if (lineBuffer[i] == '#') {
166 styler.ColourTo(startLine + i - 1, SCE_YAML_DEFAULT);
167 styler.ColourTo(endPos, SCE_YAML_COMMENT);
168 return;
169 }
170 Sci_PositionU startComment = i;
171 bInQuotes = false;
172 while (startComment < lengthLine) { // Comment must be space padded
173 if (lineBuffer[startComment] == '\'' || lineBuffer[startComment] == '\"')
174 bInQuotes = !bInQuotes;
175 if (lineBuffer[startComment] == '#' && isspacechar(lineBuffer[startComment - 1]) && !bInQuotes)
176 break;
177 startComment++;
178 }
179 styler.SetLineState(currentLine, YAML_STATE_VALUE);
180 if (lineBuffer[i] == '&' || lineBuffer[i] == '*') {
181 styler.ColourTo(startLine + startComment - 1, SCE_YAML_REFERENCE);
182 if (startComment < lengthLine)
183 styler.ColourTo(endPos, SCE_YAML_COMMENT);
184 return;
185 }
186 if (KeywordAtChar(&lineBuffer[i], &lineBuffer[startComment], keywords)) { // Convertible value (true/false, etc.)
187 styler.ColourTo(startLine + startComment - 1, SCE_YAML_KEYWORD);
188 if (startComment < lengthLine)
189 styler.ColourTo(endPos, SCE_YAML_COMMENT);
190 return;
191 }
192 Sci_PositionU i2 = i;
193 while ((i < startComment) && lineBuffer[i]) {
194 if (!(IsASCII(lineBuffer[i]) && isdigit(lineBuffer[i])) && lineBuffer[i] != '-'
195 && lineBuffer[i] != '.' && lineBuffer[i] != ',' && lineBuffer[i] != ' ') {
196 styler.ColourTo(startLine + startComment - 1, SCE_YAML_DEFAULT);
197 if (startComment < lengthLine)
198 styler.ColourTo(endPos, SCE_YAML_COMMENT);
199 return;
200 }
201 i++;
202 }
203 if (i > i2) {
204 styler.ColourTo(startLine + startComment - 1, SCE_YAML_NUMBER);
205 if (startComment < lengthLine)
206 styler.ColourTo(endPos, SCE_YAML_COMMENT);
207 return;
208 }
209 break; // shouldn't get here, but just in case, the rest of the line is coloured the default
210 }
211 i++;
212 }
213 styler.ColourTo(endPos, SCE_YAML_DEFAULT);
214}
215
216static void ColouriseYAMLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *keywordLists[], Accessor &styler) {
217 char lineBuffer[1024] = "";
218 styler.StartAt(startPos);
219 styler.StartSegment(startPos);
220 Sci_PositionU linePos = 0;
221 Sci_PositionU startLine = startPos;
222 Sci_PositionU endPos = startPos + length;
223 Sci_PositionU maxPos = styler.Length();
224 Sci_PositionU lineCurrent = styler.GetLine(startPos);
225
226 for (Sci_PositionU i = startPos; i < maxPos && i < endPos; i++) {
227 lineBuffer[linePos++] = styler[i];
228 if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
229 // End of line (or of line buffer) met, colourise it
230 lineBuffer[linePos] = '\0';
231 ColouriseYAMLLine(lineBuffer, lineCurrent, linePos, startLine, i, *keywordLists[0], styler);
232 linePos = 0;
233 startLine = i + 1;
234 lineCurrent++;
235 }
236 }
237 if (linePos > 0) { // Last line does not have ending characters
238 ColouriseYAMLLine(lineBuffer, lineCurrent, linePos, startLine, startPos + length - 1, *keywordLists[0], styler);
239 }
240}
241
242static bool IsCommentLine(Sci_Position line, Accessor &styler) {
243 Sci_Position pos = styler.LineStart(line);
244 if (styler[pos] == '#')
245 return true;
246 return false;
247}
248
249static void FoldYAMLDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/,
250 WordList *[], Accessor &styler) {
251 const Sci_Position maxPos = startPos + length;
252 const Sci_Position maxLines = styler.GetLine(maxPos - 1); // Requested last line
253 const Sci_Position docLines = styler.GetLine(styler.Length() - 1); // Available last line
254 const bool foldComment = styler.GetPropertyInt("fold.comment.yaml") != 0;
255
256 // Backtrack to previous non-blank line so we can determine indent level
257 // for any white space lines
258 // and so we can fix any preceding fold level (which is why we go back
259 // at least one line in all cases)
260 int spaceFlags = 0;
261 Sci_Position lineCurrent = styler.GetLine(startPos);
262 int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
263 while (lineCurrent > 0) {
264 lineCurrent--;
265 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
266 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
267 (!IsCommentLine(lineCurrent, styler)))
268 break;
269 }
270 int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
271
272 // Set up initial loop state
273 int prevComment = 0;
274 if (lineCurrent >= 1)
275 prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
276
277 // Process all characters to end of requested range
278 // or comment that hangs over the end of the range. Cap processing in all cases
279 // to end of document (in case of unclosed comment at end).
280 while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevComment)) {
281
282 // Gather info
283 int lev = indentCurrent;
284 Sci_Position lineNext = lineCurrent + 1;
285 int indentNext = indentCurrent;
286 if (lineNext <= docLines) {
287 // Information about next line is only available if not at end of document
288 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
289 }
290 const int comment = foldComment && IsCommentLine(lineCurrent, styler);
291 const int comment_start = (comment && !prevComment && (lineNext <= docLines) &&
292 IsCommentLine(lineNext, styler) && (lev > SC_FOLDLEVELBASE));
293 const int comment_continue = (comment && prevComment);
294 if (!comment)
295 indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
296 if (indentNext & SC_FOLDLEVELWHITEFLAG)
297 indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
298
299 if (comment_start) {
300 // Place fold point at start of a block of comments
301 lev |= SC_FOLDLEVELHEADERFLAG;
302 } else if (comment_continue) {
303 // Add level to rest of lines in the block
304 lev = lev + 1;
305 }
306
307 // Skip past any blank lines for next indent level info; we skip also
308 // comments (all comments, not just those starting in column 0)
309 // which effectively folds them into surrounding code rather
310 // than screwing up folding.
311
312 while ((lineNext < docLines) &&
313 ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
314 (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
315
316 lineNext++;
317 indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
318 }
319
320 const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
321 const int levelBeforeComments = Maximum(indentCurrentLevel,levelAfterComments);
322
323 // Now set all the indent levels on the lines we skipped
324 // Do this from end to start. Once we encounter one line
325 // which is indented more than the line after the end of
326 // the comment-block, use the level of the block before
327
328 Sci_Position skipLine = lineNext;
329 int skipLevel = levelAfterComments;
330
331 while (--skipLine > lineCurrent) {
332 int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
333
334 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
335 skipLevel = levelBeforeComments;
336
337 int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
338
339 styler.SetLevel(skipLine, skipLevel | whiteFlag);
340 }
341
342 // Set fold header on non-comment line
343 if (!comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG) ) {
344 if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
345 lev |= SC_FOLDLEVELHEADERFLAG;
346 }
347
348 // Keep track of block comment state of previous line
349 prevComment = comment_start || comment_continue;
350
351 // Set fold level for this line and move to next line
352 styler.SetLevel(lineCurrent, lev);
353 indentCurrent = indentNext;
354 lineCurrent = lineNext;
355 }
356
357 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
358 // header flag set; the loop above is crafted to take care of this case!
359 //styler.SetLevel(lineCurrent, indentCurrent);
360}
361
362LexerModule lmYAML(SCLEX_YAML, ColouriseYAMLDoc, "yaml", FoldYAMLDoc, yamlWordListDesc);
363