1// Scintilla source code edit control
2/** @file LexCOBOL.cxx
3 ** Lexer for COBOL
4 ** Based on LexPascal.cxx
5 ** Written by Laurent le Tynevez
6 ** Updated by Simon Steele <s.steele@pnotepad.org> September 2002
7 ** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments)
8 ** Updated by Rod Falck, Aug 2006 Converted to COBOL
9 **/
10
11#include <stdlib.h>
12#include <string.h>
13#include <stdio.h>
14#include <stdarg.h>
15#include <assert.h>
16#include <ctype.h>
17
18#include <string>
19#include <string_view>
20
21#include "ILexer.h"
22#include "Scintilla.h"
23#include "SciLexer.h"
24
25#include "WordList.h"
26#include "LexAccessor.h"
27#include "Accessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31
32using namespace Lexilla;
33
34#define IN_DIVISION 0x01
35#define IN_DECLARATIVES 0x02
36#define IN_SECTION 0x04
37#define IN_PARAGRAPH 0x08
38#define IN_FLAGS 0xF
39#define NOT_HEADER 0x10
40
41inline bool isCOBOLoperator(char ch)
42 {
43 return isoperator(ch);
44 }
45
46inline bool isCOBOLwordchar(char ch)
47 {
48 return IsASCII(ch) && (isalnum(ch) || ch == '-');
49
50 }
51
52inline bool isCOBOLwordstart(char ch)
53 {
54 return IsASCII(ch) && isalnum(ch);
55 }
56
57static int CountBits(int nBits)
58 {
59 int count = 0;
60 for (int i = 0; i < 32; ++i)
61 {
62 count += nBits & 1;
63 nBits >>= 1;
64 }
65 return count;
66 }
67
68static void getRange(Sci_PositionU start,
69 Sci_PositionU end,
70 Accessor &styler,
71 char *s,
72 Sci_PositionU len) {
73 Sci_PositionU i = 0;
74 while ((i < end - start + 1) && (i < len-1)) {
75 s[i] = static_cast<char>(tolower(styler[start + i]));
76 i++;
77 }
78 s[i] = '\0';
79}
80
81static void ColourTo(Accessor &styler, Sci_PositionU end, unsigned int attr) {
82 styler.ColourTo(end, attr);
83}
84
85
86static int classifyWordCOBOL(Sci_PositionU start, Sci_PositionU end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) {
87 int ret = 0;
88
89 WordList& a_keywords = *keywordlists[0];
90 WordList& b_keywords = *keywordlists[1];
91 WordList& c_keywords = *keywordlists[2];
92
93 char s[100];
94 s[0] = '\0';
95 s[1] = '\0';
96 getRange(start, end, styler, s, sizeof(s));
97
98 char chAttr = SCE_C_IDENTIFIER;
99 if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) {
100 chAttr = SCE_C_NUMBER;
101 char *p = s + 1;
102 while (*p) {
103 if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) {
104 chAttr = SCE_C_IDENTIFIER;
105 break;
106 }
107 ++p;
108 }
109 }
110 else {
111 if (a_keywords.InList(s)) {
112 chAttr = SCE_C_WORD;
113 }
114 else if (b_keywords.InList(s)) {
115 chAttr = SCE_C_WORD2;
116 }
117 else if (c_keywords.InList(s)) {
118 chAttr = SCE_C_UUID;
119 }
120 }
121 if (*bAarea) {
122 if (strcmp(s, "division") == 0) {
123 ret = IN_DIVISION;
124 // we've determined the containment, anything else is just ignored for those purposes
125 *bAarea = false;
126 } else if (strcmp(s, "declaratives") == 0) {
127 ret = IN_DIVISION | IN_DECLARATIVES;
128 if (nContainment & IN_DECLARATIVES)
129 ret |= NOT_HEADER | IN_SECTION;
130 // we've determined the containment, anything else is just ignored for those purposes
131 *bAarea = false;
132 } else if (strcmp(s, "section") == 0) {
133 ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION;
134 // we've determined the containment, anything else is just ignored for those purposes
135 *bAarea = false;
136 } else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) {
137 ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER;
138 } else {
139 ret = nContainment | IN_PARAGRAPH;
140 }
141 }
142 ColourTo(styler, end, chAttr);
143 return ret;
144}
145
146static void ColouriseCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
147 Accessor &styler) {
148
149 styler.StartAt(startPos);
150
151 int state = initStyle;
152 if (state == SCE_C_CHARACTER) // Does not leak onto next line
153 state = SCE_C_DEFAULT;
154 char chPrev = ' ';
155 char chNext = styler[startPos];
156 Sci_PositionU lengthDoc = startPos + length;
157
158 int nContainment;
159
160 Sci_Position currentLine = styler.GetLine(startPos);
161 if (currentLine > 0) {
162 styler.SetLineState(currentLine, styler.GetLineState(currentLine-1));
163 nContainment = styler.GetLineState(currentLine);
164 nContainment &= ~NOT_HEADER;
165 } else {
166 styler.SetLineState(currentLine, 0);
167 nContainment = 0;
168 }
169
170 styler.StartSegment(startPos);
171 bool bNewLine = true;
172 bool bAarea = !isspacechar(chNext);
173 int column = 0;
174 for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
175 char ch = chNext;
176
177 chNext = styler.SafeGetCharAt(i + 1);
178
179 ++column;
180
181 if (bNewLine) {
182 column = 0;
183 }
184 if (column <= 1 && !bAarea) {
185 bAarea = !isspacechar(ch);
186 }
187 bool bSetNewLine = false;
188 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
189 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
190 // Avoid triggering two times on Dos/Win
191 // End of line
192 if (state == SCE_C_CHARACTER) {
193 ColourTo(styler, i, state);
194 state = SCE_C_DEFAULT;
195 }
196 styler.SetLineState(currentLine, nContainment);
197 currentLine++;
198 bSetNewLine = true;
199 if (nContainment & NOT_HEADER)
200 nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION);
201 }
202
203 if (styler.IsLeadByte(ch)) {
204 chNext = styler.SafeGetCharAt(i + 2);
205 chPrev = ' ';
206 i += 1;
207 continue;
208 }
209
210 if (state == SCE_C_DEFAULT) {
211 if (isCOBOLwordstart(ch) || (ch == '$' && IsASCII(chNext) && isalpha(chNext))) {
212 ColourTo(styler, i-1, state);
213 state = SCE_C_IDENTIFIER;
214 } else if (column == 6 && ch == '*') {
215 // Cobol comment line: asterisk in column 7.
216 ColourTo(styler, i-1, state);
217 state = SCE_C_COMMENTLINE;
218 } else if (ch == '*' && chNext == '>') {
219 // Cobol inline comment: asterisk, followed by greater than.
220 ColourTo(styler, i-1, state);
221 state = SCE_C_COMMENTLINE;
222 } else if (column == 0 && ch == '*' && chNext != '*') {
223 ColourTo(styler, i-1, state);
224 state = SCE_C_COMMENTLINE;
225 } else if (column == 0 && ch == '/' && chNext != '*') {
226 ColourTo(styler, i-1, state);
227 state = SCE_C_COMMENTLINE;
228 } else if (column == 0 && ch == '*' && chNext == '*') {
229 ColourTo(styler, i-1, state);
230 state = SCE_C_COMMENTDOC;
231 } else if (column == 0 && ch == '/' && chNext == '*') {
232 ColourTo(styler, i-1, state);
233 state = SCE_C_COMMENTDOC;
234 } else if (ch == '"') {
235 ColourTo(styler, i-1, state);
236 state = SCE_C_STRING;
237 } else if (ch == '\'') {
238 ColourTo(styler, i-1, state);
239 state = SCE_C_CHARACTER;
240 } else if (ch == '?' && column == 0) {
241 ColourTo(styler, i-1, state);
242 state = SCE_C_PREPROCESSOR;
243 } else if (isCOBOLoperator(ch)) {
244 ColourTo(styler, i-1, state);
245 ColourTo(styler, i, SCE_C_OPERATOR);
246 }
247 } else if (state == SCE_C_IDENTIFIER) {
248 if (!isCOBOLwordchar(ch)) {
249 int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea);
250
251 if(lStateChange != 0) {
252 styler.SetLineState(currentLine, lStateChange);
253 nContainment = lStateChange;
254 }
255
256 state = SCE_C_DEFAULT;
257 chNext = styler.SafeGetCharAt(i + 1);
258 if (ch == '"') {
259 state = SCE_C_STRING;
260 } else if (ch == '\'') {
261 state = SCE_C_CHARACTER;
262 } else if (isCOBOLoperator(ch)) {
263 ColourTo(styler, i, SCE_C_OPERATOR);
264 }
265 }
266 } else {
267 if (state == SCE_C_PREPROCESSOR) {
268 if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) {
269 ColourTo(styler, i-1, state);
270 state = SCE_C_DEFAULT;
271 }
272 } else if (state == SCE_C_COMMENT) {
273 if (ch == '\r' || ch == '\n') {
274 ColourTo(styler, i, state);
275 state = SCE_C_DEFAULT;
276 }
277 } else if (state == SCE_C_COMMENTDOC) {
278 if (ch == '\r' || ch == '\n') {
279 if (((i > styler.GetStartSegment() + 2) || (
280 (initStyle == SCE_C_COMMENTDOC) &&
281 (styler.GetStartSegment() == static_cast<Sci_PositionU>(startPos))))) {
282 ColourTo(styler, i, state);
283 state = SCE_C_DEFAULT;
284 }
285 }
286 } else if (state == SCE_C_COMMENTLINE) {
287 if (ch == '\r' || ch == '\n') {
288 ColourTo(styler, i-1, state);
289 state = SCE_C_DEFAULT;
290 }
291 } else if (state == SCE_C_STRING) {
292 if (ch == '"') {
293 ColourTo(styler, i, state);
294 state = SCE_C_DEFAULT;
295 }
296 } else if (state == SCE_C_CHARACTER) {
297 if (ch == '\'') {
298 ColourTo(styler, i, state);
299 state = SCE_C_DEFAULT;
300 }
301 }
302 }
303 chPrev = ch;
304 bNewLine = bSetNewLine;
305 if (bNewLine)
306 {
307 bAarea = false;
308 }
309 }
310 ColourTo(styler, lengthDoc - 1, state);
311}
312
313static void FoldCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
314 Accessor &styler) {
315 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
316 Sci_PositionU endPos = startPos + length;
317 int visibleChars = 0;
318 Sci_Position lineCurrent = styler.GetLine(startPos);
319 int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF;
320 char chNext = styler[startPos];
321
322 bool bNewLine = true;
323 bool bAarea = !isspacechar(chNext);
324 int column = 0;
325 bool bComment = false;
326 for (Sci_PositionU i = startPos; i < endPos; i++) {
327 char ch = chNext;
328 chNext = styler.SafeGetCharAt(i + 1);
329 ++column;
330
331 if (bNewLine) {
332 column = 0;
333 bComment = (ch == '*' || ch == '/' || ch == '?');
334 }
335 if (column <= 1 && !bAarea) {
336 bAarea = !isspacechar(ch);
337 }
338 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
339 if (atEOL) {
340 int nContainment = styler.GetLineState(lineCurrent);
341 int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE;
342 if (bAarea && !bComment)
343 --lev;
344 if (visibleChars == 0 && foldCompact)
345 lev |= SC_FOLDLEVELWHITEFLAG;
346 if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment)
347 lev |= SC_FOLDLEVELHEADERFLAG;
348 if (lev != styler.LevelAt(lineCurrent)) {
349 styler.SetLevel(lineCurrent, lev);
350 }
351 if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) {
352 // this level is at the same level or less than the previous line
353 // therefore these is nothing for the previous header to collapse, so remove the header
354 styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG);
355 }
356 levelPrev = lev;
357 visibleChars = 0;
358 bAarea = false;
359 bNewLine = true;
360 lineCurrent++;
361 } else {
362 bNewLine = false;
363 }
364
365
366 if (!isspacechar(ch))
367 visibleChars++;
368 }
369
370 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
371 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
372 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
373}
374
375static const char * const COBOLWordListDesc[] = {
376 "A Keywords",
377 "B Keywords",
378 "Extended Keywords",
379 0
380};
381
382LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);
383