1 | // Scintilla source code edit control |
2 | /** @file LexCOBOL.cxx |
3 | ** Lexer for COBOL |
4 | ** Based on LexPascal.cxx |
5 | ** Written by Laurent le Tynevez |
6 | ** Updated by Simon Steele <s.steele@pnotepad.org> September 2002 |
7 | ** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments) |
8 | ** Updated by Rod Falck, Aug 2006 Converted to COBOL |
9 | **/ |
10 | |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | #include <stdio.h> |
14 | #include <stdarg.h> |
15 | #include <assert.h> |
16 | #include <ctype.h> |
17 | |
18 | #include <string> |
19 | #include <string_view> |
20 | |
21 | #include "ILexer.h" |
22 | #include "Scintilla.h" |
23 | #include "SciLexer.h" |
24 | |
25 | #include "WordList.h" |
26 | #include "LexAccessor.h" |
27 | #include "Accessor.h" |
28 | #include "StyleContext.h" |
29 | #include "CharacterSet.h" |
30 | #include "LexerModule.h" |
31 | |
32 | using namespace Lexilla; |
33 | |
34 | #define IN_DIVISION 0x01 |
35 | #define IN_DECLARATIVES 0x02 |
36 | #define IN_SECTION 0x04 |
37 | #define IN_PARAGRAPH 0x08 |
38 | #define IN_FLAGS 0xF |
39 | #define 0x10 |
40 | |
41 | inline bool isCOBOLoperator(char ch) |
42 | { |
43 | return isoperator(ch); |
44 | } |
45 | |
46 | inline bool isCOBOLwordchar(char ch) |
47 | { |
48 | return IsASCII(ch) && (isalnum(ch) || ch == '-'); |
49 | |
50 | } |
51 | |
52 | inline bool isCOBOLwordstart(char ch) |
53 | { |
54 | return IsASCII(ch) && isalnum(ch); |
55 | } |
56 | |
57 | static int CountBits(int nBits) |
58 | { |
59 | int count = 0; |
60 | for (int i = 0; i < 32; ++i) |
61 | { |
62 | count += nBits & 1; |
63 | nBits >>= 1; |
64 | } |
65 | return count; |
66 | } |
67 | |
68 | static void getRange(Sci_PositionU start, |
69 | Sci_PositionU end, |
70 | Accessor &styler, |
71 | char *s, |
72 | Sci_PositionU len) { |
73 | Sci_PositionU i = 0; |
74 | while ((i < end - start + 1) && (i < len-1)) { |
75 | s[i] = static_cast<char>(tolower(styler[start + i])); |
76 | i++; |
77 | } |
78 | s[i] = '\0'; |
79 | } |
80 | |
81 | static void ColourTo(Accessor &styler, Sci_PositionU end, unsigned int attr) { |
82 | styler.ColourTo(end, attr); |
83 | } |
84 | |
85 | |
86 | static int classifyWordCOBOL(Sci_PositionU start, Sci_PositionU end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) { |
87 | int ret = 0; |
88 | |
89 | WordList& a_keywords = *keywordlists[0]; |
90 | WordList& b_keywords = *keywordlists[1]; |
91 | WordList& c_keywords = *keywordlists[2]; |
92 | |
93 | char s[100]; |
94 | s[0] = '\0'; |
95 | s[1] = '\0'; |
96 | getRange(start, end, styler, s, sizeof(s)); |
97 | |
98 | char chAttr = SCE_C_IDENTIFIER; |
99 | if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) { |
100 | chAttr = SCE_C_NUMBER; |
101 | char *p = s + 1; |
102 | while (*p) { |
103 | if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) { |
104 | chAttr = SCE_C_IDENTIFIER; |
105 | break; |
106 | } |
107 | ++p; |
108 | } |
109 | } |
110 | else { |
111 | if (a_keywords.InList(s)) { |
112 | chAttr = SCE_C_WORD; |
113 | } |
114 | else if (b_keywords.InList(s)) { |
115 | chAttr = SCE_C_WORD2; |
116 | } |
117 | else if (c_keywords.InList(s)) { |
118 | chAttr = SCE_C_UUID; |
119 | } |
120 | } |
121 | if (*bAarea) { |
122 | if (strcmp(s, "division" ) == 0) { |
123 | ret = IN_DIVISION; |
124 | // we've determined the containment, anything else is just ignored for those purposes |
125 | *bAarea = false; |
126 | } else if (strcmp(s, "declaratives" ) == 0) { |
127 | ret = IN_DIVISION | IN_DECLARATIVES; |
128 | if (nContainment & IN_DECLARATIVES) |
129 | ret |= NOT_HEADER | IN_SECTION; |
130 | // we've determined the containment, anything else is just ignored for those purposes |
131 | *bAarea = false; |
132 | } else if (strcmp(s, "section" ) == 0) { |
133 | ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION; |
134 | // we've determined the containment, anything else is just ignored for those purposes |
135 | *bAarea = false; |
136 | } else if (strcmp(s, "end" ) == 0 && (nContainment & IN_DECLARATIVES)) { |
137 | ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER; |
138 | } else { |
139 | ret = nContainment | IN_PARAGRAPH; |
140 | } |
141 | } |
142 | ColourTo(styler, end, chAttr); |
143 | return ret; |
144 | } |
145 | |
146 | static void ColouriseCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], |
147 | Accessor &styler) { |
148 | |
149 | styler.StartAt(startPos); |
150 | |
151 | int state = initStyle; |
152 | if (state == SCE_C_CHARACTER) // Does not leak onto next line |
153 | state = SCE_C_DEFAULT; |
154 | char chPrev = ' '; |
155 | char chNext = styler[startPos]; |
156 | Sci_PositionU lengthDoc = startPos + length; |
157 | |
158 | int nContainment; |
159 | |
160 | Sci_Position currentLine = styler.GetLine(startPos); |
161 | if (currentLine > 0) { |
162 | styler.SetLineState(currentLine, styler.GetLineState(currentLine-1)); |
163 | nContainment = styler.GetLineState(currentLine); |
164 | nContainment &= ~NOT_HEADER; |
165 | } else { |
166 | styler.SetLineState(currentLine, 0); |
167 | nContainment = 0; |
168 | } |
169 | |
170 | styler.StartSegment(startPos); |
171 | bool bNewLine = true; |
172 | bool bAarea = !isspacechar(chNext); |
173 | int column = 0; |
174 | for (Sci_PositionU i = startPos; i < lengthDoc; i++) { |
175 | char ch = chNext; |
176 | |
177 | chNext = styler.SafeGetCharAt(i + 1); |
178 | |
179 | ++column; |
180 | |
181 | if (bNewLine) { |
182 | column = 0; |
183 | } |
184 | if (column <= 1 && !bAarea) { |
185 | bAarea = !isspacechar(ch); |
186 | } |
187 | bool bSetNewLine = false; |
188 | if ((ch == '\r' && chNext != '\n') || (ch == '\n')) { |
189 | // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) |
190 | // Avoid triggering two times on Dos/Win |
191 | // End of line |
192 | if (state == SCE_C_CHARACTER) { |
193 | ColourTo(styler, i, state); |
194 | state = SCE_C_DEFAULT; |
195 | } |
196 | styler.SetLineState(currentLine, nContainment); |
197 | currentLine++; |
198 | bSetNewLine = true; |
199 | if (nContainment & NOT_HEADER) |
200 | nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION); |
201 | } |
202 | |
203 | if (styler.IsLeadByte(ch)) { |
204 | chNext = styler.SafeGetCharAt(i + 2); |
205 | chPrev = ' '; |
206 | i += 1; |
207 | continue; |
208 | } |
209 | |
210 | if (state == SCE_C_DEFAULT) { |
211 | if (isCOBOLwordstart(ch) || (ch == '$' && IsASCII(chNext) && isalpha(chNext))) { |
212 | ColourTo(styler, i-1, state); |
213 | state = SCE_C_IDENTIFIER; |
214 | } else if (column == 6 && ch == '*') { |
215 | // Cobol comment line: asterisk in column 7. |
216 | ColourTo(styler, i-1, state); |
217 | state = SCE_C_COMMENTLINE; |
218 | } else if (ch == '*' && chNext == '>') { |
219 | // Cobol inline comment: asterisk, followed by greater than. |
220 | ColourTo(styler, i-1, state); |
221 | state = SCE_C_COMMENTLINE; |
222 | } else if (column == 0 && ch == '*' && chNext != '*') { |
223 | ColourTo(styler, i-1, state); |
224 | state = SCE_C_COMMENTLINE; |
225 | } else if (column == 0 && ch == '/' && chNext != '*') { |
226 | ColourTo(styler, i-1, state); |
227 | state = SCE_C_COMMENTLINE; |
228 | } else if (column == 0 && ch == '*' && chNext == '*') { |
229 | ColourTo(styler, i-1, state); |
230 | state = SCE_C_COMMENTDOC; |
231 | } else if (column == 0 && ch == '/' && chNext == '*') { |
232 | ColourTo(styler, i-1, state); |
233 | state = SCE_C_COMMENTDOC; |
234 | } else if (ch == '"') { |
235 | ColourTo(styler, i-1, state); |
236 | state = SCE_C_STRING; |
237 | } else if (ch == '\'') { |
238 | ColourTo(styler, i-1, state); |
239 | state = SCE_C_CHARACTER; |
240 | } else if (ch == '?' && column == 0) { |
241 | ColourTo(styler, i-1, state); |
242 | state = SCE_C_PREPROCESSOR; |
243 | } else if (isCOBOLoperator(ch)) { |
244 | ColourTo(styler, i-1, state); |
245 | ColourTo(styler, i, SCE_C_OPERATOR); |
246 | } |
247 | } else if (state == SCE_C_IDENTIFIER) { |
248 | if (!isCOBOLwordchar(ch)) { |
249 | int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea); |
250 | |
251 | if(lStateChange != 0) { |
252 | styler.SetLineState(currentLine, lStateChange); |
253 | nContainment = lStateChange; |
254 | } |
255 | |
256 | state = SCE_C_DEFAULT; |
257 | chNext = styler.SafeGetCharAt(i + 1); |
258 | if (ch == '"') { |
259 | state = SCE_C_STRING; |
260 | } else if (ch == '\'') { |
261 | state = SCE_C_CHARACTER; |
262 | } else if (isCOBOLoperator(ch)) { |
263 | ColourTo(styler, i, SCE_C_OPERATOR); |
264 | } |
265 | } |
266 | } else { |
267 | if (state == SCE_C_PREPROCESSOR) { |
268 | if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) { |
269 | ColourTo(styler, i-1, state); |
270 | state = SCE_C_DEFAULT; |
271 | } |
272 | } else if (state == SCE_C_COMMENT) { |
273 | if (ch == '\r' || ch == '\n') { |
274 | ColourTo(styler, i, state); |
275 | state = SCE_C_DEFAULT; |
276 | } |
277 | } else if (state == SCE_C_COMMENTDOC) { |
278 | if (ch == '\r' || ch == '\n') { |
279 | if (((i > styler.GetStartSegment() + 2) || ( |
280 | (initStyle == SCE_C_COMMENTDOC) && |
281 | (styler.GetStartSegment() == static_cast<Sci_PositionU>(startPos))))) { |
282 | ColourTo(styler, i, state); |
283 | state = SCE_C_DEFAULT; |
284 | } |
285 | } |
286 | } else if (state == SCE_C_COMMENTLINE) { |
287 | if (ch == '\r' || ch == '\n') { |
288 | ColourTo(styler, i-1, state); |
289 | state = SCE_C_DEFAULT; |
290 | } |
291 | } else if (state == SCE_C_STRING) { |
292 | if (ch == '"') { |
293 | ColourTo(styler, i, state); |
294 | state = SCE_C_DEFAULT; |
295 | } |
296 | } else if (state == SCE_C_CHARACTER) { |
297 | if (ch == '\'') { |
298 | ColourTo(styler, i, state); |
299 | state = SCE_C_DEFAULT; |
300 | } |
301 | } |
302 | } |
303 | chPrev = ch; |
304 | bNewLine = bSetNewLine; |
305 | if (bNewLine) |
306 | { |
307 | bAarea = false; |
308 | } |
309 | } |
310 | ColourTo(styler, lengthDoc - 1, state); |
311 | } |
312 | |
313 | static void FoldCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], |
314 | Accessor &styler) { |
315 | bool foldCompact = styler.GetPropertyInt("fold.compact" , 1) != 0; |
316 | Sci_PositionU endPos = startPos + length; |
317 | int visibleChars = 0; |
318 | Sci_Position lineCurrent = styler.GetLine(startPos); |
319 | int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF; |
320 | char chNext = styler[startPos]; |
321 | |
322 | bool bNewLine = true; |
323 | bool bAarea = !isspacechar(chNext); |
324 | int column = 0; |
325 | bool = false; |
326 | for (Sci_PositionU i = startPos; i < endPos; i++) { |
327 | char ch = chNext; |
328 | chNext = styler.SafeGetCharAt(i + 1); |
329 | ++column; |
330 | |
331 | if (bNewLine) { |
332 | column = 0; |
333 | bComment = (ch == '*' || ch == '/' || ch == '?'); |
334 | } |
335 | if (column <= 1 && !bAarea) { |
336 | bAarea = !isspacechar(ch); |
337 | } |
338 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); |
339 | if (atEOL) { |
340 | int nContainment = styler.GetLineState(lineCurrent); |
341 | int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE; |
342 | if (bAarea && !bComment) |
343 | --lev; |
344 | if (visibleChars == 0 && foldCompact) |
345 | lev |= SC_FOLDLEVELWHITEFLAG; |
346 | if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment) |
347 | lev |= SC_FOLDLEVELHEADERFLAG; |
348 | if (lev != styler.LevelAt(lineCurrent)) { |
349 | styler.SetLevel(lineCurrent, lev); |
350 | } |
351 | if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) { |
352 | // this level is at the same level or less than the previous line |
353 | // therefore these is nothing for the previous header to collapse, so remove the header |
354 | styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG); |
355 | } |
356 | levelPrev = lev; |
357 | visibleChars = 0; |
358 | bAarea = false; |
359 | bNewLine = true; |
360 | lineCurrent++; |
361 | } else { |
362 | bNewLine = false; |
363 | } |
364 | |
365 | |
366 | if (!isspacechar(ch)) |
367 | visibleChars++; |
368 | } |
369 | |
370 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later |
371 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; |
372 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); |
373 | } |
374 | |
375 | static const char * const COBOLWordListDesc[] = { |
376 | "A Keywords" , |
377 | "B Keywords" , |
378 | "Extended Keywords" , |
379 | 0 |
380 | }; |
381 | |
382 | LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL" , FoldCOBOLDoc, COBOLWordListDesc); |
383 | |