1// Scintilla source code edit control
2/** @file LexStata.cxx
3 ** Lexer for Stata
4 **/
5// Author: Luke Rasmussen (luke.rasmussen@gmail.com)
6//
7// The License.txt file describes the conditions under which this software may
8// be distributed.
9//
10// Developed as part of the StatTag project at Northwestern University Feinberg
11// School of Medicine with funding from Northwestern University Clinical and
12// Translational Sciences Institute through CTSA grant UL1TR001422. This work
13// has not been reviewed or endorsed by NCATS or the NIH.
14
15#include <stdlib.h>
16#include <string.h>
17#include <stdio.h>
18#include <stdarg.h>
19#include <assert.h>
20#include <ctype.h>
21
22#include <string>
23#include <string_view>
24
25#include "ILexer.h"
26#include "Scintilla.h"
27#include "SciLexer.h"
28
29#include "WordList.h"
30#include "LexAccessor.h"
31#include "Accessor.h"
32#include "StyleContext.h"
33#include "CharacterSet.h"
34#include "LexerModule.h"
35
36using namespace Lexilla;
37
38static void ColouriseStataDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
39 Accessor &styler) {
40
41 WordList &keywords = *keywordlists[0];
42 WordList &types = *keywordlists[1];
43
44 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
45 CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
46 CharacterSet setWord(CharacterSet::setAlphaNum, "._", 0x80, true);
47
48 StyleContext sc(startPos, length, initStyle, styler);
49 bool lineHasNonCommentChar = false;
50 for (; sc.More(); sc.Forward()) {
51 if (sc.atLineStart) {
52 lineHasNonCommentChar = false;
53 }
54
55 // Determine if the current state should terminate.
56 switch (sc.state) {
57 case SCE_STATA_OPERATOR:
58 sc.SetState(SCE_STATA_DEFAULT);
59 break;
60 case SCE_STATA_NUMBER:
61 // We accept almost anything because of hex. and number suffixes
62 if (!setWord.Contains(sc.ch)) {
63 sc.SetState(SCE_STATA_DEFAULT);
64 }
65 break;
66 case SCE_STATA_IDENTIFIER:
67 if (!setWord.Contains(sc.ch) || (sc.ch == '.')) {
68 char s[1000];
69 sc.GetCurrent(s, sizeof(s));
70 if (keywords.InList(s)) {
71 sc.ChangeState(SCE_STATA_WORD);
72 }
73 else if (types.InList(s)) {
74 sc.ChangeState(SCE_STATA_TYPE);
75 }
76 sc.SetState(SCE_STATA_DEFAULT);
77 }
78 break;
79 case SCE_STATA_COMMENTBLOCK:
80 if (sc.Match('*', '/')) {
81 sc.Forward();
82 sc.ForwardSetState(SCE_STATA_DEFAULT);
83 }
84 break;
85 case SCE_STATA_COMMENT:
86 case SCE_STATA_COMMENTLINE:
87 if (sc.atLineStart) {
88 sc.SetState(SCE_STATA_DEFAULT);
89 }
90 break;
91 case SCE_STATA_STRING:
92 if (sc.ch == '\\') {
93 // Per Stata documentation, the following characters are the only ones that can
94 // be escaped (not our typical set of quotes, etc.):
95 // https://www.stata.com/support/faqs/programming/backslashes-and-macros/
96 if (sc.chNext == '$' || sc.chNext == '`' || sc.chNext == '\\') {
97 sc.Forward();
98 }
99 }
100 else if (sc.ch == '\"') {
101 sc.ForwardSetState(SCE_STATA_DEFAULT);
102 }
103 break;
104 }
105
106 // Determine if a new state should be entered.
107 if (sc.state == SCE_STATA_DEFAULT) {
108 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
109 lineHasNonCommentChar = true;
110 sc.SetState(SCE_STATA_NUMBER);
111 }
112 else if (setWordStart.Contains(sc.ch)) {
113 lineHasNonCommentChar = true;
114 sc.SetState(SCE_STATA_IDENTIFIER);
115 }
116 else if (sc.Match('*') && !lineHasNonCommentChar) {
117 sc.SetState(SCE_STATA_COMMENT);
118 }
119 else if (sc.Match('/', '*')) {
120 sc.SetState(SCE_STATA_COMMENTBLOCK);
121 sc.Forward(); // Eat the * so it isn't used for the end of the comment
122 }
123 else if (sc.Match('/', '/')) {
124 sc.SetState(SCE_STATA_COMMENTLINE);
125 }
126 else if (sc.ch == '\"') {
127 lineHasNonCommentChar = true;
128 sc.SetState(SCE_STATA_STRING);
129 }
130 else if (isoperator(sc.ch)) {
131 lineHasNonCommentChar = true;
132 sc.SetState(SCE_STATA_OPERATOR);
133 }
134 }
135 }
136
137 sc.Complete();
138}
139
140// Store both the current line's fold level and the next lines in the
141// level store to make it easy to pick up with each increment
142// and to make it possible to fiddle the current level for "} else {".
143static void FoldStataDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
144 Accessor &styler) {
145 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
146 bool foldAtElse = styler.GetPropertyInt("fold.at.else", 0) != 0;
147 Sci_PositionU endPos = startPos + length;
148 int visibleChars = 0;
149 Sci_Position lineCurrent = styler.GetLine(startPos);
150 int levelCurrent = SC_FOLDLEVELBASE;
151 if (lineCurrent > 0)
152 levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
153 int levelMinCurrent = levelCurrent;
154 int levelNext = levelCurrent;
155 char chNext = styler[startPos];
156 int styleNext = styler.StyleAt(startPos);
157 for (Sci_PositionU i = startPos; i < endPos; i++) {
158 char ch = chNext;
159 chNext = styler.SafeGetCharAt(i + 1);
160 int style = styleNext;
161 styleNext = styler.StyleAt(i + 1);
162 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
163 if (style == SCE_R_OPERATOR) {
164 if (ch == '{') {
165 // Measure the minimum before a '{' to allow
166 // folding on "} else {"
167 if (levelMinCurrent > levelNext) {
168 levelMinCurrent = levelNext;
169 }
170 levelNext++;
171 }
172 else if (ch == '}') {
173 levelNext--;
174 }
175 }
176 if (atEOL) {
177 int levelUse = levelCurrent;
178 if (foldAtElse) {
179 levelUse = levelMinCurrent;
180 }
181 int lev = levelUse | levelNext << 16;
182 if (visibleChars == 0 && foldCompact)
183 lev |= SC_FOLDLEVELWHITEFLAG;
184 if (levelUse < levelNext)
185 lev |= SC_FOLDLEVELHEADERFLAG;
186 if (lev != styler.LevelAt(lineCurrent)) {
187 styler.SetLevel(lineCurrent, lev);
188 }
189 lineCurrent++;
190 levelCurrent = levelNext;
191 levelMinCurrent = levelCurrent;
192 visibleChars = 0;
193 }
194 if (!isspacechar(ch))
195 visibleChars++;
196 }
197}
198
199
200static const char * const StataWordLists[] = {
201 "Language Keywords",
202 "Types",
203 0,
204};
205
206LexerModule lmStata(SCLEX_STATA, ColouriseStataDoc, "stata", FoldStataDoc, StataWordLists);
207