1// Copyright 2008-2010 Sergiu Dotenco. The License.txt file describes the
2// conditions under which this software may be distributed.
3
4/**
5 * @file LexBibTeX.cxx
6 * @brief General BibTeX coloring scheme.
7 * @author Sergiu Dotenco
8 * @date April 18, 2009
9 */
10
11#include <stdlib.h>
12#include <string.h>
13
14#include <cassert>
15#include <cctype>
16
17#include <string>
18#include <string_view>
19#include <algorithm>
20#include <functional>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25
26#include "PropSetSimple.h"
27#include "WordList.h"
28#include "LexAccessor.h"
29#include "Accessor.h"
30#include "StyleContext.h"
31#include "CharacterSet.h"
32#include "LexerModule.h"
33
34using namespace Lexilla;
35
36namespace {
37 bool IsAlphabetic(unsigned int ch)
38 {
39 return IsASCII(ch) && std::isalpha(ch) != 0;
40 }
41 bool IsAlphaNumeric(char ch)
42 {
43 return IsASCII(ch) && std::isalnum(ch);
44 }
45
46 bool EqualCaseInsensitive(const char* a, const char* b)
47 {
48 return CompareCaseInsensitive(a, b) == 0;
49 }
50
51 bool EntryWithoutKey(const char* name)
52 {
53 return EqualCaseInsensitive(name,"string");
54 }
55
56 char GetClosingBrace(char openbrace)
57 {
58 char result = openbrace;
59
60 switch (openbrace) {
61 case '(': result = ')'; break;
62 case '{': result = '}'; break;
63 }
64
65 return result;
66 }
67
68 bool IsEntryStart(char prev, char ch)
69 {
70 return prev != '\\' && ch == '@';
71 }
72
73 bool IsEntryStart(const StyleContext& sc)
74 {
75 return IsEntryStart(sc.chPrev, sc.ch);
76 }
77
78 void ColorizeBibTeX(Sci_PositionU start_pos, Sci_Position length, int /*init_style*/, WordList* keywordlists[], Accessor& styler)
79 {
80 WordList &EntryNames = *keywordlists[0];
81 bool fold_compact = styler.GetPropertyInt("fold.compact", 1) != 0;
82
83 std::string buffer;
84 buffer.reserve(25);
85
86 // We always colorize a section from the beginning, so let's
87 // search for the @ character which isn't escaped, i.e. \@
88 while (start_pos > 0 && !IsEntryStart(styler.SafeGetCharAt(start_pos - 1),
89 styler.SafeGetCharAt(start_pos))) {
90 --start_pos; ++length;
91 }
92
93 styler.StartAt(start_pos);
94 styler.StartSegment(start_pos);
95
96 Sci_Position current_line = styler.GetLine(start_pos);
97 int prev_level = styler.LevelAt(current_line) & SC_FOLDLEVELNUMBERMASK;
98 int current_level = prev_level;
99 int visible_chars = 0;
100
101 bool in_comment = false ;
102 StyleContext sc(start_pos, length, SCE_BIBTEX_DEFAULT, styler);
103
104 bool going = sc.More(); // needed because of a fuzzy end of file state
105 char closing_brace = 0;
106 bool collect_entry_name = false;
107
108 for (; going; sc.Forward()) {
109 if (!sc.More())
110 going = false; // we need to go one behind the end of text
111
112 if (in_comment) {
113 if (sc.atLineEnd) {
114 sc.SetState(SCE_BIBTEX_DEFAULT);
115 in_comment = false;
116 }
117 }
118 else {
119 // Found @entry
120 if (IsEntryStart(sc)) {
121 sc.SetState(SCE_BIBTEX_UNKNOWN_ENTRY);
122 sc.Forward();
123 ++current_level;
124
125 buffer.clear();
126 collect_entry_name = true;
127 }
128 else if ((sc.state == SCE_BIBTEX_ENTRY || sc.state == SCE_BIBTEX_UNKNOWN_ENTRY)
129 && (sc.ch == '{' || sc.ch == '(')) {
130 // Entry name colorization done
131 // Found either a { or a ( after entry's name, e.g. @entry(...) @entry{...}
132 // Closing counterpart needs to be stored.
133 closing_brace = GetClosingBrace(sc.ch);
134
135 sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize { (
136
137 // @string doesn't have any key
138 if (EntryWithoutKey(buffer.c_str()))
139 sc.ForwardSetState(SCE_BIBTEX_PARAMETER);
140 else
141 sc.ForwardSetState(SCE_BIBTEX_KEY); // Key/label colorization
142 }
143
144 // Need to handle the case where entry's key is empty
145 // e.g. @book{,...}
146 if (sc.state == SCE_BIBTEX_KEY && sc.ch == ',') {
147 // Key/label colorization done
148 sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the ,
149 sc.ForwardSetState(SCE_BIBTEX_PARAMETER); // Parameter colorization
150 }
151 else if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == '=') {
152 sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the =
153 sc.ForwardSetState(SCE_BIBTEX_VALUE); // Parameter value colorization
154
155 Sci_Position start = sc.currentPos;
156
157 // We need to handle multiple situations:
158 // 1. name"one two {three}"
159 // 2. name={one {one two {two}} three}
160 // 3. year=2005
161
162 // Skip ", { until we encounter the first alphanumerical character
163 while (sc.More() && !(IsAlphaNumeric(sc.ch) || sc.ch == '"' || sc.ch == '{'))
164 sc.Forward();
165
166 if (sc.More()) {
167 // Store " or {
168 char ch = sc.ch;
169
170 // Not interested in alphanumerical characters
171 if (IsAlphaNumeric(ch))
172 ch = 0;
173
174 int skipped = 0;
175
176 if (ch) {
177 // Skip preceding " or { such as in name={{test}}.
178 // Remember how many characters have been skipped
179 // Make sure that empty values, i.e. "" are also handled correctly
180 while (sc.More() && (sc.ch == ch && (ch != '"' || skipped < 1))) {
181 sc.Forward();
182 ++skipped;
183 }
184 }
185
186 // Closing counterpart for " is the same character
187 if (ch == '{')
188 ch = '}';
189
190 // We have reached the parameter value
191 // In case the open character was a alnum char, skip until , is found
192 // otherwise until skipped == 0
193 while (sc.More() && (skipped > 0 || (!ch && !(sc.ch == ',' || sc.ch == closing_brace)))) {
194 // Make sure the character isn't escaped
195 if (sc.chPrev != '\\') {
196 // Parameter value contains a { which is the 2nd case described above
197 if (sc.ch == '{')
198 ++skipped; // Remember it
199 else if (sc.ch == '}')
200 --skipped;
201 else if (skipped == 1 && sc.ch == ch && ch == '"') // Don't ignore cases like {"o}
202 skipped = 0;
203 }
204
205 sc.Forward();
206 }
207 }
208
209 // Don't colorize the ,
210 sc.SetState(SCE_BIBTEX_DEFAULT);
211
212 // Skip until the , or entry's closing closing_brace is found
213 // since this parameter might be the last one
214 while (sc.More() && !(sc.ch == ',' || sc.ch == closing_brace))
215 sc.Forward();
216
217 int state = SCE_BIBTEX_PARAMETER; // The might be more parameters
218
219 // We've reached the closing closing_brace for the bib entry
220 // in case no " or {} has been used to enclose the value,
221 // as in 3rd case described above
222 if (sc.ch == closing_brace) {
223 --current_level;
224 // Make sure the text between entries is not colored
225 // using parameter's style
226 state = SCE_BIBTEX_DEFAULT;
227 }
228
229 Sci_Position end = sc.currentPos;
230 current_line = styler.GetLine(end);
231
232 // We have possibly skipped some lines, so the folding levels
233 // have to be adjusted separately
234 for (Sci_Position i = styler.GetLine(start); i <= styler.GetLine(end); ++i)
235 styler.SetLevel(i, prev_level);
236
237 sc.ForwardSetState(state);
238 }
239
240 if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == closing_brace) {
241 sc.SetState(SCE_BIBTEX_DEFAULT);
242 --current_level;
243 }
244
245 // Non escaped % found which represents a comment until the end of the line
246 if (sc.chPrev != '\\' && sc.ch == '%') {
247 in_comment = true;
248 sc.SetState(SCE_BIBTEX_COMMENT);
249 }
250 }
251
252 if (sc.state == SCE_BIBTEX_UNKNOWN_ENTRY || sc.state == SCE_BIBTEX_ENTRY) {
253 if (!IsAlphabetic(sc.ch) && collect_entry_name)
254 collect_entry_name = false;
255
256 if (collect_entry_name) {
257 buffer += static_cast<char>(tolower(sc.ch));
258 if (EntryNames.InList(buffer.c_str()))
259 sc.ChangeState(SCE_BIBTEX_ENTRY);
260 else
261 sc.ChangeState(SCE_BIBTEX_UNKNOWN_ENTRY);
262 }
263 }
264
265 if (sc.atLineEnd) {
266 int level = prev_level;
267
268 if (visible_chars == 0 && fold_compact)
269 level |= SC_FOLDLEVELWHITEFLAG;
270
271 if ((current_level > prev_level))
272 level |= SC_FOLDLEVELHEADERFLAG;
273 // else if (current_level < prev_level)
274 // level |= SC_FOLDLEVELBOXFOOTERFLAG; // Deprecated
275
276 if (level != styler.LevelAt(current_line)) {
277 styler.SetLevel(current_line, level);
278 }
279
280 ++current_line;
281 prev_level = current_level;
282 visible_chars = 0;
283 }
284
285 if (!isspacechar(sc.ch))
286 ++visible_chars;
287 }
288
289 sc.Complete();
290
291 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
292 int flagsNext = styler.LevelAt(current_line) & ~SC_FOLDLEVELNUMBERMASK;
293 styler.SetLevel(current_line, prev_level | flagsNext);
294 }
295}
296static const char * const BibTeXWordLists[] = {
297 "Entry Names",
298 0,
299};
300
301
302LexerModule lmBibTeX(SCLEX_BIBTEX, ColorizeBibTeX, "bib", 0, BibTeXWordLists);
303
304// Entry Names
305// article, book, booklet, conference, inbook,
306// incollection, inproceedings, manual, mastersthesis,
307// misc, phdthesis, proceedings, techreport, unpublished,
308// string, url
309
310