LexJSON.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexJSON.cxx]

1	// Scintilla source code edit control
2	/**
3	* @file LexJSON.cxx
4	* @date February 19, 2016
5	* @brief Lexer for JSON and JSON-LD formats
6	* @author nkmathew
7	*
8	* The License.txt file describes the conditions under which this software may
9	* be distributed.
10	*
11	*/
12
13	#include <cstdlib>
14	#include <cassert>
15	#include <cctype>
16	#include <cstdio>
17
18	#include <string>
19	#include <string_view>
20	#include <vector>
21	#include <map>
22	#include <functional>
23
24	#include "ILexer.h"
25	#include "Scintilla.h"
26	#include "SciLexer.h"
27	#include "WordList.h"
28	#include "LexAccessor.h"
29	#include "StyleContext.h"
30	#include "CharacterSet.h"
31	#include "LexerModule.h"
32	#include "OptionSet.h"
33	#include "DefaultLexer.h"
34
35	using namespace Scintilla;
36	using namespace Lexilla;
37
38	static const char *const JSONWordListDesc[] = {
39	"JSON Keywords",
40	"JSON-LD Keywords",
41	`0`
42	};
43
44	/**
45	* Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
46	* colon separating the prefix and suffix
47	*
48	* https://www.w3.org/TR/json-ld/#dfn-compact-iri
49	*/
50	struct CompactIRI {
51	int colonCount;
52	bool foundInvalidChar;
53	CharacterSet setCompactIRI;
54	CompactIRI() {
55	colonCount = `0`;
56	foundInvalidChar = false;
57	setCompactIRI = CharacterSet (CharacterSet::setAlpha, "$_-");
58	}
59	void resetState() {
60	colonCount = `0`;
61	foundInvalidChar = false;
62	}
63	void checkChar(int ch) {
64	if (ch == `':'`) {
65	colonCount++;
66	} else {
67	foundInvalidChar \|= !setCompactIRI.Contains(ch);
68	}
69	}
70	bool shouldHighlight() const {
71	return !foundInvalidChar && colonCount == `1`;
72	}
73	};
74
75	/**
76	* Keeps track of escaped characters in strings as per:
77	*
78	* https://tools.ietf.org/html/rfc7159#section-7
79	*/
80	struct EscapeSequence {
81	int digitsLeft;
82	CharacterSet setHexDigits;
83	CharacterSet setEscapeChars;
84	EscapeSequence() {
85	digitsLeft = `0`;
86	setHexDigits = CharacterSet (CharacterSet::setDigits, "ABCDEFabcdef");
87	setEscapeChars = CharacterSet (CharacterSet::setNone, "\\\"tnbfru/");
88	}
89	// Returns true if the following character is a valid escaped character
90	bool newSequence(int nextChar) {
91	digitsLeft = `0`;
92	if (nextChar == `'u'`) {
93	digitsLeft = `5`;
94	} else if (!setEscapeChars.Contains(nextChar)) {
95	return false;
96	}
97	return true;
98	}
99	bool atEscapeEnd() const {
100	return digitsLeft <= `0`;
101	}
102	bool isInvalidChar(int currChar) const {
103	return !setHexDigits.Contains(currChar);
104	}
105	};
106
107	struct OptionsJSON {
108	bool foldCompact;
109	bool fold;
110	bool allowComments;
111	bool escapeSequence;
112	OptionsJSON() {
113	foldCompact = false;
114	fold = false;
115	allowComments = false;
116	escapeSequence = false;
117	}
118	};
119
120	struct OptionSetJSON : public OptionSet<OptionsJSON> {
121	OptionSetJSON() {
122	DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
123	"Set to 1 to enable highlighting of escape sequences in strings");
124
125	DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
126	"Set to 1 to enable highlighting of line/block comments in JSON");
127
128	DefineProperty("fold.compact", &OptionsJSON::foldCompact);
129	DefineProperty("fold", &OptionsJSON::fold);
130	DefineWordListSets(JSONWordListDesc);
131	}
132	};
133
134	class LexerJSON : public DefaultLexer {
135	OptionsJSON options;
136	OptionSetJSON optSetJSON;
137	EscapeSequence escapeSeq;
138	WordList keywordsJSON;
139	WordList keywordsJSONLD;
140	CharacterSet setOperators;
141	CharacterSet setURL;
142	CharacterSet setKeywordJSONLD;
143	CharacterSet setKeywordJSON;
144	CompactIRI compactIRI;
145
146	static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
147	Sci_Position i = `0`;
148	while (i < `50`) {
149	i++;
150	char curr = styler.SafeGetCharAt(start+i, `'\0'`);
151	char next = styler.SafeGetCharAt(start+i+`1`, `'\0'`);
152	bool atEOL = (curr == `'\r'` && next != `'\n'`) \|\| (curr == `'\n'`);
153	if (curr == ch) {
154	return true;
155	} else if (!isspacechar(curr) \|\| atEOL) {
156	return false;
157	}
158	}
159	return false;
160	}
161
162	/**
163	* Looks for the colon following the end quote
164	*
165	* Assumes property names of lengths no longer than a 100 characters.
166	* The colon is also expected to be less than 50 spaces after the end
167	* quote for the string to be considered a property name
168	*/
169	static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
170	Sci_Position i = `0`;
171	bool escaped = false;
172	while (i < `100`) {
173	i++;
174	char curr = styler.SafeGetCharAt(start+i, `'\0'`);
175	if (escaped) {
176	escaped = false;
177	continue;
178	}
179	escaped = curr == `'\\'`;
180	if (curr == `'"'`) {
181	return IsNextNonWhitespace(styler, start+i, `':'`);
182	} else if (!curr) {
183	return false;
184	}
185	}
186	return false;
187	}
188
189	static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
190	StyleContext &context, LexAccessor &styler) {
191	char word[`51`];
192	Sci_Position currPos = (Sci_Position) context.currentPos;
193	int i = `0`;
194	while (i < `50`) {
195	char ch = styler.SafeGetCharAt(currPos + i);
196	if (!wordSet.Contains(ch)) {
197	break;
198	}
199	word[i] = ch;
200	i++;
201	}
202	word[i] = `'\0'`;
203	return keywordList.InList(word);
204	}
205
206	public:
207	LexerJSON() :
208	DefaultLexer ("json", SCLEX_JSON),
209	setOperators (CharacterSet::setNone, "[{}]:,"),
210	setURL (CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
211	setKeywordJSONLD (CharacterSet::setAlpha, ":@"),
212	setKeywordJSON (CharacterSet::setAlpha, "$_") {
213	}
214	virtual ~LexerJSON() {}
215	int SCI_METHOD Version() const override {
216	return lvRelease5;
217	}
218	void SCI_METHOD Release() override {
219	delete this;
220	}
221	const char *SCI_METHOD PropertyNames() override {
222	return optSetJSON.PropertyNames();
223	}
224	int SCI_METHOD PropertyType(const char *name) override {
225	return optSetJSON.PropertyType(name);
226	}
227	const char SCI_METHOD DescribeProperty(const* char *name) override {
228	return optSetJSON.DescribeProperty(name);
229	}
230	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override {
231	if (optSetJSON.PropertySet(&options, key, val)) {
232	return `0`;
233	}
234	return -`1`;
235	}
236	const char * SCI_METHOD PropertyGet(const char *key) override {
237	return optSetJSON.PropertyGet(key);
238	}
239	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override {
240	WordList *wordListN = `0`;
241	switch (n) {
242	case `0`:
243	wordListN = &keywordsJSON;
244	break;
245	case `1`:
246	wordListN = &keywordsJSONLD;
247	break;
248	}
249	Sci_Position firstModification = -`1`;
250	if (wordListN) {
251	WordList wlNew;
252	wlNew.Set(wl);
253	if (*wordListN != wlNew) {
254	wordListN->Set(wl);
255	firstModification = `0`;
256	}
257	}
258	return firstModification;
259	}
260	void SCI_METHOD PrivateCall(int, void* *) override {
261	return `0`;
262	}
263	static ILexer5 *LexerFactoryJSON() {
264	return new LexerJSON;
265	}
266	const char *SCI_METHOD DescribeWordListSets() override {
267	return optSetJSON.DescribeWordListSets();
268	}
269	void SCI_METHOD Lex(Sci_PositionU startPos,
270	Sci_Position length,
271	int initStyle,
272	IDocument *pAccess) override;
273	void SCI_METHOD Fold(Sci_PositionU startPos,
274	Sci_Position length,
275	int initStyle,
276	IDocument *pAccess) override;
277	};
278
279	void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
280	Sci_Position length,
281	int initStyle,
282	IDocument *pAccess) {
283	LexAccessor styler(pAccess);
284	StyleContext context(startPos, length, initStyle, styler);
285	int stringStyleBefore = SCE_JSON_STRING;
286	while (context.More()) {
287	switch (context.state) {
288	case SCE_JSON_BLOCKCOMMENT:
289	if (context.Match("*/")) {
290	context.Forward();
291	context.ForwardSetState(SCE_JSON_DEFAULT);
292	}
293	break;
294	case SCE_JSON_LINECOMMENT:
295	if (context.atLineEnd) {
296	context.SetState(SCE_JSON_DEFAULT);
297	}
298	break;
299	case SCE_JSON_STRINGEOL:
300	if (context.atLineStart) {
301	context.SetState(SCE_JSON_DEFAULT);
302	}
303	break;
304	case SCE_JSON_ESCAPESEQUENCE:
305	escapeSeq.digitsLeft--;
306	if (!escapeSeq.atEscapeEnd()) {
307	if (escapeSeq.isInvalidChar(context.ch)) {
308	context.SetState(SCE_JSON_ERROR);
309	}
310	break;
311	}
312	if (context.ch == `'"'`) {
313	context.SetState(stringStyleBefore);
314	context.ForwardSetState(SCE_C_DEFAULT);
315	} else if (context.ch == `'\\'`) {
316	if (!escapeSeq.newSequence(context.chNext)) {
317	context.SetState(SCE_JSON_ERROR);
318	}
319	context.Forward();
320	} else {
321	context.SetState(stringStyleBefore);
322	if (context.atLineEnd) {
323	context.ChangeState(SCE_JSON_STRINGEOL);
324	}
325	}
326	break;
327	case SCE_JSON_PROPERTYNAME:
328	case SCE_JSON_STRING:
329	if (context.ch == `'"'`) {
330	if (compactIRI.shouldHighlight()) {
331	context.ChangeState(SCE_JSON_COMPACTIRI);
332	context.ForwardSetState(SCE_JSON_DEFAULT);
333	compactIRI.resetState();
334	} else {
335	context.ForwardSetState(SCE_JSON_DEFAULT);
336	}
337	} else if (context.atLineEnd) {
338	context.ChangeState(SCE_JSON_STRINGEOL);
339	} else if (context.ch == `'\\'`) {
340	stringStyleBefore = context.state;
341	if (options.escapeSequence) {
342	context.SetState(SCE_JSON_ESCAPESEQUENCE);
343	if (!escapeSeq.newSequence(context.chNext)) {
344	context.SetState(SCE_JSON_ERROR);
345	}
346	}
347	context.Forward();
348	} else if (context.Match("https://") \|\|
349	context.Match("http://") \|\|
350	context.Match("ssh://") \|\|
351	context.Match("git://") \|\|
352	context.Match("svn://") \|\|
353	context.Match("ftp://") \|\|
354	context.Match("mailto:")) {
355	// Handle most common URI schemes only
356	stringStyleBefore = context.state;
357	context.SetState(SCE_JSON_URI);
358	} else if (context.ch == `'@'`) {
359	// https://www.w3.org/TR/json-ld/#dfn-keyword
360	if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
361	stringStyleBefore = context.state;
362	context.SetState(SCE_JSON_LDKEYWORD);
363	}
364	} else {
365	compactIRI.checkChar(context.ch);
366	}
367	break;
368	case SCE_JSON_LDKEYWORD:
369	case SCE_JSON_URI:
370	if ((!setKeywordJSONLD.Contains(context.ch) &&
371	(context.state == SCE_JSON_LDKEYWORD)) \|\|
372	(!setURL.Contains(context.ch))) {
373	context.SetState(stringStyleBefore);
374	}
375	if (context.ch == `'"'`) {
376	context.ForwardSetState(SCE_JSON_DEFAULT);
377	} else if (context.atLineEnd) {
378	context.ChangeState(SCE_JSON_STRINGEOL);
379	}
380	break;
381	case SCE_JSON_OPERATOR:
382	case SCE_JSON_NUMBER:
383	context.SetState(SCE_JSON_DEFAULT);
384	break;
385	case SCE_JSON_ERROR:
386	if (context.atLineEnd) {
387	context.SetState(SCE_JSON_DEFAULT);
388	}
389	break;
390	case SCE_JSON_KEYWORD:
391	if (!setKeywordJSON.Contains(context.ch)) {
392	context.SetState(SCE_JSON_DEFAULT);
393	}
394	break;
395	}
396	if (context.state == SCE_JSON_DEFAULT) {
397	if (context.ch == `'"'`) {
398	compactIRI.resetState();
399	context.SetState(SCE_JSON_STRING);
400	Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
401	if (AtPropertyName(styler, currPos)) {
402	context.SetState(SCE_JSON_PROPERTYNAME);
403	}
404	} else if (setOperators.Contains(context.ch)) {
405	context.SetState(SCE_JSON_OPERATOR);
406	} else if (options.allowComments && context.Match("/*")) {
407	context.SetState(SCE_JSON_BLOCKCOMMENT);
408	context.Forward();
409	} else if (options.allowComments && context.Match("//")) {
410	context.SetState(SCE_JSON_LINECOMMENT);
411	} else if (setKeywordJSON.Contains(context.ch)) {
412	if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
413	context.SetState(SCE_JSON_KEYWORD);
414	}
415	}
416	bool numberStart =
417	IsADigit(context.ch) && (context.chPrev == `'+'`\|\|
418	context.chPrev == `'-'` \|\|
419	context.atLineStart \|\|
420	IsASpace(context.chPrev) \|\|
421	setOperators.Contains(context.chPrev));
422	bool exponentPart =
423	tolower(context.ch) == `'e'` &&
424	IsADigit(context.chPrev) &&
425	(IsADigit(context.chNext) \|\|
426	context.chNext == `'+'` \|\|
427	context.chNext == `'-'`);
428	bool signPart =
429	(context.ch == `'-'` \|\| context.ch == `'+'`) &&
430	((tolower(context.chPrev) == `'e'` && IsADigit(context.chNext)) \|\|
431	((IsASpace(context.chPrev) \|\| setOperators.Contains(context.chPrev))
432	&& IsADigit(context.chNext)));
433	bool adjacentDigit =
434	IsADigit(context.ch) && IsADigit(context.chPrev);
435	bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == `'e'`;
436	bool dotPart = context.ch == `'.'` &&
437	IsADigit(context.chPrev) &&
438	IsADigit(context.chNext);
439	bool afterDot = IsADigit(context.ch) && context.chPrev == `'.'`;
440	if (numberStart \|\|
441	exponentPart \|\|
442	signPart \|\|
443	adjacentDigit \|\|
444	dotPart \|\|
445	afterExponent \|\|
446	afterDot) {
447	context.SetState(SCE_JSON_NUMBER);
448	} else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
449	context.SetState(SCE_JSON_ERROR);
450	}
451	}
452	context.Forward();
453	}
454	context.Complete();
455	}
456
457	void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
458	Sci_Position length,
459	int,
460	IDocument *pAccess) {
461	if (!options.fold) {
462	return;
463	}
464	LexAccessor styler(pAccess);
465	Sci_PositionU currLine = styler.GetLine(startPos);
466	Sci_PositionU endPos = startPos + length;
467	int currLevel = SC_FOLDLEVELBASE;
468	if (currLine > `0`)
469	currLevel = styler.LevelAt(currLine - `1`) >> `16`;
470	int nextLevel = currLevel;
471	int visibleChars = `0`;
472	for (Sci_PositionU i = startPos; i < endPos; i++) {
473	char curr = styler.SafeGetCharAt(i);
474	char next = styler.SafeGetCharAt(i+`1`);
475	bool atEOL = (curr == `'\r'` && next != `'\n'`) \|\| (curr == `'\n'`);
476	if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
477	if (curr == `'{'` \|\| curr == `'['`) {
478	nextLevel++;
479	} else if (curr == `'}'` \|\| curr == `']'`) {
480	nextLevel--;
481	}
482	}
483	if (atEOL \|\| i == (endPos-`1`)) {
484	int level = currLevel \| nextLevel << `16`;
485	if (!visibleChars && options.foldCompact) {
486	level \|= SC_FOLDLEVELWHITEFLAG;
487	} else if (nextLevel > currLevel) {
488	level \|= SC_FOLDLEVELHEADERFLAG;
489	}
490	if (level != styler.LevelAt(currLine)) {
491	styler.SetLevel(currLine, level);
492	}
493	currLine++;
494	currLevel = nextLevel;
495	visibleChars = `0`;
496	}
497	if (!isspacechar(curr)) {
498	visibleChars++;
499	}
500	}
501	}
502
503	LexerModule lmJSON(SCLEX_JSON,
504	LexerJSON::LexerFactoryJSON,
505	"json",
506	JSONWordListDesc);
507

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexJSON.cxx