LexEDIFACT.cxx source code [DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexEDIFACT.cxx]

1	// Scintilla Lexer for EDIFACT
2	// @file LexEDIFACT.cxx
3	// Written by Iain Clarke, IMCSoft & Inobiz AB.
4	// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html
5	// and more readably here: https://en.wikipedia.org/wiki/EDIFACT
6	// This code is subject to the same license terms as the rest of the scintilla project:
7	// The License.txt file describes the conditions under which this software may be distributed.
8	//
9
10	// Header order must match order in scripts/HeaderOrder.txt
11	#include <cstdlib>
12	#include <cassert>
13	#include <cstring>
14	#include <cctype>
15
16	#include <string>
17	#include <string_view>
18
19	#include "ILexer.h"
20	#include "Scintilla.h"
21	#include "SciLexer.h"
22
23	#include "LexAccessor.h"
24	#include "LexerModule.h"
25	#include "DefaultLexer.h"
26
27	using namespace Scintilla;
28	using namespace Lexilla;
29
30	class LexerEDIFACT : public DefaultLexer
31	{
32	public:
33	LexerEDIFACT();
34	virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer
35
36	static ILexer5 *Factory() {
37	return new LexerEDIFACT;
38	}
39
40	int SCI_METHOD Version() const override
41	{
42	return lvRelease5;
43	}
44	void SCI_METHOD Release() override
45	{
46	delete this;
47	}
48
49	const char * SCI_METHOD PropertyNames() override
50	{
51	return "fold\nlexer.edifact.highlight.un.all";
52	}
53	int SCI_METHOD PropertyType(const char *) override
54	{
55	return SC_TYPE_BOOLEAN; // Only one property!
56	}
57	const char * SCI_METHOD DescribeProperty(const char *name) override
58	{
59	if (!strcmp(name, "fold"))
60	return "Whether to apply folding to document or not";
61	if (!strcmp(name, "lexer.edifact.highlight.un.all"))
62	return "Whether to apply UN* highlighting to all UN segments, or just to UNH";
63	return NULL;
64	}
65
66	Sci_Position SCI_METHOD PropertySet(const char key, const* char *val) override
67	{
68	if (!strcmp(key, "fold"))
69	{
70	m_bFold = strcmp(val, "0") ? true : false;
71	return `0`;
72	}
73	if (!strcmp(key, "lexer.edifact.highlight.un.all")) // GetProperty
74	{
75	m_bHighlightAllUN = strcmp(val, "0") ? true : false;
76	return `0`;
77	}
78	return -`1`;
79	}
80
81	const char * SCI_METHOD PropertyGet(const char *key) override
82	{
83	m_lastPropertyValue = "";
84	if (!strcmp(key, "fold"))
85	{
86	m_lastPropertyValue = m_bFold ? "1" : "0";
87	}
88	if (!strcmp(key, "lexer.edifact.highlight.un.all")) // GetProperty
89	{
90	m_lastPropertyValue = m_bHighlightAllUN ? "1" : "0";
91	}
92	return m_lastPropertyValue.c_str();
93	}
94
95	const char * SCI_METHOD DescribeWordListSets() override
96	{
97	return NULL;
98	}
99	Sci_Position SCI_METHOD WordListSet(int, const char *) override
100	{
101	return -`1`;
102	}
103	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
104	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
105	void * SCI_METHOD PrivateCall(int, void *) override
106	{
107	return NULL;
108	}
109
110	protected:
111	Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength);
112	Sci_Position FindPreviousEnd(IDocument pAccess, Sci_Position startPos) const*;
113	Sci_Position ForwardPastWhitespace(IDocument pAccess, Sci_Position startPos, Sci_Position MaxLength) const*;
114	int DetectSegmentHeader(char SegmentHeader[`3`]) const;
115
116	bool m_bFold;
117
118	// property lexer.edifact.highlight.un.all
119	// Set to 0 to highlight only UNA segments, or 1 to highlight all UNx segments.
120	bool m_bHighlightAllUN;
121
122	char m_chComponent;
123	char m_chData;
124	char m_chDecimal;
125	char m_chRelease;
126	char m_chSegment;
127
128	std::string m_lastPropertyValue;
129	};
130
131	LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact");
132
133	///////////////////////////////////////////////////////////////////////////////
134
135
136
137	///////////////////////////////////////////////////////////////////////////////
138
139	LexerEDIFACT::LexerEDIFACT() : DefaultLexer ("edifact", SCLEX_EDIFACT)
140	{
141	m_bFold = false;
142	m_bHighlightAllUN = false;
143	m_chComponent = `':'`;
144	m_chData = `'+'`;
145	m_chDecimal = `'.'`;
146	m_chRelease = `'?'`;
147	m_chSegment = `'\''`;
148	}
149
150	void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
151	{
152	Sci_PositionU posFinish = startPos + length;
153	InitialiseFromUNA(pAccess, posFinish);
154
155	// Look backwards for a ' or a document beginning
156	Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos);
157	// And jump past the ' if this was not the beginning of the document
158	if (posCurrent != `0`)
159	posCurrent++;
160
161	// Style buffer, so we're not issuing loads of notifications
162	LexAccessor styler (pAccess);
163	pAccess->StartStyling(posCurrent);
164	styler.StartSegment(posCurrent);
165	Sci_Position posSegmentStart = -`1`;
166
167	while ((posCurrent < posFinish) && (posSegmentStart == -`1`))
168	{
169	posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish);
170	// Mark whitespace as default
171	styler.ColourTo(posCurrent - `1`, SCE_EDI_DEFAULT);
172	if (posCurrent >= posFinish)
173	break;
174
175	// Does is start with 3 charaters? ie, UNH
176	char SegmentHeader[`4`] = { `0` };
177	pAccess->GetCharRange(SegmentHeader, posCurrent, `3`);
178
179	int SegmentStyle = DetectSegmentHeader(SegmentHeader);
180	if (SegmentStyle == SCE_EDI_BADSEGMENT)
181	break;
182	if (SegmentStyle == SCE_EDI_UNA)
183	{
184	posCurrent += `9`;
185	styler.ColourTo(posCurrent - `1`, SCE_EDI_UNA); // UNA
186	continue;
187	}
188	posSegmentStart = posCurrent;
189	posCurrent += `3`;
190
191	styler.ColourTo(posCurrent - `1`, SegmentStyle); // UNH etc
192
193	// Colour in the rest of the segment
194	for (char c; posCurrent < posFinish; posCurrent++)
195	{
196	pAccess->GetCharRange(&c, posCurrent, `1`);
197
198	if (c == m_chRelease) // ? escape character, check first, in case of ?'
199	posCurrent++;
200	else if (c == m_chSegment) // '
201	{
202	// Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad.
203	Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart);
204	Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent);
205	if (lineSegmentStart == lineSegmentEnd)
206	styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND);
207	else
208	styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT);
209	posSegmentStart = -`1`;
210	posCurrent++;
211	break;
212	}
213	else if (c == m_chComponent) // :
214	styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE);
215	else if (c == m_chData) // +
216	styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT);
217	else
218	styler.ColourTo(posCurrent, SCE_EDI_DEFAULT);
219	}
220	}
221	styler.Flush();
222
223	if (posSegmentStart == -`1`)
224	return;
225
226	pAccess->StartStyling(posSegmentStart);
227	pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT);
228	}
229
230	void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
231	{
232	if (!m_bFold)
233	return;
234
235	Sci_PositionU endPos = startPos + length;
236	startPos = FindPreviousEnd(pAccess, startPos);
237	char c;
238	char SegmentHeader[`4`] = { `0` };
239
240	bool AwaitingSegment = true;
241	Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
242	int levelCurrentStyle = SC_FOLDLEVELBASE;
243	if (currLine > `0`)
244	levelCurrentStyle = pAccess->GetLevel(currLine - `1`); // bottom 12 bits are level
245	int indentCurrent = levelCurrentStyle & SC_FOLDLEVELNUMBERMASK;
246	int indentNext = indentCurrent;
247
248	while (startPos < endPos)
249	{
250	pAccess->GetCharRange(&c, startPos, `1`);
251	switch (c)
252	{
253	case `'\t'`:
254	case `'\r'`:
255	case `' '`:
256	startPos++;
257	continue;
258	case `'\n'`:
259	currLine = pAccess->LineFromPosition(startPos);
260	pAccess->SetLevel(currLine, levelCurrentStyle \| indentCurrent);
261	startPos++;
262	levelCurrentStyle = SC_FOLDLEVELBASE;
263	indentCurrent = indentNext;
264	continue;
265	}
266	if (c == m_chRelease)
267	{
268	startPos += `2`;
269	continue;
270	}
271	if (c == m_chSegment)
272	{
273	AwaitingSegment = true;
274	startPos++;
275	continue;
276	}
277
278	if (!AwaitingSegment)
279	{
280	startPos++;
281	continue;
282	}
283
284	// Segment!
285	pAccess->GetCharRange(SegmentHeader, startPos, `3`);
286	if (SegmentHeader[`0`] != `'U'` \|\| SegmentHeader[`1`] != `'N'`)
287	{
288	startPos++;
289	continue;
290	}
291
292	AwaitingSegment = false;
293	switch (SegmentHeader[`2`])
294	{
295	case `'H'`:
296	case `'G'`:
297	indentNext++;
298	levelCurrentStyle = SC_FOLDLEVELBASE \| SC_FOLDLEVELHEADERFLAG;
299	break;
300
301	case `'T'`:
302	case `'E'`:
303	if (indentNext > `0`)
304	indentNext--;
305	break;
306	}
307
308	startPos += `3`;
309	}
310	}
311
312	Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength)
313	{
314	MaxLength -= `9`; // drop 9 chars, to give us room for UNA:+.? '
315
316	Sci_PositionU startPos = `0`;
317	startPos += ForwardPastWhitespace(pAccess, `0`, MaxLength);
318	if (startPos < MaxLength)
319	{
320	char bufUNA[`9`];
321	pAccess->GetCharRange(bufUNA, startPos, `9`);
322
323	// Check it's UNA segment
324	if (!memcmp(bufUNA, "UNA", `3`))
325	{
326	m_chComponent = bufUNA[`3`];
327	m_chData = bufUNA[`4`];
328	m_chDecimal = bufUNA[`5`];
329	m_chRelease = bufUNA[`6`];
330	// bufUNA [7] should be space - reserved.
331	m_chSegment = bufUNA[`8`];
332
333	return `0`; // success!
334	}
335	}
336
337	// We failed to find a UNA, so drop to defaults
338	m_chComponent = `':'`;
339	m_chData = `'+'`;
340	m_chDecimal = `'.'`;
341	m_chRelease = `'?'`;
342	m_chSegment = `'\''`;
343
344	return -`1`;
345	}
346
347	Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument pAccess, Sci_Position startPos, Sci_Position MaxLength) const*
348	{
349	char c;
350
351	while (startPos < MaxLength)
352	{
353	pAccess->GetCharRange(&c, startPos, `1`);
354	switch (c)
355	{
356	case `'\t'`:
357	case `'\r'`:
358	case `'\n'`:
359	case `' '`:
360	break;
361	default:
362	return startPos;
363	}
364
365	startPos++;
366	}
367
368	return MaxLength;
369	}
370
371	int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[`3`]) const
372	{
373	if (
374	SegmentHeader[`0`] < `'A'` \|\| SegmentHeader[`0`] > `'Z'` \|\|
375	SegmentHeader[`1`] < `'A'` \|\| SegmentHeader[`1`] > `'Z'` \|\|
376	SegmentHeader[`2`] < `'A'` \|\| SegmentHeader[`2`] > `'Z'`)
377	return SCE_EDI_BADSEGMENT;
378
379	if (!memcmp(SegmentHeader, "UNA", `3`))
380	return SCE_EDI_UNA;
381
382	if (m_bHighlightAllUN && !memcmp(SegmentHeader, "UN", `2`))
383	return SCE_EDI_UNH;
384	else if (!memcmp(SegmentHeader, "UNH", `3`))
385	return SCE_EDI_UNH;
386	else if (!memcmp(SegmentHeader, "UNG", `3`))
387	return SCE_EDI_UNH;
388
389	return SCE_EDI_SEGMENTSTART;
390	}
391
392	// Look backwards for a ' or a document beginning
393	Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument pAccess, Sci_Position startPos) const*
394	{
395	for (char c; startPos > `0`; startPos--)
396	{
397	pAccess->GetCharRange(&c, startPos, `1`);
398	if (c == m_chSegment)
399	return startPos;
400	}
401	// We didn't find a ', so just go with the beginning
402	return `0`;
403	}
404
405
406

Browse the source code of DeepinIDE/3rdparty/unioncode-scintilla515/lexilla/lexers/LexEDIFACT.cxx