1// Scintilla Lexer for EDIFACT
2// @file LexEDIFACT.cxx
3// Written by Iain Clarke, IMCSoft & Inobiz AB.
4// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html
5// and more readably here: https://en.wikipedia.org/wiki/EDIFACT
6// This code is subject to the same license terms as the rest of the scintilla project:
7// The License.txt file describes the conditions under which this software may be distributed.
8//
9
10// Header order must match order in scripts/HeaderOrder.txt
11#include <cstdlib>
12#include <cassert>
13#include <cstring>
14#include <cctype>
15
16#include <string>
17#include <string_view>
18
19#include "ILexer.h"
20#include "Scintilla.h"
21#include "SciLexer.h"
22
23#include "LexAccessor.h"
24#include "LexerModule.h"
25#include "DefaultLexer.h"
26
27using namespace Scintilla;
28using namespace Lexilla;
29
30class LexerEDIFACT : public DefaultLexer
31{
32public:
33 LexerEDIFACT();
34 virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer
35
36 static ILexer5 *Factory() {
37 return new LexerEDIFACT;
38 }
39
40 int SCI_METHOD Version() const override
41 {
42 return lvRelease5;
43 }
44 void SCI_METHOD Release() override
45 {
46 delete this;
47 }
48
49 const char * SCI_METHOD PropertyNames() override
50 {
51 return "fold\nlexer.edifact.highlight.un.all";
52 }
53 int SCI_METHOD PropertyType(const char *) override
54 {
55 return SC_TYPE_BOOLEAN; // Only one property!
56 }
57 const char * SCI_METHOD DescribeProperty(const char *name) override
58 {
59 if (!strcmp(name, "fold"))
60 return "Whether to apply folding to document or not";
61 if (!strcmp(name, "lexer.edifact.highlight.un.all"))
62 return "Whether to apply UN* highlighting to all UN segments, or just to UNH";
63 return NULL;
64 }
65
66 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
67 {
68 if (!strcmp(key, "fold"))
69 {
70 m_bFold = strcmp(val, "0") ? true : false;
71 return 0;
72 }
73 if (!strcmp(key, "lexer.edifact.highlight.un.all")) // GetProperty
74 {
75 m_bHighlightAllUN = strcmp(val, "0") ? true : false;
76 return 0;
77 }
78 return -1;
79 }
80
81 const char * SCI_METHOD PropertyGet(const char *key) override
82 {
83 m_lastPropertyValue = "";
84 if (!strcmp(key, "fold"))
85 {
86 m_lastPropertyValue = m_bFold ? "1" : "0";
87 }
88 if (!strcmp(key, "lexer.edifact.highlight.un.all")) // GetProperty
89 {
90 m_lastPropertyValue = m_bHighlightAllUN ? "1" : "0";
91 }
92 return m_lastPropertyValue.c_str();
93 }
94
95 const char * SCI_METHOD DescribeWordListSets() override
96 {
97 return NULL;
98 }
99 Sci_Position SCI_METHOD WordListSet(int, const char *) override
100 {
101 return -1;
102 }
103 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
104 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
105 void * SCI_METHOD PrivateCall(int, void *) override
106 {
107 return NULL;
108 }
109
110protected:
111 Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength);
112 Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const;
113 Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const;
114 int DetectSegmentHeader(char SegmentHeader[3]) const;
115
116 bool m_bFold;
117
118 // property lexer.edifact.highlight.un.all
119 // Set to 0 to highlight only UNA segments, or 1 to highlight all UNx segments.
120 bool m_bHighlightAllUN;
121
122 char m_chComponent;
123 char m_chData;
124 char m_chDecimal;
125 char m_chRelease;
126 char m_chSegment;
127
128 std::string m_lastPropertyValue;
129};
130
131LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact");
132
133///////////////////////////////////////////////////////////////////////////////
134
135
136
137///////////////////////////////////////////////////////////////////////////////
138
139LexerEDIFACT::LexerEDIFACT() : DefaultLexer("edifact", SCLEX_EDIFACT)
140{
141 m_bFold = false;
142 m_bHighlightAllUN = false;
143 m_chComponent = ':';
144 m_chData = '+';
145 m_chDecimal = '.';
146 m_chRelease = '?';
147 m_chSegment = '\'';
148}
149
150void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
151{
152 Sci_PositionU posFinish = startPos + length;
153 InitialiseFromUNA(pAccess, posFinish);
154
155 // Look backwards for a ' or a document beginning
156 Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos);
157 // And jump past the ' if this was not the beginning of the document
158 if (posCurrent != 0)
159 posCurrent++;
160
161 // Style buffer, so we're not issuing loads of notifications
162 LexAccessor styler (pAccess);
163 pAccess->StartStyling(posCurrent);
164 styler.StartSegment(posCurrent);
165 Sci_Position posSegmentStart = -1;
166
167 while ((posCurrent < posFinish) && (posSegmentStart == -1))
168 {
169 posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish);
170 // Mark whitespace as default
171 styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT);
172 if (posCurrent >= posFinish)
173 break;
174
175 // Does is start with 3 charaters? ie, UNH
176 char SegmentHeader[4] = { 0 };
177 pAccess->GetCharRange(SegmentHeader, posCurrent, 3);
178
179 int SegmentStyle = DetectSegmentHeader(SegmentHeader);
180 if (SegmentStyle == SCE_EDI_BADSEGMENT)
181 break;
182 if (SegmentStyle == SCE_EDI_UNA)
183 {
184 posCurrent += 9;
185 styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA
186 continue;
187 }
188 posSegmentStart = posCurrent;
189 posCurrent += 3;
190
191 styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc
192
193 // Colour in the rest of the segment
194 for (char c; posCurrent < posFinish; posCurrent++)
195 {
196 pAccess->GetCharRange(&c, posCurrent, 1);
197
198 if (c == m_chRelease) // ? escape character, check first, in case of ?'
199 posCurrent++;
200 else if (c == m_chSegment) // '
201 {
202 // Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad.
203 Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart);
204 Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent);
205 if (lineSegmentStart == lineSegmentEnd)
206 styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND);
207 else
208 styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT);
209 posSegmentStart = -1;
210 posCurrent++;
211 break;
212 }
213 else if (c == m_chComponent) // :
214 styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE);
215 else if (c == m_chData) // +
216 styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT);
217 else
218 styler.ColourTo(posCurrent, SCE_EDI_DEFAULT);
219 }
220 }
221 styler.Flush();
222
223 if (posSegmentStart == -1)
224 return;
225
226 pAccess->StartStyling(posSegmentStart);
227 pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT);
228}
229
230void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
231{
232 if (!m_bFold)
233 return;
234
235 Sci_PositionU endPos = startPos + length;
236 startPos = FindPreviousEnd(pAccess, startPos);
237 char c;
238 char SegmentHeader[4] = { 0 };
239
240 bool AwaitingSegment = true;
241 Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
242 int levelCurrentStyle = SC_FOLDLEVELBASE;
243 if (currLine > 0)
244 levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
245 int indentCurrent = levelCurrentStyle & SC_FOLDLEVELNUMBERMASK;
246 int indentNext = indentCurrent;
247
248 while (startPos < endPos)
249 {
250 pAccess->GetCharRange(&c, startPos, 1);
251 switch (c)
252 {
253 case '\t':
254 case '\r':
255 case ' ':
256 startPos++;
257 continue;
258 case '\n':
259 currLine = pAccess->LineFromPosition(startPos);
260 pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
261 startPos++;
262 levelCurrentStyle = SC_FOLDLEVELBASE;
263 indentCurrent = indentNext;
264 continue;
265 }
266 if (c == m_chRelease)
267 {
268 startPos += 2;
269 continue;
270 }
271 if (c == m_chSegment)
272 {
273 AwaitingSegment = true;
274 startPos++;
275 continue;
276 }
277
278 if (!AwaitingSegment)
279 {
280 startPos++;
281 continue;
282 }
283
284 // Segment!
285 pAccess->GetCharRange(SegmentHeader, startPos, 3);
286 if (SegmentHeader[0] != 'U' || SegmentHeader[1] != 'N')
287 {
288 startPos++;
289 continue;
290 }
291
292 AwaitingSegment = false;
293 switch (SegmentHeader[2])
294 {
295 case 'H':
296 case 'G':
297 indentNext++;
298 levelCurrentStyle = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
299 break;
300
301 case 'T':
302 case 'E':
303 if (indentNext > 0)
304 indentNext--;
305 break;
306 }
307
308 startPos += 3;
309 }
310}
311
312Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength)
313{
314 MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? '
315
316 Sci_PositionU startPos = 0;
317 startPos += ForwardPastWhitespace(pAccess, 0, MaxLength);
318 if (startPos < MaxLength)
319 {
320 char bufUNA[9];
321 pAccess->GetCharRange(bufUNA, startPos, 9);
322
323 // Check it's UNA segment
324 if (!memcmp(bufUNA, "UNA", 3))
325 {
326 m_chComponent = bufUNA[3];
327 m_chData = bufUNA[4];
328 m_chDecimal = bufUNA[5];
329 m_chRelease = bufUNA[6];
330 // bufUNA [7] should be space - reserved.
331 m_chSegment = bufUNA[8];
332
333 return 0; // success!
334 }
335 }
336
337 // We failed to find a UNA, so drop to defaults
338 m_chComponent = ':';
339 m_chData = '+';
340 m_chDecimal = '.';
341 m_chRelease = '?';
342 m_chSegment = '\'';
343
344 return -1;
345}
346
347Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const
348{
349 char c;
350
351 while (startPos < MaxLength)
352 {
353 pAccess->GetCharRange(&c, startPos, 1);
354 switch (c)
355 {
356 case '\t':
357 case '\r':
358 case '\n':
359 case ' ':
360 break;
361 default:
362 return startPos;
363 }
364
365 startPos++;
366 }
367
368 return MaxLength;
369}
370
371int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const
372{
373 if (
374 SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' ||
375 SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' ||
376 SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z')
377 return SCE_EDI_BADSEGMENT;
378
379 if (!memcmp(SegmentHeader, "UNA", 3))
380 return SCE_EDI_UNA;
381
382 if (m_bHighlightAllUN && !memcmp(SegmentHeader, "UN", 2))
383 return SCE_EDI_UNH;
384 else if (!memcmp(SegmentHeader, "UNH", 3))
385 return SCE_EDI_UNH;
386 else if (!memcmp(SegmentHeader, "UNG", 3))
387 return SCE_EDI_UNH;
388
389 return SCE_EDI_SEGMENTSTART;
390}
391
392// Look backwards for a ' or a document beginning
393Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const
394{
395 for (char c; startPos > 0; startPos--)
396 {
397 pAccess->GetCharRange(&c, startPos, 1);
398 if (c == m_chSegment)
399 return startPos;
400 }
401 // We didn't find a ', so just go with the beginning
402 return 0;
403}
404
405
406