1// Scintilla Lexer for X12
2// @file LexX12.cxx
3// Written by Iain Clarke, IMCSoft & Inobiz AB.
4// X12 official documentation is behind a paywall, but there's a description of the syntax here:
5// http://www.rawlinsecconsulting.com/x12tutorial/x12syn.html
6// This code is subject to the same license terms as the rest of the scintilla project:
7// The License.txt file describes the conditions under which this software may be distributed.
8//
9
10// Header order must match order in scripts/HeaderOrder.txt
11#include <cstdlib>
12#include <cassert>
13#include <cstring>
14#include <cctype>
15
16#include <string>
17#include <string_view>
18
19#include <vector>
20#include <algorithm>
21
22#include "ILexer.h"
23#include "Scintilla.h"
24#include "SciLexer.h"
25#include "LexerModule.h"
26#include "DefaultLexer.h"
27
28using namespace Scintilla;
29using namespace Lexilla;
30
31class LexerX12 : public DefaultLexer
32{
33public:
34 LexerX12();
35 virtual ~LexerX12() {} // virtual destructor, as we inherit from ILexer
36
37 static ILexer5 *Factory() {
38 return new LexerX12;
39 }
40
41 int SCI_METHOD Version() const override
42 {
43 return lvRelease5;
44 }
45 void SCI_METHOD Release() override
46 {
47 delete this;
48 }
49
50 const char * SCI_METHOD PropertyNames() override
51 {
52 return "fold";
53 }
54 int SCI_METHOD PropertyType(const char *) override
55 {
56 return SC_TYPE_BOOLEAN; // Only one property!
57 }
58 const char * SCI_METHOD DescribeProperty(const char *name) override
59 {
60 if (!strcmp(name, "fold"))
61 return "Whether to apply folding to document or not";
62 return "";
63 }
64
65 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
66 {
67 if (!strcmp(key, "fold"))
68 {
69 m_bFold = strcmp(val, "0") ? true : false;
70 return 0;
71 }
72 return -1;
73 }
74 const char * SCI_METHOD PropertyGet(const char *) override {
75 return "";
76 }
77 const char * SCI_METHOD DescribeWordListSets() override
78 {
79 return "";
80 }
81 Sci_Position SCI_METHOD WordListSet(int, const char *) override
82 {
83 return -1;
84 }
85 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
86 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
87 void * SCI_METHOD PrivateCall(int, void *) override
88 {
89 return NULL;
90 }
91
92protected:
93 struct Terminator
94 {
95 int Style = SCE_X12_BAD;
96 Sci_PositionU pos = 0;
97 Sci_PositionU length = 0;
98 int FoldChange = 0;
99 };
100 Terminator InitialiseFromISA(IDocument *pAccess);
101 Sci_PositionU FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const;
102 Terminator DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const;
103 Terminator FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator = false) const;
104
105 bool m_bFold = false;
106 char m_SeparatorSubElement = 0;
107 char m_SeparatorElement = 0;
108 std::string m_SeparatorSegment; // might be multiple characters
109 std::string m_LineFeed;
110};
111
112LexerModule lmX12(SCLEX_X12, LexerX12::Factory, "x12");
113
114///////////////////////////////////////////////////////////////////////////////
115
116
117
118///////////////////////////////////////////////////////////////////////////////
119
120LexerX12::LexerX12() : DefaultLexer("x12", SCLEX_X12)
121{
122}
123
124void LexerX12::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
125{
126 Sci_PositionU posFinish = startPos + length;
127
128 Terminator T = InitialiseFromISA(pAccess);
129
130 if (T.Style == SCE_X12_BAD)
131 {
132 if (T.pos < startPos)
133 T.pos = startPos; // we may be colouring in batches.
134 pAccess->StartStyling(startPos);
135 pAccess->SetStyleFor(T.pos - startPos, SCE_X12_ENVELOPE);
136 pAccess->SetStyleFor(posFinish - T.pos, SCE_X12_BAD);
137 return;
138 }
139
140 // Look backwards for a segment start or a document beginning
141 Sci_PositionU posCurrent = FindPreviousSegmentStart (pAccess, startPos);
142
143 // Style buffer, so we're not issuing loads of notifications
144 pAccess->StartStyling(posCurrent);
145
146 while (posCurrent < posFinish)
147 {
148 // Look for first element marker, so we can denote segment
149 T = DetectSegmentHeader(pAccess, posCurrent);
150 if (T.Style == SCE_X12_BAD)
151 break;
152
153 pAccess->SetStyleFor(T.pos - posCurrent, T.Style);
154 pAccess->SetStyleFor(T.length, SCE_X12_SEP_ELEMENT);
155 posCurrent = T.pos + T.length;
156
157 while (T.Style != SCE_X12_BAD && T.Style != SCE_X12_SEGMENTEND) // Break on bad or segment ending
158 {
159 T = FindNextTerminator(pAccess, posCurrent, false);
160 if (T.Style == SCE_X12_BAD)
161 break;
162
163 int Style = T.Style;
164
165 pAccess->SetStyleFor(T.pos - posCurrent, SCE_X12_DEFAULT);
166 pAccess->SetStyleFor(T.length, Style);
167 posCurrent = T.pos + T.length;
168 }
169 if (T.Style == SCE_X12_BAD)
170 break;
171 }
172
173 pAccess->SetStyleFor(posFinish - posCurrent, SCE_X12_BAD);
174}
175
176void LexerX12::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
177{
178 if (!m_bFold)
179 return;
180
181 // Are we even foldable?
182 // check for cr,lf,cr+lf.
183 if (m_LineFeed.empty())
184 return;
185
186 Sci_PositionU posFinish = startPos + length;
187
188 // Look backwards for a segment start or a document beginning
189 startPos = FindPreviousSegmentStart(pAccess, startPos);
190 Terminator T;
191
192 Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
193 int levelCurrentStyle = SC_FOLDLEVELBASE;
194 int indentCurrent = 0;
195 if (currLine > 0)
196 {
197 levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
198 indentCurrent = levelCurrentStyle & (SC_FOLDLEVELBASE - 1); // indent from previous line
199 Sci_PositionU posLine = pAccess->LineStart(currLine - 1);
200 T = DetectSegmentHeader(pAccess, posLine);
201 indentCurrent += T.FoldChange;
202 }
203
204 while (startPos < posFinish)
205 {
206 T = DetectSegmentHeader(pAccess, startPos);
207 int indentNext = indentCurrent + T.FoldChange;
208 if (indentNext < 0)
209 indentNext = 0;
210
211 levelCurrentStyle = (T.FoldChange > 0) ? (SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG) : SC_FOLDLEVELBASE;
212
213 currLine = pAccess->LineFromPosition(startPos);
214 pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
215
216 T = FindNextTerminator(pAccess, startPos, true);
217
218 if (T.Style == SCE_X12_BAD)
219 break;
220
221 startPos = T.pos + T.length;
222 indentCurrent = indentNext;
223 }
224}
225
226LexerX12::Terminator LexerX12::InitialiseFromISA(IDocument *pAccess)
227{
228 Sci_Position length = pAccess->Length();
229 if (length <= 108)
230 return { SCE_X12_BAD, 0 };
231
232 pAccess->GetCharRange(&m_SeparatorElement, 3, 1);
233 pAccess->GetCharRange(&m_SeparatorSubElement, 104, 1);
234
235 // Look for GS, as that's the next segment. Anything between 105 and GS is our segment separator.
236 Sci_Position posGS;
237 char bufGS[3] = { 0 };
238 for (posGS = 105; posGS < length - 2; posGS++)
239 {
240 pAccess->GetCharRange(bufGS, posGS, 2);
241 if (bufGS[0] == 'G' && bufGS[1] == 'S')
242 {
243 m_SeparatorSegment.resize(posGS - 105);
244 pAccess->GetCharRange(&m_SeparatorSegment.at(0), 105, posGS - 105);
245
246 // Is some of that CR+LF?
247 size_t nPos = m_SeparatorSegment.find_last_not_of("\r\n");
248 m_LineFeed = m_SeparatorSegment.substr(nPos + 1);
249 m_SeparatorSegment = m_SeparatorSegment.substr(0, nPos + 1);
250 break;
251 }
252 }
253 if (m_SeparatorSegment.empty() && m_LineFeed.empty())
254 {
255 return { SCE_X12_BAD, 105 };
256 }
257
258 // Validate we have an element separator, and it's not silly!
259 if (m_SeparatorElement == '\0' || m_SeparatorElement == '\n' || m_SeparatorElement == '\r')
260 return { SCE_X12_BAD, 3 };
261
262 // Validate we have an element separator, and it's not silly!
263 if (m_SeparatorSubElement == '\0' || m_SeparatorSubElement == '\n' || m_SeparatorSubElement == '\r')
264 return { SCE_X12_BAD, 103 };
265 if (m_SeparatorElement == m_SeparatorSubElement)
266 return { SCE_X12_BAD, 104 };
267 for (auto& c : m_SeparatorSegment)
268 {
269 if (m_SeparatorElement == c)
270 return { SCE_X12_BAD, 105 };
271 if (m_SeparatorSubElement == c)
272 return { SCE_X12_BAD, 105 };
273 }
274
275 // Check we have element markers at all the right places! ISA element has fixed entries.
276 std::vector<Sci_PositionU> ElementMarkers = { 3, 6, 17, 20, 31, 34, 50, 53, 69, 76, 81, 83, 89, 99, 101, 103 };
277 for (auto i : ElementMarkers)
278 {
279 char c;
280 pAccess->GetCharRange(&c, i, 1);
281 if (c != m_SeparatorElement)
282 return { SCE_X12_BAD, i };
283 }
284 // Check we have no element markers anywhere else!
285 for (Sci_PositionU i = 0; i < 105; i++)
286 {
287 if (std::find(ElementMarkers.begin(), ElementMarkers.end(), i) != ElementMarkers.end())
288 continue;
289
290 char c;
291 pAccess->GetCharRange(&c, i, 1);
292 if (c == m_SeparatorElement)
293 return { SCE_X12_BAD, i };
294 }
295
296 return { SCE_X12_ENVELOPE };
297}
298
299Sci_PositionU LexerX12::FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const
300{
301 Sci_PositionU length = pAccess->Length();
302 std::string bufTest = m_SeparatorSegment + m_LineFeed; // quick way of making the lengths the same
303 std::string bufCompare = bufTest;
304
305 for (; startPos > 0; startPos--)
306 {
307 if (startPos + bufTest.size() > length)
308 continue;
309
310 pAccess->GetCharRange(&bufTest.at(0), startPos, bufTest.size());
311 if (bufTest == bufCompare)
312 {
313 return startPos + bufTest.size();
314 }
315 }
316 // We didn't find a ', so just go with the beginning
317 return 0;
318}
319
320LexerX12::Terminator LexerX12::DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const
321{
322 Sci_PositionU Length = pAccess->Length();
323 Length -= pos;
324 char c, Buf[4] = { 0 }; // max 3 + separator
325 for (Sci_PositionU posOffset = 0; posOffset < std::size(Buf) && posOffset < Length; posOffset++)
326 {
327 pAccess->GetCharRange(&c, pos + posOffset, 1);
328 if (c != m_SeparatorElement)
329 {
330 Buf[posOffset] = c;
331 continue;
332 }
333
334 // check for special segments, involved in folding start/stop.
335 if (memcmp(Buf, "ISA", 3) == 0)
336 return { SCE_X12_ENVELOPE, pos + posOffset, 1, +1 };
337 if (memcmp(Buf, "IEA", 3) == 0)
338 return { SCE_X12_ENVELOPE, pos + posOffset, 1, -1 };
339 if (memcmp(Buf, "GS", 2) == 0)
340 return { SCE_X12_FUNCTIONGROUP, pos + posOffset, 1, +1 };
341 if (memcmp(Buf, "GE", 2) == 0)
342 return { SCE_X12_FUNCTIONGROUP, pos + posOffset, 1, -1 };
343 if (memcmp(Buf, "ST", 2) == 0)
344 return { SCE_X12_TRANSACTIONSET, pos + posOffset, 1, +1 };
345 if (memcmp(Buf, "SE", 2) == 0)
346 return { SCE_X12_TRANSACTIONSET, pos + posOffset, 1, -1 };
347 return { SCE_X12_SEGMENTHEADER, pos + posOffset, 1, 0 };
348 }
349 return { SCE_X12_BAD, pos, 0, 0 };
350}
351
352LexerX12::Terminator LexerX12::FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator) const
353{
354 char c;
355 Sci_PositionU length = pAccess->Length();
356 std::string bufTestSegment = m_SeparatorSegment; // quick way of making the lengths the same
357 std::string bufTestLineFeed = m_LineFeed; // quick way of making the lengths the same
358
359
360 while (pos < (Sci_PositionU)length)
361 {
362 pAccess->GetCharRange(&c, pos, 1);
363 if (pos + m_SeparatorSegment.size() > length)
364 bufTestSegment.clear(); // going up - so once we can't get this, we're done with the buffer.
365 else if (!bufTestSegment.empty())
366 pAccess->GetCharRange(&bufTestSegment.at(0), pos, bufTestSegment.size());
367 if (pos + m_LineFeed.size() > length)
368 bufTestLineFeed.clear(); // going up - so once we can't get this, we're done with the buffer.
369 else if (!bufTestLineFeed.empty())
370 pAccess->GetCharRange(&bufTestLineFeed.at(0), pos, bufTestLineFeed.size());
371
372 if (!bJustSegmentTerminator && c == m_SeparatorElement)
373 return { SCE_X12_SEP_ELEMENT, pos, 1 };
374 else if (!bJustSegmentTerminator && c == m_SeparatorSubElement)
375 return { SCE_X12_SEP_SUBELEMENT, pos, 1 };
376 else if (!m_SeparatorSegment.empty() && bufTestSegment == m_SeparatorSegment)
377 {
378 if (m_LineFeed.empty())
379 return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() };
380 // is this the end?
381 if (pos + m_SeparatorSegment.size() == length)
382 return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() };
383 // Check if we're followed by a linefeed.
384 if (pos + m_SeparatorSegment.size() + m_LineFeed.size() > length)
385 return { SCE_X12_BAD, pos };
386 bufTestSegment = m_LineFeed;
387 pAccess->GetCharRange(&bufTestSegment.at(0), pos + m_SeparatorSegment.size(), bufTestSegment.size());
388 if (bufTestSegment == m_LineFeed)
389 return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() + m_LineFeed.size() };
390 break;
391 }
392 else if (m_SeparatorSegment.empty() && bufTestLineFeed == m_LineFeed)
393 {
394 return { SCE_X12_SEGMENTEND, pos, m_LineFeed.size() };
395 }
396 pos++;
397 }
398
399 return { SCE_X12_BAD, pos };
400}
401