1// Scintilla source code edit control
2/** @file LexVHDL.cxx
3 ** Lexer for VHDL
4 ** Written by Phil Reid,
5 ** Based on:
6 ** - The Verilog Lexer by Avi Yegudin
7 ** - The Fortran Lexer by Chuan-jian Shen
8 ** - The C++ lexer by Neil Hodgson
9 **/
10// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
11// The License.txt file describes the conditions under which this software may be distributed.
12
13#include <stdlib.h>
14#include <string.h>
15#include <stdio.h>
16#include <stdarg.h>
17#include <assert.h>
18#include <ctype.h>
19
20#include <string>
21#include <string_view>
22
23#include "ILexer.h"
24#include "Scintilla.h"
25#include "SciLexer.h"
26
27#include "WordList.h"
28#include "LexAccessor.h"
29#include "Accessor.h"
30#include "StyleContext.h"
31#include "CharacterSet.h"
32#include "LexerModule.h"
33
34using namespace Lexilla;
35
36static void ColouriseVHDLDoc(
37 Sci_PositionU startPos,
38 Sci_Position length,
39 int initStyle,
40 WordList *keywordlists[],
41 Accessor &styler);
42
43
44/***************************************/
45static inline bool IsAWordChar(const int ch) {
46 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_' );
47}
48
49/***************************************/
50static inline bool IsAWordStart(const int ch) {
51 return (ch < 0x80) && (isalnum(ch) || ch == '_');
52}
53
54/***************************************/
55static inline bool IsABlank(unsigned int ch) {
56 return (ch == ' ') || (ch == 0x09) || (ch == 0x0b) ;
57}
58
59/***************************************/
60static void ColouriseVHDLDoc(
61 Sci_PositionU startPos,
62 Sci_Position length,
63 int initStyle,
64 WordList *keywordlists[],
65 Accessor &styler)
66{
67 WordList &Keywords = *keywordlists[0];
68 WordList &Operators = *keywordlists[1];
69 WordList &Attributes = *keywordlists[2];
70 WordList &Functions = *keywordlists[3];
71 WordList &Packages = *keywordlists[4];
72 WordList &Types = *keywordlists[5];
73 WordList &User = *keywordlists[6];
74
75 StyleContext sc(startPos, length, initStyle, styler);
76 bool isExtendedId = false; // true when parsing an extended identifier
77
78 while (sc.More())
79 {
80 bool advance = true;
81
82 // Determine if the current state should terminate.
83 if (sc.state == SCE_VHDL_OPERATOR) {
84 sc.SetState(SCE_VHDL_DEFAULT);
85 } else if (sc.state == SCE_VHDL_NUMBER) {
86 if (!IsAWordChar(sc.ch) && (sc.ch != '#')) {
87 sc.SetState(SCE_VHDL_DEFAULT);
88 }
89 } else if (sc.state == SCE_VHDL_IDENTIFIER) {
90 if (!isExtendedId && (!IsAWordChar(sc.ch) || (sc.ch == '.'))) {
91 char s[100];
92 sc.GetCurrentLowered(s, sizeof(s));
93 if (Keywords.InList(s)) {
94 sc.ChangeState(SCE_VHDL_KEYWORD);
95 } else if (Operators.InList(s)) {
96 sc.ChangeState(SCE_VHDL_STDOPERATOR);
97 } else if (Attributes.InList(s)) {
98 sc.ChangeState(SCE_VHDL_ATTRIBUTE);
99 } else if (Functions.InList(s)) {
100 sc.ChangeState(SCE_VHDL_STDFUNCTION);
101 } else if (Packages.InList(s)) {
102 sc.ChangeState(SCE_VHDL_STDPACKAGE);
103 } else if (Types.InList(s)) {
104 sc.ChangeState(SCE_VHDL_STDTYPE);
105 } else if (User.InList(s)) {
106 sc.ChangeState(SCE_VHDL_USERWORD);
107 }
108 sc.SetState(SCE_VHDL_DEFAULT);
109 } else if (isExtendedId && ((sc.ch == '\\') || sc.atLineEnd)) {
110 // extended identifiers are terminated by backslash, check for end of line in case we have invalid syntax
111 isExtendedId = false;
112 sc.ForwardSetState(SCE_VHDL_DEFAULT);
113 advance = false;
114 }
115 } else if (sc.state == SCE_VHDL_COMMENT || sc.state == SCE_VHDL_COMMENTLINEBANG) {
116 if (sc.atLineEnd) {
117 sc.SetState(SCE_VHDL_DEFAULT);
118 }
119 } else if (sc.state == SCE_VHDL_STRING) {
120 if (sc.ch == '"') {
121 advance = false;
122 sc.Forward();
123 if (sc.ch == '"')
124 sc.Forward();
125 else
126 sc.SetState(SCE_VHDL_DEFAULT);
127 } else if (sc.atLineEnd) {
128 advance = false;
129 sc.ChangeState(SCE_VHDL_STRINGEOL);
130 sc.ForwardSetState(SCE_VHDL_DEFAULT);
131 }
132 } else if (sc.state == SCE_VHDL_BLOCK_COMMENT){
133 if(sc.ch == '*' && sc.chNext == '/'){
134 advance = false;
135 sc.Forward();
136 sc.ForwardSetState(SCE_VHDL_DEFAULT);
137 }
138 }
139
140 // Determine if a new state should be entered.
141 if (sc.state == SCE_VHDL_DEFAULT) {
142 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
143 sc.SetState(SCE_VHDL_NUMBER);
144 } else if (IsAWordStart(sc.ch)) {
145 sc.SetState(SCE_VHDL_IDENTIFIER);
146 } else if (sc.Match('-', '-')) {
147 if (sc.Match("--!")) // Nice to have a different comment style
148 sc.SetState(SCE_VHDL_COMMENTLINEBANG);
149 else
150 sc.SetState(SCE_VHDL_COMMENT);
151 } else if (sc.Match('/', '*')){
152 sc.SetState(SCE_VHDL_BLOCK_COMMENT);
153 } else if (sc.ch == '"') {
154 sc.SetState(SCE_VHDL_STRING);
155 } else if (sc.ch == '\'') {
156 if (sc.GetRelative(2) == '\''){
157 if (sc.chNext != '(' || sc.GetRelative(4) != '\''){
158 // Can only be a character literal
159 sc.SetState(SCE_VHDL_STRING);
160 sc.Forward();
161 sc.Forward();
162 sc.ForwardSetState(SCE_VHDL_DEFAULT);
163 advance = false;
164 } // else can be a tick or a character literal, need more context, eg.: identifier'('x')
165 } // else can only be a tick
166 } else if (sc.ch == '\\') {
167 isExtendedId = true;
168 sc.SetState(SCE_VHDL_IDENTIFIER);
169 } else if (isoperator(static_cast<char>(sc.ch))) {
170 sc.SetState(SCE_VHDL_OPERATOR);
171 }
172 }
173
174 if (advance)
175 sc.Forward();
176 }
177 sc.Complete();
178}
179//=============================================================================
180static bool IsCommentLine(Sci_Position line, Accessor &styler) {
181 Sci_Position pos = styler.LineStart(line);
182 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
183 for (Sci_Position i = pos; i < eol_pos; i++) {
184 char ch = styler[i];
185 char chNext = styler[i+1];
186 if ((ch == '-') && (chNext == '-'))
187 return true;
188 else if (ch != ' ' && ch != '\t')
189 return false;
190 }
191 return false;
192}
193static bool IsCommentBlockStart(Sci_Position line, Accessor &styler)
194{
195 Sci_Position pos = styler.LineStart(line);
196 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
197 for (Sci_Position i = pos; i < eol_pos; i++) {
198 char ch = styler[i];
199 char chNext = styler[i+1];
200 char style = styler.StyleAt(i);
201 if ((style == SCE_VHDL_BLOCK_COMMENT) && (ch == '/') && (chNext == '*'))
202 return true;
203 }
204 return false;
205}
206
207static bool IsCommentBlockEnd(Sci_Position line, Accessor &styler)
208{
209 Sci_Position pos = styler.LineStart(line);
210 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
211
212 for (Sci_Position i = pos; i < eol_pos; i++) {
213 char ch = styler[i];
214 char chNext = styler[i+1];
215 char style = styler.StyleAt(i);
216 if ((style == SCE_VHDL_BLOCK_COMMENT) && (ch == '*') && (chNext == '/'))
217 return true;
218 }
219 return false;
220}
221
222static bool IsCommentStyle(char style)
223{
224 return style == SCE_VHDL_BLOCK_COMMENT || style == SCE_VHDL_COMMENT || style == SCE_VHDL_COMMENTLINEBANG;
225}
226
227//=============================================================================
228// Folding the code
229static void FoldNoBoxVHDLDoc(
230 Sci_PositionU startPos,
231 Sci_Position length,
232 int,
233 Accessor &styler)
234{
235 // Decided it would be smarter to have the lexer have all keywords included. Therefore I
236 // don't check if the style for the keywords that I use to adjust the levels.
237 char words[] =
238 "architecture begin block case component else elsif end entity generate loop package process record then "
239 "procedure protected function when units";
240 WordList keywords;
241 keywords.Set(words);
242
243 bool foldComment = styler.GetPropertyInt("fold.comment", 1) != 0;
244 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
245 bool foldAtElse = styler.GetPropertyInt("fold.at.else", 1) != 0;
246 bool foldAtBegin = styler.GetPropertyInt("fold.at.Begin", 1) != 0;
247 bool foldAtParenthese = styler.GetPropertyInt("fold.at.Parenthese", 1) != 0;
248 //bool foldAtWhen = styler.GetPropertyInt("fold.at.When", 1) != 0; //< fold at when in case statements
249
250 int visibleChars = 0;
251 Sci_PositionU endPos = startPos + length;
252
253 Sci_Position lineCurrent = styler.GetLine(startPos);
254 int levelCurrent = SC_FOLDLEVELBASE;
255 if(lineCurrent > 0)
256 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
257 //int levelMinCurrent = levelCurrent;
258 int levelMinCurrentElse = levelCurrent; ///< Used for folding at 'else'
259 int levelMinCurrentBegin = levelCurrent; ///< Used for folding at 'begin'
260 int levelNext = levelCurrent;
261
262 /***************************************/
263 Sci_Position lastStart = 0;
264 char prevWord[32] = "";
265
266 /***************************************/
267 // Find prev word
268 // The logic for going up or down a level depends on a the previous keyword
269 // This code could be cleaned up.
270 Sci_Position end = 0;
271 Sci_PositionU j;
272 for(j = startPos; j>0; j--)
273 {
274 char ch = styler.SafeGetCharAt(j);
275 char chPrev = styler.SafeGetCharAt(j-1);
276 int style = styler.StyleAt(j);
277 int stylePrev = styler.StyleAt(j-1);
278 if ((!IsCommentStyle(style)) && (stylePrev != SCE_VHDL_STRING))
279 {
280 if(IsAWordChar(chPrev) && !IsAWordChar(ch))
281 {
282 end = j-1;
283 }
284 }
285 if ((!IsCommentStyle(style)) && (style != SCE_VHDL_STRING))
286 {
287 if(!IsAWordChar(chPrev) && IsAWordStart(ch) && (end != 0))
288 {
289 char s[32];
290 Sci_PositionU k;
291 for(k=0; (k<31 ) && (k<end-j+1 ); k++) {
292 s[k] = static_cast<char>(tolower(styler[j+k]));
293 }
294 s[k] = '\0';
295
296 if(keywords.InList(s)) {
297 strcpy(prevWord, s);
298 break;
299 }
300 }
301 }
302 }
303 for(j=j+static_cast<Sci_PositionU>(strlen(prevWord)); j<endPos; j++)
304 {
305 char ch = styler.SafeGetCharAt(j);
306 int style = styler.StyleAt(j);
307 if ((!IsCommentStyle(style)) && (style != SCE_VHDL_STRING))
308 {
309 if((ch == ';') && (strcmp(prevWord, "end") == 0))
310 {
311 strcpy(prevWord, ";");
312 }
313 }
314 }
315
316 char chNext = styler[startPos];
317 char chPrev = '\0';
318 char chNextNonBlank;
319 int styleNext = styler.StyleAt(startPos);
320 //Platform::DebugPrintf("Line[%04d] Prev[%20s] ************************* Level[%x]\n", lineCurrent+1, prevWord, levelCurrent);
321
322 /***************************************/
323 for (Sci_PositionU i = startPos; i < endPos; i++)
324 {
325 char ch = chNext;
326 chNext = styler.SafeGetCharAt(i + 1);
327 chPrev = styler.SafeGetCharAt(i - 1);
328 chNextNonBlank = chNext;
329 Sci_PositionU j = i+1;
330 while(IsABlank(chNextNonBlank) && j<endPos)
331 {
332 j ++ ;
333 chNextNonBlank = styler.SafeGetCharAt(j);
334 }
335 int style = styleNext;
336 styleNext = styler.StyleAt(i + 1);
337 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
338
339 if (foldComment && atEOL)
340 {
341 if(IsCommentLine(lineCurrent, styler))
342 {
343 if(!IsCommentLine(lineCurrent-1, styler) && IsCommentLine(lineCurrent+1, styler))
344 {
345 levelNext++;
346 }
347 else if(IsCommentLine(lineCurrent-1, styler) && !IsCommentLine(lineCurrent+1, styler))
348 {
349 levelNext--;
350 }
351 }
352 else
353 {
354 if (IsCommentBlockStart(lineCurrent, styler) && !IsCommentBlockEnd(lineCurrent, styler))
355 {
356 levelNext++;
357 }
358 else if (IsCommentBlockEnd(lineCurrent, styler) && !IsCommentBlockStart(lineCurrent, styler))
359 {
360 levelNext--;
361 }
362 }
363 }
364
365 if ((style == SCE_VHDL_OPERATOR) && foldAtParenthese)
366 {
367 if(ch == '(') {
368 levelNext++;
369 } else if (ch == ')') {
370 levelNext--;
371 }
372 }
373
374 if ((!IsCommentStyle(style)) && (style != SCE_VHDL_STRING))
375 {
376 if((ch == ';') && (strcmp(prevWord, "end") == 0))
377 {
378 strcpy(prevWord, ";");
379 }
380
381 if(!IsAWordChar(chPrev) && IsAWordStart(ch))
382 {
383 lastStart = i;
384 }
385
386 if(IsAWordChar(ch) && !IsAWordChar(chNext)) {
387 char s[32];
388 Sci_PositionU k;
389 for(k=0; (k<31 ) && (k<i-lastStart+1 ); k++) {
390 s[k] = static_cast<char>(tolower(styler[lastStart+k]));
391 }
392 s[k] = '\0';
393
394 if(keywords.InList(s))
395 {
396 if (
397 strcmp(s, "architecture") == 0 ||
398 strcmp(s, "case") == 0 ||
399 strcmp(s, "generate") == 0 ||
400 strcmp(s, "block") == 0 ||
401 strcmp(s, "loop") == 0 ||
402 strcmp(s, "package") ==0 ||
403 strcmp(s, "process") == 0 ||
404 strcmp(s, "protected") == 0 ||
405 strcmp(s, "record") == 0 ||
406 strcmp(s, "then") == 0 ||
407 strcmp(s, "units") == 0)
408 {
409 if (strcmp(prevWord, "end") != 0)
410 {
411 if (levelMinCurrentElse > levelNext) {
412 levelMinCurrentElse = levelNext;
413 }
414 levelNext++;
415 }
416 } else if (
417 strcmp(s, "component") == 0 ||
418 strcmp(s, "entity") == 0 ||
419 strcmp(s, "configuration") == 0 )
420 {
421 if (strcmp(prevWord, "end") != 0)
422 { // check for instantiated unit by backward searching for the colon.
423 Sci_PositionU pos = lastStart;
424 char chAtPos=0, styleAtPos;
425 do{// skip white spaces
426 if(!pos)
427 break;
428 pos--;
429 styleAtPos = styler.StyleAt(pos);
430 chAtPos = styler.SafeGetCharAt(pos);
431 }while(pos &&
432 (chAtPos == ' ' || chAtPos == '\t' ||
433 chAtPos == '\n' || chAtPos == '\r' ||
434 IsCommentStyle(styleAtPos)));
435
436 // check for a colon (':') before the instantiated units "entity", "component" or "configuration". Don't fold thereafter.
437 if (chAtPos != ':')
438 {
439 if (levelMinCurrentElse > levelNext) {
440 levelMinCurrentElse = levelNext;
441 }
442 levelNext++;
443 }
444 }
445 } else if (
446 strcmp(s, "procedure") == 0 ||
447 strcmp(s, "function") == 0)
448 {
449 if (strcmp(prevWord, "end") != 0) // check for "end procedure" etc.
450 { // This code checks to see if the procedure / function is a definition within a "package"
451 // rather than the actual code in the body.
452 int BracketLevel = 0;
453 for(Sci_Position pos=i+1; pos<styler.Length(); pos++)
454 {
455 int styleAtPos = styler.StyleAt(pos);
456 char chAtPos = styler.SafeGetCharAt(pos);
457 if(chAtPos == '(') BracketLevel++;
458 if(chAtPos == ')') BracketLevel--;
459 if(
460 (BracketLevel == 0) &&
461 (!IsCommentStyle(styleAtPos)) &&
462 (styleAtPos != SCE_VHDL_STRING) &&
463 !iswordchar(styler.SafeGetCharAt(pos-1)) &&
464 (chAtPos|' ')=='i' && (styler.SafeGetCharAt(pos+1)|' ')=='s' &&
465 !iswordchar(styler.SafeGetCharAt(pos+2)))
466 {
467 if (levelMinCurrentElse > levelNext) {
468 levelMinCurrentElse = levelNext;
469 }
470 levelNext++;
471 break;
472 }
473 if((BracketLevel == 0) && (chAtPos == ';'))
474 {
475 break;
476 }
477 }
478 }
479
480 } else if (strcmp(s, "end") == 0) {
481 levelNext--;
482 } else if(strcmp(s, "elsif") == 0) { // elsif is followed by then so folding occurs correctly
483 levelNext--;
484 } else if (strcmp(s, "else") == 0) {
485 if(strcmp(prevWord, "when") != 0) // ignore a <= x when y else z;
486 {
487 levelMinCurrentElse = levelNext - 1; // VHDL else is all on its own so just dec. the min level
488 }
489 } else if(
490 ((strcmp(s, "begin") == 0) && (strcmp(prevWord, "architecture") == 0)) ||
491 ((strcmp(s, "begin") == 0) && (strcmp(prevWord, "function") == 0)) ||
492 ((strcmp(s, "begin") == 0) && (strcmp(prevWord, "procedure") == 0)))
493 {
494 levelMinCurrentBegin = levelNext - 1;
495 }
496 //Platform::DebugPrintf("Line[%04d] Prev[%20s] Cur[%20s] Level[%x]\n", lineCurrent+1, prevWord, s, levelCurrent);
497 strcpy(prevWord, s);
498 }
499 }
500 }
501 if (atEOL) {
502 int levelUse = levelCurrent;
503
504 if (foldAtElse && (levelMinCurrentElse < levelUse)) {
505 levelUse = levelMinCurrentElse;
506 }
507 if (foldAtBegin && (levelMinCurrentBegin < levelUse)) {
508 levelUse = levelMinCurrentBegin;
509 }
510 int lev = levelUse | levelNext << 16;
511 if (visibleChars == 0 && foldCompact)
512 lev |= SC_FOLDLEVELWHITEFLAG;
513
514 if (levelUse < levelNext)
515 lev |= SC_FOLDLEVELHEADERFLAG;
516 if (lev != styler.LevelAt(lineCurrent)) {
517 styler.SetLevel(lineCurrent, lev);
518 }
519 //Platform::DebugPrintf("Line[%04d] ---------------------------------------------------- Level[%x]\n", lineCurrent+1, levelCurrent);
520 lineCurrent++;
521 levelCurrent = levelNext;
522 //levelMinCurrent = levelCurrent;
523 levelMinCurrentElse = levelCurrent;
524 levelMinCurrentBegin = levelCurrent;
525 visibleChars = 0;
526 }
527 /***************************************/
528 if (!isspacechar(ch)) visibleChars++;
529 }
530
531 /***************************************/
532// Platform::DebugPrintf("Line[%04d] ---------------------------------------------------- Level[%x]\n", lineCurrent+1, levelCurrent);
533}
534
535//=============================================================================
536static void FoldVHDLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *[],
537 Accessor &styler) {
538 FoldNoBoxVHDLDoc(startPos, length, initStyle, styler);
539}
540
541//=============================================================================
542static const char * const VHDLWordLists[] = {
543 "Keywords",
544 "Operators",
545 "Attributes",
546 "Standard Functions",
547 "Standard Packages",
548 "Standard Types",
549 "User Words",
550 0,
551 };
552
553
554LexerModule lmVHDL(SCLEX_VHDL, ColouriseVHDLDoc, "vhdl", FoldVHDLDoc, VHDLWordLists);
555
556
557// Keyword:
558// access after alias all architecture array assert attribute begin block body buffer bus case component
559// configuration constant disconnect downto else elsif end entity exit file for function generate generic
560// group guarded if impure in inertial inout is label library linkage literal loop map new next null of
561// on open others out package port postponed procedure process pure range record register reject report
562// return select severity shared signal subtype then to transport type unaffected units until use variable
563// wait when while with
564//
565// Operators:
566// abs and mod nand nor not or rem rol ror sla sll sra srl xnor xor
567//
568// Attributes:
569// left right low high ascending image value pos val succ pred leftof rightof base range reverse_range
570// length delayed stable quiet transaction event active last_event last_active last_value driving
571// driving_value simple_name path_name instance_name
572//
573// Std Functions:
574// now readline read writeline write endfile resolved to_bit to_bitvector to_stdulogic to_stdlogicvector
575// to_stdulogicvector to_x01 to_x01z to_UX01 rising_edge falling_edge is_x shift_left shift_right rotate_left
576// rotate_right resize to_integer to_unsigned to_signed std_match to_01
577//
578// Std Packages:
579// std ieee work standard textio std_logic_1164 std_logic_arith std_logic_misc std_logic_signed
580// std_logic_textio std_logic_unsigned numeric_bit numeric_std math_complex math_real vital_primitives
581// vital_timing
582//
583// Std Types:
584// boolean bit character severity_level integer real time delay_length natural positive string bit_vector
585// file_open_kind file_open_status line text side width std_ulogic std_ulogic_vector std_logic
586// std_logic_vector X01 X01Z UX01 UX01Z unsigned signed
587//
588
589