1// Scintilla source code edit control
2// Encoding: UTF-8
3/** @file LexMMIXAL.cxx
4 ** Lexer for MMIX Assembler Language.
5 ** Written by Christoph Hösler <christoph.hoesler@student.uni-tuebingen.de>
6 ** For information about MMIX visit http://www-cs-faculty.stanford.edu/~knuth/mmix.html
7 **/
8// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
9// The License.txt file describes the conditions under which this software may be distributed.
10
11#include <stdlib.h>
12#include <string.h>
13#include <stdio.h>
14#include <stdarg.h>
15#include <assert.h>
16#include <ctype.h>
17
18#include <string>
19#include <string_view>
20
21#include "ILexer.h"
22#include "Scintilla.h"
23#include "SciLexer.h"
24
25#include "WordList.h"
26#include "LexAccessor.h"
27#include "Accessor.h"
28#include "StyleContext.h"
29#include "CharacterSet.h"
30#include "LexerModule.h"
31
32using namespace Lexilla;
33
34
35static inline bool IsAWordChar(const int ch) {
36 return (ch < 0x80) && (isalnum(ch) || ch == ':' || ch == '_');
37}
38
39static inline bool isMMIXALOperator(char ch) {
40 if (IsASCII(ch) && isalnum(ch))
41 return false;
42 if (ch == '+' || ch == '-' || ch == '|' || ch == '^' ||
43 ch == '*' || ch == '/' ||
44 ch == '%' || ch == '<' || ch == '>' || ch == '&' ||
45 ch == '~' || ch == '$' ||
46 ch == ',' || ch == '(' || ch == ')' ||
47 ch == '[' || ch == ']')
48 return true;
49 return false;
50}
51
52static void ColouriseMMIXALDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
53 Accessor &styler) {
54
55 WordList &opcodes = *keywordlists[0];
56 WordList &special_register = *keywordlists[1];
57 WordList &predef_symbols = *keywordlists[2];
58
59 StyleContext sc(startPos, length, initStyle, styler);
60
61 for (; sc.More(); sc.Forward())
62 {
63 // No EOL continuation
64 if (sc.atLineStart) {
65 if (sc.ch == '@' && sc.chNext == 'i') {
66 sc.SetState(SCE_MMIXAL_INCLUDE);
67 } else {
68 sc.SetState(SCE_MMIXAL_LEADWS);
69 }
70 }
71
72 // Check if first non whitespace character in line is alphanumeric
73 if (sc.state == SCE_MMIXAL_LEADWS && !isspace(sc.ch)) { // LEADWS
74 if(!IsAWordChar(sc.ch)) {
75 sc.SetState(SCE_MMIXAL_COMMENT);
76 } else {
77 if(sc.atLineStart) {
78 sc.SetState(SCE_MMIXAL_LABEL);
79 } else {
80 sc.SetState(SCE_MMIXAL_OPCODE_PRE);
81 }
82 }
83 }
84
85 // Determine if the current state should terminate.
86 if (sc.state == SCE_MMIXAL_OPERATOR) { // OPERATOR
87 sc.SetState(SCE_MMIXAL_OPERANDS);
88 } else if (sc.state == SCE_MMIXAL_NUMBER) { // NUMBER
89 if (!isdigit(sc.ch)) {
90 if (IsAWordChar(sc.ch)) {
91 sc.ChangeState(SCE_MMIXAL_REF);
92 sc.SetState(SCE_MMIXAL_REF);
93 } else {
94 sc.SetState(SCE_MMIXAL_OPERANDS);
95 }
96 }
97 } else if (sc.state == SCE_MMIXAL_LABEL) { // LABEL
98 if (!IsAWordChar(sc.ch) ) {
99 sc.SetState(SCE_MMIXAL_OPCODE_PRE);
100 }
101 } else if (sc.state == SCE_MMIXAL_REF) { // REF
102 if (!IsAWordChar(sc.ch) ) {
103 char s0[100];
104 sc.GetCurrent(s0, sizeof(s0));
105 const char *s = s0;
106 if (*s == ':') { // ignore base prefix for match
107 ++s;
108 }
109 if (special_register.InList(s)) {
110 sc.ChangeState(SCE_MMIXAL_REGISTER);
111 } else if (predef_symbols.InList(s)) {
112 sc.ChangeState(SCE_MMIXAL_SYMBOL);
113 }
114 sc.SetState(SCE_MMIXAL_OPERANDS);
115 }
116 } else if (sc.state == SCE_MMIXAL_OPCODE_PRE) { // OPCODE_PRE
117 if (!isspace(sc.ch)) {
118 sc.SetState(SCE_MMIXAL_OPCODE);
119 }
120 } else if (sc.state == SCE_MMIXAL_OPCODE) { // OPCODE
121 if (!IsAWordChar(sc.ch) ) {
122 char s[100];
123 sc.GetCurrent(s, sizeof(s));
124 if (opcodes.InList(s)) {
125 sc.ChangeState(SCE_MMIXAL_OPCODE_VALID);
126 } else {
127 sc.ChangeState(SCE_MMIXAL_OPCODE_UNKNOWN);
128 }
129 sc.SetState(SCE_MMIXAL_OPCODE_POST);
130 }
131 } else if (sc.state == SCE_MMIXAL_STRING) { // STRING
132 if (sc.ch == '\"') {
133 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
134 } else if (sc.atLineEnd) {
135 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
136 }
137 } else if (sc.state == SCE_MMIXAL_CHAR) { // CHAR
138 if (sc.ch == '\'') {
139 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
140 } else if (sc.atLineEnd) {
141 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
142 }
143 } else if (sc.state == SCE_MMIXAL_REGISTER) { // REGISTER
144 if (!isdigit(sc.ch)) {
145 sc.SetState(SCE_MMIXAL_OPERANDS);
146 }
147 } else if (sc.state == SCE_MMIXAL_HEX) { // HEX
148 if (!isxdigit(sc.ch)) {
149 sc.SetState(SCE_MMIXAL_OPERANDS);
150 }
151 }
152
153 // Determine if a new state should be entered.
154 if (sc.state == SCE_MMIXAL_OPCODE_POST || // OPCODE_POST
155 sc.state == SCE_MMIXAL_OPERANDS) { // OPERANDS
156 if (sc.state == SCE_MMIXAL_OPERANDS && isspace(sc.ch)) {
157 sc.SetState(SCE_MMIXAL_COMMENT);
158 } else if (isdigit(sc.ch)) {
159 sc.SetState(SCE_MMIXAL_NUMBER);
160 } else if (IsAWordChar(sc.ch) || sc.Match('@')) {
161 sc.SetState(SCE_MMIXAL_REF);
162 } else if (sc.Match('\"')) {
163 sc.SetState(SCE_MMIXAL_STRING);
164 } else if (sc.Match('\'')) {
165 sc.SetState(SCE_MMIXAL_CHAR);
166 } else if (sc.Match('$')) {
167 sc.SetState(SCE_MMIXAL_REGISTER);
168 } else if (sc.Match('#')) {
169 sc.SetState(SCE_MMIXAL_HEX);
170 } else if (isMMIXALOperator(static_cast<char>(sc.ch))) {
171 sc.SetState(SCE_MMIXAL_OPERATOR);
172 }
173 }
174 }
175 sc.Complete();
176}
177
178static const char * const MMIXALWordListDesc[] = {
179 "Operation Codes",
180 "Special Register",
181 "Predefined Symbols",
182 0
183};
184
185LexerModule lmMMIXAL(SCLEX_MMIXAL, ColouriseMMIXALDoc, "mmixal", 0, MMIXALWordListDesc);
186
187