1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#include "tokenstream.h"
5#include "../math/math.h"
6
7namespace embree
8{
9 /* shorthands for common sets of characters */
10 const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
11 const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
12 const std::string TokenStream::numbers = "0123456789";
13 const std::string TokenStream::separators = "\n\t\r ";
14 const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
15
16 /* creates map for fast categorization of characters */
17 static void createCharMap(bool map[256], const std::string& chrs) {
18 for (size_t i=0; i<256; i++) map[i] = false;
19 for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
20 }
21
22 /* build full tokenizer that takes list of valid characters and keywords */
23 TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
24 const std::string& alpha, //< valid characters for identifiers
25 const std::string& seps, //< characters that act as separators
26 const std::vector<std::string>& symbols) //< symbols
27 : cin(cin), symbols(symbols)
28 {
29 createCharMap(isAlphaMap,alpha);
30 createCharMap(isSepMap,seps);
31 createCharMap(isStringCharMap,stringChars);
32 }
33
34 bool TokenStream::decDigits(std::string& str_o)
35 {
36 bool ok = false;
37 std::string str;
38 if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
39 while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
40 if (ok) str_o += str;
41 else cin->unget(str.size());
42 return ok;
43 }
44
45 bool TokenStream::decDigits1(std::string& str_o)
46 {
47 bool ok = false;
48 std::string str;
49 while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
50 if (ok) str_o += str; else cin->unget(str.size());
51 return ok;
52 }
53
54 bool TokenStream::trySymbol(const std::string& symbol)
55 {
56 size_t pos = 0;
57 while (pos < symbol.size()) {
58 if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
59 cin->drop(); pos++;
60 }
61 return true;
62 }
63
64 bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
65 {
66 for (size_t i=0; i<symbols.size(); i++) {
67 if (!trySymbol(symbols[i])) continue;
68 token = Token(symbols[i],Token::TY_SYMBOL,loc);
69 return true;
70 }
71 return false;
72 }
73
74 bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
75 {
76 bool ok = false;
77 std::string str;
78 if (trySymbol("nan")) {
79 token = Token(float(nan));
80 return true;
81 }
82 if (trySymbol("+inf")) {
83 token = Token(float(pos_inf));
84 return true;
85 }
86 if (trySymbol("-inf")) {
87 token = Token(float(neg_inf));
88 return true;
89 }
90
91 if (decDigits(str))
92 {
93 if (cin->peek() == '.') {
94 str += (char)cin->get();
95 decDigits(str);
96 if (cin->peek() == 'e' || cin->peek() == 'E') {
97 str += (char)cin->get();
98 if (decDigits(str)) ok = true; // 1.[2]E2
99 }
100 else ok = true; // 1.[2]
101 }
102 else if (cin->peek() == 'e' || cin->peek() == 'E') {
103 str += (char)cin->get();
104 if (decDigits(str)) ok = true; // 1E2
105 }
106 }
107 else
108 {
109 if (cin->peek() == '.') {
110 str += (char)cin->get();
111 if (decDigits(str)) {
112 if (cin->peek() == 'e' || cin->peek() == 'E') {
113 str += (char)cin->get();
114 if (decDigits(str)) ok = true; // .3E2
115 }
116 else ok = true; // .3
117 }
118 }
119 }
120 if (ok) {
121 token = Token((float)atof(str.c_str()),loc);
122 }
123 else cin->unget(str.size());
124 return ok;
125 }
126
127 bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
128 std::string str;
129 if (decDigits(str)) {
130 token = Token(atoi(str.c_str()),loc);
131 return true;
132 }
133 return false;
134 }
135
136 bool TokenStream::tryString(Token& token, const ParseLocation& loc)
137 {
138 std::string str;
139 if (cin->peek() != '\"') return false;
140 cin->drop();
141 while (cin->peek() != '\"') {
142 const int c = cin->get();
143 if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
144 str += (char)c;
145 }
146 cin->drop();
147 token = Token(str,Token::TY_STRING,loc);
148 return true;
149 }
150
151 bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
152 {
153 std::string str;
154 if (!isAlpha(cin->peek())) return false;
155 str += (char)cin->get();
156 while (isAlphaNum(cin->peek())) str += (char)cin->get();
157 token = Token(str,Token::TY_IDENTIFIER,loc);
158 return true;
159 }
160
161 void TokenStream::skipSeparators()
162 {
163 /* skip separators */
164 while (cin->peek() != EOF && isSeparator(cin->peek()))
165 cin->drop();
166 }
167
168 Token TokenStream::next()
169 {
170 Token token;
171 skipSeparators();
172 ParseLocation loc = cin->loc();
173 if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
174 if (tryFloat (token,loc)) return token; /**< try to parse float */
175 if (tryInt (token,loc)) return token; /**< try to parse integer */
176 if (tryString (token,loc)) return token; /**< try to parse string */
177 if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
178 if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
179 return Token((char)cin->get(),loc); /**< return invalid character token */
180 }
181}
182