1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "stream.h"
7#include <string>
8#include <vector>
9
10namespace embree
11{
12 /*! token class */
13 class Token
14 {
15 public:
16
17 enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
18
19 Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
20 Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
21 Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
22 Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
23 Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
24
25 static Token Eof() { return Token(); }
26 static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
27 static Token Str(std::string str) { return Token(str,TY_STRING); }
28 static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
29
30 char Char() const {
31 if (ty == TY_CHAR) return c;
32 THROW_RUNTIME_ERROR(loc.str()+": character expected");
33 }
34
35 int Int() const {
36 if (ty == TY_INT) return i;
37 THROW_RUNTIME_ERROR(loc.str()+": integer expected");
38 }
39
40 float Float(bool cast = true) const {
41 if (ty == TY_FLOAT) return f;
42 if (ty == TY_INT && cast) return (float)i;
43 THROW_RUNTIME_ERROR(loc.str()+": float expected");
44 }
45
46 std::string Identifier() const {
47 if (ty == TY_IDENTIFIER) return str;
48 THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
49 }
50
51 std::string String() const {
52 if (ty == TY_STRING) return str;
53 THROW_RUNTIME_ERROR(loc.str()+": string expected");
54 }
55
56 std::string Symbol() const {
57 if (ty == TY_SYMBOL) return str;
58 THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
59 }
60
61 const ParseLocation& Location() const { return loc; }
62
63 friend bool operator==(const Token& a, const Token& b)
64 {
65 if (a.ty != b.ty) return false;
66 if (a.ty == TY_CHAR) return a.c == b.c;
67 if (a.ty == TY_INT) return a.i == b.i;
68 if (a.ty == TY_FLOAT) return a.f == b.f;
69 if (a.ty == TY_IDENTIFIER) return a.str == b.str;
70 if (a.ty == TY_STRING) return a.str == b.str;
71 if (a.ty == TY_SYMBOL) return a.str == b.str;
72 return true;
73 }
74
75 friend bool operator!=(const Token& a, const Token& b) {
76 return !(a == b);
77 }
78
79 friend bool operator <( const Token& a, const Token& b ) {
80 if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
81 if (a.ty == TY_CHAR) return a.c < b.c;
82 if (a.ty == TY_INT) return a.i < b.i;
83 if (a.ty == TY_FLOAT) return a.f < b.f;
84 if (a.ty == TY_IDENTIFIER) return a.str < b.str;
85 if (a.ty == TY_STRING) return a.str < b.str;
86 if (a.ty == TY_SYMBOL) return a.str < b.str;
87 return false;
88 }
89
90 friend std::ostream& operator<<(std::ostream& cout, const Token& t)
91 {
92 if (t.ty == TY_EOF) return cout << "eof";
93 if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
94 if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
95 if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
96 if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
97 if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
98 if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
99 return cout << "unknown";
100 }
101
102 private:
103 Type ty; //< the type of the token
104 union {
105 char c; //< data for char tokens
106 int i; //< data for int tokens
107 float f; //< data for float tokens
108 };
109 std::string str; //< data for string and identifier tokens
110 ParseLocation loc; //< the location the token is from
111 };
112
113 /*! build full tokenizer that takes list of valid characters and keywords */
114 class TokenStream : public Stream<Token>
115 {
116 public:
117
118 /*! shorthands for common sets of characters */
119 static const std::string alpha;
120 static const std::string ALPHA;
121 static const std::string numbers;
122 static const std::string separators;
123 static const std::string stringChars;
124
125 public:
126 TokenStream(const Ref<Stream<int> >& cin,
127 const std::string& alpha, //< valid characters for identifiers
128 const std::string& seps, //< characters that act as separators
129 const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
130 public:
131 ParseLocation location() { return cin->loc(); }
132 Token next();
133 bool trySymbol(const std::string& symbol);
134
135 private:
136 void skipSeparators();
137 bool decDigits(std::string& str);
138 bool decDigits1(std::string& str);
139 bool trySymbols(Token& token, const ParseLocation& loc);
140 bool tryFloat(Token& token, const ParseLocation& loc);
141 bool tryInt(Token& token, const ParseLocation& loc);
142 bool tryString(Token& token, const ParseLocation& loc);
143 bool tryIdentifier(Token& token, const ParseLocation& loc);
144
145 Ref<Stream<int> > cin;
146 bool isSepMap[256];
147 bool isAlphaMap[256];
148 bool isStringCharMap[256];
149 std::vector<std::string> symbols;
150
151 /*! checks if a character is a separator */
152 __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
153
154 /*! checks if a character is a number */
155 __forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
156
157 /*! checks if a character is valid inside a string */
158 __forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
159
160 /*! checks if a character is legal for an identifier */
161 __forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
162 __forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
163 };
164}
165