1// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#ifndef RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_
6#define RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_
7
8#if defined(DART_PRECOMPILED_RUNTIME)
9#error "AOT runtime should not use compiler sources (including header files)"
10#endif // defined(DART_PRECOMPILED_RUNTIME)
11
12#include "platform/text_buffer.h"
13
14#include "vm/allocation.h"
15#include "vm/growable_array.h"
16#include "vm/hash_map.h"
17#include "vm/zone.h"
18
19namespace dart {
20
21#define FOR_EACH_S_EXPRESSION_ATOM(M) \
22 M(Bool, bool) \
23 M(Double, double) \
24 M(Integer, int64_t) \
25 M(String, const char*) \
26 M(Symbol, const char*)
27
28#define FOR_EACH_S_EXPRESSION(M) \
29 FOR_EACH_S_EXPRESSION_ATOM(M) \
30 M(List, _)
31
32#define FOR_EACH_ABSTRACT_S_EXPRESSION(M) M(Atom, _)
33
34#define FORWARD_DECLARATION(name, value_type) class SExp##name;
35FOR_EACH_S_EXPRESSION(FORWARD_DECLARATION)
36FOR_EACH_ABSTRACT_S_EXPRESSION(FORWARD_DECLARATION)
37#undef FORWARD_DECLARATION
38
39// Abstract base class for S-expressions used as an intermediate form for the
40// IL serializer. These aren't true (LISP-like) S-expressions, as the atoms
41// are more restricted and the lists have extra information. Here is an
42// illustrative BNF-style grammar of the current serialized form of
43// S-expressions that includes non-whitespace literal tokens:
44//
45// <s-exp> ::= <atom> | <list>
46// <atom> ::= <bool> | <integer> | <string> | <symbol>
47// <list> ::= '(' <s-exp>* <extra-info>? ')'
48// <extra-info> ::= '{' <extra-elem>* '}'
49// <extra-elem> ::= <symbol> <s-exp> ','
50//
51// Here, <string>s are double-quoted strings with backslash escaping and
52// <symbol>s are sequences of consecutive non-whitespace characters that do not
53// include commas (,), parentheses (()), curly braces ({}), or the double-quote
54// character (").
55//
56// In addition, the <extra-info> is considered a map from symbol labels to
57// S-expression values, and as such each symbol used as a key in an <extra-info>
58// block should only appear once as a key within that block.
59class SExpression : public ZoneAllocated {
60 public:
61 explicit SExpression(intptr_t start = kInvalidPos) : start_(start) {}
62 virtual ~SExpression() {}
63
64 static intptr_t const kInvalidPos = -1;
65
66 static SExpression* FromCString(Zone* zone, const char* cstr);
67 const char* ToCString(Zone* zone) const;
68 intptr_t start() const { return start_; }
69
70#define S_EXPRESSION_TYPE_CHECK(name, value_type) \
71 bool Is##name() const { return (As##name() != nullptr); } \
72 SExp##name* As##name() { \
73 auto const const_this = const_cast<const SExpression*>(this); \
74 return const_cast<SExp##name*>(const_this->As##name()); \
75 } \
76 virtual const SExp##name* As##name() const { return nullptr; }
77
78 FOR_EACH_S_EXPRESSION(S_EXPRESSION_TYPE_CHECK)
79 FOR_EACH_ABSTRACT_S_EXPRESSION(S_EXPRESSION_TYPE_CHECK)
80
81 virtual const char* DebugName() const = 0;
82 virtual bool Equals(SExpression* sexp) const = 0;
83 virtual void SerializeTo(Zone* zone,
84 BaseTextBuffer* buffer,
85 const char* indent,
86 intptr_t width = 80) const = 0;
87 virtual void SerializeToLine(BaseTextBuffer* buffer) const = 0;
88
89 private:
90 // Starting character position of the s-expression in the original
91 // serialization, if it was deserialized.
92 intptr_t const start_;
93 DISALLOW_COPY_AND_ASSIGN(SExpression);
94};
95
96class SExpAtom : public SExpression {
97 public:
98 explicit SExpAtom(intptr_t start = kInvalidPos) : SExpression(start) {}
99
100 virtual const SExpAtom* AsAtom() const { return this; }
101 // No atoms have sub-elements, so they always print to a single line.
102 virtual void SerializeTo(Zone* zone,
103 BaseTextBuffer* buffer,
104 const char* indent,
105 intptr_t width = 80) const {
106 SerializeToLine(buffer);
107 }
108
109 private:
110 DISALLOW_COPY_AND_ASSIGN(SExpAtom);
111};
112
113#define DEFINE_S_EXPRESSION_TYPE_CHECK(name) \
114 virtual const SExp##name* As##name() const { return this; } \
115 virtual const char* DebugName() const { return #name; }
116
117// The various concrete S-expression atom classes are thin wrappers around
118// their contained value that includes serialization and type check methods.
119#define DEFINE_S_EXPRESSION_ATOM_CLASS(name, value_type) \
120 class SExp##name : public SExpAtom { \
121 public: \
122 explicit SExp##name(value_type val, intptr_t start = kInvalidPos) \
123 : SExpAtom(start), val_(val) {} \
124 value_type value() const { return val_; } \
125 virtual bool Equals(SExpression* sexp) const; \
126 bool Equals(value_type val) const; \
127 virtual void SerializeToLine(BaseTextBuffer* buffer) const; \
128 DEFINE_S_EXPRESSION_TYPE_CHECK(name) \
129 private: \
130 value_type const val_; \
131 DISALLOW_COPY_AND_ASSIGN(SExp##name); \
132 };
133
134FOR_EACH_S_EXPRESSION_ATOM(DEFINE_S_EXPRESSION_ATOM_CLASS)
135
136// A list of S-expressions. Unlike normal S-expressions, an S-expression list
137// also contains a hash map kept separate from the elements, which we use for
138// extra non-argument information for IL instructions.
139class SExpList : public SExpression {
140 public:
141 explicit SExpList(Zone* zone, intptr_t start = kInvalidPos)
142 : SExpression(start), contents_(zone, 2), extra_info_(zone) {}
143
144 using ExtraInfoHashMap = CStringMap<SExpression*>;
145
146 void Add(SExpression* sexp);
147 void AddExtra(const char* label, SExpression* value);
148
149 SExpression* At(intptr_t i) const { return contents_.At(i); }
150 intptr_t Length() const { return contents_.length(); }
151
152 intptr_t ExtraLength() const { return extra_info_.Length(); }
153 ExtraInfoHashMap::Iterator ExtraIterator() const {
154 return extra_info_.GetIterator();
155 }
156 bool ExtraHasKey(const char* cstr) const { return extra_info_.HasKey(cstr); }
157 SExpression* ExtraLookupValue(const char* cstr) const {
158 return extra_info_.LookupValue(cstr);
159 }
160
161 // Shortcut for retrieving the tag from a tagged list (list that contains an
162 // initial symbol). Returns nullptr if the list is not a tagged list.
163 SExpSymbol* Tag() const {
164 if (Length() == 0 || !At(0)->IsSymbol()) return nullptr;
165 return At(0)->AsSymbol();
166 }
167
168 DEFINE_S_EXPRESSION_TYPE_CHECK(List)
169 virtual bool Equals(SExpression* sexp) const;
170 virtual void SerializeTo(Zone* zone,
171 BaseTextBuffer* buffer,
172 const char* indent,
173 intptr_t width = 80) const;
174 virtual void SerializeToLine(BaseTextBuffer* buffer) const;
175
176 private:
177 static const char* const kElemIndent;
178 static const char* const kExtraIndent;
179
180 void SerializeExtraInfoTo(Zone* zone,
181 BaseTextBuffer* buffer,
182 const char* indent,
183 int width) const;
184 void SerializeExtraInfoToLine(BaseTextBuffer* buffer) const;
185
186 ZoneGrowableArray<SExpression*> contents_;
187 ExtraInfoHashMap extra_info_;
188
189 DISALLOW_COPY_AND_ASSIGN(SExpList);
190};
191
192class SExpParser : public ValueObject {
193 public:
194 SExpParser(Zone* zone, const char* cstr)
195 : SExpParser(zone, cstr, strlen(cstr)) {}
196 SExpParser(Zone* zone, const char* cstr, intptr_t len)
197 : zone_(zone),
198 buffer_(ASSERT_NOTNULL(cstr)),
199 buffer_size_(strnlen(cstr, len)),
200 cur_label_(nullptr),
201 cur_value_(nullptr),
202 list_stack_(zone, 2),
203 in_extra_stack_(zone, 2),
204 extra_start_stack_(zone, 2),
205 cur_label_stack_(zone, 2),
206 error_message_(nullptr) {}
207
208 // Constants used in serializing and deserializing S-expressions.
209 static const char* const kBoolTrueSymbol;
210 static const char* const kBoolFalseSymbol;
211 static char const kDoubleExponentChar;
212 static const char* const kDoubleInfinitySymbol;
213 static const char* const kDoubleNaNSymbol;
214
215 struct ErrorStrings : AllStatic {
216 static const char* const kOpenString;
217 static const char* const kBadUnicodeEscape;
218 static const char* const kOpenSExpList;
219 static const char* const kOpenMap;
220 static const char* const kNestedMap;
221 static const char* const kMapOutsideList;
222 static const char* const kNonSymbolLabel;
223 static const char* const kNoMapLabel;
224 static const char* const kRepeatedMapLabel;
225 static const char* const kNoMapValue;
226 static const char* const kExtraMapValue;
227 static const char* const kUnexpectedComma;
228 static const char* const kUnexpectedRightParen;
229 static const char* const kUnexpectedRightCurly;
230 };
231
232 intptr_t error_pos() const { return error_pos_; }
233 const char* error_message() const { return error_message_; }
234
235 const char* Input() const { return buffer_; }
236 SExpression* Parse();
237 DART_NORETURN void ReportError() const;
238
239 private:
240#define S_EXP_TOKEN_LIST(M) \
241 M(LeftParen) \
242 M(RightParen) \
243 M(Comma) \
244 M(LeftCurly) \
245 M(RightCurly) \
246 M(QuotedString) \
247 M(Integer) \
248 M(Double) \
249 M(Boolean) \
250 M(Symbol)
251
252 // clang-format off
253#define DEFINE_S_EXP_TOKEN_ENUM_LINE(name) k##name,
254 enum TokenType {
255 S_EXP_TOKEN_LIST(DEFINE_S_EXP_TOKEN_ENUM_LINE)
256 kMaxTokens,
257 };
258#undef DEFINE_S_EXP_TOKEN_ENUM
259 // clang-format on
260
261 class Token : public ZoneAllocated {
262 public:
263 Token(TokenType type, const char* cstr, intptr_t len)
264 : type_(type), cstr_(cstr), len_(len) {}
265
266 TokenType type() const { return type_; }
267 intptr_t length() const { return len_; }
268 const char* cstr() const { return cstr_; }
269 const char* DebugName() const { return TokenNames[type()]; }
270 const char* ToCString(Zone* zone);
271
272 private:
273 static const char* const TokenNames[kMaxTokens];
274
275 TokenType const type_;
276 const char* const cstr_;
277 intptr_t const len_;
278 };
279
280 SExpression* TokenToSExpression(Token* token);
281 Token* GetNextToken();
282 void Reset();
283 void StoreError(intptr_t pos, const char* format, ...) PRINTF_ATTRIBUTE(3, 4);
284
285 static bool IsSymbolContinue(char c);
286
287 Zone* const zone_;
288 const char* const buffer_;
289 intptr_t const buffer_size_;
290 intptr_t cur_pos_ = 0;
291 bool in_extra_ = false;
292 intptr_t extra_start_ = -1;
293 const char* cur_label_;
294 SExpression* cur_value_;
295 ZoneGrowableArray<SExpList*> list_stack_;
296 ZoneGrowableArray<bool> in_extra_stack_;
297 ZoneGrowableArray<intptr_t> extra_start_stack_;
298 ZoneGrowableArray<const char*> cur_label_stack_;
299 intptr_t error_pos_ = -1;
300 const char* error_message_;
301};
302
303} // namespace dart
304
305#endif // RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_
306