| 1 | // Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file | 
|---|
| 2 | // for details. All rights reserved. Use of this source code is governed by a | 
|---|
| 3 | // BSD-style license that can be found in the LICENSE file. | 
|---|
| 4 |  | 
|---|
| 5 | #ifndef RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_ | 
|---|
| 6 | #define RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_ | 
|---|
| 7 |  | 
|---|
| 8 | #if defined(DART_PRECOMPILED_RUNTIME) | 
|---|
| 9 | #error "AOT runtime should not use compiler sources (including header files)" | 
|---|
| 10 | #endif  // defined(DART_PRECOMPILED_RUNTIME) | 
|---|
| 11 |  | 
|---|
| 12 | #include "platform/text_buffer.h" | 
|---|
| 13 |  | 
|---|
| 14 | #include "vm/allocation.h" | 
|---|
| 15 | #include "vm/growable_array.h" | 
|---|
| 16 | #include "vm/hash_map.h" | 
|---|
| 17 | #include "vm/zone.h" | 
|---|
| 18 |  | 
|---|
| 19 | namespace dart { | 
|---|
| 20 |  | 
|---|
| 21 | #define FOR_EACH_S_EXPRESSION_ATOM(M)                                          \ | 
|---|
| 22 | M(Bool, bool)                                                                \ | 
|---|
| 23 | M(Double, double)                                                            \ | 
|---|
| 24 | M(Integer, int64_t)                                                          \ | 
|---|
| 25 | M(String, const char*)                                                       \ | 
|---|
| 26 | M(Symbol, const char*) | 
|---|
| 27 |  | 
|---|
| 28 | #define FOR_EACH_S_EXPRESSION(M)                                               \ | 
|---|
| 29 | FOR_EACH_S_EXPRESSION_ATOM(M)                                                \ | 
|---|
| 30 | M(List, _) | 
|---|
| 31 |  | 
|---|
| 32 | #define FOR_EACH_ABSTRACT_S_EXPRESSION(M) M(Atom, _) | 
|---|
| 33 |  | 
|---|
| 34 | #define FORWARD_DECLARATION(name, value_type) class SExp##name; | 
|---|
| 35 | FOR_EACH_S_EXPRESSION(FORWARD_DECLARATION) | 
|---|
| 36 | FOR_EACH_ABSTRACT_S_EXPRESSION(FORWARD_DECLARATION) | 
|---|
| 37 | #undef FORWARD_DECLARATION | 
|---|
| 38 |  | 
|---|
| 39 | // Abstract base class for S-expressions used as an intermediate form for the | 
|---|
| 40 | // IL serializer. These aren't true (LISP-like) S-expressions, as the atoms | 
|---|
| 41 | // are more restricted and the lists have extra information. Here is an | 
|---|
| 42 | // illustrative BNF-style grammar of the current serialized form of | 
|---|
| 43 | // S-expressions that includes non-whitespace literal tokens: | 
|---|
| 44 | // | 
|---|
| 45 | // <s-exp>      ::= <atom> | <list> | 
|---|
| 46 | // <atom>       ::= <bool> | <integer> | <string> | <symbol> | 
|---|
| 47 | // <list>       ::= '(' <s-exp>* <extra-info>? ')' | 
|---|
| 48 | // <extra-info> ::= '{' <extra-elem>* '}' | 
|---|
| 49 | // <extra-elem> ::= <symbol> <s-exp> ',' | 
|---|
| 50 | // | 
|---|
| 51 | // Here, <string>s are double-quoted strings with backslash escaping and | 
|---|
| 52 | // <symbol>s are sequences of consecutive non-whitespace characters that do not | 
|---|
| 53 | // include commas (,), parentheses (()), curly braces ({}), or the double-quote | 
|---|
| 54 | // character ("). | 
|---|
| 55 | // | 
|---|
| 56 | // In addition, the <extra-info> is considered a map from symbol labels to | 
|---|
| 57 | // S-expression values, and as such each symbol used as a key in an <extra-info> | 
|---|
| 58 | // block should only appear once as a key within that block. | 
|---|
| 59 | class SExpression : public ZoneAllocated { | 
|---|
| 60 | public: | 
|---|
| 61 | explicit SExpression(intptr_t start = kInvalidPos) : start_(start) {} | 
|---|
| 62 | virtual ~SExpression() {} | 
|---|
| 63 |  | 
|---|
| 64 | static intptr_t const kInvalidPos = -1; | 
|---|
| 65 |  | 
|---|
| 66 | static SExpression* FromCString(Zone* zone, const char* cstr); | 
|---|
| 67 | const char* ToCString(Zone* zone) const; | 
|---|
| 68 | intptr_t start() const { return start_; } | 
|---|
| 69 |  | 
|---|
| 70 | #define S_EXPRESSION_TYPE_CHECK(name, value_type)                              \ | 
|---|
| 71 | bool Is##name() const { return (As##name() != nullptr); }                    \ | 
|---|
| 72 | SExp##name* As##name() {                                                     \ | 
|---|
| 73 | auto const const_this = const_cast<const SExpression*>(this);              \ | 
|---|
| 74 | return const_cast<SExp##name*>(const_this->As##name());                    \ | 
|---|
| 75 | }                                                                            \ | 
|---|
| 76 | virtual const SExp##name* As##name() const { return nullptr; } | 
|---|
| 77 |  | 
|---|
| 78 | FOR_EACH_S_EXPRESSION(S_EXPRESSION_TYPE_CHECK) | 
|---|
| 79 | FOR_EACH_ABSTRACT_S_EXPRESSION(S_EXPRESSION_TYPE_CHECK) | 
|---|
| 80 |  | 
|---|
| 81 | virtual const char* DebugName() const = 0; | 
|---|
| 82 | virtual bool Equals(SExpression* sexp) const = 0; | 
|---|
| 83 | virtual void SerializeTo(Zone* zone, | 
|---|
| 84 | BaseTextBuffer* buffer, | 
|---|
| 85 | const char* indent, | 
|---|
| 86 | intptr_t width = 80) const = 0; | 
|---|
| 87 | virtual void SerializeToLine(BaseTextBuffer* buffer) const = 0; | 
|---|
| 88 |  | 
|---|
| 89 | private: | 
|---|
| 90 | // Starting character position of the s-expression in the original | 
|---|
| 91 | // serialization, if it was deserialized. | 
|---|
| 92 | intptr_t const start_; | 
|---|
| 93 | DISALLOW_COPY_AND_ASSIGN(SExpression); | 
|---|
| 94 | }; | 
|---|
| 95 |  | 
|---|
| 96 | class SExpAtom : public SExpression { | 
|---|
| 97 | public: | 
|---|
| 98 | explicit SExpAtom(intptr_t start = kInvalidPos) : SExpression(start) {} | 
|---|
| 99 |  | 
|---|
| 100 | virtual const SExpAtom* AsAtom() const { return this; } | 
|---|
| 101 | // No atoms have sub-elements, so they always print to a single line. | 
|---|
| 102 | virtual void SerializeTo(Zone* zone, | 
|---|
| 103 | BaseTextBuffer* buffer, | 
|---|
| 104 | const char* indent, | 
|---|
| 105 | intptr_t width = 80) const { | 
|---|
| 106 | SerializeToLine(buffer); | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | private: | 
|---|
| 110 | DISALLOW_COPY_AND_ASSIGN(SExpAtom); | 
|---|
| 111 | }; | 
|---|
| 112 |  | 
|---|
| 113 | #define DEFINE_S_EXPRESSION_TYPE_CHECK(name)                                   \ | 
|---|
| 114 | virtual const SExp##name* As##name() const { return this; }                  \ | 
|---|
| 115 | virtual const char* DebugName() const { return #name; } | 
|---|
| 116 |  | 
|---|
| 117 | // The various concrete S-expression atom classes are thin wrappers around | 
|---|
| 118 | // their contained value that includes serialization and type check methods. | 
|---|
| 119 | #define DEFINE_S_EXPRESSION_ATOM_CLASS(name, value_type)                       \ | 
|---|
| 120 | class SExp##name : public SExpAtom {                                         \ | 
|---|
| 121 | public:                                                                     \ | 
|---|
| 122 | explicit SExp##name(value_type val, intptr_t start = kInvalidPos)          \ | 
|---|
| 123 | : SExpAtom(start), val_(val) {}                                        \ | 
|---|
| 124 | value_type value() const { return val_; }                                  \ | 
|---|
| 125 | virtual bool Equals(SExpression* sexp) const;                              \ | 
|---|
| 126 | bool Equals(value_type val) const;                                         \ | 
|---|
| 127 | virtual void SerializeToLine(BaseTextBuffer* buffer) const;                \ | 
|---|
| 128 | DEFINE_S_EXPRESSION_TYPE_CHECK(name)                                       \ | 
|---|
| 129 | private:                                                                    \ | 
|---|
| 130 | value_type const val_;                                                     \ | 
|---|
| 131 | DISALLOW_COPY_AND_ASSIGN(SExp##name);                                      \ | 
|---|
| 132 | }; | 
|---|
| 133 |  | 
|---|
| 134 | FOR_EACH_S_EXPRESSION_ATOM(DEFINE_S_EXPRESSION_ATOM_CLASS) | 
|---|
| 135 |  | 
|---|
| 136 | // A list of S-expressions. Unlike normal S-expressions, an S-expression list | 
|---|
| 137 | // also contains a hash map kept separate from the elements, which we use for | 
|---|
| 138 | // extra non-argument information for IL instructions. | 
|---|
| 139 | class SExpList : public SExpression { | 
|---|
| 140 | public: | 
|---|
| 141 | explicit SExpList(Zone* zone, intptr_t start = kInvalidPos) | 
|---|
| 142 | : SExpression(start), contents_(zone, 2), extra_info_(zone) {} | 
|---|
| 143 |  | 
|---|
| 144 | using  = CStringMap<SExpression*>; | 
|---|
| 145 |  | 
|---|
| 146 | void Add(SExpression* sexp); | 
|---|
| 147 | void (const char* label, SExpression* value); | 
|---|
| 148 |  | 
|---|
| 149 | SExpression* At(intptr_t i) const { return contents_.At(i); } | 
|---|
| 150 | intptr_t Length() const { return contents_.length(); } | 
|---|
| 151 |  | 
|---|
| 152 | intptr_t () const { return extra_info_.Length(); } | 
|---|
| 153 | ExtraInfoHashMap::Iterator () const { | 
|---|
| 154 | return extra_info_.GetIterator(); | 
|---|
| 155 | } | 
|---|
| 156 | bool (const char* cstr) const { return extra_info_.HasKey(cstr); } | 
|---|
| 157 | SExpression* (const char* cstr) const { | 
|---|
| 158 | return extra_info_.LookupValue(cstr); | 
|---|
| 159 | } | 
|---|
| 160 |  | 
|---|
| 161 | // Shortcut for retrieving the tag from a tagged list (list that contains an | 
|---|
| 162 | // initial symbol). Returns nullptr if the list is not a tagged list. | 
|---|
| 163 | SExpSymbol* Tag() const { | 
|---|
| 164 | if (Length() == 0 || !At(0)->IsSymbol()) return nullptr; | 
|---|
| 165 | return At(0)->AsSymbol(); | 
|---|
| 166 | } | 
|---|
| 167 |  | 
|---|
| 168 | DEFINE_S_EXPRESSION_TYPE_CHECK(List) | 
|---|
| 169 | virtual bool Equals(SExpression* sexp) const; | 
|---|
| 170 | virtual void SerializeTo(Zone* zone, | 
|---|
| 171 | BaseTextBuffer* buffer, | 
|---|
| 172 | const char* indent, | 
|---|
| 173 | intptr_t width = 80) const; | 
|---|
| 174 | virtual void SerializeToLine(BaseTextBuffer* buffer) const; | 
|---|
| 175 |  | 
|---|
| 176 | private: | 
|---|
| 177 | static const char* const kElemIndent; | 
|---|
| 178 | static const char* const ; | 
|---|
| 179 |  | 
|---|
| 180 | void (Zone* zone, | 
|---|
| 181 | BaseTextBuffer* buffer, | 
|---|
| 182 | const char* indent, | 
|---|
| 183 | int width) const; | 
|---|
| 184 | void (BaseTextBuffer* buffer) const; | 
|---|
| 185 |  | 
|---|
| 186 | ZoneGrowableArray<SExpression*> contents_; | 
|---|
| 187 | ExtraInfoHashMap ; | 
|---|
| 188 |  | 
|---|
| 189 | DISALLOW_COPY_AND_ASSIGN(SExpList); | 
|---|
| 190 | }; | 
|---|
| 191 |  | 
|---|
| 192 | class SExpParser : public ValueObject { | 
|---|
| 193 | public: | 
|---|
| 194 | SExpParser(Zone* zone, const char* cstr) | 
|---|
| 195 | : SExpParser(zone, cstr, strlen(cstr)) {} | 
|---|
| 196 | SExpParser(Zone* zone, const char* cstr, intptr_t len) | 
|---|
| 197 | : zone_(zone), | 
|---|
| 198 | buffer_(ASSERT_NOTNULL(cstr)), | 
|---|
| 199 | buffer_size_(strnlen(cstr, len)), | 
|---|
| 200 | cur_label_(nullptr), | 
|---|
| 201 | cur_value_(nullptr), | 
|---|
| 202 | list_stack_(zone, 2), | 
|---|
| 203 | in_extra_stack_(zone, 2), | 
|---|
| 204 | extra_start_stack_(zone, 2), | 
|---|
| 205 | cur_label_stack_(zone, 2), | 
|---|
| 206 | error_message_(nullptr) {} | 
|---|
| 207 |  | 
|---|
| 208 | // Constants used in serializing and deserializing S-expressions. | 
|---|
| 209 | static const char* const kBoolTrueSymbol; | 
|---|
| 210 | static const char* const kBoolFalseSymbol; | 
|---|
| 211 | static char const kDoubleExponentChar; | 
|---|
| 212 | static const char* const kDoubleInfinitySymbol; | 
|---|
| 213 | static const char* const kDoubleNaNSymbol; | 
|---|
| 214 |  | 
|---|
| 215 | struct ErrorStrings : AllStatic { | 
|---|
| 216 | static const char* const kOpenString; | 
|---|
| 217 | static const char* const kBadUnicodeEscape; | 
|---|
| 218 | static const char* const kOpenSExpList; | 
|---|
| 219 | static const char* const kOpenMap; | 
|---|
| 220 | static const char* const kNestedMap; | 
|---|
| 221 | static const char* const kMapOutsideList; | 
|---|
| 222 | static const char* const kNonSymbolLabel; | 
|---|
| 223 | static const char* const kNoMapLabel; | 
|---|
| 224 | static const char* const kRepeatedMapLabel; | 
|---|
| 225 | static const char* const kNoMapValue; | 
|---|
| 226 | static const char* const ; | 
|---|
| 227 | static const char* const kUnexpectedComma; | 
|---|
| 228 | static const char* const kUnexpectedRightParen; | 
|---|
| 229 | static const char* const kUnexpectedRightCurly; | 
|---|
| 230 | }; | 
|---|
| 231 |  | 
|---|
| 232 | intptr_t error_pos() const { return error_pos_; } | 
|---|
| 233 | const char* error_message() const { return error_message_; } | 
|---|
| 234 |  | 
|---|
| 235 | const char* Input() const { return buffer_; } | 
|---|
| 236 | SExpression* Parse(); | 
|---|
| 237 | DART_NORETURN void ReportError() const; | 
|---|
| 238 |  | 
|---|
| 239 | private: | 
|---|
| 240 | #define S_EXP_TOKEN_LIST(M)                                                    \ | 
|---|
| 241 | M(LeftParen)                                                                 \ | 
|---|
| 242 | M(RightParen)                                                                \ | 
|---|
| 243 | M(Comma)                                                                     \ | 
|---|
| 244 | M(LeftCurly)                                                                 \ | 
|---|
| 245 | M(RightCurly)                                                                \ | 
|---|
| 246 | M(QuotedString)                                                              \ | 
|---|
| 247 | M(Integer)                                                                   \ | 
|---|
| 248 | M(Double)                                                                    \ | 
|---|
| 249 | M(Boolean)                                                                   \ | 
|---|
| 250 | M(Symbol) | 
|---|
| 251 |  | 
|---|
| 252 | // clang-format off | 
|---|
| 253 | #define DEFINE_S_EXP_TOKEN_ENUM_LINE(name) k##name, | 
|---|
| 254 | enum TokenType { | 
|---|
| 255 | S_EXP_TOKEN_LIST(DEFINE_S_EXP_TOKEN_ENUM_LINE) | 
|---|
| 256 | kMaxTokens, | 
|---|
| 257 | }; | 
|---|
| 258 | #undef DEFINE_S_EXP_TOKEN_ENUM | 
|---|
| 259 | // clang-format on | 
|---|
| 260 |  | 
|---|
| 261 | class Token : public ZoneAllocated { | 
|---|
| 262 | public: | 
|---|
| 263 | Token(TokenType type, const char* cstr, intptr_t len) | 
|---|
| 264 | : type_(type), cstr_(cstr), len_(len) {} | 
|---|
| 265 |  | 
|---|
| 266 | TokenType type() const { return type_; } | 
|---|
| 267 | intptr_t length() const { return len_; } | 
|---|
| 268 | const char* cstr() const { return cstr_; } | 
|---|
| 269 | const char* DebugName() const { return TokenNames[type()]; } | 
|---|
| 270 | const char* ToCString(Zone* zone); | 
|---|
| 271 |  | 
|---|
| 272 | private: | 
|---|
| 273 | static const char* const TokenNames[kMaxTokens]; | 
|---|
| 274 |  | 
|---|
| 275 | TokenType const type_; | 
|---|
| 276 | const char* const cstr_; | 
|---|
| 277 | intptr_t const len_; | 
|---|
| 278 | }; | 
|---|
| 279 |  | 
|---|
| 280 | SExpression* TokenToSExpression(Token* token); | 
|---|
| 281 | Token* GetNextToken(); | 
|---|
| 282 | void Reset(); | 
|---|
| 283 | void StoreError(intptr_t pos, const char* format, ...) PRINTF_ATTRIBUTE(3, 4); | 
|---|
| 284 |  | 
|---|
| 285 | static bool IsSymbolContinue(char c); | 
|---|
| 286 |  | 
|---|
| 287 | Zone* const zone_; | 
|---|
| 288 | const char* const buffer_; | 
|---|
| 289 | intptr_t const buffer_size_; | 
|---|
| 290 | intptr_t cur_pos_ = 0; | 
|---|
| 291 | bool  = false; | 
|---|
| 292 | intptr_t  = -1; | 
|---|
| 293 | const char* cur_label_; | 
|---|
| 294 | SExpression* cur_value_; | 
|---|
| 295 | ZoneGrowableArray<SExpList*> list_stack_; | 
|---|
| 296 | ZoneGrowableArray<bool> ; | 
|---|
| 297 | ZoneGrowableArray<intptr_t> ; | 
|---|
| 298 | ZoneGrowableArray<const char*> cur_label_stack_; | 
|---|
| 299 | intptr_t error_pos_ = -1; | 
|---|
| 300 | const char* error_message_; | 
|---|
| 301 | }; | 
|---|
| 302 |  | 
|---|
| 303 | }  // namespace dart | 
|---|
| 304 |  | 
|---|
| 305 | #endif  // RUNTIME_VM_COMPILER_BACKEND_SEXPRESSION_H_ | 
|---|
| 306 |  | 
|---|