| 1 | // Protocol Buffers - Google's data interchange format | 
|---|
| 2 | // Copyright 2008 Google Inc.  All rights reserved. | 
|---|
| 3 | // https://developers.google.com/protocol-buffers/ | 
|---|
| 4 | // | 
|---|
| 5 | // Redistribution and use in source and binary forms, with or without | 
|---|
| 6 | // modification, are permitted provided that the following conditions are | 
|---|
| 7 | // met: | 
|---|
| 8 | // | 
|---|
| 9 | //     * Redistributions of source code must retain the above copyright | 
|---|
| 10 | // notice, this list of conditions and the following disclaimer. | 
|---|
| 11 | //     * Redistributions in binary form must reproduce the above | 
|---|
| 12 | // copyright notice, this list of conditions and the following disclaimer | 
|---|
| 13 | // in the documentation and/or other materials provided with the | 
|---|
| 14 | // distribution. | 
|---|
| 15 | //     * Neither the name of Google Inc. nor the names of its | 
|---|
| 16 | // contributors may be used to endorse or promote products derived from | 
|---|
| 17 | // this software without specific prior written permission. | 
|---|
| 18 | // | 
|---|
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|---|
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|---|
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
|---|
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
|---|
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
|---|
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
|---|
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
|---|
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
|---|
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|---|
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
|---|
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 30 |  | 
|---|
| 31 | #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__ | 
|---|
| 32 | #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__ | 
|---|
| 33 |  | 
|---|
| 34 | #include <cstdint> | 
|---|
| 35 | #include <cstring> | 
|---|
| 36 | #include <string> | 
|---|
| 37 | #include <type_traits> | 
|---|
| 38 |  | 
|---|
| 39 | #include <google/protobuf/io/coded_stream.h> | 
|---|
| 40 | #include <google/protobuf/io/zero_copy_stream.h> | 
|---|
| 41 | #include <google/protobuf/arena.h> | 
|---|
| 42 | #include <google/protobuf/port.h> | 
|---|
| 43 | #include <google/protobuf/stubs/strutil.h> | 
|---|
| 44 | #include <google/protobuf/arenastring.h> | 
|---|
| 45 | #include <google/protobuf/endian.h> | 
|---|
| 46 | #include <google/protobuf/implicit_weak_message.h> | 
|---|
| 47 | #include <google/protobuf/inlined_string_field.h> | 
|---|
| 48 | #include <google/protobuf/metadata_lite.h> | 
|---|
| 49 | #include <google/protobuf/repeated_field.h> | 
|---|
| 50 | #include <google/protobuf/wire_format_lite.h> | 
|---|
| 51 |  | 
|---|
| 52 | // Must be included last. | 
|---|
| 53 | #include <google/protobuf/port_def.inc> | 
|---|
| 54 |  | 
|---|
| 55 |  | 
|---|
| 56 | namespace google { | 
|---|
| 57 | namespace protobuf { | 
|---|
| 58 |  | 
|---|
| 59 | class UnknownFieldSet; | 
|---|
| 60 | class DescriptorPool; | 
|---|
| 61 | class MessageFactory; | 
|---|
| 62 |  | 
|---|
| 63 | namespace internal { | 
|---|
| 64 |  | 
|---|
| 65 | // Template code below needs to know about the existence of these functions. | 
|---|
| 66 | PROTOBUF_EXPORT void WriteVarint(uint32_t num, uint64_t val, std::string* s); | 
|---|
| 67 | PROTOBUF_EXPORT void WriteLengthDelimited(uint32_t num, StringPiece val, | 
|---|
| 68 | std::string* s); | 
|---|
| 69 | // Inline because it is just forwarding to s->WriteVarint | 
|---|
| 70 | inline void WriteVarint(uint32_t num, uint64_t val, UnknownFieldSet* s); | 
|---|
| 71 | inline void WriteLengthDelimited(uint32_t num, StringPiece val, | 
|---|
| 72 | UnknownFieldSet* s); | 
|---|
| 73 |  | 
|---|
| 74 |  | 
|---|
| 75 | // The basic abstraction the parser is designed for is a slight modification | 
|---|
| 76 | // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized | 
|---|
| 77 | // stream as a series of buffers that concatenate to the full stream. | 
|---|
| 78 | // Pictorially a ZCIS presents a stream in chunks like so | 
|---|
| 79 | // [---------------------------------------------------------------] | 
|---|
| 80 | // [---------------------] chunk 1 | 
|---|
| 81 | //                      [----------------------------] chunk 2 | 
|---|
| 82 | //                                          chunk 3 [--------------] | 
|---|
| 83 | // | 
|---|
| 84 | // Where the '-' represent the bytes which are vertically lined up with the | 
|---|
| 85 | // bytes of the stream. The proto parser requires its input to be presented | 
|---|
| 86 | // similarly with the extra | 
|---|
| 87 | // property that each chunk has kSlopBytes past its end that overlaps with the | 
|---|
| 88 | // first kSlopBytes of the next chunk, or if there is no next chunk at least its | 
|---|
| 89 | // still valid to read those bytes. Again, pictorially, we now have | 
|---|
| 90 | // | 
|---|
| 91 | // [---------------------------------------------------------------] | 
|---|
| 92 | // [-------------------....] chunk 1 | 
|---|
| 93 | //                    [------------------------....] chunk 2 | 
|---|
| 94 | //                                    chunk 3 [------------------..**] | 
|---|
| 95 | //                                                      chunk 4 [--****] | 
|---|
| 96 | // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the | 
|---|
| 97 | // chunk that match up with the start of the next chunk. Above each chunk has | 
|---|
| 98 | // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes | 
|---|
| 99 | // past the stream, indicated by '*' above, their values are unspecified. It is | 
|---|
| 100 | // still legal to read them (ie. should not segfault). Reading past the | 
|---|
| 101 | // end should be detected by the user and indicated as an error. | 
|---|
| 102 | // | 
|---|
| 103 | // The reason for this, admittedly, unconventional invariant is to ruthlessly | 
|---|
| 104 | // optimize the protobuf parser. Having an overlap helps in two important ways. | 
|---|
| 105 | // Firstly it alleviates having to performing bounds checks if a piece of code | 
|---|
| 106 | // is guaranteed to not read more than kSlopBytes. Secondly, and more | 
|---|
| 107 | // importantly, the protobuf wireformat is such that reading a key/value pair is | 
|---|
| 108 | // always less than 16 bytes. This removes the need to change to next buffer in | 
|---|
| 109 | // the middle of reading primitive values. Hence there is no need to store and | 
|---|
| 110 | // load the current position. | 
|---|
| 111 |  | 
|---|
| 112 | class PROTOBUF_EXPORT EpsCopyInputStream { | 
|---|
| 113 | public: | 
|---|
| 114 | enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 }; | 
|---|
| 115 |  | 
|---|
| 116 | explicit EpsCopyInputStream(bool enable_aliasing) | 
|---|
| 117 | : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {} | 
|---|
| 118 |  | 
|---|
| 119 | void BackUp(const char* ptr) { | 
|---|
| 120 | GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes); | 
|---|
| 121 | int count; | 
|---|
| 122 | if (next_chunk_ == buffer_) { | 
|---|
| 123 | count = static_cast<int>(buffer_end_ + kSlopBytes - ptr); | 
|---|
| 124 | } else { | 
|---|
| 125 | count = size_ + static_cast<int>(buffer_end_ - ptr); | 
|---|
| 126 | } | 
|---|
| 127 | if (count > 0) StreamBackUp(count); | 
|---|
| 128 | } | 
|---|
| 129 |  | 
|---|
| 130 | // If return value is negative it's an error | 
|---|
| 131 | PROTOBUF_NODISCARD int PushLimit(const char* ptr, int limit) { | 
|---|
| 132 | GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes); | 
|---|
| 133 | // This add is safe due to the invariant above, because | 
|---|
| 134 | // ptr - buffer_end_ <= kSlopBytes. | 
|---|
| 135 | limit += static_cast<int>(ptr - buffer_end_); | 
|---|
| 136 | limit_end_ = buffer_end_ + (std::min)(0, limit); | 
|---|
| 137 | auto old_limit = limit_; | 
|---|
| 138 | limit_ = limit; | 
|---|
| 139 | return old_limit - limit; | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | PROTOBUF_NODISCARD bool PopLimit(int delta) { | 
|---|
| 143 | if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false; | 
|---|
| 144 | limit_ = limit_ + delta; | 
|---|
| 145 | // TODO(gerbens) We could remove this line and hoist the code to | 
|---|
| 146 | // DoneFallback. Study the perf/bin-size effects. | 
|---|
| 147 | limit_end_ = buffer_end_ + (std::min)(0, limit_); | 
|---|
| 148 | return true; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | PROTOBUF_NODISCARD const char* Skip(const char* ptr, int size) { | 
|---|
| 152 | if (size <= buffer_end_ + kSlopBytes - ptr) { | 
|---|
| 153 | return ptr + size; | 
|---|
| 154 | } | 
|---|
| 155 | return SkipFallback(ptr, size); | 
|---|
| 156 | } | 
|---|
| 157 | PROTOBUF_NODISCARD const char* ReadString(const char* ptr, int size, | 
|---|
| 158 | std::string* s) { | 
|---|
| 159 | if (size <= buffer_end_ + kSlopBytes - ptr) { | 
|---|
| 160 | s->assign(s: ptr, n: size); | 
|---|
| 161 | return ptr + size; | 
|---|
| 162 | } | 
|---|
| 163 | return ReadStringFallback(ptr, size, str: s); | 
|---|
| 164 | } | 
|---|
| 165 | PROTOBUF_NODISCARD const char* AppendString(const char* ptr, int size, | 
|---|
| 166 | std::string* s) { | 
|---|
| 167 | if (size <= buffer_end_ + kSlopBytes - ptr) { | 
|---|
| 168 | s->append(s: ptr, n: size); | 
|---|
| 169 | return ptr + size; | 
|---|
| 170 | } | 
|---|
| 171 | return AppendStringFallback(ptr, size, str: s); | 
|---|
| 172 | } | 
|---|
| 173 | // Implemented in arenastring.cc | 
|---|
| 174 | PROTOBUF_NODISCARD const char* ReadArenaString(const char* ptr, | 
|---|
| 175 | ArenaStringPtr* s, | 
|---|
| 176 | Arena* arena); | 
|---|
| 177 |  | 
|---|
| 178 | template <typename Tag, typename T> | 
|---|
| 179 | PROTOBUF_NODISCARD const char* ReadRepeatedFixed(const char* ptr, | 
|---|
| 180 | Tag expected_tag, | 
|---|
| 181 | RepeatedField<T>* out); | 
|---|
| 182 |  | 
|---|
| 183 | template <typename T> | 
|---|
| 184 | PROTOBUF_NODISCARD const char* ReadPackedFixed(const char* ptr, int size, | 
|---|
| 185 | RepeatedField<T>* out); | 
|---|
| 186 | template <typename Add> | 
|---|
| 187 | PROTOBUF_NODISCARD const char* ReadPackedVarint(const char* ptr, Add add); | 
|---|
| 188 |  | 
|---|
| 189 | uint32_t LastTag() const { return last_tag_minus_1_ + 1; } | 
|---|
| 190 | bool ConsumeEndGroup(uint32_t start_tag) { | 
|---|
| 191 | bool res = last_tag_minus_1_ == start_tag; | 
|---|
| 192 | last_tag_minus_1_ = 0; | 
|---|
| 193 | return res; | 
|---|
| 194 | } | 
|---|
| 195 | bool EndedAtLimit() const { return last_tag_minus_1_ == 0; } | 
|---|
| 196 | bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; } | 
|---|
| 197 | void SetLastTag(uint32_t tag) { last_tag_minus_1_ = tag - 1; } | 
|---|
| 198 | void SetEndOfStream() { last_tag_minus_1_ = 1; } | 
|---|
| 199 | bool IsExceedingLimit(const char* ptr) { | 
|---|
| 200 | return ptr > limit_end_ && | 
|---|
| 201 | (next_chunk_ == nullptr || ptr - buffer_end_ > limit_); | 
|---|
| 202 | } | 
|---|
| 203 | bool AliasingEnabled() const { return aliasing_ != kNoAliasing; } | 
|---|
| 204 | int BytesUntilLimit(const char* ptr) const { | 
|---|
| 205 | return limit_ + static_cast<int>(buffer_end_ - ptr); | 
|---|
| 206 | } | 
|---|
| 207 | // Returns true if more data is available, if false is returned one has to | 
|---|
| 208 | // call Done for further checks. | 
|---|
| 209 | bool DataAvailable(const char* ptr) { return ptr < limit_end_; } | 
|---|
| 210 |  | 
|---|
| 211 | protected: | 
|---|
| 212 | // Returns true is limit (either an explicit limit or end of stream) is | 
|---|
| 213 | // reached. It aligns *ptr across buffer seams. | 
|---|
| 214 | // If limit is exceeded it returns true and ptr is set to null. | 
|---|
| 215 | bool DoneWithCheck(const char** ptr, int d) { | 
|---|
| 216 | GOOGLE_DCHECK(*ptr); | 
|---|
| 217 | if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false; | 
|---|
| 218 | int overrun = static_cast<int>(*ptr - buffer_end_); | 
|---|
| 219 | GOOGLE_DCHECK_LE(overrun, kSlopBytes);  // Guaranteed by parse loop. | 
|---|
| 220 | if (overrun == | 
|---|
| 221 | limit_) {  //  No need to flip buffers if we ended on a limit. | 
|---|
| 222 | // If we actually overrun the buffer and next_chunk_ is null. It means | 
|---|
| 223 | // the stream ended and we passed the stream end. | 
|---|
| 224 | if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr; | 
|---|
| 225 | return true; | 
|---|
| 226 | } | 
|---|
| 227 | auto res = DoneFallback(overrun, depth: d); | 
|---|
| 228 | *ptr = res.first; | 
|---|
| 229 | return res.second; | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | const char* InitFrom(StringPiece flat) { | 
|---|
| 233 | overall_limit_ = 0; | 
|---|
| 234 | if (flat.size() > kSlopBytes) { | 
|---|
| 235 | limit_ = kSlopBytes; | 
|---|
| 236 | limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes; | 
|---|
| 237 | next_chunk_ = buffer_; | 
|---|
| 238 | if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; | 
|---|
| 239 | return flat.data(); | 
|---|
| 240 | } else { | 
|---|
| 241 | std::memcpy(dest: buffer_, src: flat.data(), n: flat.size()); | 
|---|
| 242 | limit_ = 0; | 
|---|
| 243 | limit_end_ = buffer_end_ = buffer_ + flat.size(); | 
|---|
| 244 | next_chunk_ = nullptr; | 
|---|
| 245 | if (aliasing_ == kOnPatch) { | 
|---|
| 246 | aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) - | 
|---|
| 247 | reinterpret_cast<std::uintptr_t>(buffer_); | 
|---|
| 248 | } | 
|---|
| 249 | return buffer_; | 
|---|
| 250 | } | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | const char* InitFrom(io::ZeroCopyInputStream* zcis); | 
|---|
| 254 |  | 
|---|
| 255 | const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) { | 
|---|
| 256 | if (limit == -1) return InitFrom(zcis); | 
|---|
| 257 | overall_limit_ = limit; | 
|---|
| 258 | auto res = InitFrom(zcis); | 
|---|
| 259 | limit_ = limit - static_cast<int>(buffer_end_ - res); | 
|---|
| 260 | limit_end_ = buffer_end_ + (std::min)(0, limit_); | 
|---|
| 261 | return res; | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | private: | 
|---|
| 265 | const char* limit_end_;  // buffer_end_ + min(limit_, 0) | 
|---|
| 266 | const char* buffer_end_; | 
|---|
| 267 | const char* next_chunk_; | 
|---|
| 268 | int size_; | 
|---|
| 269 | int limit_;  // relative to buffer_end_; | 
|---|
| 270 | io::ZeroCopyInputStream* zcis_ = nullptr; | 
|---|
| 271 | char buffer_[2 * kSlopBytes] = {}; | 
|---|
| 272 | enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 }; | 
|---|
| 273 | std::uintptr_t aliasing_ = kNoAliasing; | 
|---|
| 274 | // This variable is used to communicate how the parse ended, in order to | 
|---|
| 275 | // completely verify the parsed data. A wire-format parse can end because of | 
|---|
| 276 | // one of the following conditions: | 
|---|
| 277 | // 1) A parse can end on a pushed limit. | 
|---|
| 278 | // 2) A parse can end on End Of Stream (EOS). | 
|---|
| 279 | // 3) A parse can end on 0 tag (only valid for toplevel message). | 
|---|
| 280 | // 4) A parse can end on an end-group tag. | 
|---|
| 281 | // This variable should always be set to 0, which indicates case 1. If the | 
|---|
| 282 | // parse terminated due to EOS (case 2), it's set to 1. In case the parse | 
|---|
| 283 | // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1). | 
|---|
| 284 | // This var doesn't really belong in EpsCopyInputStream and should be part of | 
|---|
| 285 | // the ParseContext, but case 2 is most easily and optimally implemented in | 
|---|
| 286 | // DoneFallback. | 
|---|
| 287 | uint32_t last_tag_minus_1_ = 0; | 
|---|
| 288 | int overall_limit_ = INT_MAX;  // Overall limit independent of pushed limits. | 
|---|
| 289 | // Pretty random large number that seems like a safe allocation on most | 
|---|
| 290 | // systems. TODO(gerbens) do we need to set this as build flag? | 
|---|
| 291 | enum { kSafeStringSize = 50000000 }; | 
|---|
| 292 |  | 
|---|
| 293 | // Advances to next buffer chunk returns a pointer to the same logical place | 
|---|
| 294 | // in the stream as set by overrun. Overrun indicates the position in the slop | 
|---|
| 295 | // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at | 
|---|
| 296 | // limit, at which point the returned pointer maybe null if there was an | 
|---|
| 297 | // error. The invariant of this function is that it's guaranteed that | 
|---|
| 298 | // kSlopBytes bytes can be accessed from the returned ptr. This function might | 
|---|
| 299 | // advance more buffers than one in the underlying ZeroCopyInputStream. | 
|---|
| 300 | std::pair<const char*, bool> DoneFallback(int overrun, int depth); | 
|---|
| 301 | // Advances to the next buffer, at most one call to Next() on the underlying | 
|---|
| 302 | // ZeroCopyInputStream is made. This function DOES NOT match the returned | 
|---|
| 303 | // pointer to where in the slop region the parse ends, hence no overrun | 
|---|
| 304 | // parameter. This is useful for string operations where you always copy | 
|---|
| 305 | // to the end of the buffer (including the slop region). | 
|---|
| 306 | const char* Next(); | 
|---|
| 307 | // overrun is the location in the slop region the stream currently is | 
|---|
| 308 | // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of | 
|---|
| 309 | // the ZeroCopyInputStream in the case the parse will end in the last | 
|---|
| 310 | // kSlopBytes of the current buffer. depth is the current depth of nested | 
|---|
| 311 | // groups (or negative if the use case does not need careful tracking). | 
|---|
| 312 | inline const char* NextBuffer(int overrun, int depth); | 
|---|
| 313 | const char* SkipFallback(const char* ptr, int size); | 
|---|
| 314 | const char* AppendStringFallback(const char* ptr, int size, std::string* str); | 
|---|
| 315 | const char* ReadStringFallback(const char* ptr, int size, std::string* str); | 
|---|
| 316 | bool StreamNext(const void** data) { | 
|---|
| 317 | bool res = zcis_->Next(data, size: &size_); | 
|---|
| 318 | if (res) overall_limit_ -= size_; | 
|---|
| 319 | return res; | 
|---|
| 320 | } | 
|---|
| 321 | void StreamBackUp(int count) { | 
|---|
| 322 | zcis_->BackUp(count); | 
|---|
| 323 | overall_limit_ += count; | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | template <typename A> | 
|---|
| 327 | const char* AppendSize(const char* ptr, int size, const A& append) { | 
|---|
| 328 | int chunk_size = buffer_end_ + kSlopBytes - ptr; | 
|---|
| 329 | do { | 
|---|
| 330 | GOOGLE_DCHECK(size > chunk_size); | 
|---|
| 331 | if (next_chunk_ == nullptr) return nullptr; | 
|---|
| 332 | append(ptr, chunk_size); | 
|---|
| 333 | ptr += chunk_size; | 
|---|
| 334 | size -= chunk_size; | 
|---|
| 335 | // TODO(gerbens) Next calls NextBuffer which generates buffers with | 
|---|
| 336 | // overlap and thus incurs cost of copying the slop regions. This is not | 
|---|
| 337 | // necessary for reading strings. We should just call Next buffers. | 
|---|
| 338 | if (limit_ <= kSlopBytes) return nullptr; | 
|---|
| 339 | ptr = Next(); | 
|---|
| 340 | if (ptr == nullptr) return nullptr;  // passed the limit | 
|---|
| 341 | ptr += kSlopBytes; | 
|---|
| 342 | chunk_size = buffer_end_ + kSlopBytes - ptr; | 
|---|
| 343 | } while (size > chunk_size); | 
|---|
| 344 | append(ptr, size); | 
|---|
| 345 | return ptr + size; | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | // AppendUntilEnd appends data until a limit (either a PushLimit or end of | 
|---|
| 349 | // stream. Normal payloads are from length delimited fields which have an | 
|---|
| 350 | // explicit size. Reading until limit only comes when the string takes | 
|---|
| 351 | // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and | 
|---|
| 352 | // implicit weak messages. We keep these methods private and friend them. | 
|---|
| 353 | template <typename A> | 
|---|
| 354 | const char* AppendUntilEnd(const char* ptr, const A& append) { | 
|---|
| 355 | if (ptr - buffer_end_ > limit_) return nullptr; | 
|---|
| 356 | while (limit_ > kSlopBytes) { | 
|---|
| 357 | size_t chunk_size = buffer_end_ + kSlopBytes - ptr; | 
|---|
| 358 | append(ptr, chunk_size); | 
|---|
| 359 | ptr = Next(); | 
|---|
| 360 | if (ptr == nullptr) return limit_end_; | 
|---|
| 361 | ptr += kSlopBytes; | 
|---|
| 362 | } | 
|---|
| 363 | auto end = buffer_end_ + limit_; | 
|---|
| 364 | GOOGLE_DCHECK(end >= ptr); | 
|---|
| 365 | append(ptr, end - ptr); | 
|---|
| 366 | return end; | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | PROTOBUF_NODISCARD const char* AppendString(const char* ptr, | 
|---|
| 370 | std::string* str) { | 
|---|
| 371 | return AppendUntilEnd( | 
|---|
| 372 | ptr, append: [str](const char* p, ptrdiff_t s) { str->append(s: p, n: s); }); | 
|---|
| 373 | } | 
|---|
| 374 | friend class ImplicitWeakMessage; | 
|---|
| 375 | }; | 
|---|
| 376 |  | 
|---|
| 377 | using LazyEagerVerifyFnType = const char* (*)(const char* ptr, | 
|---|
| 378 | ParseContext* ctx); | 
|---|
| 379 | using LazyEagerVerifyFnRef = std::remove_pointer<LazyEagerVerifyFnType>::type&; | 
|---|
| 380 |  | 
|---|
| 381 | // ParseContext holds all data that is global to the entire parse. Most | 
|---|
| 382 | // importantly it contains the input stream, but also recursion depth and also | 
|---|
| 383 | // stores the end group tag, in case a parser ended on a endgroup, to verify | 
|---|
| 384 | // matching start/end group tags. | 
|---|
| 385 | class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream { | 
|---|
| 386 | public: | 
|---|
| 387 | struct Data { | 
|---|
| 388 | const DescriptorPool* pool = nullptr; | 
|---|
| 389 | MessageFactory* factory = nullptr; | 
|---|
| 390 | Arena* arena = nullptr; | 
|---|
| 391 | }; | 
|---|
| 392 |  | 
|---|
| 393 | template <typename... T> | 
|---|
| 394 | ParseContext(int depth, bool aliasing, const char** start, T&&... args) | 
|---|
| 395 | : EpsCopyInputStream(aliasing), depth_(depth) { | 
|---|
| 396 | *start = InitFrom(std::forward<T>(args)...); | 
|---|
| 397 | } | 
|---|
| 398 |  | 
|---|
| 399 | void TrackCorrectEnding() { group_depth_ = 0; } | 
|---|
| 400 |  | 
|---|
| 401 | bool Done(const char** ptr) { return DoneWithCheck(ptr, d: group_depth_); } | 
|---|
| 402 |  | 
|---|
| 403 | int depth() const { return depth_; } | 
|---|
| 404 |  | 
|---|
| 405 | Data& data() { return data_; } | 
|---|
| 406 | const Data& data() const { return data_; } | 
|---|
| 407 |  | 
|---|
| 408 | const char* ParseMessage(MessageLite* msg, const char* ptr); | 
|---|
| 409 |  | 
|---|
| 410 | // Spawns a child parsing context that inherits key properties. New context | 
|---|
| 411 | // inherits the following: | 
|---|
| 412 | // --depth_, data_, check_required_fields_, lazy_parse_mode_ | 
|---|
| 413 | // The spawned context always disables aliasing (different input). | 
|---|
| 414 | template <typename... T> | 
|---|
| 415 | ParseContext Spawn(const char** start, T&&... args) { | 
|---|
| 416 | ParseContext spawned(depth_, false, start, std::forward<T>(args)...); | 
|---|
| 417 | // Transfer key context states. | 
|---|
| 418 | spawned.data_ = data_; | 
|---|
| 419 | return spawned; | 
|---|
| 420 | } | 
|---|
| 421 |  | 
|---|
| 422 | // This overload supports those few cases where ParseMessage is called | 
|---|
| 423 | // on a class that is not actually a proto message. | 
|---|
| 424 | // TODO(jorg): Eliminate this use case. | 
|---|
| 425 | template <typename T, | 
|---|
| 426 | typename std::enable_if<!std::is_base_of<MessageLite, T>::value, | 
|---|
| 427 | bool>::type = true> | 
|---|
| 428 | PROTOBUF_NODISCARD const char* ParseMessage(T* msg, const char* ptr); | 
|---|
| 429 |  | 
|---|
| 430 | template <typename T> | 
|---|
| 431 | PROTOBUF_NODISCARD PROTOBUF_NDEBUG_INLINE const char* ParseGroup( | 
|---|
| 432 | T* msg, const char* ptr, uint32_t tag) { | 
|---|
| 433 | if (--depth_ < 0) return nullptr; | 
|---|
| 434 | group_depth_++; | 
|---|
| 435 | ptr = msg->_InternalParse(ptr, this); | 
|---|
| 436 | group_depth_--; | 
|---|
| 437 | depth_++; | 
|---|
| 438 | if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr; | 
|---|
| 439 | return ptr; | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | private: | 
|---|
| 443 | // Out-of-line routine to save space in ParseContext::ParseMessage<T> | 
|---|
| 444 | //   int old; | 
|---|
| 445 | //   ptr = ReadSizeAndPushLimitAndDepth(ptr, &old) | 
|---|
| 446 | // is equivalent to: | 
|---|
| 447 | //   int size = ReadSize(&ptr); | 
|---|
| 448 | //   if (!ptr) return nullptr; | 
|---|
| 449 | //   int old = PushLimit(ptr, size); | 
|---|
| 450 | //   if (--depth_ < 0) return nullptr; | 
|---|
| 451 | PROTOBUF_NODISCARD const char* ReadSizeAndPushLimitAndDepth(const char* ptr, | 
|---|
| 452 | int* old_limit); | 
|---|
| 453 |  | 
|---|
| 454 | // The context keeps an internal stack to keep track of the recursive | 
|---|
| 455 | // part of the parse state. | 
|---|
| 456 | // Current depth of the active parser, depth counts down. | 
|---|
| 457 | // This is used to limit recursion depth (to prevent overflow on malicious | 
|---|
| 458 | // data), but is also used to index in stack_ to store the current state. | 
|---|
| 459 | int depth_; | 
|---|
| 460 | // Unfortunately necessary for the fringe case of ending on 0 or end-group tag | 
|---|
| 461 | // in the last kSlopBytes of a ZeroCopyInputStream chunk. | 
|---|
| 462 | int group_depth_ = INT_MIN; | 
|---|
| 463 | Data data_; | 
|---|
| 464 | }; | 
|---|
| 465 |  | 
|---|
| 466 | template <uint32_t tag> | 
|---|
| 467 | bool ExpectTag(const char* ptr) { | 
|---|
| 468 | if (tag < 128) { | 
|---|
| 469 | return *ptr == static_cast<char>(tag); | 
|---|
| 470 | } else { | 
|---|
| 471 | static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes"); | 
|---|
| 472 | char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)}; | 
|---|
| 473 | return std::memcmp(s1: ptr, s2: buf, n: 2) == 0; | 
|---|
| 474 | } | 
|---|
| 475 | } | 
|---|
| 476 |  | 
|---|
| 477 | template <int> | 
|---|
| 478 | struct EndianHelper; | 
|---|
| 479 |  | 
|---|
| 480 | template <> | 
|---|
| 481 | struct EndianHelper<1> { | 
|---|
| 482 | static uint8_t Load(const void* p) { return *static_cast<const uint8_t*>(p); } | 
|---|
| 483 | }; | 
|---|
| 484 |  | 
|---|
| 485 | template <> | 
|---|
| 486 | struct EndianHelper<2> { | 
|---|
| 487 | static uint16_t Load(const void* p) { | 
|---|
| 488 | uint16_t tmp; | 
|---|
| 489 | std::memcpy(dest: &tmp, src: p, n: 2); | 
|---|
| 490 | return little_endian::ToHost(value: tmp); | 
|---|
| 491 | } | 
|---|
| 492 | }; | 
|---|
| 493 |  | 
|---|
| 494 | template <> | 
|---|
| 495 | struct EndianHelper<4> { | 
|---|
| 496 | static uint32_t Load(const void* p) { | 
|---|
| 497 | uint32_t tmp; | 
|---|
| 498 | std::memcpy(dest: &tmp, src: p, n: 4); | 
|---|
| 499 | return little_endian::ToHost(value: tmp); | 
|---|
| 500 | } | 
|---|
| 501 | }; | 
|---|
| 502 |  | 
|---|
| 503 | template <> | 
|---|
| 504 | struct EndianHelper<8> { | 
|---|
| 505 | static uint64_t Load(const void* p) { | 
|---|
| 506 | uint64_t tmp; | 
|---|
| 507 | std::memcpy(dest: &tmp, src: p, n: 8); | 
|---|
| 508 | return little_endian::ToHost(value: tmp); | 
|---|
| 509 | } | 
|---|
| 510 | }; | 
|---|
| 511 |  | 
|---|
| 512 | template <typename T> | 
|---|
| 513 | T UnalignedLoad(const char* p) { | 
|---|
| 514 | auto tmp = EndianHelper<sizeof(T)>::Load(p); | 
|---|
| 515 | T res; | 
|---|
| 516 | memcpy(&res, &tmp, sizeof(T)); | 
|---|
| 517 | return res; | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 | PROTOBUF_EXPORT | 
|---|
| 521 | std::pair<const char*, uint32_t> VarintParseSlow32(const char* p, uint32_t res); | 
|---|
| 522 | PROTOBUF_EXPORT | 
|---|
| 523 | std::pair<const char*, uint64_t> VarintParseSlow64(const char* p, uint32_t res); | 
|---|
| 524 |  | 
|---|
| 525 | inline const char* VarintParseSlow(const char* p, uint32_t res, uint32_t* out) { | 
|---|
| 526 | auto tmp = VarintParseSlow32(p, res); | 
|---|
| 527 | *out = tmp.second; | 
|---|
| 528 | return tmp.first; | 
|---|
| 529 | } | 
|---|
| 530 |  | 
|---|
| 531 | inline const char* VarintParseSlow(const char* p, uint32_t res, uint64_t* out) { | 
|---|
| 532 | auto tmp = VarintParseSlow64(p, res); | 
|---|
| 533 | *out = tmp.second; | 
|---|
| 534 | return tmp.first; | 
|---|
| 535 | } | 
|---|
| 536 |  | 
|---|
| 537 | template <typename T> | 
|---|
| 538 | PROTOBUF_NODISCARD const char* VarintParse(const char* p, T* out) { | 
|---|
| 539 | auto ptr = reinterpret_cast<const uint8_t*>(p); | 
|---|
| 540 | uint32_t res = ptr[0]; | 
|---|
| 541 | if (!(res & 0x80)) { | 
|---|
| 542 | *out = res; | 
|---|
| 543 | return p + 1; | 
|---|
| 544 | } | 
|---|
| 545 | uint32_t byte = ptr[1]; | 
|---|
| 546 | res += (byte - 1) << 7; | 
|---|
| 547 | if (!(byte & 0x80)) { | 
|---|
| 548 | *out = res; | 
|---|
| 549 | return p + 2; | 
|---|
| 550 | } | 
|---|
| 551 | return VarintParseSlow(p, res, out); | 
|---|
| 552 | } | 
|---|
| 553 |  | 
|---|
| 554 | // Used for tags, could read up to 5 bytes which must be available. | 
|---|
| 555 | // Caller must ensure its safe to call. | 
|---|
| 556 |  | 
|---|
| 557 | PROTOBUF_EXPORT | 
|---|
| 558 | std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res); | 
|---|
| 559 |  | 
|---|
| 560 | // Same as ParseVarint but only accept 5 bytes at most. | 
|---|
| 561 | inline const char* ReadTag(const char* p, uint32_t* out, | 
|---|
| 562 | uint32_t /*max_tag*/ = 0) { | 
|---|
| 563 | uint32_t res = static_cast<uint8_t>(p[0]); | 
|---|
| 564 | if (res < 128) { | 
|---|
| 565 | *out = res; | 
|---|
| 566 | return p + 1; | 
|---|
| 567 | } | 
|---|
| 568 | uint32_t second = static_cast<uint8_t>(p[1]); | 
|---|
| 569 | res += (second - 1) << 7; | 
|---|
| 570 | if (second < 128) { | 
|---|
| 571 | *out = res; | 
|---|
| 572 | return p + 2; | 
|---|
| 573 | } | 
|---|
| 574 | auto tmp = ReadTagFallback(p, res); | 
|---|
| 575 | *out = tmp.second; | 
|---|
| 576 | return tmp.first; | 
|---|
| 577 | } | 
|---|
| 578 |  | 
|---|
| 579 | // As above, but optimized to consume very few registers while still being fast, | 
|---|
| 580 | // ReadTagInlined is useful for callers that don't mind the extra code but would | 
|---|
| 581 | // like to avoid an extern function call causing spills into the stack. | 
|---|
| 582 | // | 
|---|
| 583 | // Two support routines for ReadTagInlined come first... | 
|---|
| 584 | template <class T> | 
|---|
| 585 | PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE constexpr T RotateLeft( | 
|---|
| 586 | T x, int s) noexcept { | 
|---|
| 587 | return static_cast<T>(x << (s & (std::numeric_limits<T>::digits - 1))) | | 
|---|
| 588 | static_cast<T>(x >> ((-s) & (std::numeric_limits<T>::digits - 1))); | 
|---|
| 589 | } | 
|---|
| 590 |  | 
|---|
| 591 | PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE uint64_t | 
|---|
| 592 | RotRight7AndReplaceLowByte(uint64_t res, const char& byte) { | 
|---|
| 593 | #if defined(__x86_64__) && defined(__GNUC__) | 
|---|
| 594 | // This will only use one register for `res`. | 
|---|
| 595 | // `byte` comes as a reference to allow the compiler to generate code like: | 
|---|
| 596 | // | 
|---|
| 597 | //   rorq    $7, %rcx | 
|---|
| 598 | //   movb    1(%rax), %cl | 
|---|
| 599 | // | 
|---|
| 600 | // which avoids loading the incoming bytes into a separate register first. | 
|---|
| 601 | asm( "ror $7,%0\n\t" | 
|---|
| 602 | "movb %1,%b0" | 
|---|
| 603 | : "+r"(res) | 
|---|
| 604 | : "m"(byte)); | 
|---|
| 605 | #else | 
|---|
| 606 | res = RotateLeft(x: res, s: -7); | 
|---|
| 607 | res = res & ~0xFF; | 
|---|
| 608 | res |= 0xFF & byte; | 
|---|
| 609 | #endif | 
|---|
| 610 | return res; | 
|---|
| 611 | }; | 
|---|
| 612 |  | 
|---|
| 613 | inline PROTOBUF_ALWAYS_INLINE | 
|---|
| 614 | const char* ReadTagInlined(const char* ptr, uint32_t* out) { | 
|---|
| 615 | uint64_t res = 0xFF & ptr[0]; | 
|---|
| 616 | if (PROTOBUF_PREDICT_FALSE(res >= 128)) { | 
|---|
| 617 | res = RotRight7AndReplaceLowByte(res, byte: ptr[1]); | 
|---|
| 618 | if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { | 
|---|
| 619 | res = RotRight7AndReplaceLowByte(res, byte: ptr[2]); | 
|---|
| 620 | if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { | 
|---|
| 621 | res = RotRight7AndReplaceLowByte(res, byte: ptr[3]); | 
|---|
| 622 | if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { | 
|---|
| 623 | // Note: this wouldn't work if res were 32-bit, | 
|---|
| 624 | // because then replacing the low byte would overwrite | 
|---|
| 625 | // the bottom 4 bits of the result. | 
|---|
| 626 | res = RotRight7AndReplaceLowByte(res, byte: ptr[4]); | 
|---|
| 627 | if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { | 
|---|
| 628 | // The proto format does not permit longer than 5-byte encodings for | 
|---|
| 629 | // tags. | 
|---|
| 630 | *out = 0; | 
|---|
| 631 | return nullptr; | 
|---|
| 632 | } | 
|---|
| 633 | *out = static_cast<uint32_t>(RotateLeft(x: res, s: 28)); | 
|---|
| 634 | #if defined(__GNUC__) | 
|---|
| 635 | // Note: this asm statement prevents the compiler from | 
|---|
| 636 | // trying to share the "return ptr + constant" among all | 
|---|
| 637 | // branches. | 
|---|
| 638 | asm( "": "+r"(ptr)); | 
|---|
| 639 | #endif | 
|---|
| 640 | return ptr + 5; | 
|---|
| 641 | } | 
|---|
| 642 | *out = static_cast<uint32_t>(RotateLeft(x: res, s: 21)); | 
|---|
| 643 | return ptr + 4; | 
|---|
| 644 | } | 
|---|
| 645 | *out = static_cast<uint32_t>(RotateLeft(x: res, s: 14)); | 
|---|
| 646 | return ptr + 3; | 
|---|
| 647 | } | 
|---|
| 648 | *out = static_cast<uint32_t>(RotateLeft(x: res, s: 7)); | 
|---|
| 649 | return ptr + 2; | 
|---|
| 650 | } | 
|---|
| 651 | *out = static_cast<uint32_t>(res); | 
|---|
| 652 | return ptr + 1; | 
|---|
| 653 | } | 
|---|
| 654 |  | 
|---|
| 655 | // Decode 2 consecutive bytes of a varint and returns the value, shifted left | 
|---|
| 656 | // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the | 
|---|
| 657 | // first byte's continuation bit is set. | 
|---|
| 658 | // If bit 15 of return value is set (equivalent to the continuation bits of both | 
|---|
| 659 | // bytes being set) the varint continues, otherwise the parse is done. On x86 | 
|---|
| 660 | // movsx eax, dil | 
|---|
| 661 | // and edi, eax | 
|---|
| 662 | // add eax, edi | 
|---|
| 663 | // adc [rsi], 1 | 
|---|
| 664 | inline uint32_t DecodeTwoBytes(const char** ptr) { | 
|---|
| 665 | uint32_t value = UnalignedLoad<uint16_t>(p: *ptr); | 
|---|
| 666 | // Sign extend the low byte continuation bit | 
|---|
| 667 | uint32_t x = static_cast<int8_t>(value); | 
|---|
| 668 | value &= x;  // Mask out the high byte iff no continuation | 
|---|
| 669 | // This add is an amazing operation, it cancels the low byte continuation bit | 
|---|
| 670 | // from y transferring it to the carry. Simultaneously it also shifts the 7 | 
|---|
| 671 | // LSB left by one tightly against high byte varint bits. Hence value now | 
|---|
| 672 | // contains the unpacked value shifted left by 1. | 
|---|
| 673 | value += x; | 
|---|
| 674 | // Use the carry to update the ptr appropriately. | 
|---|
| 675 | *ptr += value < x ? 2 : 1; | 
|---|
| 676 | return value; | 
|---|
| 677 | } | 
|---|
| 678 |  | 
|---|
| 679 | // More efficient varint parsing for big varints | 
|---|
| 680 | inline const char* ParseBigVarint(const char* p, uint64_t* out) { | 
|---|
| 681 | auto pnew = p; | 
|---|
| 682 | auto tmp = DecodeTwoBytes(ptr: &pnew); | 
|---|
| 683 | uint64_t res = tmp >> 1; | 
|---|
| 684 | if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) { | 
|---|
| 685 | *out = res; | 
|---|
| 686 | return pnew; | 
|---|
| 687 | } | 
|---|
| 688 | for (std::uint32_t i = 1; i < 5; i++) { | 
|---|
| 689 | pnew = p + 2 * i; | 
|---|
| 690 | tmp = DecodeTwoBytes(ptr: &pnew); | 
|---|
| 691 | res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1); | 
|---|
| 692 | if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) { | 
|---|
| 693 | *out = res; | 
|---|
| 694 | return pnew; | 
|---|
| 695 | } | 
|---|
| 696 | } | 
|---|
| 697 | return nullptr; | 
|---|
| 698 | } | 
|---|
| 699 |  | 
|---|
| 700 | PROTOBUF_EXPORT | 
|---|
| 701 | std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t first); | 
|---|
| 702 | // Used for tags, could read up to 5 bytes which must be available. Additionally | 
|---|
| 703 | // it makes sure the unsigned value fits a int32_t, otherwise returns nullptr. | 
|---|
| 704 | // Caller must ensure its safe to call. | 
|---|
| 705 | inline uint32_t ReadSize(const char** pp) { | 
|---|
| 706 | auto p = *pp; | 
|---|
| 707 | uint32_t res = static_cast<uint8_t>(p[0]); | 
|---|
| 708 | if (res < 128) { | 
|---|
| 709 | *pp = p + 1; | 
|---|
| 710 | return res; | 
|---|
| 711 | } | 
|---|
| 712 | auto x = ReadSizeFallback(p, first: res); | 
|---|
| 713 | *pp = x.first; | 
|---|
| 714 | return x.second; | 
|---|
| 715 | } | 
|---|
| 716 |  | 
|---|
| 717 | // Some convenience functions to simplify the generated parse loop code. | 
|---|
| 718 | // Returning the value and updating the buffer pointer allows for nicer | 
|---|
| 719 | // function composition. We rely on the compiler to inline this. | 
|---|
| 720 | // Also in debug compiles having local scoped variables tend to generated | 
|---|
| 721 | // stack frames that scale as O(num fields). | 
|---|
| 722 | inline uint64_t ReadVarint64(const char** p) { | 
|---|
| 723 | uint64_t tmp; | 
|---|
| 724 | *p = VarintParse(p: *p, out: &tmp); | 
|---|
| 725 | return tmp; | 
|---|
| 726 | } | 
|---|
| 727 |  | 
|---|
| 728 | inline uint32_t ReadVarint32(const char** p) { | 
|---|
| 729 | uint32_t tmp; | 
|---|
| 730 | *p = VarintParse(p: *p, out: &tmp); | 
|---|
| 731 | return tmp; | 
|---|
| 732 | } | 
|---|
| 733 |  | 
|---|
| 734 | inline int64_t ReadVarintZigZag64(const char** p) { | 
|---|
| 735 | uint64_t tmp; | 
|---|
| 736 | *p = VarintParse(p: *p, out: &tmp); | 
|---|
| 737 | return WireFormatLite::ZigZagDecode64(n: tmp); | 
|---|
| 738 | } | 
|---|
| 739 |  | 
|---|
| 740 | inline int32_t ReadVarintZigZag32(const char** p) { | 
|---|
| 741 | uint64_t tmp; | 
|---|
| 742 | *p = VarintParse(p: *p, out: &tmp); | 
|---|
| 743 | return WireFormatLite::ZigZagDecode32(n: static_cast<uint32_t>(tmp)); | 
|---|
| 744 | } | 
|---|
| 745 |  | 
|---|
| 746 | template <typename T, typename std::enable_if< | 
|---|
| 747 | !std::is_base_of<MessageLite, T>::value, bool>::type> | 
|---|
| 748 | PROTOBUF_NODISCARD const char* ParseContext::ParseMessage(T* msg, | 
|---|
| 749 | const char* ptr) { | 
|---|
| 750 | int old; | 
|---|
| 751 | ptr = ReadSizeAndPushLimitAndDepth(ptr, old_limit: &old); | 
|---|
| 752 | ptr = ptr ? msg->_InternalParse(ptr, this) : nullptr; | 
|---|
| 753 | depth_++; | 
|---|
| 754 | if (!PopLimit(delta: old)) return nullptr; | 
|---|
| 755 | return ptr; | 
|---|
| 756 | } | 
|---|
| 757 |  | 
|---|
| 758 | template <typename Tag, typename T> | 
|---|
| 759 | const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr, | 
|---|
| 760 | Tag expected_tag, | 
|---|
| 761 | RepeatedField<T>* out) { | 
|---|
| 762 | do { | 
|---|
| 763 | out->Add(UnalignedLoad<T>(ptr)); | 
|---|
| 764 | ptr += sizeof(T); | 
|---|
| 765 | if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr; | 
|---|
| 766 | } while (UnalignedLoad<Tag>(ptr) == expected_tag && (ptr += sizeof(Tag))); | 
|---|
| 767 | return ptr; | 
|---|
| 768 | } | 
|---|
| 769 |  | 
|---|
| 770 | // Add any of the following lines to debug which parse function is failing. | 
|---|
| 771 |  | 
|---|
| 772 | #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \ | 
|---|
| 773 | if (!(predicate)) {                                  \ | 
|---|
| 774 | /*  ::raise(SIGINT);  */                           \ | 
|---|
| 775 | /*  GOOGLE_LOG(ERROR) << "Parse failure";  */             \ | 
|---|
| 776 | return ret;                                        \ | 
|---|
| 777 | } | 
|---|
| 778 |  | 
|---|
| 779 | #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \ | 
|---|
| 780 | GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr) | 
|---|
| 781 |  | 
|---|
| 782 | template <typename T> | 
|---|
| 783 | const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size, | 
|---|
| 784 | RepeatedField<T>* out) { | 
|---|
| 785 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); | 
|---|
| 786 | int nbytes = buffer_end_ + kSlopBytes - ptr; | 
|---|
| 787 | while (size > nbytes) { | 
|---|
| 788 | int num = nbytes / sizeof(T); | 
|---|
| 789 | int old_entries = out->size(); | 
|---|
| 790 | out->Reserve(old_entries + num); | 
|---|
| 791 | int block_size = num * sizeof(T); | 
|---|
| 792 | auto dst = out->AddNAlreadyReserved(num); | 
|---|
| 793 | #ifdef PROTOBUF_LITTLE_ENDIAN | 
|---|
| 794 | std::memcpy(dest: dst, src: ptr, n: block_size); | 
|---|
| 795 | #else | 
|---|
| 796 | for (int i = 0; i < num; i++) | 
|---|
| 797 | dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T)); | 
|---|
| 798 | #endif | 
|---|
| 799 | size -= block_size; | 
|---|
| 800 | if (limit_ <= kSlopBytes) return nullptr; | 
|---|
| 801 | ptr = Next(); | 
|---|
| 802 | if (ptr == nullptr) return nullptr; | 
|---|
| 803 | ptr += kSlopBytes - (nbytes - block_size); | 
|---|
| 804 | nbytes = buffer_end_ + kSlopBytes - ptr; | 
|---|
| 805 | } | 
|---|
| 806 | int num = size / sizeof(T); | 
|---|
| 807 | int old_entries = out->size(); | 
|---|
| 808 | out->Reserve(old_entries + num); | 
|---|
| 809 | int block_size = num * sizeof(T); | 
|---|
| 810 | auto dst = out->AddNAlreadyReserved(num); | 
|---|
| 811 | #ifdef PROTOBUF_LITTLE_ENDIAN | 
|---|
| 812 | std::memcpy(dest: dst, src: ptr, n: block_size); | 
|---|
| 813 | #else | 
|---|
| 814 | for (int i = 0; i < num; i++) dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T)); | 
|---|
| 815 | #endif | 
|---|
| 816 | ptr += block_size; | 
|---|
| 817 | if (size != block_size) return nullptr; | 
|---|
| 818 | return ptr; | 
|---|
| 819 | } | 
|---|
| 820 |  | 
|---|
| 821 | template <typename Add> | 
|---|
| 822 | const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) { | 
|---|
| 823 | while (ptr < end) { | 
|---|
| 824 | uint64_t varint; | 
|---|
| 825 | ptr = VarintParse(p: ptr, out: &varint); | 
|---|
| 826 | if (ptr == nullptr) return nullptr; | 
|---|
| 827 | add(varint); | 
|---|
| 828 | } | 
|---|
| 829 | return ptr; | 
|---|
| 830 | } | 
|---|
| 831 |  | 
|---|
| 832 | template <typename Add> | 
|---|
| 833 | const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) { | 
|---|
| 834 | int size = ReadSize(pp: &ptr); | 
|---|
| 835 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); | 
|---|
| 836 | int chunk_size = buffer_end_ - ptr; | 
|---|
| 837 | while (size > chunk_size) { | 
|---|
| 838 | ptr = ReadPackedVarintArray(ptr, buffer_end_, add); | 
|---|
| 839 | if (ptr == nullptr) return nullptr; | 
|---|
| 840 | int overrun = ptr - buffer_end_; | 
|---|
| 841 | GOOGLE_DCHECK(overrun >= 0 && overrun <= kSlopBytes); | 
|---|
| 842 | if (size - chunk_size <= kSlopBytes) { | 
|---|
| 843 | // The current buffer contains all the information needed, we don't need | 
|---|
| 844 | // to flip buffers. However we must parse from a buffer with enough space | 
|---|
| 845 | // so we are not prone to a buffer overflow. | 
|---|
| 846 | char buf[kSlopBytes + 10] = {}; | 
|---|
| 847 | std::memcpy(dest: buf, src: buffer_end_, n: kSlopBytes); | 
|---|
| 848 | GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes); | 
|---|
| 849 | auto end = buf + (size - chunk_size); | 
|---|
| 850 | auto res = ReadPackedVarintArray(buf + overrun, end, add); | 
|---|
| 851 | if (res == nullptr || res != end) return nullptr; | 
|---|
| 852 | return buffer_end_ + (res - buf); | 
|---|
| 853 | } | 
|---|
| 854 | size -= overrun + chunk_size; | 
|---|
| 855 | GOOGLE_DCHECK_GT(size, 0); | 
|---|
| 856 | // We must flip buffers | 
|---|
| 857 | if (limit_ <= kSlopBytes) return nullptr; | 
|---|
| 858 | ptr = Next(); | 
|---|
| 859 | if (ptr == nullptr) return nullptr; | 
|---|
| 860 | ptr += overrun; | 
|---|
| 861 | chunk_size = buffer_end_ - ptr; | 
|---|
| 862 | } | 
|---|
| 863 | auto end = ptr + size; | 
|---|
| 864 | ptr = ReadPackedVarintArray(ptr, end, add); | 
|---|
| 865 | return end == ptr ? ptr : nullptr; | 
|---|
| 866 | } | 
|---|
| 867 |  | 
|---|
| 868 | // Helper for verification of utf8 | 
|---|
| 869 | PROTOBUF_EXPORT | 
|---|
| 870 | bool VerifyUTF8(StringPiece s, const char* field_name); | 
|---|
| 871 |  | 
|---|
| 872 | inline bool VerifyUTF8(const std::string* s, const char* field_name) { | 
|---|
| 873 | return VerifyUTF8(s: *s, field_name); | 
|---|
| 874 | } | 
|---|
| 875 |  | 
|---|
| 876 | // All the string parsers with or without UTF checking and for all CTypes. | 
|---|
| 877 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* InlineGreedyStringParser( | 
|---|
| 878 | std::string* s, const char* ptr, ParseContext* ctx); | 
|---|
| 879 |  | 
|---|
| 880 |  | 
|---|
| 881 | template <typename T> | 
|---|
| 882 | PROTOBUF_NODISCARD const char* FieldParser(uint64_t tag, T& field_parser, | 
|---|
| 883 | const char* ptr, ParseContext* ctx) { | 
|---|
| 884 | uint32_t number = tag >> 3; | 
|---|
| 885 | GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0); | 
|---|
| 886 | using WireType = internal::WireFormatLite::WireType; | 
|---|
| 887 | switch (tag & 7) { | 
|---|
| 888 | case WireType::WIRETYPE_VARINT: { | 
|---|
| 889 | uint64_t value; | 
|---|
| 890 | ptr = VarintParse(p: ptr, out: &value); | 
|---|
| 891 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); | 
|---|
| 892 | field_parser.AddVarint(number, value); | 
|---|
| 893 | break; | 
|---|
| 894 | } | 
|---|
| 895 | case WireType::WIRETYPE_FIXED64: { | 
|---|
| 896 | uint64_t value = UnalignedLoad<uint64_t>(p: ptr); | 
|---|
| 897 | ptr += 8; | 
|---|
| 898 | field_parser.AddFixed64(number, value); | 
|---|
| 899 | break; | 
|---|
| 900 | } | 
|---|
| 901 | case WireType::WIRETYPE_LENGTH_DELIMITED: { | 
|---|
| 902 | ptr = field_parser.ParseLengthDelimited(number, ptr, ctx); | 
|---|
| 903 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); | 
|---|
| 904 | break; | 
|---|
| 905 | } | 
|---|
| 906 | case WireType::WIRETYPE_START_GROUP: { | 
|---|
| 907 | ptr = field_parser.ParseGroup(number, ptr, ctx); | 
|---|
| 908 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); | 
|---|
| 909 | break; | 
|---|
| 910 | } | 
|---|
| 911 | case WireType::WIRETYPE_END_GROUP: { | 
|---|
| 912 | GOOGLE_LOG(FATAL) << "Can't happen"; | 
|---|
| 913 | break; | 
|---|
| 914 | } | 
|---|
| 915 | case WireType::WIRETYPE_FIXED32: { | 
|---|
| 916 | uint32_t value = UnalignedLoad<uint32_t>(p: ptr); | 
|---|
| 917 | ptr += 4; | 
|---|
| 918 | field_parser.AddFixed32(number, value); | 
|---|
| 919 | break; | 
|---|
| 920 | } | 
|---|
| 921 | default: | 
|---|
| 922 | return nullptr; | 
|---|
| 923 | } | 
|---|
| 924 | return ptr; | 
|---|
| 925 | } | 
|---|
| 926 |  | 
|---|
| 927 | template <typename T> | 
|---|
| 928 | PROTOBUF_NODISCARD const char* WireFormatParser(T& field_parser, | 
|---|
| 929 | const char* ptr, | 
|---|
| 930 | ParseContext* ctx) { | 
|---|
| 931 | while (!ctx->Done(ptr: &ptr)) { | 
|---|
| 932 | uint32_t tag; | 
|---|
| 933 | ptr = ReadTag(p: ptr, out: &tag); | 
|---|
| 934 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr); | 
|---|
| 935 | if (tag == 0 || (tag & 7) == 4) { | 
|---|
| 936 | ctx->SetLastTag(tag); | 
|---|
| 937 | return ptr; | 
|---|
| 938 | } | 
|---|
| 939 | ptr = FieldParser(tag, field_parser, ptr, ctx); | 
|---|
| 940 | GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr); | 
|---|
| 941 | } | 
|---|
| 942 | return ptr; | 
|---|
| 943 | } | 
|---|
| 944 |  | 
|---|
| 945 | // The packed parsers parse repeated numeric primitives directly into  the | 
|---|
| 946 | // corresponding field | 
|---|
| 947 |  | 
|---|
| 948 | // These are packed varints | 
|---|
| 949 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt32Parser( | 
|---|
| 950 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 951 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt32Parser( | 
|---|
| 952 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 953 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt64Parser( | 
|---|
| 954 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 955 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt64Parser( | 
|---|
| 956 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 957 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt32Parser( | 
|---|
| 958 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 959 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt64Parser( | 
|---|
| 960 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 961 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedEnumParser( | 
|---|
| 962 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 963 |  | 
|---|
| 964 | template <typename T> | 
|---|
| 965 | PROTOBUF_NODISCARD const char* PackedEnumParser(void* object, const char* ptr, | 
|---|
| 966 | ParseContext* ctx, | 
|---|
| 967 | bool (*is_valid)(int), | 
|---|
| 968 | InternalMetadata* metadata, | 
|---|
| 969 | int field_num) { | 
|---|
| 970 | return ctx->ReadPackedVarint( | 
|---|
| 971 | ptr, [object, is_valid, metadata, field_num](uint64_t val) { | 
|---|
| 972 | if (is_valid(val)) { | 
|---|
| 973 | static_cast<RepeatedField<int>*>(object)->Add(value: val); | 
|---|
| 974 | } else { | 
|---|
| 975 | WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>()); | 
|---|
| 976 | } | 
|---|
| 977 | }); | 
|---|
| 978 | } | 
|---|
| 979 |  | 
|---|
| 980 | template <typename T> | 
|---|
| 981 | PROTOBUF_NODISCARD const char* PackedEnumParserArg( | 
|---|
| 982 | void* object, const char* ptr, ParseContext* ctx, | 
|---|
| 983 | bool (*is_valid)(const void*, int), const void* data, | 
|---|
| 984 | InternalMetadata* metadata, int field_num) { | 
|---|
| 985 | return ctx->ReadPackedVarint( | 
|---|
| 986 | ptr, [object, is_valid, data, metadata, field_num](uint64_t val) { | 
|---|
| 987 | if (is_valid(data, val)) { | 
|---|
| 988 | static_cast<RepeatedField<int>*>(object)->Add(value: val); | 
|---|
| 989 | } else { | 
|---|
| 990 | WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>()); | 
|---|
| 991 | } | 
|---|
| 992 | }); | 
|---|
| 993 | } | 
|---|
| 994 |  | 
|---|
| 995 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedBoolParser( | 
|---|
| 996 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 997 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed32Parser( | 
|---|
| 998 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 999 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed32Parser( | 
|---|
| 1000 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 1001 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed64Parser( | 
|---|
| 1002 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 1003 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed64Parser( | 
|---|
| 1004 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 1005 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFloatParser( | 
|---|
| 1006 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 1007 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedDoubleParser( | 
|---|
| 1008 | void* object, const char* ptr, ParseContext* ctx); | 
|---|
| 1009 |  | 
|---|
| 1010 | // This is the only recursive parser. | 
|---|
| 1011 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownGroupLiteParse( | 
|---|
| 1012 | std::string* unknown, const char* ptr, ParseContext* ctx); | 
|---|
| 1013 | // This is a helper to for the UnknownGroupLiteParse but is actually also | 
|---|
| 1014 | // useful in the generated code. It uses overload on std::string* vs | 
|---|
| 1015 | // UnknownFieldSet* to make the generated code isomorphic between full and lite. | 
|---|
| 1016 | PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownFieldParse( | 
|---|
| 1017 | uint32_t tag, std::string* unknown, const char* ptr, ParseContext* ctx); | 
|---|
| 1018 |  | 
|---|
| 1019 | }  // namespace internal | 
|---|
| 1020 | }  // namespace protobuf | 
|---|
| 1021 | }  // namespace google | 
|---|
| 1022 |  | 
|---|
| 1023 | #include <google/protobuf/port_undef.inc> | 
|---|
| 1024 |  | 
|---|
| 1025 | #endif  // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__ | 
|---|
| 1026 |  | 
|---|