1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32#define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33
34#include <cstdint>
35#include <cstring>
36#include <string>
37#include <type_traits>
38
39#include <google/protobuf/io/coded_stream.h>
40#include <google/protobuf/io/zero_copy_stream.h>
41#include <google/protobuf/arena.h>
42#include <google/protobuf/port.h>
43#include <google/protobuf/stubs/strutil.h>
44#include <google/protobuf/arenastring.h>
45#include <google/protobuf/endian.h>
46#include <google/protobuf/implicit_weak_message.h>
47#include <google/protobuf/inlined_string_field.h>
48#include <google/protobuf/metadata_lite.h>
49#include <google/protobuf/repeated_field.h>
50#include <google/protobuf/wire_format_lite.h>
51
52// Must be included last.
53#include <google/protobuf/port_def.inc>
54
55
56namespace google {
57namespace protobuf {
58
59class UnknownFieldSet;
60class DescriptorPool;
61class MessageFactory;
62
63namespace internal {
64
65// Template code below needs to know about the existence of these functions.
66PROTOBUF_EXPORT void WriteVarint(uint32_t num, uint64_t val, std::string* s);
67PROTOBUF_EXPORT void WriteLengthDelimited(uint32_t num, StringPiece val,
68 std::string* s);
69// Inline because it is just forwarding to s->WriteVarint
70inline void WriteVarint(uint32_t num, uint64_t val, UnknownFieldSet* s);
71inline void WriteLengthDelimited(uint32_t num, StringPiece val,
72 UnknownFieldSet* s);
73
74
75// The basic abstraction the parser is designed for is a slight modification
76// of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
77// stream as a series of buffers that concatenate to the full stream.
78// Pictorially a ZCIS presents a stream in chunks like so
79// [---------------------------------------------------------------]
80// [---------------------] chunk 1
81// [----------------------------] chunk 2
82// chunk 3 [--------------]
83//
84// Where the '-' represent the bytes which are vertically lined up with the
85// bytes of the stream. The proto parser requires its input to be presented
86// similarly with the extra
87// property that each chunk has kSlopBytes past its end that overlaps with the
88// first kSlopBytes of the next chunk, or if there is no next chunk at least its
89// still valid to read those bytes. Again, pictorially, we now have
90//
91// [---------------------------------------------------------------]
92// [-------------------....] chunk 1
93// [------------------------....] chunk 2
94// chunk 3 [------------------..**]
95// chunk 4 [--****]
96// Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
97// chunk that match up with the start of the next chunk. Above each chunk has
98// 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
99// past the stream, indicated by '*' above, their values are unspecified. It is
100// still legal to read them (ie. should not segfault). Reading past the
101// end should be detected by the user and indicated as an error.
102//
103// The reason for this, admittedly, unconventional invariant is to ruthlessly
104// optimize the protobuf parser. Having an overlap helps in two important ways.
105// Firstly it alleviates having to performing bounds checks if a piece of code
106// is guaranteed to not read more than kSlopBytes. Secondly, and more
107// importantly, the protobuf wireformat is such that reading a key/value pair is
108// always less than 16 bytes. This removes the need to change to next buffer in
109// the middle of reading primitive values. Hence there is no need to store and
110// load the current position.
111
112class PROTOBUF_EXPORT EpsCopyInputStream {
113 public:
114 enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
115
116 explicit EpsCopyInputStream(bool enable_aliasing)
117 : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
118
119 void BackUp(const char* ptr) {
120 GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
121 int count;
122 if (next_chunk_ == buffer_) {
123 count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
124 } else {
125 count = size_ + static_cast<int>(buffer_end_ - ptr);
126 }
127 if (count > 0) StreamBackUp(count);
128 }
129
130 // If return value is negative it's an error
131 PROTOBUF_NODISCARD int PushLimit(const char* ptr, int limit) {
132 GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
133 // This add is safe due to the invariant above, because
134 // ptr - buffer_end_ <= kSlopBytes.
135 limit += static_cast<int>(ptr - buffer_end_);
136 limit_end_ = buffer_end_ + (std::min)(0, limit);
137 auto old_limit = limit_;
138 limit_ = limit;
139 return old_limit - limit;
140 }
141
142 PROTOBUF_NODISCARD bool PopLimit(int delta) {
143 if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
144 limit_ = limit_ + delta;
145 // TODO(gerbens) We could remove this line and hoist the code to
146 // DoneFallback. Study the perf/bin-size effects.
147 limit_end_ = buffer_end_ + (std::min)(0, limit_);
148 return true;
149 }
150
151 PROTOBUF_NODISCARD const char* Skip(const char* ptr, int size) {
152 if (size <= buffer_end_ + kSlopBytes - ptr) {
153 return ptr + size;
154 }
155 return SkipFallback(ptr, size);
156 }
157 PROTOBUF_NODISCARD const char* ReadString(const char* ptr, int size,
158 std::string* s) {
159 if (size <= buffer_end_ + kSlopBytes - ptr) {
160 s->assign(s: ptr, n: size);
161 return ptr + size;
162 }
163 return ReadStringFallback(ptr, size, str: s);
164 }
165 PROTOBUF_NODISCARD const char* AppendString(const char* ptr, int size,
166 std::string* s) {
167 if (size <= buffer_end_ + kSlopBytes - ptr) {
168 s->append(s: ptr, n: size);
169 return ptr + size;
170 }
171 return AppendStringFallback(ptr, size, str: s);
172 }
173 // Implemented in arenastring.cc
174 PROTOBUF_NODISCARD const char* ReadArenaString(const char* ptr,
175 ArenaStringPtr* s,
176 Arena* arena);
177
178 template <typename Tag, typename T>
179 PROTOBUF_NODISCARD const char* ReadRepeatedFixed(const char* ptr,
180 Tag expected_tag,
181 RepeatedField<T>* out);
182
183 template <typename T>
184 PROTOBUF_NODISCARD const char* ReadPackedFixed(const char* ptr, int size,
185 RepeatedField<T>* out);
186 template <typename Add>
187 PROTOBUF_NODISCARD const char* ReadPackedVarint(const char* ptr, Add add);
188
189 uint32_t LastTag() const { return last_tag_minus_1_ + 1; }
190 bool ConsumeEndGroup(uint32_t start_tag) {
191 bool res = last_tag_minus_1_ == start_tag;
192 last_tag_minus_1_ = 0;
193 return res;
194 }
195 bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
196 bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
197 void SetLastTag(uint32_t tag) { last_tag_minus_1_ = tag - 1; }
198 void SetEndOfStream() { last_tag_minus_1_ = 1; }
199 bool IsExceedingLimit(const char* ptr) {
200 return ptr > limit_end_ &&
201 (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
202 }
203 bool AliasingEnabled() const { return aliasing_ != kNoAliasing; }
204 int BytesUntilLimit(const char* ptr) const {
205 return limit_ + static_cast<int>(buffer_end_ - ptr);
206 }
207 // Returns true if more data is available, if false is returned one has to
208 // call Done for further checks.
209 bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
210
211 protected:
212 // Returns true is limit (either an explicit limit or end of stream) is
213 // reached. It aligns *ptr across buffer seams.
214 // If limit is exceeded it returns true and ptr is set to null.
215 bool DoneWithCheck(const char** ptr, int d) {
216 GOOGLE_DCHECK(*ptr);
217 if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
218 int overrun = static_cast<int>(*ptr - buffer_end_);
219 GOOGLE_DCHECK_LE(overrun, kSlopBytes); // Guaranteed by parse loop.
220 if (overrun ==
221 limit_) { // No need to flip buffers if we ended on a limit.
222 // If we actually overrun the buffer and next_chunk_ is null. It means
223 // the stream ended and we passed the stream end.
224 if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr;
225 return true;
226 }
227 auto res = DoneFallback(overrun, depth: d);
228 *ptr = res.first;
229 return res.second;
230 }
231
232 const char* InitFrom(StringPiece flat) {
233 overall_limit_ = 0;
234 if (flat.size() > kSlopBytes) {
235 limit_ = kSlopBytes;
236 limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
237 next_chunk_ = buffer_;
238 if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
239 return flat.data();
240 } else {
241 std::memcpy(dest: buffer_, src: flat.data(), n: flat.size());
242 limit_ = 0;
243 limit_end_ = buffer_end_ = buffer_ + flat.size();
244 next_chunk_ = nullptr;
245 if (aliasing_ == kOnPatch) {
246 aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
247 reinterpret_cast<std::uintptr_t>(buffer_);
248 }
249 return buffer_;
250 }
251 }
252
253 const char* InitFrom(io::ZeroCopyInputStream* zcis);
254
255 const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
256 if (limit == -1) return InitFrom(zcis);
257 overall_limit_ = limit;
258 auto res = InitFrom(zcis);
259 limit_ = limit - static_cast<int>(buffer_end_ - res);
260 limit_end_ = buffer_end_ + (std::min)(0, limit_);
261 return res;
262 }
263
264 private:
265 const char* limit_end_; // buffer_end_ + min(limit_, 0)
266 const char* buffer_end_;
267 const char* next_chunk_;
268 int size_;
269 int limit_; // relative to buffer_end_;
270 io::ZeroCopyInputStream* zcis_ = nullptr;
271 char buffer_[2 * kSlopBytes] = {};
272 enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
273 std::uintptr_t aliasing_ = kNoAliasing;
274 // This variable is used to communicate how the parse ended, in order to
275 // completely verify the parsed data. A wire-format parse can end because of
276 // one of the following conditions:
277 // 1) A parse can end on a pushed limit.
278 // 2) A parse can end on End Of Stream (EOS).
279 // 3) A parse can end on 0 tag (only valid for toplevel message).
280 // 4) A parse can end on an end-group tag.
281 // This variable should always be set to 0, which indicates case 1. If the
282 // parse terminated due to EOS (case 2), it's set to 1. In case the parse
283 // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
284 // This var doesn't really belong in EpsCopyInputStream and should be part of
285 // the ParseContext, but case 2 is most easily and optimally implemented in
286 // DoneFallback.
287 uint32_t last_tag_minus_1_ = 0;
288 int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits.
289 // Pretty random large number that seems like a safe allocation on most
290 // systems. TODO(gerbens) do we need to set this as build flag?
291 enum { kSafeStringSize = 50000000 };
292
293 // Advances to next buffer chunk returns a pointer to the same logical place
294 // in the stream as set by overrun. Overrun indicates the position in the slop
295 // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at
296 // limit, at which point the returned pointer maybe null if there was an
297 // error. The invariant of this function is that it's guaranteed that
298 // kSlopBytes bytes can be accessed from the returned ptr. This function might
299 // advance more buffers than one in the underlying ZeroCopyInputStream.
300 std::pair<const char*, bool> DoneFallback(int overrun, int depth);
301 // Advances to the next buffer, at most one call to Next() on the underlying
302 // ZeroCopyInputStream is made. This function DOES NOT match the returned
303 // pointer to where in the slop region the parse ends, hence no overrun
304 // parameter. This is useful for string operations where you always copy
305 // to the end of the buffer (including the slop region).
306 const char* Next();
307 // overrun is the location in the slop region the stream currently is
308 // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of
309 // the ZeroCopyInputStream in the case the parse will end in the last
310 // kSlopBytes of the current buffer. depth is the current depth of nested
311 // groups (or negative if the use case does not need careful tracking).
312 inline const char* NextBuffer(int overrun, int depth);
313 const char* SkipFallback(const char* ptr, int size);
314 const char* AppendStringFallback(const char* ptr, int size, std::string* str);
315 const char* ReadStringFallback(const char* ptr, int size, std::string* str);
316 bool StreamNext(const void** data) {
317 bool res = zcis_->Next(data, size: &size_);
318 if (res) overall_limit_ -= size_;
319 return res;
320 }
321 void StreamBackUp(int count) {
322 zcis_->BackUp(count);
323 overall_limit_ += count;
324 }
325
326 template <typename A>
327 const char* AppendSize(const char* ptr, int size, const A& append) {
328 int chunk_size = buffer_end_ + kSlopBytes - ptr;
329 do {
330 GOOGLE_DCHECK(size > chunk_size);
331 if (next_chunk_ == nullptr) return nullptr;
332 append(ptr, chunk_size);
333 ptr += chunk_size;
334 size -= chunk_size;
335 // TODO(gerbens) Next calls NextBuffer which generates buffers with
336 // overlap and thus incurs cost of copying the slop regions. This is not
337 // necessary for reading strings. We should just call Next buffers.
338 if (limit_ <= kSlopBytes) return nullptr;
339 ptr = Next();
340 if (ptr == nullptr) return nullptr; // passed the limit
341 ptr += kSlopBytes;
342 chunk_size = buffer_end_ + kSlopBytes - ptr;
343 } while (size > chunk_size);
344 append(ptr, size);
345 return ptr + size;
346 }
347
348 // AppendUntilEnd appends data until a limit (either a PushLimit or end of
349 // stream. Normal payloads are from length delimited fields which have an
350 // explicit size. Reading until limit only comes when the string takes
351 // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
352 // implicit weak messages. We keep these methods private and friend them.
353 template <typename A>
354 const char* AppendUntilEnd(const char* ptr, const A& append) {
355 if (ptr - buffer_end_ > limit_) return nullptr;
356 while (limit_ > kSlopBytes) {
357 size_t chunk_size = buffer_end_ + kSlopBytes - ptr;
358 append(ptr, chunk_size);
359 ptr = Next();
360 if (ptr == nullptr) return limit_end_;
361 ptr += kSlopBytes;
362 }
363 auto end = buffer_end_ + limit_;
364 GOOGLE_DCHECK(end >= ptr);
365 append(ptr, end - ptr);
366 return end;
367 }
368
369 PROTOBUF_NODISCARD const char* AppendString(const char* ptr,
370 std::string* str) {
371 return AppendUntilEnd(
372 ptr, append: [str](const char* p, ptrdiff_t s) { str->append(s: p, n: s); });
373 }
374 friend class ImplicitWeakMessage;
375};
376
377using LazyEagerVerifyFnType = const char* (*)(const char* ptr,
378 ParseContext* ctx);
379using LazyEagerVerifyFnRef = std::remove_pointer<LazyEagerVerifyFnType>::type&;
380
381// ParseContext holds all data that is global to the entire parse. Most
382// importantly it contains the input stream, but also recursion depth and also
383// stores the end group tag, in case a parser ended on a endgroup, to verify
384// matching start/end group tags.
385class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
386 public:
387 struct Data {
388 const DescriptorPool* pool = nullptr;
389 MessageFactory* factory = nullptr;
390 Arena* arena = nullptr;
391 };
392
393 template <typename... T>
394 ParseContext(int depth, bool aliasing, const char** start, T&&... args)
395 : EpsCopyInputStream(aliasing), depth_(depth) {
396 *start = InitFrom(std::forward<T>(args)...);
397 }
398
399 void TrackCorrectEnding() { group_depth_ = 0; }
400
401 bool Done(const char** ptr) { return DoneWithCheck(ptr, d: group_depth_); }
402
403 int depth() const { return depth_; }
404
405 Data& data() { return data_; }
406 const Data& data() const { return data_; }
407
408 const char* ParseMessage(MessageLite* msg, const char* ptr);
409
410 // Spawns a child parsing context that inherits key properties. New context
411 // inherits the following:
412 // --depth_, data_, check_required_fields_, lazy_parse_mode_
413 // The spawned context always disables aliasing (different input).
414 template <typename... T>
415 ParseContext Spawn(const char** start, T&&... args) {
416 ParseContext spawned(depth_, false, start, std::forward<T>(args)...);
417 // Transfer key context states.
418 spawned.data_ = data_;
419 return spawned;
420 }
421
422 // This overload supports those few cases where ParseMessage is called
423 // on a class that is not actually a proto message.
424 // TODO(jorg): Eliminate this use case.
425 template <typename T,
426 typename std::enable_if<!std::is_base_of<MessageLite, T>::value,
427 bool>::type = true>
428 PROTOBUF_NODISCARD const char* ParseMessage(T* msg, const char* ptr);
429
430 template <typename T>
431 PROTOBUF_NODISCARD PROTOBUF_NDEBUG_INLINE const char* ParseGroup(
432 T* msg, const char* ptr, uint32_t tag) {
433 if (--depth_ < 0) return nullptr;
434 group_depth_++;
435 ptr = msg->_InternalParse(ptr, this);
436 group_depth_--;
437 depth_++;
438 if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
439 return ptr;
440 }
441
442 private:
443 // Out-of-line routine to save space in ParseContext::ParseMessage<T>
444 // int old;
445 // ptr = ReadSizeAndPushLimitAndDepth(ptr, &old)
446 // is equivalent to:
447 // int size = ReadSize(&ptr);
448 // if (!ptr) return nullptr;
449 // int old = PushLimit(ptr, size);
450 // if (--depth_ < 0) return nullptr;
451 PROTOBUF_NODISCARD const char* ReadSizeAndPushLimitAndDepth(const char* ptr,
452 int* old_limit);
453
454 // The context keeps an internal stack to keep track of the recursive
455 // part of the parse state.
456 // Current depth of the active parser, depth counts down.
457 // This is used to limit recursion depth (to prevent overflow on malicious
458 // data), but is also used to index in stack_ to store the current state.
459 int depth_;
460 // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
461 // in the last kSlopBytes of a ZeroCopyInputStream chunk.
462 int group_depth_ = INT_MIN;
463 Data data_;
464};
465
466template <uint32_t tag>
467bool ExpectTag(const char* ptr) {
468 if (tag < 128) {
469 return *ptr == static_cast<char>(tag);
470 } else {
471 static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes");
472 char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)};
473 return std::memcmp(s1: ptr, s2: buf, n: 2) == 0;
474 }
475}
476
477template <int>
478struct EndianHelper;
479
480template <>
481struct EndianHelper<1> {
482 static uint8_t Load(const void* p) { return *static_cast<const uint8_t*>(p); }
483};
484
485template <>
486struct EndianHelper<2> {
487 static uint16_t Load(const void* p) {
488 uint16_t tmp;
489 std::memcpy(dest: &tmp, src: p, n: 2);
490 return little_endian::ToHost(value: tmp);
491 }
492};
493
494template <>
495struct EndianHelper<4> {
496 static uint32_t Load(const void* p) {
497 uint32_t tmp;
498 std::memcpy(dest: &tmp, src: p, n: 4);
499 return little_endian::ToHost(value: tmp);
500 }
501};
502
503template <>
504struct EndianHelper<8> {
505 static uint64_t Load(const void* p) {
506 uint64_t tmp;
507 std::memcpy(dest: &tmp, src: p, n: 8);
508 return little_endian::ToHost(value: tmp);
509 }
510};
511
512template <typename T>
513T UnalignedLoad(const char* p) {
514 auto tmp = EndianHelper<sizeof(T)>::Load(p);
515 T res;
516 memcpy(&res, &tmp, sizeof(T));
517 return res;
518}
519
520PROTOBUF_EXPORT
521std::pair<const char*, uint32_t> VarintParseSlow32(const char* p, uint32_t res);
522PROTOBUF_EXPORT
523std::pair<const char*, uint64_t> VarintParseSlow64(const char* p, uint32_t res);
524
525inline const char* VarintParseSlow(const char* p, uint32_t res, uint32_t* out) {
526 auto tmp = VarintParseSlow32(p, res);
527 *out = tmp.second;
528 return tmp.first;
529}
530
531inline const char* VarintParseSlow(const char* p, uint32_t res, uint64_t* out) {
532 auto tmp = VarintParseSlow64(p, res);
533 *out = tmp.second;
534 return tmp.first;
535}
536
537template <typename T>
538PROTOBUF_NODISCARD const char* VarintParse(const char* p, T* out) {
539 auto ptr = reinterpret_cast<const uint8_t*>(p);
540 uint32_t res = ptr[0];
541 if (!(res & 0x80)) {
542 *out = res;
543 return p + 1;
544 }
545 uint32_t byte = ptr[1];
546 res += (byte - 1) << 7;
547 if (!(byte & 0x80)) {
548 *out = res;
549 return p + 2;
550 }
551 return VarintParseSlow(p, res, out);
552}
553
554// Used for tags, could read up to 5 bytes which must be available.
555// Caller must ensure its safe to call.
556
557PROTOBUF_EXPORT
558std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res);
559
560// Same as ParseVarint but only accept 5 bytes at most.
561inline const char* ReadTag(const char* p, uint32_t* out,
562 uint32_t /*max_tag*/ = 0) {
563 uint32_t res = static_cast<uint8_t>(p[0]);
564 if (res < 128) {
565 *out = res;
566 return p + 1;
567 }
568 uint32_t second = static_cast<uint8_t>(p[1]);
569 res += (second - 1) << 7;
570 if (second < 128) {
571 *out = res;
572 return p + 2;
573 }
574 auto tmp = ReadTagFallback(p, res);
575 *out = tmp.second;
576 return tmp.first;
577}
578
579// As above, but optimized to consume very few registers while still being fast,
580// ReadTagInlined is useful for callers that don't mind the extra code but would
581// like to avoid an extern function call causing spills into the stack.
582//
583// Two support routines for ReadTagInlined come first...
584template <class T>
585PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE constexpr T RotateLeft(
586 T x, int s) noexcept {
587 return static_cast<T>(x << (s & (std::numeric_limits<T>::digits - 1))) |
588 static_cast<T>(x >> ((-s) & (std::numeric_limits<T>::digits - 1)));
589}
590
591PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE uint64_t
592RotRight7AndReplaceLowByte(uint64_t res, const char& byte) {
593#if defined(__x86_64__) && defined(__GNUC__)
594 // This will only use one register for `res`.
595 // `byte` comes as a reference to allow the compiler to generate code like:
596 //
597 // rorq $7, %rcx
598 // movb 1(%rax), %cl
599 //
600 // which avoids loading the incoming bytes into a separate register first.
601 asm("ror $7,%0\n\t"
602 "movb %1,%b0"
603 : "+r"(res)
604 : "m"(byte));
605#else
606 res = RotateLeft(x: res, s: -7);
607 res = res & ~0xFF;
608 res |= 0xFF & byte;
609#endif
610 return res;
611};
612
613inline PROTOBUF_ALWAYS_INLINE
614const char* ReadTagInlined(const char* ptr, uint32_t* out) {
615 uint64_t res = 0xFF & ptr[0];
616 if (PROTOBUF_PREDICT_FALSE(res >= 128)) {
617 res = RotRight7AndReplaceLowByte(res, byte: ptr[1]);
618 if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
619 res = RotRight7AndReplaceLowByte(res, byte: ptr[2]);
620 if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
621 res = RotRight7AndReplaceLowByte(res, byte: ptr[3]);
622 if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
623 // Note: this wouldn't work if res were 32-bit,
624 // because then replacing the low byte would overwrite
625 // the bottom 4 bits of the result.
626 res = RotRight7AndReplaceLowByte(res, byte: ptr[4]);
627 if (PROTOBUF_PREDICT_FALSE(res & 0x80)) {
628 // The proto format does not permit longer than 5-byte encodings for
629 // tags.
630 *out = 0;
631 return nullptr;
632 }
633 *out = static_cast<uint32_t>(RotateLeft(x: res, s: 28));
634#if defined(__GNUC__)
635 // Note: this asm statement prevents the compiler from
636 // trying to share the "return ptr + constant" among all
637 // branches.
638 asm("" : "+r"(ptr));
639#endif
640 return ptr + 5;
641 }
642 *out = static_cast<uint32_t>(RotateLeft(x: res, s: 21));
643 return ptr + 4;
644 }
645 *out = static_cast<uint32_t>(RotateLeft(x: res, s: 14));
646 return ptr + 3;
647 }
648 *out = static_cast<uint32_t>(RotateLeft(x: res, s: 7));
649 return ptr + 2;
650 }
651 *out = static_cast<uint32_t>(res);
652 return ptr + 1;
653}
654
655// Decode 2 consecutive bytes of a varint and returns the value, shifted left
656// by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
657// first byte's continuation bit is set.
658// If bit 15 of return value is set (equivalent to the continuation bits of both
659// bytes being set) the varint continues, otherwise the parse is done. On x86
660// movsx eax, dil
661// and edi, eax
662// add eax, edi
663// adc [rsi], 1
664inline uint32_t DecodeTwoBytes(const char** ptr) {
665 uint32_t value = UnalignedLoad<uint16_t>(p: *ptr);
666 // Sign extend the low byte continuation bit
667 uint32_t x = static_cast<int8_t>(value);
668 value &= x; // Mask out the high byte iff no continuation
669 // This add is an amazing operation, it cancels the low byte continuation bit
670 // from y transferring it to the carry. Simultaneously it also shifts the 7
671 // LSB left by one tightly against high byte varint bits. Hence value now
672 // contains the unpacked value shifted left by 1.
673 value += x;
674 // Use the carry to update the ptr appropriately.
675 *ptr += value < x ? 2 : 1;
676 return value;
677}
678
679// More efficient varint parsing for big varints
680inline const char* ParseBigVarint(const char* p, uint64_t* out) {
681 auto pnew = p;
682 auto tmp = DecodeTwoBytes(ptr: &pnew);
683 uint64_t res = tmp >> 1;
684 if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) {
685 *out = res;
686 return pnew;
687 }
688 for (std::uint32_t i = 1; i < 5; i++) {
689 pnew = p + 2 * i;
690 tmp = DecodeTwoBytes(ptr: &pnew);
691 res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
692 if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= 0)) {
693 *out = res;
694 return pnew;
695 }
696 }
697 return nullptr;
698}
699
700PROTOBUF_EXPORT
701std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t first);
702// Used for tags, could read up to 5 bytes which must be available. Additionally
703// it makes sure the unsigned value fits a int32_t, otherwise returns nullptr.
704// Caller must ensure its safe to call.
705inline uint32_t ReadSize(const char** pp) {
706 auto p = *pp;
707 uint32_t res = static_cast<uint8_t>(p[0]);
708 if (res < 128) {
709 *pp = p + 1;
710 return res;
711 }
712 auto x = ReadSizeFallback(p, first: res);
713 *pp = x.first;
714 return x.second;
715}
716
717// Some convenience functions to simplify the generated parse loop code.
718// Returning the value and updating the buffer pointer allows for nicer
719// function composition. We rely on the compiler to inline this.
720// Also in debug compiles having local scoped variables tend to generated
721// stack frames that scale as O(num fields).
722inline uint64_t ReadVarint64(const char** p) {
723 uint64_t tmp;
724 *p = VarintParse(p: *p, out: &tmp);
725 return tmp;
726}
727
728inline uint32_t ReadVarint32(const char** p) {
729 uint32_t tmp;
730 *p = VarintParse(p: *p, out: &tmp);
731 return tmp;
732}
733
734inline int64_t ReadVarintZigZag64(const char** p) {
735 uint64_t tmp;
736 *p = VarintParse(p: *p, out: &tmp);
737 return WireFormatLite::ZigZagDecode64(n: tmp);
738}
739
740inline int32_t ReadVarintZigZag32(const char** p) {
741 uint64_t tmp;
742 *p = VarintParse(p: *p, out: &tmp);
743 return WireFormatLite::ZigZagDecode32(n: static_cast<uint32_t>(tmp));
744}
745
746template <typename T, typename std::enable_if<
747 !std::is_base_of<MessageLite, T>::value, bool>::type>
748PROTOBUF_NODISCARD const char* ParseContext::ParseMessage(T* msg,
749 const char* ptr) {
750 int old;
751 ptr = ReadSizeAndPushLimitAndDepth(ptr, old_limit: &old);
752 ptr = ptr ? msg->_InternalParse(ptr, this) : nullptr;
753 depth_++;
754 if (!PopLimit(delta: old)) return nullptr;
755 return ptr;
756}
757
758template <typename Tag, typename T>
759const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr,
760 Tag expected_tag,
761 RepeatedField<T>* out) {
762 do {
763 out->Add(UnalignedLoad<T>(ptr));
764 ptr += sizeof(T);
765 if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr;
766 } while (UnalignedLoad<Tag>(ptr) == expected_tag && (ptr += sizeof(Tag)));
767 return ptr;
768}
769
770// Add any of the following lines to debug which parse function is failing.
771
772#define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
773 if (!(predicate)) { \
774 /* ::raise(SIGINT); */ \
775 /* GOOGLE_LOG(ERROR) << "Parse failure"; */ \
776 return ret; \
777 }
778
779#define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
780 GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
781
782template <typename T>
783const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size,
784 RepeatedField<T>* out) {
785 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
786 int nbytes = buffer_end_ + kSlopBytes - ptr;
787 while (size > nbytes) {
788 int num = nbytes / sizeof(T);
789 int old_entries = out->size();
790 out->Reserve(old_entries + num);
791 int block_size = num * sizeof(T);
792 auto dst = out->AddNAlreadyReserved(num);
793#ifdef PROTOBUF_LITTLE_ENDIAN
794 std::memcpy(dest: dst, src: ptr, n: block_size);
795#else
796 for (int i = 0; i < num; i++)
797 dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
798#endif
799 size -= block_size;
800 if (limit_ <= kSlopBytes) return nullptr;
801 ptr = Next();
802 if (ptr == nullptr) return nullptr;
803 ptr += kSlopBytes - (nbytes - block_size);
804 nbytes = buffer_end_ + kSlopBytes - ptr;
805 }
806 int num = size / sizeof(T);
807 int old_entries = out->size();
808 out->Reserve(old_entries + num);
809 int block_size = num * sizeof(T);
810 auto dst = out->AddNAlreadyReserved(num);
811#ifdef PROTOBUF_LITTLE_ENDIAN
812 std::memcpy(dest: dst, src: ptr, n: block_size);
813#else
814 for (int i = 0; i < num; i++) dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
815#endif
816 ptr += block_size;
817 if (size != block_size) return nullptr;
818 return ptr;
819}
820
821template <typename Add>
822const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
823 while (ptr < end) {
824 uint64_t varint;
825 ptr = VarintParse(p: ptr, out: &varint);
826 if (ptr == nullptr) return nullptr;
827 add(varint);
828 }
829 return ptr;
830}
831
832template <typename Add>
833const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
834 int size = ReadSize(pp: &ptr);
835 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
836 int chunk_size = buffer_end_ - ptr;
837 while (size > chunk_size) {
838 ptr = ReadPackedVarintArray(ptr, buffer_end_, add);
839 if (ptr == nullptr) return nullptr;
840 int overrun = ptr - buffer_end_;
841 GOOGLE_DCHECK(overrun >= 0 && overrun <= kSlopBytes);
842 if (size - chunk_size <= kSlopBytes) {
843 // The current buffer contains all the information needed, we don't need
844 // to flip buffers. However we must parse from a buffer with enough space
845 // so we are not prone to a buffer overflow.
846 char buf[kSlopBytes + 10] = {};
847 std::memcpy(dest: buf, src: buffer_end_, n: kSlopBytes);
848 GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes);
849 auto end = buf + (size - chunk_size);
850 auto res = ReadPackedVarintArray(buf + overrun, end, add);
851 if (res == nullptr || res != end) return nullptr;
852 return buffer_end_ + (res - buf);
853 }
854 size -= overrun + chunk_size;
855 GOOGLE_DCHECK_GT(size, 0);
856 // We must flip buffers
857 if (limit_ <= kSlopBytes) return nullptr;
858 ptr = Next();
859 if (ptr == nullptr) return nullptr;
860 ptr += overrun;
861 chunk_size = buffer_end_ - ptr;
862 }
863 auto end = ptr + size;
864 ptr = ReadPackedVarintArray(ptr, end, add);
865 return end == ptr ? ptr : nullptr;
866}
867
868// Helper for verification of utf8
869PROTOBUF_EXPORT
870bool VerifyUTF8(StringPiece s, const char* field_name);
871
872inline bool VerifyUTF8(const std::string* s, const char* field_name) {
873 return VerifyUTF8(s: *s, field_name);
874}
875
876// All the string parsers with or without UTF checking and for all CTypes.
877PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* InlineGreedyStringParser(
878 std::string* s, const char* ptr, ParseContext* ctx);
879
880
881template <typename T>
882PROTOBUF_NODISCARD const char* FieldParser(uint64_t tag, T& field_parser,
883 const char* ptr, ParseContext* ctx) {
884 uint32_t number = tag >> 3;
885 GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
886 using WireType = internal::WireFormatLite::WireType;
887 switch (tag & 7) {
888 case WireType::WIRETYPE_VARINT: {
889 uint64_t value;
890 ptr = VarintParse(p: ptr, out: &value);
891 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
892 field_parser.AddVarint(number, value);
893 break;
894 }
895 case WireType::WIRETYPE_FIXED64: {
896 uint64_t value = UnalignedLoad<uint64_t>(p: ptr);
897 ptr += 8;
898 field_parser.AddFixed64(number, value);
899 break;
900 }
901 case WireType::WIRETYPE_LENGTH_DELIMITED: {
902 ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
903 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
904 break;
905 }
906 case WireType::WIRETYPE_START_GROUP: {
907 ptr = field_parser.ParseGroup(number, ptr, ctx);
908 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
909 break;
910 }
911 case WireType::WIRETYPE_END_GROUP: {
912 GOOGLE_LOG(FATAL) << "Can't happen";
913 break;
914 }
915 case WireType::WIRETYPE_FIXED32: {
916 uint32_t value = UnalignedLoad<uint32_t>(p: ptr);
917 ptr += 4;
918 field_parser.AddFixed32(number, value);
919 break;
920 }
921 default:
922 return nullptr;
923 }
924 return ptr;
925}
926
927template <typename T>
928PROTOBUF_NODISCARD const char* WireFormatParser(T& field_parser,
929 const char* ptr,
930 ParseContext* ctx) {
931 while (!ctx->Done(ptr: &ptr)) {
932 uint32_t tag;
933 ptr = ReadTag(p: ptr, out: &tag);
934 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
935 if (tag == 0 || (tag & 7) == 4) {
936 ctx->SetLastTag(tag);
937 return ptr;
938 }
939 ptr = FieldParser(tag, field_parser, ptr, ctx);
940 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
941 }
942 return ptr;
943}
944
945// The packed parsers parse repeated numeric primitives directly into the
946// corresponding field
947
948// These are packed varints
949PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt32Parser(
950 void* object, const char* ptr, ParseContext* ctx);
951PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt32Parser(
952 void* object, const char* ptr, ParseContext* ctx);
953PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt64Parser(
954 void* object, const char* ptr, ParseContext* ctx);
955PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt64Parser(
956 void* object, const char* ptr, ParseContext* ctx);
957PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt32Parser(
958 void* object, const char* ptr, ParseContext* ctx);
959PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt64Parser(
960 void* object, const char* ptr, ParseContext* ctx);
961PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedEnumParser(
962 void* object, const char* ptr, ParseContext* ctx);
963
964template <typename T>
965PROTOBUF_NODISCARD const char* PackedEnumParser(void* object, const char* ptr,
966 ParseContext* ctx,
967 bool (*is_valid)(int),
968 InternalMetadata* metadata,
969 int field_num) {
970 return ctx->ReadPackedVarint(
971 ptr, [object, is_valid, metadata, field_num](uint64_t val) {
972 if (is_valid(val)) {
973 static_cast<RepeatedField<int>*>(object)->Add(value: val);
974 } else {
975 WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
976 }
977 });
978}
979
980template <typename T>
981PROTOBUF_NODISCARD const char* PackedEnumParserArg(
982 void* object, const char* ptr, ParseContext* ctx,
983 bool (*is_valid)(const void*, int), const void* data,
984 InternalMetadata* metadata, int field_num) {
985 return ctx->ReadPackedVarint(
986 ptr, [object, is_valid, data, metadata, field_num](uint64_t val) {
987 if (is_valid(data, val)) {
988 static_cast<RepeatedField<int>*>(object)->Add(value: val);
989 } else {
990 WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
991 }
992 });
993}
994
995PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedBoolParser(
996 void* object, const char* ptr, ParseContext* ctx);
997PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed32Parser(
998 void* object, const char* ptr, ParseContext* ctx);
999PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed32Parser(
1000 void* object, const char* ptr, ParseContext* ctx);
1001PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed64Parser(
1002 void* object, const char* ptr, ParseContext* ctx);
1003PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed64Parser(
1004 void* object, const char* ptr, ParseContext* ctx);
1005PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFloatParser(
1006 void* object, const char* ptr, ParseContext* ctx);
1007PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedDoubleParser(
1008 void* object, const char* ptr, ParseContext* ctx);
1009
1010// This is the only recursive parser.
1011PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownGroupLiteParse(
1012 std::string* unknown, const char* ptr, ParseContext* ctx);
1013// This is a helper to for the UnknownGroupLiteParse but is actually also
1014// useful in the generated code. It uses overload on std::string* vs
1015// UnknownFieldSet* to make the generated code isomorphic between full and lite.
1016PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownFieldParse(
1017 uint32_t tag, std::string* unknown, const char* ptr, ParseContext* ctx);
1018
1019} // namespace internal
1020} // namespace protobuf
1021} // namespace google
1022
1023#include <google/protobuf/port_undef.inc>
1024
1025#endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
1026