parse_context.h source code [Velox/build/_deps/protobuf-src/src/google/protobuf/parse_context.h]

1	// Protocol Buffers - Google's data interchange format
2	// Copyright 2008 Google Inc. All rights reserved.
3	// https://developers.google.com/protocol-buffers/
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are
7	// met:
8	//
9	// Redistributions of source code must retain the above copyright*
10	// notice, this list of conditions and the following disclaimer.
11	// Redistributions in binary form must reproduce the above*
12	// copyright notice, this list of conditions and the following disclaimer
13	// in the documentation and/or other materials provided with the
14	// distribution.
15	// Neither the name of Google Inc. nor the names of its*
16	// contributors may be used to endorse or promote products derived from
17	// this software without specific prior written permission.
18	//
19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31	#ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32	#define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33
34	#include <cstdint>
35	#include <cstring>
36	#include <string>
37	#include <type_traits>
38
39	#include <google/protobuf/io/coded_stream.h>
40	#include <google/protobuf/io/zero_copy_stream.h>
41	#include <google/protobuf/arena.h>
42	#include <google/protobuf/port.h>
43	#include <google/protobuf/stubs/strutil.h>
44	#include <google/protobuf/arenastring.h>
45	#include <google/protobuf/endian.h>
46	#include <google/protobuf/implicit_weak_message.h>
47	#include <google/protobuf/inlined_string_field.h>
48	#include <google/protobuf/metadata_lite.h>
49	#include <google/protobuf/repeated_field.h>
50	#include <google/protobuf/wire_format_lite.h>
51
52	// Must be included last.
53	#include <google/protobuf/port_def.inc>
54
55
56	namespace google {
57	namespace protobuf {
58
59	class UnknownFieldSet;
60	class DescriptorPool;
61	class MessageFactory;
62
63	namespace internal {
64
65	// Template code below needs to know about the existence of these functions.
66	PROTOBUF_EXPORT void WriteVarint(uint32_t num, uint64_t val, std::string* s);
67	PROTOBUF_EXPORT void WriteLengthDelimited(uint32_t num, StringPiece val,
68	std::string* s);
69	// Inline because it is just forwarding to s->WriteVarint
70	inline void WriteVarint(uint32_t num, uint64_t val, UnknownFieldSet* s);
71	inline void WriteLengthDelimited(uint32_t num, StringPiece val,
72	UnknownFieldSet* s);
73
74
75	// The basic abstraction the parser is designed for is a slight modification
76	// of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
77	// stream as a series of buffers that concatenate to the full stream.
78	// Pictorially a ZCIS presents a stream in chunks like so
79	// [---------------------------------------------------------------]
80	// [---------------------] chunk 1
81	// [----------------------------] chunk 2
82	// chunk 3 [--------------]
83	//
84	// Where the '-' represent the bytes which are vertically lined up with the
85	// bytes of the stream. The proto parser requires its input to be presented
86	// similarly with the extra
87	// property that each chunk has kSlopBytes past its end that overlaps with the
88	// first kSlopBytes of the next chunk, or if there is no next chunk at least its
89	// still valid to read those bytes. Again, pictorially, we now have
90	//
91	// [---------------------------------------------------------------]
92	// [-------------------....] chunk 1
93	// [------------------------....] chunk 2
94	// chunk 3 [------------------..]
95	// chunk 4 [--**]
96	// Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
97	// chunk that match up with the start of the next chunk. Above each chunk has
98	// 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
99	// past the stream, indicated by '' above, their values are unspecified. It is*
100	// still legal to read them (ie. should not segfault). Reading past the
101	// end should be detected by the user and indicated as an error.
102	//
103	// The reason for this, admittedly, unconventional invariant is to ruthlessly
104	// optimize the protobuf parser. Having an overlap helps in two important ways.
105	// Firstly it alleviates having to performing bounds checks if a piece of code
106	// is guaranteed to not read more than kSlopBytes. Secondly, and more
107	// importantly, the protobuf wireformat is such that reading a key/value pair is
108	// always less than 16 bytes. This removes the need to change to next buffer in
109	// the middle of reading primitive values. Hence there is no need to store and
110	// load the current position.
111
112	class PROTOBUF_EXPORT EpsCopyInputStream {
113	public:
114	enum { kSlopBytes = `16`, kMaxCordBytesToCopy = `512` };
115
116	explicit EpsCopyInputStream(bool enable_aliasing)
117	: aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
118
119	void BackUp(const char* ptr) {
120	GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
121	int count;
122	if (next_chunk_ == buffer_) {
123	count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
124	} else {
125	count = size_ + static_cast<int>(buffer_end_ - ptr);
126	}
127	if (count > `0`) StreamBackUp(count);
128	}
129
130	// If return value is negative it's an error
131	PROTOBUF_NODISCARD int PushLimit(const char* ptr, int limit) {
132	GOOGLE_DCHECK(limit >= `0` && limit <= INT_MAX - kSlopBytes);
133	// This add is safe due to the invariant above, because
134	// ptr - buffer_end_ <= kSlopBytes.
135	limit += static_cast<int>(ptr - buffer_end_);
136	limit_end_ = buffer_end_ + (std::min)(`0`, limit);
137	auto old_limit = limit_;
138	limit_ = limit;
139	return old_limit - limit;
140	}
141
142	PROTOBUF_NODISCARD bool PopLimit(int delta) {
143	if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
144	limit_ = limit_ + delta;
145	// TODO(gerbens) We could remove this line and hoist the code to
146	// DoneFallback. Study the perf/bin-size effects.
147	limit_end_ = buffer_end_ + (std::min)(`0`, limit_);
148	return true;
149	}
150
151	PROTOBUF_NODISCARD const char* Skip(const char* ptr, int size) {
152	if (size <= buffer_end_ + kSlopBytes - ptr) {
153	return ptr + size;
154	}
155	return SkipFallback(ptr, size);
156	}
157	PROTOBUF_NODISCARD const char* ReadString(const char* ptr, int size,
158	std::string* s) {
159	if (size <= buffer_end_ + kSlopBytes - ptr) {
160	s->assign(s: ptr, n: size);
161	return ptr + size;
162	}
163	return ReadStringFallback(ptr, size, str: s);
164	}
165	PROTOBUF_NODISCARD const char* AppendString(const char* ptr, int size,
166	std::string* s) {
167	if (size <= buffer_end_ + kSlopBytes - ptr) {
168	s->append(s: ptr, n: size);
169	return ptr + size;
170	}
171	return AppendStringFallback(ptr, size, str: s);
172	}
173	// Implemented in arenastring.cc
174	PROTOBUF_NODISCARD const char* ReadArenaString(const char* ptr,
175	ArenaStringPtr* s,
176	Arena* arena);
177
178	template <typename Tag, typename T>
179	PROTOBUF_NODISCARD const char* ReadRepeatedFixed(const char* ptr,
180	Tag expected_tag,
181	RepeatedField<T>* out);
182
183	template <typename T>
184	PROTOBUF_NODISCARD const char* ReadPackedFixed(const char* ptr, int size,
185	RepeatedField<T>* out);
186	template <typename Add>
187	PROTOBUF_NODISCARD const char* ReadPackedVarint(const char* ptr, Add add);
188
189	uint32_t LastTag() const { return last_tag_minus_1_ + `1`; }
190	bool ConsumeEndGroup(uint32_t start_tag) {
191	bool res = last_tag_minus_1_ == start_tag;
192	last_tag_minus_1_ = `0`;
193	return res;
194	}
195	bool EndedAtLimit() const { return last_tag_minus_1_ == `0`; }
196	bool EndedAtEndOfStream() const { return last_tag_minus_1_ == `1`; }
197	void SetLastTag(uint32_t tag) { last_tag_minus_1_ = tag - `1`; }
198	void SetEndOfStream() { last_tag_minus_1_ = `1`; }
199	bool IsExceedingLimit(const char* ptr) {
200	return ptr > limit_end_ &&
201	(next_chunk_ == nullptr \|\| ptr - buffer_end_ > limit_);
202	}
203	bool AliasingEnabled() const { return aliasing_ != kNoAliasing; }
204	int BytesUntilLimit(const char* ptr) const {
205	return limit_ + static_cast<int>(buffer_end_ - ptr);
206	}
207	// Returns true if more data is available, if false is returned one has to
208	// call Done for further checks.
209	bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
210
211	protected:
212	// Returns true is limit (either an explicit limit or end of stream) is
213	// reached. It aligns ptr across buffer seams.*
214	// If limit is exceeded it returns true and ptr is set to null.
215	bool DoneWithCheck(const char** ptr, int d) {
216	GOOGLE_DCHECK(*ptr);
217	if (PROTOBUF_PREDICT_TRUE(ptr < limit_end_)) return* false;
218	int overrun = static_cast<int>(*ptr - buffer_end_);
219	GOOGLE_DCHECK_LE(overrun, kSlopBytes); // Guaranteed by parse loop.
220	if (overrun ==
221	limit_) { // No need to flip buffers if we ended on a limit.
222	// If we actually overrun the buffer and next_chunk_ is null. It means
223	// the stream ended and we passed the stream end.
224	if (overrun > `0` && next_chunk_ == nullptr) ptr = nullptr*;
225	return true;
226	}
227	auto res = DoneFallback(overrun, depth: d);
228	*ptr = res.first;
229	return res.second;
230	}
231
232	const char* InitFrom(StringPiece flat) {
233	overall_limit_ = `0`;
234	if (flat.size() > kSlopBytes) {
235	limit_ = kSlopBytes;
236	limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
237	next_chunk_ = buffer_;
238	if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
239	return flat.data();
240	} else {
241	std::memcpy(dest: buffer_, src: flat.data(), n: flat.size());
242	limit_ = `0`;
243	limit_end_ = buffer_end_ = buffer_ + flat.size();
244	next_chunk_ = nullptr;
245	if (aliasing_ == kOnPatch) {
246	aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
247	reinterpret_cast<std::uintptr_t>(buffer_);
248	}
249	return buffer_;
250	}
251	}
252
253	const char* InitFrom(io::ZeroCopyInputStream* zcis);
254
255	const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
256	if (limit == -`1`) return InitFrom(zcis);
257	overall_limit_ = limit;
258	auto res = InitFrom(zcis);
259	limit_ = limit - static_cast<int>(buffer_end_ - res);
260	limit_end_ = buffer_end_ + (std::min)(`0`, limit_);
261	return res;
262	}
263
264	private:
265	const char* limit_end_; // buffer_end_ + min(limit_, 0)
266	const char* buffer_end_;
267	const char* next_chunk_;
268	int size_;
269	int limit_; // relative to buffer_end_;
270	io::ZeroCopyInputStream* zcis_ = nullptr;
271	char buffer_[`2` * kSlopBytes] = {};
272	enum { kNoAliasing = `0`, kOnPatch = `1`, kNoDelta = `2` };
273	std::uintptr_t aliasing_ = kNoAliasing;
274	// This variable is used to communicate how the parse ended, in order to
275	// completely verify the parsed data. A wire-format parse can end because of
276	// one of the following conditions:
277	// 1) A parse can end on a pushed limit.
278	// 2) A parse can end on End Of Stream (EOS).
279	// 3) A parse can end on 0 tag (only valid for toplevel message).
280	// 4) A parse can end on an end-group tag.
281	// This variable should always be set to 0, which indicates case 1. If the
282	// parse terminated due to EOS (case 2), it's set to 1. In case the parse
283	// ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
284	// This var doesn't really belong in EpsCopyInputStream and should be part of
285	// the ParseContext, but case 2 is most easily and optimally implemented in
286	// DoneFallback.
287	uint32_t last_tag_minus_1_ = `0`;
288	int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits.
289	// Pretty random large number that seems like a safe allocation on most
290	// systems. TODO(gerbens) do we need to set this as build flag?
291	enum { kSafeStringSize = `50000000` };
292
293	// Advances to next buffer chunk returns a pointer to the same logical place
294	// in the stream as set by overrun. Overrun indicates the position in the slop
295	// region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at
296	// limit, at which point the returned pointer maybe null if there was an
297	// error. The invariant of this function is that it's guaranteed that
298	// kSlopBytes bytes can be accessed from the returned ptr. This function might
299	// advance more buffers than one in the underlying ZeroCopyInputStream.
300	std::pair<const char, bool> DoneFallback(int* overrun, int depth);
301	// Advances to the next buffer, at most one call to Next() on the underlying
302	// ZeroCopyInputStream is made. This function DOES NOT match the returned
303	// pointer to where in the slop region the parse ends, hence no overrun
304	// parameter. This is useful for string operations where you always copy
305	// to the end of the buffer (including the slop region).
306	const char* Next();
307	// overrun is the location in the slop region the stream currently is
308	// (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of
309	// the ZeroCopyInputStream in the case the parse will end in the last
310	// kSlopBytes of the current buffer. depth is the current depth of nested
311	// groups (or negative if the use case does not need careful tracking).
312	inline const char* NextBuffer(int overrun, int depth);
313	const char* SkipFallback(const char* ptr, int size);
314	const char* AppendStringFallback(const char* ptr, int size, std::string* str);
315	const char* ReadStringFallback(const char* ptr, int size, std::string* str);
316	bool StreamNext(const void** data) {
317	bool res = zcis_->Next(data, size: &size_);
318	if (res) overall_limit_ -= size_;
319	return res;
320	}
321	void StreamBackUp(int count) {
322	zcis_->BackUp(count);
323	overall_limit_ += count;
324	}
325
326	template <typename A>
327	const char* AppendSize(const char* ptr, int size, const A& append) {
328	int chunk_size = buffer_end_ + kSlopBytes - ptr;
329	do {
330	GOOGLE_DCHECK(size > chunk_size);
331	if (next_chunk_ == nullptr) return nullptr;
332	append(ptr, chunk_size);
333	ptr += chunk_size;
334	size -= chunk_size;
335	// TODO(gerbens) Next calls NextBuffer which generates buffers with
336	// overlap and thus incurs cost of copying the slop regions. This is not
337	// necessary for reading strings. We should just call Next buffers.
338	if (limit_ <= kSlopBytes) return nullptr;
339	ptr = Next();
340	if (ptr == nullptr) return nullptr; // passed the limit
341	ptr += kSlopBytes;
342	chunk_size = buffer_end_ + kSlopBytes - ptr;
343	} while (size > chunk_size);
344	append(ptr, size);
345	return ptr + size;
346	}
347
348	// AppendUntilEnd appends data until a limit (either a PushLimit or end of
349	// stream. Normal payloads are from length delimited fields which have an
350	// explicit size. Reading until limit only comes when the string takes
351	// the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
352	// implicit weak messages. We keep these methods private and friend them.
353	template <typename A>
354	const char* AppendUntilEnd(const char* ptr, const A& append) {
355	if (ptr - buffer_end_ > limit_) return nullptr;
356	while (limit_ > kSlopBytes) {
357	size_t chunk_size = buffer_end_ + kSlopBytes - ptr;
358	append(ptr, chunk_size);
359	ptr = Next();
360	if (ptr == nullptr) return limit_end_;
361	ptr += kSlopBytes;
362	}
363	auto end = buffer_end_ + limit_;
364	GOOGLE_DCHECK(end >= ptr);
365	append(ptr, end - ptr);
366	return end;
367	}
368
369	PROTOBUF_NODISCARD const char* AppendString(const char* ptr,
370	std::string* str) {
371	return AppendUntilEnd(
372	ptr, append: [str](const char* p, ptrdiff_t s) { str->append(s: p, n: s); });
373	}
374	friend class ImplicitWeakMessage;
375	};
376
377	using LazyEagerVerifyFnType = const char* ()(const* char* ptr,
378	ParseContext* ctx);
379	using LazyEagerVerifyFnRef = std::remove_pointer<LazyEagerVerifyFnType>::type&;
380
381	// ParseContext holds all data that is global to the entire parse. Most
382	// importantly it contains the input stream, but also recursion depth and also
383	// stores the end group tag, in case a parser ended on a endgroup, to verify
384	// matching start/end group tags.
385	class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
386	public:
387	struct Data {
388	const DescriptorPool* pool = nullptr;
389	MessageFactory* factory = nullptr;
390	Arena* arena = nullptr;
391	};
392
393	template <typename... T>
394	ParseContext(int depth, bool aliasing, const char** start, T&&... args)
395	: EpsCopyInputStream(aliasing), depth_(depth) {
396	*start = InitFrom(std::forward<T>(args)...);
397	}
398
399	void TrackCorrectEnding() { group_depth_ = `0`; }
400
401	bool Done(const char ptr) { return** DoneWithCheck(ptr, d: group_depth_); }
402
403	int depth() const { return depth_; }
404
405	Data& data() { return data_; }
406	const Data& data() const { return data_; }
407
408	const char* ParseMessage(MessageLite* msg, const char* ptr);
409
410	// Spawns a child parsing context that inherits key properties. New context
411	// inherits the following:
412	// --depth_, data_, check_required_fields_, lazy_parse_mode_
413	// The spawned context always disables aliasing (different input).
414	template <typename... T>
415	ParseContext Spawn(const char** start, T&&... args) {
416	ParseContext spawned(depth_, false, start, std::forward<T>(args)...);
417	// Transfer key context states.
418	spawned.data_ = data_;
419	return spawned;
420	}
421
422	// This overload supports those few cases where ParseMessage is called
423	// on a class that is not actually a proto message.
424	// TODO(jorg): Eliminate this use case.
425	template <typename T,
426	typename std::enable_if<!std::is_base_of<MessageLite, T>::value,
427	bool>::type = true>
428	PROTOBUF_NODISCARD const char* ParseMessage(T* msg, const char* ptr);
429
430	template <typename T>
431	PROTOBUF_NODISCARD PROTOBUF_NDEBUG_INLINE const char* ParseGroup(
432	T* msg, const char* ptr, uint32_t tag) {
433	if (--depth_ < `0`) return nullptr;
434	group_depth_++;
435	ptr = msg->_InternalParse(ptr, this);
436	group_depth_--;
437	depth_++;
438	if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
439	return ptr;
440	}
441
442	private:
443	// Out-of-line routine to save space in ParseContext::ParseMessage<T>
444	// int old;
445	// ptr = ReadSizeAndPushLimitAndDepth(ptr, &old)
446	// is equivalent to:
447	// int size = ReadSize(&ptr);
448	// if (!ptr) return nullptr;
449	// int old = PushLimit(ptr, size);
450	// if (--depth_ < 0) return nullptr;
451	PROTOBUF_NODISCARD const char* ReadSizeAndPushLimitAndDepth(const char* ptr,
452	int* old_limit);
453
454	// The context keeps an internal stack to keep track of the recursive
455	// part of the parse state.
456	// Current depth of the active parser, depth counts down.
457	// This is used to limit recursion depth (to prevent overflow on malicious
458	// data), but is also used to index in stack_ to store the current state.
459	int depth_;
460	// Unfortunately necessary for the fringe case of ending on 0 or end-group tag
461	// in the last kSlopBytes of a ZeroCopyInputStream chunk.
462	int group_depth_ = INT_MIN;
463	Data data_;
464	};
465
466	template <uint32_t tag>
467	bool ExpectTag(const char* ptr) {
468	if (tag < `128`) {
469	return ptr == static_cast<char*>(tag);
470	} else {
471	static_assert(tag < `128` * `128`, "We only expect tags for 1 or 2 bytes");
472	char buf[`2`] = {static_cast<char>(tag \| `0x80`), static_cast<char>(tag >> `7`)};
473	return std::memcmp(s1: ptr, s2: buf, n: `2`) == `0`;
474	}
475	}
476
477	template <int>
478	struct EndianHelper;
479
480	template <>
481	struct EndianHelper<`1`> {
482	static uint8_t Load(const void* p) { return *static_cast<const uint8_t*>(p); }
483	};
484
485	template <>
486	struct EndianHelper<`2`> {
487	static uint16_t Load(const void* p) {
488	uint16_t tmp;
489	std::memcpy(dest: &tmp, src: p, n: `2`);
490	return little_endian::ToHost(value: tmp);
491	}
492	};
493
494	template <>
495	struct EndianHelper<`4`> {
496	static uint32_t Load(const void* p) {
497	uint32_t tmp;
498	std::memcpy(dest: &tmp, src: p, n: `4`);
499	return little_endian::ToHost(value: tmp);
500	}
501	};
502
503	template <>
504	struct EndianHelper<`8`> {
505	static uint64_t Load(const void* p) {
506	uint64_t tmp;
507	std::memcpy(dest: &tmp, src: p, n: `8`);
508	return little_endian::ToHost(value: tmp);
509	}
510	};
511
512	template <typename T>
513	T UnalignedLoad(const char* p) {
514	auto tmp = EndianHelper<sizeof(T)>::Load(p);
515	T res;
516	memcpy(&res, &tmp, sizeof(T));
517	return res;
518	}
519
520	PROTOBUF_EXPORT
521	std::pair<const char, uint32_t> VarintParseSlow32(const* char* p, uint32_t res);
522	PROTOBUF_EXPORT
523	std::pair<const char, uint64_t> VarintParseSlow64(const* char* p, uint32_t res);
524
525	inline const char* VarintParseSlow(const char* p, uint32_t res, uint32_t* out) {
526	auto tmp = VarintParseSlow32(p, res);
527	*out = tmp.second;
528	return tmp.first;
529	}
530
531	inline const char* VarintParseSlow(const char* p, uint32_t res, uint64_t* out) {
532	auto tmp = VarintParseSlow64(p, res);
533	*out = tmp.second;
534	return tmp.first;
535	}
536
537	template <typename T>
538	PROTOBUF_NODISCARD const char* VarintParse(const char* p, T* out) {
539	auto ptr = reinterpret_cast<const uint8_t*>(p);
540	uint32_t res = ptr[`0`];
541	if (!(res & `0x80`)) {
542	*out = res;
543	return p + `1`;
544	}
545	uint32_t byte = ptr[`1`];
546	res += (byte - `1`) << `7`;
547	if (!(byte & `0x80`)) {
548	*out = res;
549	return p + `2`;
550	}
551	return VarintParseSlow(p, res, out);
552	}
553
554	// Used for tags, could read up to 5 bytes which must be available.
555	// Caller must ensure its safe to call.
556
557	PROTOBUF_EXPORT
558	std::pair<const char, uint32_t> ReadTagFallback(const* char* p, uint32_t res);
559
560	// Same as ParseVarint but only accept 5 bytes at most.
561	inline const char* ReadTag(const char* p, uint32_t* out,
562	uint32_t /max_tag/ = `0`) {
563	uint32_t res = static_cast<uint8_t>(p[`0`]);
564	if (res < `128`) {
565	*out = res;
566	return p + `1`;
567	}
568	uint32_t second = static_cast<uint8_t>(p[`1`]);
569	res += (second - `1`) << `7`;
570	if (second < `128`) {
571	*out = res;
572	return p + `2`;
573	}
574	auto tmp = ReadTagFallback(p, res);
575	*out = tmp.second;
576	return tmp.first;
577	}
578
579	// As above, but optimized to consume very few registers while still being fast,
580	// ReadTagInlined is useful for callers that don't mind the extra code but would
581	// like to avoid an extern function call causing spills into the stack.
582	//
583	// Two support routines for ReadTagInlined come first...
584	template <class T>
585	PROTOBUF_NODISCARD PROTOBUF_ALWAYS_INLINE constexpr T RotateLeft(
586	T x, int s) noexcept {
587	return static_cast<T>(x << (s & (std::numeric_limits<T>::digits - `1`))) \|
588	static_cast<T>(x >> ((-s) & (std::numeric_limits<T>::digits - `1`)));
589	}
590
591	PROTOBUF_NODISCARD inline PROTOBUF_ALWAYS_INLINE uint64_t
592	RotRight7AndReplaceLowByte(uint64_t res, const char& byte) {
593	#if defined(__x86_64__) && defined(__GNUC__)
594	// This will only use one register for `res`.
595	// `byte` comes as a reference to allow the compiler to generate code like:
596	//
597	// rorq $7, %rcx
598	// movb 1(%rax), %cl
599	//
600	// which avoids loading the incoming bytes into a separate register first.
601	asm("ror $7,%0\n\t"
602	"movb %1,%b0"
603	: "+r"(res)
604	: "m"(byte));
605	#else
606	res = RotateLeft(x: res, s: -`7`);
607	res = res & ~`0xFF`;
608	res \|= `0xFF` & byte;
609	#endif
610	return res;
611	};
612
613	inline PROTOBUF_ALWAYS_INLINE
614	const char* ReadTagInlined(const char* ptr, uint32_t* out) {
615	uint64_t res = `0xFF` & ptr[`0`];
616	if (PROTOBUF_PREDICT_FALSE(res >= `128`)) {
617	res = RotRight7AndReplaceLowByte(res, byte: ptr[`1`]);
618	if (PROTOBUF_PREDICT_FALSE(res & `0x80`)) {
619	res = RotRight7AndReplaceLowByte(res, byte: ptr[`2`]);
620	if (PROTOBUF_PREDICT_FALSE(res & `0x80`)) {
621	res = RotRight7AndReplaceLowByte(res, byte: ptr[`3`]);
622	if (PROTOBUF_PREDICT_FALSE(res & `0x80`)) {
623	// Note: this wouldn't work if res were 32-bit,
624	// because then replacing the low byte would overwrite
625	// the bottom 4 bits of the result.
626	res = RotRight7AndReplaceLowByte(res, byte: ptr[`4`]);
627	if (PROTOBUF_PREDICT_FALSE(res & `0x80`)) {
628	// The proto format does not permit longer than 5-byte encodings for
629	// tags.
630	*out = `0`;
631	return nullptr;
632	}
633	out = static_cast*<uint32_t>(RotateLeft(x: res, s: `28`));
634	#if defined(__GNUC__)
635	// Note: this asm statement prevents the compiler from
636	// trying to share the "return ptr + constant" among all
637	// branches.
638	asm("" : "+r"(ptr));
639	#endif
640	return ptr + `5`;
641	}
642	out = static_cast*<uint32_t>(RotateLeft(x: res, s: `21`));
643	return ptr + `4`;
644	}
645	out = static_cast*<uint32_t>(RotateLeft(x: res, s: `14`));
646	return ptr + `3`;
647	}
648	out = static_cast*<uint32_t>(RotateLeft(x: res, s: `7`));
649	return ptr + `2`;
650	}
651	out = static_cast*<uint32_t>(res);
652	return ptr + `1`;
653	}
654
655	// Decode 2 consecutive bytes of a varint and returns the value, shifted left
656	// by 1. It simultaneous updates ptr to ptr + 1 or ptr + 2 depending if the*
657	// first byte's continuation bit is set.
658	// If bit 15 of return value is set (equivalent to the continuation bits of both
659	// bytes being set) the varint continues, otherwise the parse is done. On x86
660	// movsx eax, dil
661	// and edi, eax
662	// add eax, edi
663	// adc [rsi], 1
664	inline uint32_t DecodeTwoBytes(const char** ptr) {
665	uint32_t value = UnalignedLoad<uint16_t>(p: *ptr);
666	// Sign extend the low byte continuation bit
667	uint32_t x = static_cast<int8_t>(value);
668	value &= x; // Mask out the high byte iff no continuation
669	// This add is an amazing operation, it cancels the low byte continuation bit
670	// from y transferring it to the carry. Simultaneously it also shifts the 7
671	// LSB left by one tightly against high byte varint bits. Hence value now
672	// contains the unpacked value shifted left by 1.
673	value += x;
674	// Use the carry to update the ptr appropriately.
675	*ptr += value < x ? `2` : `1`;
676	return value;
677	}
678
679	// More efficient varint parsing for big varints
680	inline const char* ParseBigVarint(const char* p, uint64_t* out) {
681	auto pnew = p;
682	auto tmp = DecodeTwoBytes(ptr: &pnew);
683	uint64_t res = tmp >> `1`;
684	if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= `0`)) {
685	*out = res;
686	return pnew;
687	}
688	for (std::uint32_t i = `1`; i < `5`; i++) {
689	pnew = p + `2` * i;
690	tmp = DecodeTwoBytes(ptr: &pnew);
691	res += (static_cast<std::uint64_t>(tmp) - `2`) << (`14` * i - `1`);
692	if (PROTOBUF_PREDICT_TRUE(static_cast<std::int16_t>(tmp) >= `0`)) {
693	*out = res;
694	return pnew;
695	}
696	}
697	return nullptr;
698	}
699
700	PROTOBUF_EXPORT
701	std::pair<const char, int32_t> ReadSizeFallback(const* char* p, uint32_t first);
702	// Used for tags, could read up to 5 bytes which must be available. Additionally
703	// it makes sure the unsigned value fits a int32_t, otherwise returns nullptr.
704	// Caller must ensure its safe to call.
705	inline uint32_t ReadSize(const char** pp) {
706	auto p = *pp;
707	uint32_t res = static_cast<uint8_t>(p[`0`]);
708	if (res < `128`) {
709	*pp = p + `1`;
710	return res;
711	}
712	auto x = ReadSizeFallback(p, first: res);
713	*pp = x.first;
714	return x.second;
715	}
716
717	// Some convenience functions to simplify the generated parse loop code.
718	// Returning the value and updating the buffer pointer allows for nicer
719	// function composition. We rely on the compiler to inline this.
720	// Also in debug compiles having local scoped variables tend to generated
721	// stack frames that scale as O(num fields).
722	inline uint64_t ReadVarint64(const char** p) {
723	uint64_t tmp;
724	p = VarintParse(p: p, out: &tmp);
725	return tmp;
726	}
727
728	inline uint32_t ReadVarint32(const char** p) {
729	uint32_t tmp;
730	p = VarintParse(p: p, out: &tmp);
731	return tmp;
732	}
733
734	inline int64_t ReadVarintZigZag64(const char** p) {
735	uint64_t tmp;
736	p = VarintParse(p: p, out: &tmp);
737	return WireFormatLite::ZigZagDecode64(n: tmp);
738	}
739
740	inline int32_t ReadVarintZigZag32(const char** p) {
741	uint64_t tmp;
742	p = VarintParse(p: p, out: &tmp);
743	return WireFormatLite::ZigZagDecode32(n: static_cast<uint32_t>(tmp));
744	}
745
746	template <typename T, typename std::enable_if<
747	!std::is_base_of<MessageLite, T>::value, bool>::type>
748	PROTOBUF_NODISCARD const char* ParseContext::ParseMessage(T* msg,
749	const char* ptr) {
750	int old;
751	ptr = ReadSizeAndPushLimitAndDepth(ptr, old_limit: &old);
752	ptr = ptr ? msg->_InternalParse(ptr, this) : nullptr;
753	depth_++;
754	if (!PopLimit(delta: old)) return nullptr;
755	return ptr;
756	}
757
758	template <typename Tag, typename T>
759	const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr,
760	Tag expected_tag,
761	RepeatedField<T>* out) {
762	do {
763	out->Add(UnalignedLoad<T>(ptr));
764	ptr += sizeof(T);
765	if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr;
766	} while (UnalignedLoad<Tag>(ptr) == expected_tag && (ptr += sizeof(Tag)));
767	return ptr;
768	}
769
770	// Add any of the following lines to debug which parse function is failing.
771
772	#define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
773	if (!(predicate)) { \
774	/* ::raise(SIGINT); */ \
775	/* GOOGLE_LOG(ERROR) << "Parse failure"; */ \
776	return ret; \
777	}
778
779	#define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
780	GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
781
782	template <typename T>
783	const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size,
784	RepeatedField<T>* out) {
785	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
786	int nbytes = buffer_end_ + kSlopBytes - ptr;
787	while (size > nbytes) {
788	int num = nbytes / sizeof(T);
789	int old_entries = out->size();
790	out->Reserve(old_entries + num);
791	int block_size = num * sizeof(T);
792	auto dst = out->AddNAlreadyReserved(num);
793	#ifdef PROTOBUF_LITTLE_ENDIAN
794	std::memcpy(dest: dst, src: ptr, n: block_size);
795	#else
796	for (int i = `0`; i < num; i++)
797	dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
798	#endif
799	size -= block_size;
800	if (limit_ <= kSlopBytes) return nullptr;
801	ptr = Next();
802	if (ptr == nullptr) return nullptr;
803	ptr += kSlopBytes - (nbytes - block_size);
804	nbytes = buffer_end_ + kSlopBytes - ptr;
805	}
806	int num = size / sizeof(T);
807	int old_entries = out->size();
808	out->Reserve(old_entries + num);
809	int block_size = num * sizeof(T);
810	auto dst = out->AddNAlreadyReserved(num);
811	#ifdef PROTOBUF_LITTLE_ENDIAN
812	std::memcpy(dest: dst, src: ptr, n: block_size);
813	#else
814	for (int i = `0`; i < num; i++) dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
815	#endif
816	ptr += block_size;
817	if (size != block_size) return nullptr;
818	return ptr;
819	}
820
821	template <typename Add>
822	const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
823	while (ptr < end) {
824	uint64_t varint;
825	ptr = VarintParse(p: ptr, out: &varint);
826	if (ptr == nullptr) return nullptr;
827	add(varint);
828	}
829	return ptr;
830	}
831
832	template <typename Add>
833	const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
834	int size = ReadSize(pp: &ptr);
835	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
836	int chunk_size = buffer_end_ - ptr;
837	while (size > chunk_size) {
838	ptr = ReadPackedVarintArray(ptr, buffer_end_, add);
839	if (ptr == nullptr) return nullptr;
840	int overrun = ptr - buffer_end_;
841	GOOGLE_DCHECK(overrun >= `0` && overrun <= kSlopBytes);
842	if (size - chunk_size <= kSlopBytes) {
843	// The current buffer contains all the information needed, we don't need
844	// to flip buffers. However we must parse from a buffer with enough space
845	// so we are not prone to a buffer overflow.
846	char buf[kSlopBytes + `10`] = {};
847	std::memcpy(dest: buf, src: buffer_end_, n: kSlopBytes);
848	GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes);
849	auto end = buf + (size - chunk_size);
850	auto res = ReadPackedVarintArray(buf + overrun, end, add);
851	if (res == nullptr \|\| res != end) return nullptr;
852	return buffer_end_ + (res - buf);
853	}
854	size -= overrun + chunk_size;
855	GOOGLE_DCHECK_GT(size, `0`);
856	// We must flip buffers
857	if (limit_ <= kSlopBytes) return nullptr;
858	ptr = Next();
859	if (ptr == nullptr) return nullptr;
860	ptr += overrun;
861	chunk_size = buffer_end_ - ptr;
862	}
863	auto end = ptr + size;
864	ptr = ReadPackedVarintArray(ptr, end, add);
865	return end == ptr ? ptr : nullptr;
866	}
867
868	// Helper for verification of utf8
869	PROTOBUF_EXPORT
870	bool VerifyUTF8(StringPiece s, const char* field_name);
871
872	inline bool VerifyUTF8(const std::string* s, const char* field_name) {
873	return VerifyUTF8(s: *s, field_name);
874	}
875
876	// All the string parsers with or without UTF checking and for all CTypes.
877	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* InlineGreedyStringParser(
878	std::string* s, const char* ptr, ParseContext* ctx);
879
880
881	template <typename T>
882	PROTOBUF_NODISCARD const char* FieldParser(uint64_t tag, T& field_parser,
883	const char* ptr, ParseContext* ctx) {
884	uint32_t number = tag >> `3`;
885	GOOGLE_PROTOBUF_PARSER_ASSERT(number != `0`);
886	using WireType = internal::WireFormatLite::WireType;
887	switch (tag & `7`) {
888	case WireType::WIRETYPE_VARINT: {
889	uint64_t value;
890	ptr = VarintParse(p: ptr, out: &value);
891	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
892	field_parser.AddVarint(number, value);
893	break;
894	}
895	case WireType::WIRETYPE_FIXED64: {
896	uint64_t value = UnalignedLoad<uint64_t>(p: ptr);
897	ptr += `8`;
898	field_parser.AddFixed64(number, value);
899	break;
900	}
901	case WireType::WIRETYPE_LENGTH_DELIMITED: {
902	ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
903	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
904	break;
905	}
906	case WireType::WIRETYPE_START_GROUP: {
907	ptr = field_parser.ParseGroup(number, ptr, ctx);
908	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
909	break;
910	}
911	case WireType::WIRETYPE_END_GROUP: {
912	GOOGLE_LOG(FATAL) << "Can't happen";
913	break;
914	}
915	case WireType::WIRETYPE_FIXED32: {
916	uint32_t value = UnalignedLoad<uint32_t>(p: ptr);
917	ptr += `4`;
918	field_parser.AddFixed32(number, value);
919	break;
920	}
921	default:
922	return nullptr;
923	}
924	return ptr;
925	}
926
927	template <typename T>
928	PROTOBUF_NODISCARD const char* WireFormatParser(T& field_parser,
929	const char* ptr,
930	ParseContext* ctx) {
931	while (!ctx->Done(ptr: &ptr)) {
932	uint32_t tag;
933	ptr = ReadTag(p: ptr, out: &tag);
934	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
935	if (tag == `0` \|\| (tag & `7`) == `4`) {
936	ctx->SetLastTag(tag);
937	return ptr;
938	}
939	ptr = FieldParser(tag, field_parser, ptr, ctx);
940	GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
941	}
942	return ptr;
943	}
944
945	// The packed parsers parse repeated numeric primitives directly into the
946	// corresponding field
947
948	// These are packed varints
949	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt32Parser(
950	void* object, const char* ptr, ParseContext* ctx);
951	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt32Parser(
952	void* object, const char* ptr, ParseContext* ctx);
953	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedInt64Parser(
954	void* object, const char* ptr, ParseContext* ctx);
955	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedUInt64Parser(
956	void* object, const char* ptr, ParseContext* ctx);
957	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt32Parser(
958	void* object, const char* ptr, ParseContext* ctx);
959	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSInt64Parser(
960	void* object, const char* ptr, ParseContext* ctx);
961	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedEnumParser(
962	void* object, const char* ptr, ParseContext* ctx);
963
964	template <typename T>
965	PROTOBUF_NODISCARD const char* PackedEnumParser(void* object, const char* ptr,
966	ParseContext* ctx,
967	bool (is_valid)(int*),
968	InternalMetadata* metadata,
969	int field_num) {
970	return ctx->ReadPackedVarint(
971	ptr, [object, is_valid, metadata, field_num](uint64_t val) {
972	if (is_valid(val)) {
973	static_cast<RepeatedField<int>*>(object)->Add(value: val);
974	} else {
975	WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
976	}
977	});
978	}
979
980	template <typename T>
981	PROTOBUF_NODISCARD const char* PackedEnumParserArg(
982	void* object, const char* ptr, ParseContext* ctx,
983	bool (is_valid)(const* void, int), const* void* data,
984	InternalMetadata* metadata, int field_num) {
985	return ctx->ReadPackedVarint(
986	ptr, [object, is_valid, data, metadata, field_num](uint64_t val) {
987	if (is_valid(data, val)) {
988	static_cast<RepeatedField<int>*>(object)->Add(value: val);
989	} else {
990	WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
991	}
992	});
993	}
994
995	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedBoolParser(
996	void* object, const char* ptr, ParseContext* ctx);
997	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed32Parser(
998	void* object, const char* ptr, ParseContext* ctx);
999	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed32Parser(
1000	void* object, const char* ptr, ParseContext* ctx);
1001	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFixed64Parser(
1002	void* object, const char* ptr, ParseContext* ctx);
1003	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedSFixed64Parser(
1004	void* object, const char* ptr, ParseContext* ctx);
1005	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedFloatParser(
1006	void* object, const char* ptr, ParseContext* ctx);
1007	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* PackedDoubleParser(
1008	void* object, const char* ptr, ParseContext* ctx);
1009
1010	// This is the only recursive parser.
1011	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownGroupLiteParse(
1012	std::string* unknown, const char* ptr, ParseContext* ctx);
1013	// This is a helper to for the UnknownGroupLiteParse but is actually also
1014	// useful in the generated code. It uses overload on std::string vs*
1015	// UnknownFieldSet to make the generated code isomorphic between full and lite.*
1016	PROTOBUF_NODISCARD PROTOBUF_EXPORT const char* UnknownFieldParse(
1017	uint32_t tag, std::string* unknown, const char* ptr, ParseContext* ctx);
1018
1019	} // namespace internal
1020	} // namespace protobuf
1021	} // namespace google
1022
1023	#include <google/protobuf/port_undef.inc>
1024
1025	#endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
1026

Browse the source code of Velox/build/_deps/protobuf-src/src/google/protobuf/parse_context.h