1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#include <google/protobuf/parse_context.h>
32
33#include <google/protobuf/io/coded_stream.h>
34#include <google/protobuf/io/zero_copy_stream.h>
35#include <google/protobuf/arenastring.h>
36#include <google/protobuf/endian.h>
37#include <google/protobuf/message_lite.h>
38#include <google/protobuf/repeated_field.h>
39#include <google/protobuf/stubs/strutil.h>
40#include <google/protobuf/wire_format_lite.h>
41
42// Must be included last.
43#include <google/protobuf/port_def.inc>
44
45namespace google {
46namespace protobuf {
47namespace internal {
48
49namespace {
50
51// Only call if at start of tag.
52bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth) {
53 constexpr int kSlopBytes = EpsCopyInputStream::kSlopBytes;
54 GOOGLE_DCHECK_GE(overrun, 0);
55 GOOGLE_DCHECK_LE(overrun, kSlopBytes);
56 auto ptr = begin + overrun;
57 auto end = begin + kSlopBytes;
58 while (ptr < end) {
59 uint32_t tag;
60 ptr = ReadTag(p: ptr, out: &tag);
61 if (ptr == nullptr || ptr > end) return false;
62 // ending on 0 tag is allowed and is the major reason for the necessity of
63 // this function.
64 if (tag == 0) return true;
65 switch (tag & 7) {
66 case 0: { // Varint
67 uint64_t val;
68 ptr = VarintParse(p: ptr, out: &val);
69 if (ptr == nullptr) return false;
70 break;
71 }
72 case 1: { // fixed64
73 ptr += 8;
74 break;
75 }
76 case 2: { // len delim
77 int32_t size = ReadSize(pp: &ptr);
78 if (ptr == nullptr || size > end - ptr) return false;
79 ptr += size;
80 break;
81 }
82 case 3: { // start group
83 depth++;
84 break;
85 }
86 case 4: { // end group
87 if (--depth < 0) return true; // We exit early
88 break;
89 }
90 case 5: { // fixed32
91 ptr += 4;
92 break;
93 }
94 default:
95 return false; // Unknown wireformat
96 }
97 }
98 return false;
99}
100
101} // namespace
102
103const char* EpsCopyInputStream::NextBuffer(int overrun, int depth) {
104 if (next_chunk_ == nullptr) return nullptr; // We've reached end of stream.
105 if (next_chunk_ != buffer_) {
106 GOOGLE_DCHECK(size_ > kSlopBytes);
107 // The chunk is large enough to be used directly
108 buffer_end_ = next_chunk_ + size_ - kSlopBytes;
109 auto res = next_chunk_;
110 next_chunk_ = buffer_;
111 if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
112 return res;
113 }
114 // Move the slop bytes of previous buffer to start of the patch buffer.
115 // Note we must use memmove because the previous buffer could be part of
116 // buffer_.
117 std::memmove(dest: buffer_, src: buffer_end_, n: kSlopBytes);
118 if (overall_limit_ > 0 &&
119 (depth < 0 || !ParseEndsInSlopRegion(begin: buffer_, overrun, depth))) {
120 const void* data;
121 // ZeroCopyInputStream indicates Next may return 0 size buffers. Hence
122 // we loop.
123 while (StreamNext(data: &data)) {
124 if (size_ > kSlopBytes) {
125 // We got a large chunk
126 std::memcpy(dest: buffer_ + kSlopBytes, src: data, n: kSlopBytes);
127 next_chunk_ = static_cast<const char*>(data);
128 buffer_end_ = buffer_ + kSlopBytes;
129 if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch;
130 return buffer_;
131 } else if (size_ > 0) {
132 std::memcpy(dest: buffer_ + kSlopBytes, src: data, n: size_);
133 next_chunk_ = buffer_;
134 buffer_end_ = buffer_ + size_;
135 if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch;
136 return buffer_;
137 }
138 GOOGLE_DCHECK(size_ == 0) << size_;
139 }
140 overall_limit_ = 0; // Next failed, no more needs for next
141 }
142 // End of stream or array
143 if (aliasing_ == kNoDelta) {
144 // If there is no more block and aliasing is true, the previous block
145 // is still valid and we can alias. We have users relying on string_view's
146 // obtained from protos to outlive the proto, when the parse was from an
147 // array. This guarantees string_view's are always aliased if parsed from
148 // an array.
149 aliasing_ = reinterpret_cast<std::uintptr_t>(buffer_end_) -
150 reinterpret_cast<std::uintptr_t>(buffer_);
151 }
152 next_chunk_ = nullptr;
153 buffer_end_ = buffer_ + kSlopBytes;
154 size_ = 0;
155 return buffer_;
156}
157
158const char* EpsCopyInputStream::Next() {
159 GOOGLE_DCHECK(limit_ > kSlopBytes);
160 auto p = NextBuffer(overrun: 0 /* immaterial */, depth: -1);
161 if (p == nullptr) {
162 limit_end_ = buffer_end_;
163 // Distinguish ending on a pushed limit or ending on end-of-stream.
164 SetEndOfStream();
165 return nullptr;
166 }
167 limit_ -= buffer_end_ - p; // Adjust limit_ relative to new anchor
168 limit_end_ = buffer_end_ + std::min(0, limit_);
169 return p;
170}
171
172std::pair<const char*, bool> EpsCopyInputStream::DoneFallback(int overrun,
173 int depth) {
174 // Did we exceeded the limit (parse error).
175 if (PROTOBUF_PREDICT_FALSE(overrun > limit_)) return {nullptr, true};
176 GOOGLE_DCHECK(overrun != limit_); // Guaranteed by caller.
177 GOOGLE_DCHECK(overrun < limit_); // Follows from above
178 // TODO(gerbens) Instead of this dcheck we could just assign, and remove
179 // updating the limit_end from PopLimit, ie.
180 // limit_end_ = buffer_end_ + (std::min)(0, limit_);
181 // if (ptr < limit_end_) return {ptr, false};
182 GOOGLE_DCHECK(limit_end_ == buffer_end_ + (std::min)(0, limit_));
183 // At this point we know the following assertion holds.
184 GOOGLE_DCHECK_GT(limit_, 0);
185 GOOGLE_DCHECK(limit_end_ == buffer_end_); // because limit_ > 0
186 const char* p;
187 do {
188 // We are past the end of buffer_end_, in the slop region.
189 GOOGLE_DCHECK_GE(overrun, 0);
190 p = NextBuffer(overrun, depth);
191 if (p == nullptr) {
192 // We are at the end of the stream
193 if (PROTOBUF_PREDICT_FALSE(overrun != 0)) return {nullptr, true};
194 GOOGLE_DCHECK_GT(limit_, 0);
195 limit_end_ = buffer_end_;
196 // Distinguish ending on a pushed limit or ending on end-of-stream.
197 SetEndOfStream();
198 return {buffer_end_, true};
199 }
200 limit_ -= buffer_end_ - p; // Adjust limit_ relative to new anchor
201 p += overrun;
202 overrun = p - buffer_end_;
203 } while (overrun >= 0);
204 limit_end_ = buffer_end_ + std::min(0, limit_);
205 return {p, false};
206}
207
208const char* EpsCopyInputStream::SkipFallback(const char* ptr, int size) {
209 return AppendSize(ptr, size, append: [](const char* /*p*/, int /*s*/) {});
210}
211
212const char* EpsCopyInputStream::ReadStringFallback(const char* ptr, int size,
213 std::string* str) {
214 str->clear();
215 if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
216 // Reserve the string up to a static safe size. If strings are bigger than
217 // this we proceed by growing the string as needed. This protects against
218 // malicious payloads making protobuf hold on to a lot of memory.
219 str->reserve(res_arg: str->size() + std::min<int>(size, kSafeStringSize));
220 }
221 return AppendSize(ptr, size,
222 append: [str](const char* p, int s) { str->append(s: p, n: s); });
223}
224
225const char* EpsCopyInputStream::AppendStringFallback(const char* ptr, int size,
226 std::string* str) {
227 if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
228 // Reserve the string up to a static safe size. If strings are bigger than
229 // this we proceed by growing the string as needed. This protects against
230 // malicious payloads making protobuf hold on to a lot of memory.
231 str->reserve(res_arg: str->size() + std::min<int>(size, kSafeStringSize));
232 }
233 return AppendSize(ptr, size,
234 append: [str](const char* p, int s) { str->append(s: p, n: s); });
235}
236
237
238const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) {
239 zcis_ = zcis;
240 const void* data;
241 int size;
242 limit_ = INT_MAX;
243 if (zcis->Next(data: &data, size: &size)) {
244 overall_limit_ -= size;
245 if (size > kSlopBytes) {
246 auto ptr = static_cast<const char*>(data);
247 limit_ -= size - kSlopBytes;
248 limit_end_ = buffer_end_ = ptr + size - kSlopBytes;
249 next_chunk_ = buffer_;
250 if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
251 return ptr;
252 } else {
253 limit_end_ = buffer_end_ = buffer_ + kSlopBytes;
254 next_chunk_ = buffer_;
255 auto ptr = buffer_ + 2 * kSlopBytes - size;
256 std::memcpy(dest: ptr, src: data, n: size);
257 return ptr;
258 }
259 }
260 overall_limit_ = 0;
261 next_chunk_ = nullptr;
262 size_ = 0;
263 limit_end_ = buffer_end_ = buffer_;
264 return buffer_;
265}
266
267const char* ParseContext::ReadSizeAndPushLimitAndDepth(const char* ptr,
268 int* old_limit) {
269 int size = ReadSize(pp: &ptr);
270 if (PROTOBUF_PREDICT_FALSE(!ptr)) {
271 *old_limit = 0; // Make sure this isn't uninitialized even on error return
272 return nullptr;
273 }
274 *old_limit = PushLimit(ptr, limit: size);
275 if (--depth_ < 0) return nullptr;
276 return ptr;
277}
278
279const char* ParseContext::ParseMessage(MessageLite* msg, const char* ptr) {
280 int old;
281 ptr = ReadSizeAndPushLimitAndDepth(ptr, old_limit: &old);
282 ptr = ptr ? msg->_InternalParse(ptr, this) : nullptr;
283 depth_++;
284 if (!PopLimit(delta: old)) return nullptr;
285 return ptr;
286}
287
288inline void WriteVarint(uint64_t val, std::string* s) {
289 while (val >= 128) {
290 uint8_t c = val | 0x80;
291 s->push_back(c: c);
292 val >>= 7;
293 }
294 s->push_back(c: val);
295}
296
297void WriteVarint(uint32_t num, uint64_t val, std::string* s) {
298 WriteVarint(val: num << 3, s);
299 WriteVarint(val, s);
300}
301
302void WriteLengthDelimited(uint32_t num, StringPiece val, std::string* s) {
303 WriteVarint(val: (num << 3) + 2, s);
304 WriteVarint(val: val.size(), s);
305 s->append(s: val.data(), n: val.size());
306}
307
308std::pair<const char*, uint32_t> VarintParseSlow32(const char* p,
309 uint32_t res) {
310 for (std::uint32_t i = 2; i < 5; i++) {
311 uint32_t byte = static_cast<uint8_t>(p[i]);
312 res += (byte - 1) << (7 * i);
313 if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
314 return {p + i + 1, res};
315 }
316 }
317 // Accept >5 bytes
318 for (std::uint32_t i = 5; i < 10; i++) {
319 uint32_t byte = static_cast<uint8_t>(p[i]);
320 if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
321 return {p + i + 1, res};
322 }
323 }
324 return {nullptr, 0};
325}
326
327std::pair<const char*, uint64_t> VarintParseSlow64(const char* p,
328 uint32_t res32) {
329 uint64_t res = res32;
330 for (std::uint32_t i = 2; i < 10; i++) {
331 uint64_t byte = static_cast<uint8_t>(p[i]);
332 res += (byte - 1) << (7 * i);
333 if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
334 return {p + i + 1, res};
335 }
336 }
337 return {nullptr, 0};
338}
339
340std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res) {
341 for (std::uint32_t i = 2; i < 5; i++) {
342 uint32_t byte = static_cast<uint8_t>(p[i]);
343 res += (byte - 1) << (7 * i);
344 if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
345 return {p + i + 1, res};
346 }
347 }
348 return {nullptr, 0};
349}
350
351std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t res) {
352 for (std::uint32_t i = 1; i < 4; i++) {
353 uint32_t byte = static_cast<uint8_t>(p[i]);
354 res += (byte - 1) << (7 * i);
355 if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
356 return {p + i + 1, res};
357 }
358 }
359 std::uint32_t byte = static_cast<uint8_t>(p[4]);
360 if (PROTOBUF_PREDICT_FALSE(byte >= 8)) return {nullptr, 0}; // size >= 2gb
361 res += (byte - 1) << 28;
362 // Protect against sign integer overflow in PushLimit. Limits are relative
363 // to buffer ends and ptr could potential be kSlopBytes beyond a buffer end.
364 // To protect against overflow we reject limits absurdly close to INT_MAX.
365 if (PROTOBUF_PREDICT_FALSE(res > INT_MAX - ParseContext::kSlopBytes)) {
366 return {nullptr, 0};
367 }
368 return {p + 5, res};
369}
370
371const char* StringParser(const char* begin, const char* end, void* object,
372 ParseContext*) {
373 auto str = static_cast<std::string*>(object);
374 str->append(s: begin, n: end - begin);
375 return end;
376}
377
378// Defined in wire_format_lite.cc
379void PrintUTF8ErrorLog(StringPiece message_name,
380 StringPiece field_name, const char* operation_str,
381 bool emit_stacktrace);
382
383bool VerifyUTF8(StringPiece str, const char* field_name) {
384 if (!IsStructurallyValidUTF8(str)) {
385 PrintUTF8ErrorLog(message_name: "", field_name, operation_str: "parsing", emit_stacktrace: false);
386 return false;
387 }
388 return true;
389}
390
391const char* InlineGreedyStringParser(std::string* s, const char* ptr,
392 ParseContext* ctx) {
393 int size = ReadSize(pp: &ptr);
394 if (!ptr) return nullptr;
395 return ctx->ReadString(ptr, size, s);
396}
397
398
399template <typename T, bool sign>
400const char* VarintParser(void* object, const char* ptr, ParseContext* ctx) {
401 return ctx->ReadPackedVarint(ptr, [object](uint64_t varint) {
402 T val;
403 if (sign) {
404 if (sizeof(T) == 8) {
405 val = WireFormatLite::ZigZagDecode64(n: varint);
406 } else {
407 val = WireFormatLite::ZigZagDecode32(n: varint);
408 }
409 } else {
410 val = varint;
411 }
412 static_cast<RepeatedField<T>*>(object)->Add(val);
413 });
414}
415
416const char* PackedInt32Parser(void* object, const char* ptr,
417 ParseContext* ctx) {
418 return VarintParser<int32_t, false>(object, ptr, ctx);
419}
420const char* PackedUInt32Parser(void* object, const char* ptr,
421 ParseContext* ctx) {
422 return VarintParser<uint32_t, false>(object, ptr, ctx);
423}
424const char* PackedInt64Parser(void* object, const char* ptr,
425 ParseContext* ctx) {
426 return VarintParser<int64_t, false>(object, ptr, ctx);
427}
428const char* PackedUInt64Parser(void* object, const char* ptr,
429 ParseContext* ctx) {
430 return VarintParser<uint64_t, false>(object, ptr, ctx);
431}
432const char* PackedSInt32Parser(void* object, const char* ptr,
433 ParseContext* ctx) {
434 return VarintParser<int32_t, true>(object, ptr, ctx);
435}
436const char* PackedSInt64Parser(void* object, const char* ptr,
437 ParseContext* ctx) {
438 return VarintParser<int64_t, true>(object, ptr, ctx);
439}
440
441const char* PackedEnumParser(void* object, const char* ptr, ParseContext* ctx) {
442 return VarintParser<int, false>(object, ptr, ctx);
443}
444
445const char* PackedBoolParser(void* object, const char* ptr, ParseContext* ctx) {
446 return VarintParser<bool, false>(object, ptr, ctx);
447}
448
449template <typename T>
450const char* FixedParser(void* object, const char* ptr, ParseContext* ctx) {
451 int size = ReadSize(pp: &ptr);
452 return ctx->ReadPackedFixed(ptr, size,
453 static_cast<RepeatedField<T>*>(object));
454}
455
456const char* PackedFixed32Parser(void* object, const char* ptr,
457 ParseContext* ctx) {
458 return FixedParser<uint32_t>(object, ptr, ctx);
459}
460const char* PackedSFixed32Parser(void* object, const char* ptr,
461 ParseContext* ctx) {
462 return FixedParser<int32_t>(object, ptr, ctx);
463}
464const char* PackedFixed64Parser(void* object, const char* ptr,
465 ParseContext* ctx) {
466 return FixedParser<uint64_t>(object, ptr, ctx);
467}
468const char* PackedSFixed64Parser(void* object, const char* ptr,
469 ParseContext* ctx) {
470 return FixedParser<int64_t>(object, ptr, ctx);
471}
472const char* PackedFloatParser(void* object, const char* ptr,
473 ParseContext* ctx) {
474 return FixedParser<float>(object, ptr, ctx);
475}
476const char* PackedDoubleParser(void* object, const char* ptr,
477 ParseContext* ctx) {
478 return FixedParser<double>(object, ptr, ctx);
479}
480
481class UnknownFieldLiteParserHelper {
482 public:
483 explicit UnknownFieldLiteParserHelper(std::string* unknown)
484 : unknown_(unknown) {}
485
486 void AddVarint(uint32_t num, uint64_t value) {
487 if (unknown_ == nullptr) return;
488 WriteVarint(val: num * 8, s: unknown_);
489 WriteVarint(val: value, s: unknown_);
490 }
491 void AddFixed64(uint32_t num, uint64_t value) {
492 if (unknown_ == nullptr) return;
493 WriteVarint(val: num * 8 + 1, s: unknown_);
494 char buffer[8];
495 io::CodedOutputStream::WriteLittleEndian64ToArray(
496 value, target: reinterpret_cast<uint8_t*>(buffer));
497 unknown_->append(s: buffer, n: 8);
498 }
499 const char* ParseLengthDelimited(uint32_t num, const char* ptr,
500 ParseContext* ctx) {
501 int size = ReadSize(pp: &ptr);
502 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
503 if (unknown_ == nullptr) return ctx->Skip(ptr, size);
504 WriteVarint(val: num * 8 + 2, s: unknown_);
505 WriteVarint(val: size, s: unknown_);
506 return ctx->AppendString(ptr, size, s: unknown_);
507 }
508 const char* ParseGroup(uint32_t num, const char* ptr, ParseContext* ctx) {
509 if (unknown_) WriteVarint(val: num * 8 + 3, s: unknown_);
510 ptr = ctx->ParseGroup(msg: this, ptr, tag: num * 8 + 3);
511 GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
512 if (unknown_) WriteVarint(val: num * 8 + 4, s: unknown_);
513 return ptr;
514 }
515 void AddFixed32(uint32_t num, uint32_t value) {
516 if (unknown_ == nullptr) return;
517 WriteVarint(val: num * 8 + 5, s: unknown_);
518 char buffer[4];
519 io::CodedOutputStream::WriteLittleEndian32ToArray(
520 value, target: reinterpret_cast<uint8_t*>(buffer));
521 unknown_->append(s: buffer, n: 4);
522 }
523
524 const char* _InternalParse(const char* ptr, ParseContext* ctx) {
525 return WireFormatParser(field_parser&: *this, ptr, ctx);
526 }
527
528 private:
529 std::string* unknown_;
530};
531
532const char* UnknownGroupLiteParse(std::string* unknown, const char* ptr,
533 ParseContext* ctx) {
534 UnknownFieldLiteParserHelper field_parser(unknown);
535 return WireFormatParser(field_parser, ptr, ctx);
536}
537
538const char* UnknownFieldParse(uint32_t tag, std::string* unknown,
539 const char* ptr, ParseContext* ctx) {
540 UnknownFieldLiteParserHelper field_parser(unknown);
541 return FieldParser(tag, field_parser, ptr, ctx);
542}
543
544} // namespace internal
545} // namespace protobuf
546} // namespace google
547
548#include <google/protobuf/port_undef.inc>
549