1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31#include <cstdint>
32#include <numeric>
33
34#include <google/protobuf/extension_set.h>
35#include <google/protobuf/generated_message_tctable_decl.h>
36#include <google/protobuf/generated_message_tctable_impl.h>
37#include <google/protobuf/inlined_string_field.h>
38#include <google/protobuf/message_lite.h>
39#include <google/protobuf/parse_context.h>
40#include <google/protobuf/wire_format_lite.h>
41
42// clang-format off
43#include <google/protobuf/port_def.inc>
44// clang-format on
45
46namespace google {
47namespace protobuf {
48namespace internal {
49
50using FieldEntry = TcParseTableBase::FieldEntry;
51
52//////////////////////////////////////////////////////////////////////////////
53// Template instantiations:
54//////////////////////////////////////////////////////////////////////////////
55
56#ifndef NDEBUG
57template void AlignFail<4>(uintptr_t);
58template void AlignFail<8>(uintptr_t);
59#endif
60
61const char* TcParser::GenericFallbackLite(PROTOBUF_TC_PARAM_DECL) {
62 return GenericFallbackImpl<MessageLite, std::string>(PROTOBUF_TC_PARAM_PASS);
63}
64
65//////////////////////////////////////////////////////////////////////////////
66// Core fast parsing implementation:
67//////////////////////////////////////////////////////////////////////////////
68
69class TcParser::ScopedArenaSwap final {
70 public:
71 ScopedArenaSwap(MessageLite* msg, ParseContext* ctx)
72 : ctx_(ctx), saved_(ctx->data().arena) {
73 ctx_->data().arena = msg->GetArenaForAllocation();
74 }
75 ScopedArenaSwap(const ScopedArenaSwap&) = delete;
76 ~ScopedArenaSwap() { ctx_->data().arena = saved_; }
77
78 private:
79 ParseContext* const ctx_;
80 Arena* const saved_;
81};
82
83PROTOBUF_NOINLINE const char* TcParser::ParseLoop(
84 MessageLite* msg, const char* ptr, ParseContext* ctx,
85 const TcParseTableBase* table) {
86 ScopedArenaSwap saved(msg, ctx);
87 while (!ctx->Done(ptr: &ptr)) {
88 // Unconditionally read has bits, even if we don't have has bits.
89 // has_bits_offset will be 0 and we will just read something valid.
90 uint64_t hasbits = ReadAt<uint32_t>(x: msg, offset: table->has_bits_offset);
91 ptr = TagDispatch(msg, ptr, ctx, table, hasbits, data: {});
92 if (ptr == nullptr) break;
93 if (ctx->LastTag() != 1) break; // Ended on terminating tag
94 }
95 return ptr;
96}
97
98 // Dispatch to the designated parse function
99inline PROTOBUF_ALWAYS_INLINE const char* TcParser::TagDispatch(
100 PROTOBUF_TC_PARAM_DECL) {
101 const auto coded_tag = UnalignedLoad<uint16_t>(p: ptr);
102 const size_t idx = coded_tag & table->fast_idx_mask;
103 PROTOBUF_ASSUME((idx & 7) == 0);
104 auto* fast_entry = table->fast_entry(idx: idx >> 3);
105 data = fast_entry->bits;
106 data.data ^= coded_tag;
107 PROTOBUF_MUSTTAIL return fast_entry->target(PROTOBUF_TC_PARAM_PASS);
108}
109
110// We can only safely call from field to next field if the call is optimized
111// to a proper tail call. Otherwise we blow through stack. Clang and gcc
112// reliably do this optimization in opt mode, but do not perform this in debug
113// mode. Luckily the structure of the algorithm is such that it's always
114// possible to just return and use the enclosing parse loop as a trampoline.
115inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToTagDispatch(
116 PROTOBUF_TC_PARAM_DECL) {
117 constexpr bool always_return = !PROTOBUF_TAILCALL;
118 if (always_return || !ctx->DataAvailable(ptr)) {
119 PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
120 }
121 PROTOBUF_MUSTTAIL return TagDispatch(PROTOBUF_TC_PARAM_PASS);
122}
123
124inline PROTOBUF_ALWAYS_INLINE const char* TcParser::ToParseLoop(
125 PROTOBUF_TC_PARAM_DECL) {
126 (void)data;
127 (void)ctx;
128 SyncHasbits(msg, hasbits, table);
129 return ptr;
130}
131
132inline PROTOBUF_ALWAYS_INLINE const char* TcParser::Error(
133 PROTOBUF_TC_PARAM_DECL) {
134 (void)data;
135 (void)ctx;
136 (void)ptr;
137 SyncHasbits(msg, hasbits, table);
138 return nullptr;
139}
140
141// On the fast path, a (matching) 1-byte tag already has the decoded value.
142static uint32_t FastDecodeTag(uint8_t coded_tag) {
143 return coded_tag;
144}
145
146// On the fast path, a (matching) 2-byte tag always needs to be decoded.
147static uint32_t FastDecodeTag(uint16_t coded_tag) {
148 uint32_t result = coded_tag;
149 result += static_cast<int8_t>(coded_tag);
150 return result >> 1;
151}
152
153//////////////////////////////////////////////////////////////////////////////
154// Core mini parsing implementation:
155//////////////////////////////////////////////////////////////////////////////
156
157// Field lookup table layout:
158//
159// Because it consists of a series of variable-length segments, the lookuup
160// table is organized within an array of uint16_t, and each element is either
161// a uint16_t or a uint32_t stored little-endian as a pair of uint16_t.
162//
163// Its fundamental building block maps 16 contiguously ascending field numbers
164// to their locations within the field entry table:
165
166struct SkipEntry16 {
167 uint16_t skipmap;
168 uint16_t field_entry_offset;
169};
170
171// The skipmap is a bitfield of which of those field numbers do NOT have a
172// field entry. The lowest bit of the skipmap corresponds to the lowest of
173// the 16 field numbers, so if a proto had only fields 1, 2, 3, and 7, the
174// skipmap would contain 0b11111111'10111000.
175//
176// The field lookup table begins with a single 32-bit skipmap that maps the
177// field numbers 1 through 32. This is because the majority of proto
178// messages only contain fields numbered 1 to 32.
179//
180// The rest of the lookup table is a repeated series of
181// { 32-bit field #, #SkipEntry16s, {SkipEntry16...} }
182// That is, the next thing is a pair of uint16_t that form the next
183// lowest field number that the lookup table handles. If this number is -1,
184// that is the end of the table. Then there is a uint16_t that is
185// the number of contiguous SkipEntry16 entries that follow, and then of
186// course the SkipEntry16s themselves.
187
188// Originally developed and tested at https://godbolt.org/z/vbc7enYcf
189
190// Returns the address of the field for `tag` in the table's field entries.
191// Returns nullptr if the field was not found.
192const TcParseTableBase::FieldEntry* TcParser::FindFieldEntry(
193 const TcParseTableBase* table, uint32_t field_num) {
194 const FieldEntry* const field_entries = table->field_entries_begin();
195
196 uint32_t fstart = 1;
197 uint32_t adj_fnum = field_num - fstart;
198
199 if (PROTOBUF_PREDICT_TRUE(adj_fnum < 32)) {
200 uint32_t skipmap = table->skipmap32;
201 uint32_t skipbit = 1 << adj_fnum;
202 if (PROTOBUF_PREDICT_FALSE(skipmap & skipbit)) return nullptr;
203 skipmap &= skipbit - 1;
204#if (__GNUC__ || __clang__) && __POPCNT__
205 // Note: here and below, skipmap typically has very few set bits
206 // (31 in the worst case, but usually zero) so a loop isn't that
207 // bad, and a compiler-generated popcount is typically only
208 // worthwhile if the processor itself has hardware popcount support.
209 adj_fnum -= __builtin_popcount(skipmap);
210#else
211 while (skipmap) {
212 --adj_fnum;
213 skipmap &= skipmap - 1;
214 }
215#endif
216 auto* entry = field_entries + adj_fnum;
217 PROTOBUF_ASSUME(entry != nullptr);
218 return entry;
219 }
220 const uint16_t* lookup_table = table->field_lookup_begin();
221 for (;;) {
222#ifdef PROTOBUF_LITTLE_ENDIAN
223 memcpy(dest: &fstart, src: lookup_table, n: sizeof(fstart));
224#else
225 fstart = lookup_table[0] | (lookup_table[1] << 16);
226#endif
227 lookup_table += sizeof(fstart) / sizeof(*lookup_table);
228 uint32_t num_skip_entries = *lookup_table++;
229 if (field_num < fstart) return nullptr;
230 adj_fnum = field_num - fstart;
231 uint32_t skip_num = adj_fnum / 16;
232 if (PROTOBUF_PREDICT_TRUE(skip_num < num_skip_entries)) {
233 // for each group of 16 fields we have:
234 // a bitmap of 16 bits
235 // a 16-bit field-entry offset for the first of them.
236 auto* skip_data = lookup_table + (adj_fnum / 16) * (sizeof(SkipEntry16) /
237 sizeof(uint16_t));
238 SkipEntry16 se = {.skipmap: skip_data[0], .field_entry_offset: skip_data[1]};
239 adj_fnum &= 15;
240 uint32_t skipmap = se.skipmap;
241 uint16_t skipbit = 1 << adj_fnum;
242 if (PROTOBUF_PREDICT_FALSE(skipmap & skipbit)) return nullptr;
243 skipmap &= skipbit - 1;
244 adj_fnum += se.field_entry_offset;
245#if (__GNUC__ || __clang__) && __POPCNT__
246 adj_fnum -= __builtin_popcount(skipmap);
247#else
248 while (skipmap) {
249 --adj_fnum;
250 skipmap &= skipmap - 1;
251 }
252#endif
253 auto* entry = field_entries + adj_fnum;
254 PROTOBUF_ASSUME(entry != nullptr);
255 return entry;
256 }
257 lookup_table +=
258 num_skip_entries * (sizeof(SkipEntry16) / sizeof(*lookup_table));
259 }
260}
261
262// Field names are stored in a format of:
263//
264// 1) A table of name sizes, one byte each, from 1 to 255 per name.
265// `entries` is the size of this first table.
266// 1a) padding bytes, so the table of name sizes is a multiple of
267// eight bytes in length. They are zero.
268//
269// 2) All the names, concatenated, with neither separation nor termination.
270//
271// This is designed to be compact but not particularly fast to retrieve.
272// In particular, it takes O(n) to retrieve the name of the n'th field,
273// which is usually fine because most protos have fewer than 10 fields.
274static StringPiece FindName(const char* name_data, size_t entries,
275 size_t index) {
276 // The compiler unrolls these... if this isn't fast enough,
277 // there's an AVX version at https://godbolt.org/z/eojrjqzfr
278 // ARM-compatible version at https://godbolt.org/z/n5YT5Ee85
279
280 // The field name sizes are padded up to a multiple of 8, so we
281 // must pad them here.
282 size_t num_sizes = (entries + 7) & -8;
283 auto* uint8s = reinterpret_cast<const uint8_t*>(name_data);
284 size_t pos = std::accumulate(first: uint8s, last: uint8s + index, init: num_sizes);
285 size_t size = name_data[index];
286 auto* start = &name_data[pos];
287 return {start, size};
288}
289
290StringPiece TcParser::MessageName(const TcParseTableBase* table) {
291 return FindName(name_data: table->name_data(), entries: table->num_field_entries + 1, index: 0);
292}
293
294StringPiece TcParser::FieldName(const TcParseTableBase* table,
295 const FieldEntry* field_entry) {
296 const FieldEntry* const field_entries = table->field_entries_begin();
297 auto field_index = static_cast<size_t>(field_entry - field_entries);
298 return FindName(name_data: table->name_data(), entries: table->num_field_entries + 1,
299 index: field_index + 1);
300}
301
302const char* TcParser::MiniParse(PROTOBUF_TC_PARAM_DECL) {
303 uint32_t tag;
304 ptr = ReadTagInlined(ptr, out: &tag);
305 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
306
307 auto* entry = FindFieldEntry(table, field_num: tag >> 3);
308 if (entry == nullptr) {
309 data.data = tag;
310 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
311 }
312
313 // The handler may need the tag and the entry to resolve fallback logic. Both
314 // of these are 32 bits, so pack them into (the 64-bit) `data`. Since we can't
315 // pack the entry pointer itself, just pack its offset from `table`.
316 uint64_t entry_offset = reinterpret_cast<const char*>(entry) -
317 reinterpret_cast<const char*>(table);
318 data.data = entry_offset << 32 | tag;
319
320 using field_layout::FieldKind;
321 auto field_type = entry->type_card & FieldKind::kFkMask;
322 switch (field_type) {
323 case FieldKind::kFkNone:
324 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
325 case FieldKind::kFkVarint:
326 PROTOBUF_MUSTTAIL return MpVarint(PROTOBUF_TC_PARAM_PASS);
327 case FieldKind::kFkPackedVarint:
328 PROTOBUF_MUSTTAIL return MpPackedVarint(PROTOBUF_TC_PARAM_PASS);
329 case FieldKind::kFkFixed:
330 PROTOBUF_MUSTTAIL return MpFixed(PROTOBUF_TC_PARAM_PASS);
331 case FieldKind::kFkPackedFixed:
332 PROTOBUF_MUSTTAIL return MpPackedFixed(PROTOBUF_TC_PARAM_PASS);
333 case FieldKind::kFkString:
334 PROTOBUF_MUSTTAIL return MpString(PROTOBUF_TC_PARAM_PASS);
335 case FieldKind::kFkMessage:
336 PROTOBUF_MUSTTAIL return MpMessage(PROTOBUF_TC_PARAM_PASS);
337 case FieldKind::kFkMap:
338 PROTOBUF_MUSTTAIL return MpMap(PROTOBUF_TC_PARAM_PASS);
339 default:
340 return Error(PROTOBUF_TC_PARAM_PASS);
341 }
342}
343
344namespace {
345
346// Offset returns the address `offset` bytes after `base`.
347inline void* Offset(void* base, uint32_t offset) {
348 return static_cast<uint8_t*>(base) + offset;
349}
350
351// InvertPacked changes tag bits from the given wire type to length
352// delimited. This is the difference expected between packed and non-packed
353// repeated fields.
354template <WireFormatLite::WireType Wt>
355inline PROTOBUF_ALWAYS_INLINE void InvertPacked(TcFieldData& data) {
356 data.data ^= Wt ^ WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
357}
358
359} // namespace
360
361//////////////////////////////////////////////////////////////////////////////
362// Message fields
363//////////////////////////////////////////////////////////////////////////////
364
365template <typename TagType, bool group_coding>
366inline PROTOBUF_ALWAYS_INLINE
367const char* TcParser::SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL) {
368 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
369 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
370 }
371 auto saved_tag = UnalignedLoad<TagType>(ptr);
372 ptr += sizeof(TagType);
373 hasbits |= (uint64_t{1} << data.hasbit_idx());
374 SyncHasbits(msg, hasbits, table);
375 auto& field = RefAt<MessageLite*>(x: msg, offset: data.offset());
376 if (field == nullptr) {
377 const MessageLite* default_instance =
378 table->field_aux(idx: data.aux_idx())->message_default;
379 field = default_instance->New(arena: ctx->data().arena);
380 }
381 if (group_coding) {
382 return ctx->ParseGroup(field, ptr, FastDecodeTag(saved_tag));
383 }
384 return ctx->ParseMessage(msg: field, ptr);
385}
386
387const char* TcParser::FastMS1(PROTOBUF_TC_PARAM_DECL) {
388 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, false>(
389 PROTOBUF_TC_PARAM_PASS);
390}
391
392const char* TcParser::FastMS2(PROTOBUF_TC_PARAM_DECL) {
393 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, false>(
394 PROTOBUF_TC_PARAM_PASS);
395}
396
397const char* TcParser::FastGS1(PROTOBUF_TC_PARAM_DECL) {
398 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint8_t, true>(
399 PROTOBUF_TC_PARAM_PASS);
400}
401
402const char* TcParser::FastGS2(PROTOBUF_TC_PARAM_DECL) {
403 PROTOBUF_MUSTTAIL return SingularParseMessageAuxImpl<uint16_t, true>(
404 PROTOBUF_TC_PARAM_PASS);
405}
406
407template <typename TagType, bool group_coding>
408inline PROTOBUF_ALWAYS_INLINE
409const char* TcParser::RepeatedParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL) {
410 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
411 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
412 }
413 auto saved_tag = UnalignedLoad<TagType>(ptr);
414 ptr += sizeof(TagType);
415 SyncHasbits(msg, hasbits, table);
416 const MessageLite* default_instance =
417 table->field_aux(idx: data.aux_idx())->message_default;
418 auto& field = RefAt<RepeatedPtrFieldBase>(x: msg, offset: data.offset());
419 MessageLite* submsg =
420 field.Add<GenericTypeHandler<MessageLite>>(prototype: default_instance);
421 if (group_coding) {
422 return ctx->ParseGroup(submsg, ptr, FastDecodeTag(saved_tag));
423 }
424 return ctx->ParseMessage(msg: submsg, ptr);
425}
426
427const char* TcParser::FastMR1(PROTOBUF_TC_PARAM_DECL) {
428 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, false>(
429 PROTOBUF_TC_PARAM_PASS);
430}
431
432const char* TcParser::FastMR2(PROTOBUF_TC_PARAM_DECL) {
433 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, false>(
434 PROTOBUF_TC_PARAM_PASS);
435}
436
437const char* TcParser::FastGR1(PROTOBUF_TC_PARAM_DECL) {
438 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint8_t, true>(
439 PROTOBUF_TC_PARAM_PASS);
440}
441
442const char* TcParser::FastGR2(PROTOBUF_TC_PARAM_DECL) {
443 PROTOBUF_MUSTTAIL return RepeatedParseMessageAuxImpl<uint16_t, true>(
444 PROTOBUF_TC_PARAM_PASS);
445}
446
447//////////////////////////////////////////////////////////////////////////////
448// Fixed fields
449//////////////////////////////////////////////////////////////////////////////
450
451template <typename LayoutType, typename TagType>
452PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularFixed(
453 PROTOBUF_TC_PARAM_DECL) {
454 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
455 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
456 }
457 ptr += sizeof(TagType); // Consume tag
458 hasbits |= (uint64_t{1} << data.hasbit_idx());
459 RefAt<LayoutType>(msg, data.offset()) = UnalignedLoad<LayoutType>(ptr);
460 ptr += sizeof(LayoutType);
461 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
462}
463
464const char* TcParser::FastF32S1(PROTOBUF_TC_PARAM_DECL) {
465 PROTOBUF_MUSTTAIL return SingularFixed<uint32_t, uint8_t>(
466 PROTOBUF_TC_PARAM_PASS);
467}
468const char* TcParser::FastF32S2(PROTOBUF_TC_PARAM_DECL) {
469 PROTOBUF_MUSTTAIL return SingularFixed<uint32_t, uint16_t>(
470 PROTOBUF_TC_PARAM_PASS);
471}
472const char* TcParser::FastF64S1(PROTOBUF_TC_PARAM_DECL) {
473 PROTOBUF_MUSTTAIL return SingularFixed<uint64_t, uint8_t>(
474 PROTOBUF_TC_PARAM_PASS);
475}
476const char* TcParser::FastF64S2(PROTOBUF_TC_PARAM_DECL) {
477 PROTOBUF_MUSTTAIL return SingularFixed<uint64_t, uint16_t>(
478 PROTOBUF_TC_PARAM_PASS);
479}
480
481template <typename LayoutType, typename TagType>
482PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed(
483 PROTOBUF_TC_PARAM_DECL) {
484 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
485 // Check if the field can be parsed as packed repeated:
486 constexpr WireFormatLite::WireType fallback_wt =
487 sizeof(LayoutType) == 4 ? WireFormatLite::WIRETYPE_FIXED32
488 : WireFormatLite::WIRETYPE_FIXED64;
489 InvertPacked<fallback_wt>(data);
490 if (data.coded_tag<TagType>() == 0) {
491 return PackedFixed<LayoutType, TagType>(PROTOBUF_TC_PARAM_PASS);
492 } else {
493 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
494 }
495 }
496 auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
497 int idx = field.size();
498 auto elem = field.Add();
499 int space = field.Capacity() - idx;
500 idx = 0;
501 auto expected_tag = UnalignedLoad<TagType>(ptr);
502 do {
503 ptr += sizeof(TagType);
504 elem[idx++] = UnalignedLoad<LayoutType>(ptr);
505 ptr += sizeof(LayoutType);
506 if (idx >= space) break;
507 if (!ctx->DataAvailable(ptr)) break;
508 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
509 field.AddNAlreadyReserved(idx - 1);
510 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
511}
512
513const char* TcParser::FastF32R1(PROTOBUF_TC_PARAM_DECL) {
514 PROTOBUF_MUSTTAIL return RepeatedFixed<uint32_t, uint8_t>(
515 PROTOBUF_TC_PARAM_PASS);
516}
517const char* TcParser::FastF32R2(PROTOBUF_TC_PARAM_DECL) {
518 PROTOBUF_MUSTTAIL return RepeatedFixed<uint32_t, uint16_t>(
519 PROTOBUF_TC_PARAM_PASS);
520}
521const char* TcParser::FastF64R1(PROTOBUF_TC_PARAM_DECL) {
522 PROTOBUF_MUSTTAIL return RepeatedFixed<uint64_t, uint8_t>(
523 PROTOBUF_TC_PARAM_PASS);
524}
525const char* TcParser::FastF64R2(PROTOBUF_TC_PARAM_DECL) {
526 PROTOBUF_MUSTTAIL return RepeatedFixed<uint64_t, uint16_t>(
527 PROTOBUF_TC_PARAM_PASS);
528}
529
530// Note: some versions of GCC will fail with error "function not inlinable" if
531// corecursive functions are both marked with PROTOBUF_ALWAYS_INLINE (Clang
532// accepts this). We can still apply the attribute to one of the two functions,
533// just not both (so we do mark the Repeated variant as always inlined). This
534// also applies to PackedVarint, below.
535template <typename LayoutType, typename TagType>
536const char* TcParser::PackedFixed(PROTOBUF_TC_PARAM_DECL) {
537 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
538 // Try parsing as non-packed repeated:
539 constexpr WireFormatLite::WireType fallback_wt =
540 sizeof(LayoutType) == 4 ? WireFormatLite::WIRETYPE_FIXED32
541 : WireFormatLite::WIRETYPE_FIXED64;
542 InvertPacked<fallback_wt>(data);
543 if (data.coded_tag<TagType>() == 0) {
544 return RepeatedFixed<LayoutType, TagType>(PROTOBUF_TC_PARAM_PASS);
545 } else {
546 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
547 }
548 }
549 ptr += sizeof(TagType);
550 // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
551 // pending hasbits now:
552 SyncHasbits(msg, hasbits, table);
553 auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
554 int size = ReadSize(pp: &ptr);
555 // TODO(dlj): add a tailcalling variant of ReadPackedFixed.
556 return ctx->ReadPackedFixed(ptr, size,
557 static_cast<RepeatedField<LayoutType>*>(&field));
558}
559
560const char* TcParser::FastF32P1(PROTOBUF_TC_PARAM_DECL) {
561 PROTOBUF_MUSTTAIL return PackedFixed<uint32_t, uint8_t>(
562 PROTOBUF_TC_PARAM_PASS);
563}
564const char* TcParser::FastF32P2(PROTOBUF_TC_PARAM_DECL) {
565 PROTOBUF_MUSTTAIL return PackedFixed<uint32_t, uint16_t>(
566 PROTOBUF_TC_PARAM_PASS);
567}
568const char* TcParser::FastF64P1(PROTOBUF_TC_PARAM_DECL) {
569 PROTOBUF_MUSTTAIL return PackedFixed<uint64_t, uint8_t>(
570 PROTOBUF_TC_PARAM_PASS);
571}
572const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) {
573 PROTOBUF_MUSTTAIL return PackedFixed<uint64_t, uint16_t>(
574 PROTOBUF_TC_PARAM_PASS);
575}
576
577//////////////////////////////////////////////////////////////////////////////
578// Varint fields
579//////////////////////////////////////////////////////////////////////////////
580
581namespace {
582
583// Shift "byte" left by n * 7 bits, filling vacated bits with ones.
584template <int n>
585inline PROTOBUF_ALWAYS_INLINE uint64_t
586shift_left_fill_with_ones(uint64_t byte, uint64_t ones) {
587 return (byte << (n * 7)) | (ones >> (64 - (n * 7)));
588}
589
590// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and
591// put the new value in res. Return whether the result was negative.
592template <int n>
593inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative(
594 uint64_t byte, uint64_t ones, int64_t& res) {
595#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
596 // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a
597 // substantial improvement from capturing the sign from the condition code
598 // register on x86-64.
599 bool sign_bit;
600 asm("shldq %3, %2, %1"
601 : "=@ccs"(sign_bit), "+r"(byte)
602 : "r"(ones), "i"(n * 7));
603 res = byte;
604 return sign_bit;
605#else
606 // Generic fallback:
607 res = (byte << (n * 7)) | (ones >> (64 - (n * 7)));
608 return static_cast<int64_t>(res) < 0;
609#endif
610}
611
612inline PROTOBUF_ALWAYS_INLINE std::pair<const char*, uint64_t>
613Parse64FallbackPair(const char* p, int64_t res1) {
614 auto ptr = reinterpret_cast<const int8_t*>(p);
615
616 // The algorithm relies on sign extension for each byte to set all high bits
617 // when the varint continues. It also relies on asserting all of the lower
618 // bits for each successive byte read. This allows the result to be aggregated
619 // using a bitwise AND. For example:
620 //
621 // 8 1 64 57 ... 24 17 16 9 8 1
622 // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa
623 // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111
624 // ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111
625 // ---------------------------------------------
626 // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa
627 //
628 // On x86-64, a shld from a single register filled with enough 1s in the high
629 // bits can accomplish all this in one instruction. It so happens that res1
630 // has 57 high bits of ones, which is enough for the largest shift done.
631 GOOGLE_DCHECK_EQ(res1 >> 7, -1);
632 uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD)
633 int64_t res2, res3; // accumulated result chunks
634
635 if (!shift_left_fill_with_ones_was_negative<1>(byte: ptr[1], ones, res&: res2))
636 goto done2;
637 if (!shift_left_fill_with_ones_was_negative<2>(byte: ptr[2], ones, res&: res3))
638 goto done3;
639
640 // For the remainder of the chunks, check the sign of the AND result.
641 res1 &= shift_left_fill_with_ones<3>(byte: ptr[3], ones);
642 if (res1 >= 0) goto done4;
643 res2 &= shift_left_fill_with_ones<4>(byte: ptr[4], ones);
644 if (res2 >= 0) goto done5;
645 res3 &= shift_left_fill_with_ones<5>(byte: ptr[5], ones);
646 if (res3 >= 0) goto done6;
647 res1 &= shift_left_fill_with_ones<6>(byte: ptr[6], ones);
648 if (res1 >= 0) goto done7;
649 res2 &= shift_left_fill_with_ones<7>(byte: ptr[7], ones);
650 if (res2 >= 0) goto done8;
651 res3 &= shift_left_fill_with_ones<8>(byte: ptr[8], ones);
652 if (res3 >= 0) goto done9;
653
654 // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this
655 // case, the continuation bit of ptr[8] already set the top bit of res3
656 // correctly, so all we have to do is check that the expected case is true.
657 if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10;
658
659 // A value of 0, however, represents an over-serialized varint. This case
660 // should not happen, but if does (say, due to a nonconforming serializer),
661 // deassert the continuation bit that came from ptr[8].
662 if (ptr[9] == 0) {
663#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
664 // Use a small instruction since this is an uncommon code path.
665 asm("btcq $63,%0" : "+r"(res3));
666#else
667 res3 ^= static_cast<uint64_t>(1) << 63;
668#endif
669 goto done10;
670 }
671
672 // If the 10th byte/ptr[9] itself has any other value, then it is too big to
673 // fit in 64 bits. If the continue bit is set, it is an unterminated varint.
674 return {nullptr, 0};
675
676done2:
677 return {p + 2, res1 & res2};
678done3:
679 return {p + 3, res1 & res2 & res3};
680done4:
681 return {p + 4, res1 & res2 & res3};
682done5:
683 return {p + 5, res1 & res2 & res3};
684done6:
685 return {p + 6, res1 & res2 & res3};
686done7:
687 return {p + 7, res1 & res2 & res3};
688done8:
689 return {p + 8, res1 & res2 & res3};
690done9:
691 return {p + 9, res1 & res2 & res3};
692done10:
693 return {p + 10, res1 & res2 & res3};
694}
695
696inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
697 uint64_t* value) {
698 int64_t byte = static_cast<int8_t>(*p);
699 if (PROTOBUF_PREDICT_TRUE(byte >= 0)) {
700 *value = byte;
701 return p + 1;
702 } else {
703 auto tmp = Parse64FallbackPair(p, res1: byte);
704 if (PROTOBUF_PREDICT_TRUE(tmp.first)) *value = tmp.second;
705 return tmp.first;
706 }
707}
708
709template <typename FieldType, bool zigzag = false>
710inline FieldType ZigZagDecodeHelper(uint64_t value) {
711 return static_cast<FieldType>(value);
712}
713
714template <>
715inline int32_t ZigZagDecodeHelper<int32_t, true>(uint64_t value) {
716 return WireFormatLite::ZigZagDecode32(n: value);
717}
718
719template <>
720inline int64_t ZigZagDecodeHelper<int64_t, true>(uint64_t value) {
721 return WireFormatLite::ZigZagDecode64(n: value);
722}
723
724bool EnumIsValidAux(int32_t val, uint16_t xform_val,
725 TcParseTableBase::FieldAux aux) {
726 if (xform_val == field_layout::kTvRange) {
727 auto lo = aux.enum_range.start;
728 return lo <= val && val < (lo + aux.enum_range.length);
729 }
730 return aux.enum_validator(val);
731}
732
733} // namespace
734
735template <typename FieldType, typename TagType, bool zigzag>
736PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularVarint(
737 PROTOBUF_TC_PARAM_DECL) {
738 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
739 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
740 }
741 ptr += sizeof(TagType); // Consume tag
742 hasbits |= (uint64_t{1} << data.hasbit_idx());
743
744 // clang isn't smart enough to be able to only conditionally save
745 // registers to the stack, so we turn the integer-greater-than-128
746 // case into a separate routine.
747 if (PROTOBUF_PREDICT_FALSE(static_cast<int8_t>(*ptr) < 0)) {
748 PROTOBUF_MUSTTAIL return SingularVarBigint<FieldType, TagType, zigzag>(
749 PROTOBUF_TC_PARAM_PASS);
750 }
751
752 RefAt<FieldType>(msg, data.offset()) =
753 ZigZagDecodeHelper<FieldType, zigzag>(static_cast<uint8_t>(*ptr++));
754 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
755}
756
757template <typename FieldType, typename TagType, bool zigzag>
758PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint(
759 PROTOBUF_TC_PARAM_DECL) {
760 // For some reason clang wants to save 5 registers to the stack here,
761 // but we only need four for this code, so save the data we don't need
762 // to the stack. Happily, saving them this way uses regular store
763 // instructions rather than PUSH/POP, which saves time at the cost of greater
764 // code size, but for this heavily-used piece of code, that's fine.
765 struct Spill {
766 uint64_t field_data;
767 ::google::protobuf::MessageLite* msg;
768 const ::google::protobuf::internal::TcParseTableBase* table;
769 uint64_t hasbits;
770 };
771 volatile Spill spill = {data.data, msg, table, hasbits};
772
773 uint64_t tmp;
774 PROTOBUF_ASSUME(static_cast<int8_t>(*ptr) < 0);
775 ptr = ParseVarint(p: ptr, value: &tmp);
776
777 data.data = spill.field_data;
778 msg = spill.msg;
779 table = spill.table;
780 hasbits = spill.hasbits;
781
782 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
783 return Error(PROTOBUF_TC_PARAM_PASS);
784 }
785 RefAt<FieldType>(msg, data.offset()) =
786 ZigZagDecodeHelper<FieldType, zigzag>(tmp);
787 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
788}
789
790const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) {
791 PROTOBUF_MUSTTAIL return SingularVarint<bool, uint8_t>(
792 PROTOBUF_TC_PARAM_PASS);
793}
794const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) {
795 PROTOBUF_MUSTTAIL return SingularVarint<bool, uint16_t>(
796 PROTOBUF_TC_PARAM_PASS);
797}
798const char* TcParser::FastV32S1(PROTOBUF_TC_PARAM_DECL) {
799 PROTOBUF_MUSTTAIL return SingularVarint<uint32_t, uint8_t>(
800 PROTOBUF_TC_PARAM_PASS);
801}
802const char* TcParser::FastV32S2(PROTOBUF_TC_PARAM_DECL) {
803 PROTOBUF_MUSTTAIL return SingularVarint<uint32_t, uint16_t>(
804 PROTOBUF_TC_PARAM_PASS);
805}
806const char* TcParser::FastV64S1(PROTOBUF_TC_PARAM_DECL) {
807 PROTOBUF_MUSTTAIL return SingularVarint<uint64_t, uint8_t>(
808 PROTOBUF_TC_PARAM_PASS);
809}
810const char* TcParser::FastV64S2(PROTOBUF_TC_PARAM_DECL) {
811 PROTOBUF_MUSTTAIL return SingularVarint<uint64_t, uint16_t>(
812 PROTOBUF_TC_PARAM_PASS);
813}
814
815const char* TcParser::FastZ32S1(PROTOBUF_TC_PARAM_DECL) {
816 PROTOBUF_MUSTTAIL return SingularVarint<int32_t, uint8_t, true>(
817 PROTOBUF_TC_PARAM_PASS);
818}
819const char* TcParser::FastZ32S2(PROTOBUF_TC_PARAM_DECL) {
820 PROTOBUF_MUSTTAIL return SingularVarint<int32_t, uint16_t, true>(
821 PROTOBUF_TC_PARAM_PASS);
822}
823const char* TcParser::FastZ64S1(PROTOBUF_TC_PARAM_DECL) {
824 PROTOBUF_MUSTTAIL return SingularVarint<int64_t, uint8_t, true>(
825 PROTOBUF_TC_PARAM_PASS);
826}
827const char* TcParser::FastZ64S2(PROTOBUF_TC_PARAM_DECL) {
828 PROTOBUF_MUSTTAIL return SingularVarint<int64_t, uint16_t, true>(
829 PROTOBUF_TC_PARAM_PASS);
830}
831
832template <typename FieldType, typename TagType, bool zigzag>
833PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedVarint(
834 PROTOBUF_TC_PARAM_DECL) {
835 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
836 // Try parsing as non-packed repeated:
837 InvertPacked<WireFormatLite::WIRETYPE_VARINT>(data);
838 if (data.coded_tag<TagType>() == 0) {
839 return PackedVarint<FieldType, TagType, zigzag>(PROTOBUF_TC_PARAM_PASS);
840 } else {
841 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
842 }
843 }
844 auto& field = RefAt<RepeatedField<FieldType>>(msg, data.offset());
845 auto expected_tag = UnalignedLoad<TagType>(ptr);
846 do {
847 ptr += sizeof(TagType);
848 uint64_t tmp;
849 ptr = ParseVarint(p: ptr, value: &tmp);
850 if (ptr == nullptr) {
851 return Error(PROTOBUF_TC_PARAM_PASS);
852 }
853 field.Add(ZigZagDecodeHelper<FieldType, zigzag>(tmp));
854 if (!ctx->DataAvailable(ptr)) {
855 break;
856 }
857 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
858 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
859}
860
861const char* TcParser::FastV8R1(PROTOBUF_TC_PARAM_DECL) {
862 PROTOBUF_MUSTTAIL return RepeatedVarint<bool, uint8_t>(
863 PROTOBUF_TC_PARAM_PASS);
864}
865const char* TcParser::FastV8R2(PROTOBUF_TC_PARAM_DECL) {
866 PROTOBUF_MUSTTAIL return RepeatedVarint<bool, uint16_t>(
867 PROTOBUF_TC_PARAM_PASS);
868}
869const char* TcParser::FastV32R1(PROTOBUF_TC_PARAM_DECL) {
870 PROTOBUF_MUSTTAIL return RepeatedVarint<uint32_t, uint8_t>(
871 PROTOBUF_TC_PARAM_PASS);
872}
873const char* TcParser::FastV32R2(PROTOBUF_TC_PARAM_DECL) {
874 PROTOBUF_MUSTTAIL return RepeatedVarint<uint32_t, uint16_t>(
875 PROTOBUF_TC_PARAM_PASS);
876}
877const char* TcParser::FastV64R1(PROTOBUF_TC_PARAM_DECL) {
878 PROTOBUF_MUSTTAIL return RepeatedVarint<uint64_t, uint8_t>(
879 PROTOBUF_TC_PARAM_PASS);
880}
881const char* TcParser::FastV64R2(PROTOBUF_TC_PARAM_DECL) {
882 PROTOBUF_MUSTTAIL return RepeatedVarint<uint64_t, uint16_t>(
883 PROTOBUF_TC_PARAM_PASS);
884}
885
886const char* TcParser::FastZ32R1(PROTOBUF_TC_PARAM_DECL) {
887 PROTOBUF_MUSTTAIL return RepeatedVarint<int32_t, uint8_t, true>(
888 PROTOBUF_TC_PARAM_PASS);
889}
890const char* TcParser::FastZ32R2(PROTOBUF_TC_PARAM_DECL) {
891 PROTOBUF_MUSTTAIL return RepeatedVarint<int32_t, uint16_t, true>(
892 PROTOBUF_TC_PARAM_PASS);
893}
894const char* TcParser::FastZ64R1(PROTOBUF_TC_PARAM_DECL) {
895 PROTOBUF_MUSTTAIL return RepeatedVarint<int64_t, uint8_t, true>(
896 PROTOBUF_TC_PARAM_PASS);
897}
898const char* TcParser::FastZ64R2(PROTOBUF_TC_PARAM_DECL) {
899 PROTOBUF_MUSTTAIL return RepeatedVarint<int64_t, uint16_t, true>(
900 PROTOBUF_TC_PARAM_PASS);
901}
902
903// See comment on PackedFixed for why this is not PROTOBUF_ALWAYS_INLINE.
904template <typename FieldType, typename TagType, bool zigzag>
905const char* TcParser::PackedVarint(PROTOBUF_TC_PARAM_DECL) {
906 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
907 InvertPacked<WireFormatLite::WIRETYPE_VARINT>(data);
908 if (data.coded_tag<TagType>() == 0) {
909 return RepeatedVarint<FieldType, TagType, zigzag>(PROTOBUF_TC_PARAM_PASS);
910 } else {
911 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
912 }
913 }
914 ptr += sizeof(TagType);
915 // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any
916 // pending hasbits now:
917 SyncHasbits(msg, hasbits, table);
918 auto* field = &RefAt<RepeatedField<FieldType>>(msg, data.offset());
919 return ctx->ReadPackedVarint(ptr, [field](uint64_t varint) {
920 FieldType val;
921 if (zigzag) {
922 if (sizeof(FieldType) == 8) {
923 val = WireFormatLite::ZigZagDecode64(n: varint);
924 } else {
925 val = WireFormatLite::ZigZagDecode32(n: varint);
926 }
927 } else {
928 val = varint;
929 }
930 field->Add(val);
931 });
932}
933
934const char* TcParser::FastV8P1(PROTOBUF_TC_PARAM_DECL) {
935 PROTOBUF_MUSTTAIL return PackedVarint<bool, uint8_t>(PROTOBUF_TC_PARAM_PASS);
936}
937const char* TcParser::FastV8P2(PROTOBUF_TC_PARAM_DECL) {
938 PROTOBUF_MUSTTAIL return PackedVarint<bool, uint16_t>(PROTOBUF_TC_PARAM_PASS);
939}
940const char* TcParser::FastV32P1(PROTOBUF_TC_PARAM_DECL) {
941 PROTOBUF_MUSTTAIL return PackedVarint<uint32_t, uint8_t>(
942 PROTOBUF_TC_PARAM_PASS);
943}
944const char* TcParser::FastV32P2(PROTOBUF_TC_PARAM_DECL) {
945 PROTOBUF_MUSTTAIL return PackedVarint<uint32_t, uint16_t>(
946 PROTOBUF_TC_PARAM_PASS);
947}
948const char* TcParser::FastV64P1(PROTOBUF_TC_PARAM_DECL) {
949 PROTOBUF_MUSTTAIL return PackedVarint<uint64_t, uint8_t>(
950 PROTOBUF_TC_PARAM_PASS);
951}
952const char* TcParser::FastV64P2(PROTOBUF_TC_PARAM_DECL) {
953 PROTOBUF_MUSTTAIL return PackedVarint<uint64_t, uint16_t>(
954 PROTOBUF_TC_PARAM_PASS);
955}
956
957const char* TcParser::FastZ32P1(PROTOBUF_TC_PARAM_DECL) {
958 PROTOBUF_MUSTTAIL return PackedVarint<int32_t, uint8_t, true>(
959 PROTOBUF_TC_PARAM_PASS);
960}
961const char* TcParser::FastZ32P2(PROTOBUF_TC_PARAM_DECL) {
962 PROTOBUF_MUSTTAIL return PackedVarint<int32_t, uint16_t, true>(
963 PROTOBUF_TC_PARAM_PASS);
964}
965const char* TcParser::FastZ64P1(PROTOBUF_TC_PARAM_DECL) {
966 PROTOBUF_MUSTTAIL return PackedVarint<int64_t, uint8_t, true>(
967 PROTOBUF_TC_PARAM_PASS);
968}
969const char* TcParser::FastZ64P2(PROTOBUF_TC_PARAM_DECL) {
970 PROTOBUF_MUSTTAIL return PackedVarint<int64_t, uint16_t, true>(
971 PROTOBUF_TC_PARAM_PASS);
972}
973
974//////////////////////////////////////////////////////////////////////////////
975// Enum fields
976//////////////////////////////////////////////////////////////////////////////
977
978PROTOBUF_NOINLINE const char* TcParser::FastUnknownEnumFallback(
979 PROTOBUF_TC_PARAM_DECL) {
980 (void)msg;
981 (void)ctx;
982 (void)hasbits;
983
984 // If we know we want to put this field directly into the unknown field set,
985 // then we can skip the call to MiniParse and directly call table->fallback.
986 // However, we first have to update `data` to contain the decoded tag.
987 uint32_t tag;
988 ptr = ReadTag(p: ptr, out: &tag);
989 if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) {
990 return Error(PROTOBUF_TC_PARAM_PASS);
991 }
992 data.data = tag;
993 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
994}
995
996template <typename TagType, uint16_t xform_val>
997PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularEnum(
998 PROTOBUF_TC_PARAM_DECL) {
999 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1000 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1001 }
1002 const char* ptr2 = ptr; // Save for unknown enum case
1003 ptr += sizeof(TagType); // Consume tag
1004 uint64_t tmp;
1005 ptr = ParseVarint(p: ptr, value: &tmp);
1006 if (ptr == nullptr) {
1007 return Error(PROTOBUF_TC_PARAM_PASS);
1008 }
1009 const TcParseTableBase::FieldAux aux = *table->field_aux(idx: data.aux_idx());
1010 if (PROTOBUF_PREDICT_FALSE(
1011 !EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux))) {
1012 ptr = ptr2;
1013 PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1014 }
1015 hasbits |= (uint64_t{1} << data.hasbit_idx());
1016 RefAt<int32_t>(x: msg, offset: data.offset()) = tmp;
1017 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
1018}
1019
1020const char* TcParser::FastErS1(PROTOBUF_TC_PARAM_DECL) {
1021 PROTOBUF_MUSTTAIL return SingularEnum<uint8_t, field_layout::kTvRange>(
1022 PROTOBUF_TC_PARAM_PASS);
1023}
1024const char* TcParser::FastErS2(PROTOBUF_TC_PARAM_DECL) {
1025 PROTOBUF_MUSTTAIL return SingularEnum<uint16_t, field_layout::kTvRange>(
1026 PROTOBUF_TC_PARAM_PASS);
1027}
1028const char* TcParser::FastEvS1(PROTOBUF_TC_PARAM_DECL) {
1029 PROTOBUF_MUSTTAIL return SingularEnum<uint8_t, field_layout::kTvEnum>(
1030 PROTOBUF_TC_PARAM_PASS);
1031}
1032const char* TcParser::FastEvS2(PROTOBUF_TC_PARAM_DECL) {
1033 PROTOBUF_MUSTTAIL return SingularEnum<uint16_t, field_layout::kTvEnum>(
1034 PROTOBUF_TC_PARAM_PASS);
1035}
1036
1037template <typename TagType, uint16_t xform_val>
1038PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedEnum(
1039 PROTOBUF_TC_PARAM_DECL) {
1040 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1041 InvertPacked<WireFormatLite::WIRETYPE_VARINT>(data);
1042 if (data.coded_tag<TagType>() == 0) {
1043 // Packed parsing is handled by generated fallback.
1044 PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1045 } else {
1046 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1047 }
1048 }
1049 auto& field = RefAt<RepeatedField<int32_t>>(x: msg, offset: data.offset());
1050 auto expected_tag = UnalignedLoad<TagType>(ptr);
1051 const TcParseTableBase::FieldAux aux = *table->field_aux(idx: data.aux_idx());
1052 do {
1053 const char* ptr2 = ptr; // save for unknown enum case
1054 ptr += sizeof(TagType);
1055 uint64_t tmp;
1056 ptr = ParseVarint(p: ptr, value: &tmp);
1057 if (ptr == nullptr) {
1058 return Error(PROTOBUF_TC_PARAM_PASS);
1059 }
1060 if (PROTOBUF_PREDICT_FALSE(
1061 !EnumIsValidAux(static_cast<int32_t>(tmp), xform_val, aux))) {
1062 // We can avoid duplicate work in MiniParse by directly calling
1063 // table->fallback.
1064 ptr = ptr2;
1065 PROTOBUF_MUSTTAIL return FastUnknownEnumFallback(PROTOBUF_TC_PARAM_PASS);
1066 }
1067 field.Add(value: static_cast<int32_t>(tmp));
1068 if (!ctx->DataAvailable(ptr)) {
1069 break;
1070 }
1071 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1072 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1073}
1074
1075const char* TcParser::FastErR1(PROTOBUF_TC_PARAM_DECL) {
1076 PROTOBUF_MUSTTAIL return RepeatedEnum<uint8_t, field_layout::kTvRange>(
1077 PROTOBUF_TC_PARAM_PASS);
1078}
1079const char* TcParser::FastErR2(PROTOBUF_TC_PARAM_DECL) {
1080 PROTOBUF_MUSTTAIL return RepeatedEnum<uint16_t, field_layout::kTvRange>(
1081 PROTOBUF_TC_PARAM_PASS);
1082}
1083const char* TcParser::FastEvR1(PROTOBUF_TC_PARAM_DECL) {
1084 PROTOBUF_MUSTTAIL return RepeatedEnum<uint8_t, field_layout::kTvEnum>(
1085 PROTOBUF_TC_PARAM_PASS);
1086}
1087const char* TcParser::FastEvR2(PROTOBUF_TC_PARAM_DECL) {
1088 PROTOBUF_MUSTTAIL return RepeatedEnum<uint16_t, field_layout::kTvEnum>(
1089 PROTOBUF_TC_PARAM_PASS);
1090}
1091
1092//////////////////////////////////////////////////////////////////////////////
1093// String/bytes fields
1094//////////////////////////////////////////////////////////////////////////////
1095
1096// Defined in wire_format_lite.cc
1097void PrintUTF8ErrorLog(StringPiece message_name,
1098 StringPiece field_name, const char* operation_str,
1099 bool emit_stacktrace);
1100
1101void TcParser::ReportFastUtf8Error(uint32_t decoded_tag,
1102 const TcParseTableBase* table) {
1103 uint32_t field_num = decoded_tag >> 3;
1104 const auto* entry = FindFieldEntry(table, field_num);
1105 PrintUTF8ErrorLog(message_name: MessageName(table), field_name: FieldName(table, field_entry: entry), operation_str: "parsing",
1106 emit_stacktrace: false);
1107}
1108
1109namespace {
1110
1111PROTOBUF_NOINLINE
1112const char* SingularStringParserFallback(ArenaStringPtr* s, const char* ptr,
1113 EpsCopyInputStream* stream) {
1114 int size = ReadSize(pp: &ptr);
1115 if (!ptr) return nullptr;
1116 return stream->ReadString(ptr, size, s: s->MutableNoCopy(arena: nullptr));
1117}
1118
1119} // namespace
1120
1121template <typename TagType, TcParser::Utf8Type utf8>
1122PROTOBUF_ALWAYS_INLINE const char* TcParser::SingularString(
1123 PROTOBUF_TC_PARAM_DECL) {
1124 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1125 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1126 }
1127 auto saved_tag = UnalignedLoad<TagType>(ptr);
1128 ptr += sizeof(TagType);
1129 hasbits |= (uint64_t{1} << data.hasbit_idx());
1130 auto& field = RefAt<ArenaStringPtr>(x: msg, offset: data.offset());
1131 auto arena = ctx->data().arena;
1132 if (arena) {
1133 ptr = ctx->ReadArenaString(ptr, s: &field, arena);
1134 } else {
1135 ptr = SingularStringParserFallback(s: &field, ptr, stream: ctx);
1136 }
1137 if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS);
1138 switch (utf8) {
1139 case kNoUtf8:
1140#ifdef NDEBUG
1141 case kUtf8ValidateOnly:
1142#endif
1143 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1144 default:
1145 if (PROTOBUF_PREDICT_TRUE(IsStructurallyValidUTF8(field.Get()))) {
1146 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1147 }
1148 ReportFastUtf8Error(decoded_tag: FastDecodeTag(saved_tag), table);
1149 return utf8 == kUtf8 ? Error(PROTOBUF_TC_PARAM_PASS)
1150 : ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1151 }
1152}
1153
1154const char* TcParser::FastBS1(PROTOBUF_TC_PARAM_DECL) {
1155 PROTOBUF_MUSTTAIL return SingularString<uint8_t, kNoUtf8>(
1156 PROTOBUF_TC_PARAM_PASS);
1157}
1158const char* TcParser::FastBS2(PROTOBUF_TC_PARAM_DECL) {
1159 PROTOBUF_MUSTTAIL return SingularString<uint16_t, kNoUtf8>(
1160 PROTOBUF_TC_PARAM_PASS);
1161}
1162const char* TcParser::FastSS1(PROTOBUF_TC_PARAM_DECL) {
1163 PROTOBUF_MUSTTAIL return SingularString<uint8_t, kUtf8ValidateOnly>(
1164 PROTOBUF_TC_PARAM_PASS);
1165}
1166const char* TcParser::FastSS2(PROTOBUF_TC_PARAM_DECL) {
1167 PROTOBUF_MUSTTAIL return SingularString<uint16_t, kUtf8ValidateOnly>(
1168 PROTOBUF_TC_PARAM_PASS);
1169}
1170const char* TcParser::FastUS1(PROTOBUF_TC_PARAM_DECL) {
1171 PROTOBUF_MUSTTAIL return SingularString<uint8_t, kUtf8>(
1172 PROTOBUF_TC_PARAM_PASS);
1173}
1174const char* TcParser::FastUS2(PROTOBUF_TC_PARAM_DECL) {
1175 PROTOBUF_MUSTTAIL return SingularString<uint16_t, kUtf8>(
1176 PROTOBUF_TC_PARAM_PASS);
1177}
1178
1179// Inlined string variants:
1180
1181const char* TcParser::FastBiS1(PROTOBUF_TC_PARAM_DECL) {
1182 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1183}
1184const char* TcParser::FastBiS2(PROTOBUF_TC_PARAM_DECL) {
1185 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1186}
1187const char* TcParser::FastSiS1(PROTOBUF_TC_PARAM_DECL) {
1188 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1189}
1190const char* TcParser::FastSiS2(PROTOBUF_TC_PARAM_DECL) {
1191 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1192}
1193const char* TcParser::FastUiS1(PROTOBUF_TC_PARAM_DECL) {
1194 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1195}
1196const char* TcParser::FastUiS2(PROTOBUF_TC_PARAM_DECL) {
1197 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1198}
1199
1200template <typename TagType, TcParser::Utf8Type utf8>
1201PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedString(
1202 PROTOBUF_TC_PARAM_DECL) {
1203 if (PROTOBUF_PREDICT_FALSE(data.coded_tag<TagType>() != 0)) {
1204 PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS);
1205 }
1206 auto expected_tag = UnalignedLoad<TagType>(ptr);
1207 auto& field = RefAt<RepeatedPtrField<std::string>>(x: msg, offset: data.offset());
1208 do {
1209 ptr += sizeof(TagType);
1210 std::string* str = field.Add();
1211 ptr = InlineGreedyStringParser(s: str, ptr, ctx);
1212 if (ptr == nullptr) {
1213 return Error(PROTOBUF_TC_PARAM_PASS);
1214 }
1215 switch (utf8) {
1216 case kNoUtf8:
1217#ifdef NDEBUG
1218 case kUtf8ValidateOnly:
1219#endif
1220 break;
1221 default:
1222 if (PROTOBUF_PREDICT_TRUE(IsStructurallyValidUTF8(*str))) {
1223 break;
1224 }
1225 ReportFastUtf8Error(decoded_tag: FastDecodeTag(expected_tag), table);
1226 if (utf8 == kUtf8) return Error(PROTOBUF_TC_PARAM_PASS);
1227 break;
1228 }
1229 if (!ctx->DataAvailable(ptr)) break;
1230 } while (UnalignedLoad<TagType>(ptr) == expected_tag);
1231 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1232}
1233
1234const char* TcParser::FastBR1(PROTOBUF_TC_PARAM_DECL) {
1235 PROTOBUF_MUSTTAIL return RepeatedString<uint8_t, kNoUtf8>(
1236 PROTOBUF_TC_PARAM_PASS);
1237}
1238const char* TcParser::FastBR2(PROTOBUF_TC_PARAM_DECL) {
1239 PROTOBUF_MUSTTAIL return RepeatedString<uint16_t, kNoUtf8>(
1240 PROTOBUF_TC_PARAM_PASS);
1241}
1242const char* TcParser::FastSR1(PROTOBUF_TC_PARAM_DECL) {
1243 PROTOBUF_MUSTTAIL return RepeatedString<uint8_t, kUtf8ValidateOnly>(
1244 PROTOBUF_TC_PARAM_PASS);
1245}
1246const char* TcParser::FastSR2(PROTOBUF_TC_PARAM_DECL) {
1247 PROTOBUF_MUSTTAIL return RepeatedString<uint16_t, kUtf8ValidateOnly>(
1248 PROTOBUF_TC_PARAM_PASS);
1249}
1250const char* TcParser::FastUR1(PROTOBUF_TC_PARAM_DECL) {
1251 PROTOBUF_MUSTTAIL return RepeatedString<uint8_t, kUtf8>(
1252 PROTOBUF_TC_PARAM_PASS);
1253}
1254const char* TcParser::FastUR2(PROTOBUF_TC_PARAM_DECL) {
1255 PROTOBUF_MUSTTAIL return RepeatedString<uint16_t, kUtf8>(
1256 PROTOBUF_TC_PARAM_PASS);
1257}
1258
1259//////////////////////////////////////////////////////////////////////////////
1260// Mini parsing
1261//////////////////////////////////////////////////////////////////////////////
1262
1263namespace {
1264inline void SetHas(const TcParseTableBase* table, const FieldEntry& entry,
1265 MessageLite* msg, uint64_t& hasbits) {
1266 int32_t has_idx = entry.has_idx;
1267 if (has_idx < 32) {
1268 hasbits |= uint64_t{1} << has_idx;
1269 } else {
1270 auto* hasblocks = &TcParser::RefAt<uint32_t>(x: msg, offset: table->has_bits_offset);
1271#if defined(__x86_64__) && defined(__GNUC__)
1272 asm("bts %1, %0\n" : "+m"(*hasblocks) : "r"(has_idx));
1273#else
1274 auto& hasblock = hasblocks[has_idx / 32];
1275 hasblock |= uint32_t{1} << (has_idx % 32);
1276#endif
1277 }
1278}
1279} // namespace
1280
1281// Destroys any existing oneof union member (if necessary). Returns true if the
1282// caller is responsible for initializing the object, or false if the field
1283// already has the desired case.
1284bool TcParser::ChangeOneof(const TcParseTableBase* table,
1285 const TcParseTableBase::FieldEntry& entry,
1286 uint32_t field_num, ParseContext* ctx,
1287 MessageLite* msg) {
1288 // The _oneof_case_ array offset is stored in the first aux entry.
1289 uint32_t oneof_case_offset = table->field_aux(idx: 0u)->offset;
1290 // The _oneof_case_ array index is stored in the has-bit index.
1291 uint32_t* oneof_case =
1292 &TcParser::RefAt<uint32_t>(x: msg, offset: oneof_case_offset) + entry.has_idx;
1293 uint32_t current_case = *oneof_case;
1294 *oneof_case = field_num;
1295
1296 if (current_case == 0) {
1297 // If the member is empty, we don't have anything to clear. Caller is
1298 // responsible for creating a new member object.
1299 return true;
1300 }
1301 if (current_case == field_num) {
1302 // If the member is already active, then it should be merged. We're done.
1303 return false;
1304 }
1305 // Look up the value that is already stored, and dispose of it if necessary.
1306 const FieldEntry* current_entry = FindFieldEntry(table, field_num: current_case);
1307 uint16_t current_kind = current_entry->type_card & field_layout::kFkMask;
1308 uint16_t current_rep = current_entry->type_card & field_layout::kRepMask;
1309 if (current_kind == field_layout::kFkString) {
1310 switch (current_rep) {
1311 case field_layout::kRepAString: {
1312 auto& field = RefAt<ArenaStringPtr>(x: msg, offset: current_entry->offset);
1313 field.Destroy();
1314 break;
1315 }
1316 case field_layout::kRepSString:
1317 case field_layout::kRepIString:
1318 default:
1319 GOOGLE_LOG(DFATAL) << "string rep not handled: "
1320 << (current_rep >> field_layout::kRepShift);
1321 return true;
1322 }
1323 } else if (current_kind == field_layout::kFkMessage) {
1324 switch (current_rep) {
1325 case field_layout::kRepMessage:
1326 case field_layout::kRepGroup:
1327 case field_layout::kRepIWeak: {
1328 auto& field = RefAt<MessageLite*>(x: msg, offset: current_entry->offset);
1329 if (!ctx->data().arena) {
1330 delete field;
1331 }
1332 break;
1333 }
1334 default:
1335 GOOGLE_LOG(DFATAL) << "message rep not handled: "
1336 << (current_rep >> field_layout::kRepShift);
1337 break;
1338 }
1339 }
1340 return true;
1341}
1342
1343const char* TcParser::MpFixed(PROTOBUF_TC_PARAM_DECL) {
1344 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1345 const uint16_t type_card = entry.type_card;
1346 const uint16_t card = type_card & field_layout::kFcMask;
1347
1348 // Check for repeated parsing (wiretype fallback is handled there):
1349 if (card == field_layout::kFcRepeated) {
1350 PROTOBUF_MUSTTAIL return MpRepeatedFixed(PROTOBUF_TC_PARAM_PASS);
1351 }
1352 // Check for mismatched wiretype:
1353 const uint16_t rep = type_card & field_layout::kRepMask;
1354 const uint32_t decoded_wiretype = data.tag() & 7;
1355 if (rep == field_layout::kRep64Bits) {
1356 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) {
1357 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1358 }
1359 } else {
1360 GOOGLE_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1361 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) {
1362 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1363 }
1364 }
1365 // Set the field present:
1366 if (card == field_layout::kFcOptional) {
1367 SetHas(table, entry, msg, hasbits);
1368 } else if (card == field_layout::kFcOneof) {
1369 ChangeOneof(table, entry, field_num: data.tag() >> 3, ctx, msg);
1370 }
1371 // Copy the value:
1372 if (rep == field_layout::kRep64Bits) {
1373 RefAt<uint64_t>(x: msg, offset: entry.offset) = UnalignedLoad<uint64_t>(p: ptr);
1374 ptr += sizeof(uint64_t);
1375 } else {
1376 RefAt<uint32_t>(x: msg, offset: entry.offset) = UnalignedLoad<uint32_t>(p: ptr);
1377 ptr += sizeof(uint32_t);
1378 }
1379 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
1380}
1381
1382const char* TcParser::MpRepeatedFixed(PROTOBUF_TC_PARAM_DECL) {
1383 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1384 const uint32_t decoded_tag = data.tag();
1385 const uint32_t decoded_wiretype = decoded_tag & 7;
1386
1387 // Check for packed repeated fallback:
1388 if (decoded_wiretype == WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1389 PROTOBUF_MUSTTAIL return MpPackedFixed(PROTOBUF_TC_PARAM_PASS);
1390 }
1391
1392 const uint16_t type_card = entry.type_card;
1393 const uint16_t rep = type_card & field_layout::kRepMask;
1394 if (rep == field_layout::kRep64Bits) {
1395 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) {
1396 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1397 }
1398 auto& field = RefAt<RepeatedField<uint64_t>>(x: msg, offset: entry.offset);
1399 constexpr auto size = sizeof(uint64_t);
1400 const char* ptr2 = ptr;
1401 uint32_t next_tag;
1402 do {
1403 ptr = ptr2;
1404 *field.Add() = UnalignedLoad<uint64_t>(p: ptr);
1405 ptr += size;
1406 if (!ctx->DataAvailable(ptr)) break;
1407 ptr2 = ReadTag(p: ptr, out: &next_tag);
1408 } while (next_tag == decoded_tag);
1409 } else {
1410 GOOGLE_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1411 if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) {
1412 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1413 }
1414 auto& field = RefAt<RepeatedField<uint32_t>>(x: msg, offset: entry.offset);
1415 constexpr auto size = sizeof(uint32_t);
1416 const char* ptr2 = ptr;
1417 uint32_t next_tag;
1418 do {
1419 ptr = ptr2;
1420 *field.Add() = UnalignedLoad<uint32_t>(p: ptr);
1421 ptr += size;
1422 if (!ctx->DataAvailable(ptr)) break;
1423 ptr2 = ReadTag(p: ptr, out: &next_tag);
1424 } while (next_tag == decoded_tag);
1425 }
1426
1427 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
1428}
1429
1430const char* TcParser::MpPackedFixed(PROTOBUF_TC_PARAM_DECL) {
1431 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1432 const uint16_t type_card = entry.type_card;
1433 const uint32_t decoded_wiretype = data.tag() & 7;
1434
1435 // Check for non-packed repeated fallback:
1436 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1437 PROTOBUF_MUSTTAIL return MpRepeatedFixed(PROTOBUF_TC_PARAM_PASS);
1438 }
1439
1440 // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
1441 // pending hasbits now:
1442 SyncHasbits(msg, hasbits, table);
1443
1444 int size = ReadSize(pp: &ptr);
1445 uint16_t rep = type_card & field_layout::kRepMask;
1446 if (rep == field_layout::kRep64Bits) {
1447 auto& field = RefAt<RepeatedField<uint64_t>>(x: msg, offset: entry.offset);
1448 ptr = ctx->ReadPackedFixed(ptr, size, out: &field);
1449 } else {
1450 GOOGLE_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep32Bits));
1451 auto& field = RefAt<RepeatedField<uint32_t>>(x: msg, offset: entry.offset);
1452 ptr = ctx->ReadPackedFixed(ptr, size, out: &field);
1453 }
1454
1455 if (ptr == nullptr) {
1456 return Error(PROTOBUF_TC_PARAM_PASS);
1457 }
1458 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
1459}
1460
1461const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) {
1462 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1463 const uint16_t type_card = entry.type_card;
1464 const uint16_t card = type_card & field_layout::kFcMask;
1465
1466 // Check for repeated parsing:
1467 if (card == field_layout::kFcRepeated) {
1468 PROTOBUF_MUSTTAIL return MpRepeatedVarint(PROTOBUF_TC_PARAM_PASS);
1469 }
1470 // Check for wire type mismatch:
1471 if ((data.tag() & 7) != WireFormatLite::WIRETYPE_VARINT) {
1472 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1473 }
1474 const uint16_t xform_val = type_card & field_layout::kTvMask;
1475 const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1476 const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1477
1478 // Parse the value:
1479 const char* ptr2 = ptr; // save for unknown enum case
1480 uint64_t tmp;
1481 ptr = ParseVarint(p: ptr, value: &tmp);
1482 if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS);
1483
1484 // Transform and/or validate the value
1485 uint16_t rep = type_card & field_layout::kRepMask;
1486 if (rep == field_layout::kRep64Bits) {
1487 if (is_zigzag) {
1488 tmp = WireFormatLite::ZigZagDecode64(n: tmp);
1489 }
1490 } else if (rep == field_layout::kRep32Bits) {
1491 if (is_validated_enum) {
1492 if (!EnumIsValidAux(val: tmp, xform_val, aux: *table->field_aux(entry: &entry))) {
1493 ptr = ptr2;
1494 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1495 }
1496 } else if (is_zigzag) {
1497 tmp = WireFormatLite::ZigZagDecode32(n: static_cast<uint32_t>(tmp));
1498 }
1499 }
1500
1501 // Mark the field as present:
1502 const bool is_oneof = card == field_layout::kFcOneof;
1503 if (card == field_layout::kFcOptional) {
1504 SetHas(table, entry, msg, hasbits);
1505 } else if (is_oneof) {
1506 ChangeOneof(table, entry, field_num: data.tag() >> 3, ctx, msg);
1507 }
1508
1509 if (rep == field_layout::kRep64Bits) {
1510 RefAt<uint64_t>(x: msg, offset: entry.offset) = tmp;
1511 } else if (rep == field_layout::kRep32Bits) {
1512 RefAt<uint32_t>(x: msg, offset: entry.offset) = static_cast<uint32_t>(tmp);
1513 } else {
1514 GOOGLE_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep8Bits));
1515 RefAt<bool>(x: msg, offset: entry.offset) = static_cast<bool>(tmp);
1516 }
1517
1518 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
1519}
1520
1521const char* TcParser::MpRepeatedVarint(PROTOBUF_TC_PARAM_DECL) {
1522 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1523 auto type_card = entry.type_card;
1524 const uint32_t decoded_tag = data.tag();
1525 auto decoded_wiretype = decoded_tag & 7;
1526
1527 // Check for packed repeated fallback:
1528 if (decoded_wiretype == WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1529 PROTOBUF_MUSTTAIL return MpPackedVarint(PROTOBUF_TC_PARAM_PASS);
1530 }
1531 // Check for wire type mismatch:
1532 if (decoded_wiretype != WireFormatLite::WIRETYPE_VARINT) {
1533 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1534 }
1535 uint16_t xform_val = (type_card & field_layout::kTvMask);
1536 const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1537 const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1538
1539 uint16_t rep = type_card & field_layout::kRepMask;
1540 if (rep == field_layout::kRep64Bits) {
1541 auto& field = RefAt<RepeatedField<uint64_t>>(x: msg, offset: entry.offset);
1542 const char* ptr2 = ptr;
1543 uint32_t next_tag;
1544 do {
1545 uint64_t tmp;
1546 ptr = ParseVarint(p: ptr2, value: &tmp);
1547 if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS);
1548 field.Add(value: is_zigzag ? WireFormatLite::ZigZagDecode64(n: tmp) : tmp);
1549 if (!ctx->DataAvailable(ptr)) break;
1550 ptr2 = ReadTag(p: ptr, out: &next_tag);
1551 } while (next_tag == decoded_tag);
1552 } else if (rep == field_layout::kRep32Bits) {
1553 auto& field = RefAt<RepeatedField<uint32_t>>(x: msg, offset: entry.offset);
1554 const char* ptr2 = ptr;
1555 uint32_t next_tag;
1556 do {
1557 uint64_t tmp;
1558 ptr = ParseVarint(p: ptr2, value: &tmp);
1559 if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS);
1560 if (is_validated_enum) {
1561 if (!EnumIsValidAux(val: tmp, xform_val, aux: *table->field_aux(entry: &entry))) {
1562 ptr = ptr2;
1563 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1564 }
1565 } else if (is_zigzag) {
1566 tmp = WireFormatLite::ZigZagDecode32(n: tmp);
1567 }
1568 field.Add(value: tmp);
1569 if (!ctx->DataAvailable(ptr)) break;
1570 ptr2 = ReadTag(p: ptr, out: &next_tag);
1571 } while (next_tag == decoded_tag);
1572 } else {
1573 GOOGLE_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep8Bits));
1574 auto& field = RefAt<RepeatedField<bool>>(x: msg, offset: entry.offset);
1575 const char* ptr2 = ptr;
1576 uint32_t next_tag;
1577 do {
1578 uint64_t tmp;
1579 ptr = ParseVarint(p: ptr2, value: &tmp);
1580 if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS);
1581 field.Add(value: static_cast<bool>(tmp));
1582 if (!ctx->DataAvailable(ptr)) break;
1583 ptr2 = ReadTag(p: ptr, out: &next_tag);
1584 } while (next_tag == decoded_tag);
1585 }
1586
1587 PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS);
1588}
1589
1590const char* TcParser::MpPackedVarint(PROTOBUF_TC_PARAM_DECL) {
1591 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1592 auto type_card = entry.type_card;
1593 auto decoded_wiretype = data.tag() & 7;
1594
1595 // Check for non-packed repeated fallback:
1596 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1597 PROTOBUF_MUSTTAIL return MpRepeatedVarint(PROTOBUF_TC_PARAM_PASS);
1598 }
1599 uint16_t xform_val = (type_card & field_layout::kTvMask);
1600 const bool is_zigzag = xform_val == field_layout::kTvZigZag;
1601 const bool is_validated_enum = xform_val & field_layout::kTvEnum;
1602 if (is_validated_enum) {
1603 // TODO(b/206890171): handle enums
1604 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1605 }
1606
1607 // Since ctx->ReadPackedFixed does not use TailCall<> or Return<>, sync any
1608 // pending hasbits now:
1609 SyncHasbits(msg, hasbits, table);
1610
1611 uint16_t rep = type_card & field_layout::kRepMask;
1612 if (rep == field_layout::kRep64Bits) {
1613 auto* field = &RefAt<RepeatedField<uint64_t>>(x: msg, offset: entry.offset);
1614 return ctx->ReadPackedVarint(ptr, add: [field, is_zigzag](uint64_t value) {
1615 field->Add(value: is_zigzag ? WireFormatLite::ZigZagDecode64(n: value) : value);
1616 });
1617 } else if (rep == field_layout::kRep32Bits) {
1618 auto* field = &RefAt<RepeatedField<uint32_t>>(x: msg, offset: entry.offset);
1619 return ctx->ReadPackedVarint(ptr, add: [field, is_zigzag](uint64_t value) {
1620 field->Add(value: is_zigzag ? WireFormatLite::ZigZagDecode32(
1621 n: static_cast<uint32_t>(value))
1622 : value);
1623 });
1624 } else {
1625 GOOGLE_DCHECK_EQ(rep, static_cast<uint16_t>(field_layout::kRep8Bits));
1626 auto* field = &RefAt<RepeatedField<bool>>(x: msg, offset: entry.offset);
1627 return ctx->ReadPackedVarint(
1628 ptr, add: [field](uint64_t value) { field->Add(value); });
1629 }
1630
1631 return Error(PROTOBUF_TC_PARAM_PASS);
1632}
1633
1634bool TcParser::MpVerifyUtf8(StringPiece wire_bytes,
1635 const TcParseTableBase* table,
1636 const FieldEntry& entry, uint16_t xform_val) {
1637 if (xform_val == field_layout::kTvUtf8) {
1638 if (!IsStructurallyValidUTF8(str: wire_bytes)) {
1639 PrintUTF8ErrorLog(message_name: MessageName(table), field_name: FieldName(table, field_entry: &entry), operation_str: "parsing",
1640 emit_stacktrace: false);
1641 return false;
1642 }
1643 return true;
1644 }
1645#ifndef NDEBUG
1646 if (xform_val == field_layout::kTvUtf8Debug) {
1647 if (!IsStructurallyValidUTF8(wire_bytes)) {
1648 PrintUTF8ErrorLog(MessageName(table), FieldName(table, &entry), "parsing",
1649 false);
1650 }
1651 }
1652#endif // NDEBUG
1653 return true;
1654}
1655
1656const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) {
1657 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1658 const uint16_t type_card = entry.type_card;
1659 const uint16_t card = type_card & field_layout::kFcMask;
1660 const uint32_t decoded_wiretype = data.tag() & 7;
1661
1662 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1663 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1664 }
1665 if (card == field_layout::kFcRepeated) {
1666 PROTOBUF_MUSTTAIL return MpRepeatedString(PROTOBUF_TC_PARAM_PASS);
1667 }
1668 const uint16_t xform_val = type_card & field_layout::kTvMask;
1669 const uint16_t rep = type_card & field_layout::kRepMask;
1670 if (rep == field_layout::kRepIString) {
1671 // TODO(b/198211897): support InilnedStringField.
1672 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1673 }
1674
1675 // Mark the field as present:
1676 const bool is_oneof = card == field_layout::kFcOneof;
1677 bool need_init = false;
1678 if (card == field_layout::kFcOptional) {
1679 SetHas(table, entry, msg, hasbits);
1680 } else if (is_oneof) {
1681 need_init = ChangeOneof(table, entry, field_num: data.tag() >> 3, ctx, msg);
1682 }
1683
1684 bool is_valid = false;
1685 Arena* arena = ctx->data().arena;
1686 switch (rep) {
1687 case field_layout::kRepAString: {
1688 auto& field = RefAt<ArenaStringPtr>(x: msg, offset: entry.offset);
1689 if (need_init) field.InitDefault();
1690 if (arena) {
1691 ptr = ctx->ReadArenaString(ptr, s: &field, arena);
1692 } else {
1693 std::string* str = field.MutableNoCopy(arena: nullptr);
1694 ptr = InlineGreedyStringParser(s: str, ptr, ctx);
1695 }
1696 if (!ptr) break;
1697 is_valid = MpVerifyUtf8(wire_bytes: field.Get(), table, entry, xform_val);
1698 break;
1699 }
1700
1701 case field_layout::kRepIString: {
1702 break;
1703 }
1704 }
1705
1706 if (ptr == nullptr || !is_valid) {
1707 return Error(PROTOBUF_TC_PARAM_PASS);
1708 }
1709 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1710}
1711
1712const char* TcParser::MpRepeatedString(PROTOBUF_TC_PARAM_DECL) {
1713 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1714 const uint16_t type_card = entry.type_card;
1715 const uint32_t decoded_tag = data.tag();
1716 const uint32_t decoded_wiretype = decoded_tag & 7;
1717
1718 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1719 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1720 }
1721
1722 const uint16_t rep = type_card & field_layout::kRepMask;
1723 const uint16_t xform_val = type_card & field_layout::kTvMask;
1724 switch (rep) {
1725 case field_layout::kRepSString: {
1726 auto& field = RefAt<RepeatedPtrField<std::string>>(x: msg, offset: entry.offset);
1727 const char* ptr2 = ptr;
1728 uint32_t next_tag;
1729 do {
1730 ptr = ptr2;
1731 std::string* str = field.Add();
1732 ptr = InlineGreedyStringParser(s: str, ptr, ctx);
1733 if (PROTOBUF_PREDICT_FALSE(
1734 ptr == nullptr ||
1735 !MpVerifyUtf8(*str, table, entry, xform_val))) {
1736 return Error(PROTOBUF_TC_PARAM_PASS);
1737 }
1738 if (!ctx->DataAvailable(ptr)) break;
1739 ptr2 = ReadTag(p: ptr, out: &next_tag);
1740 } while (next_tag == decoded_tag);
1741 break;
1742 }
1743
1744#ifndef NDEBUG
1745 default:
1746 GOOGLE_LOG(FATAL) << "Unsupported repeated string rep: " << rep;
1747 break;
1748#endif
1749 }
1750
1751 return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
1752}
1753
1754const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
1755 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1756 const uint16_t type_card = entry.type_card;
1757 const uint16_t card = type_card & field_layout::kFcMask;
1758
1759 // Check for repeated parsing:
1760 if (card == field_layout::kFcRepeated) {
1761 PROTOBUF_MUSTTAIL return MpRepeatedMessage(PROTOBUF_TC_PARAM_PASS);
1762 }
1763
1764 const uint32_t decoded_tag = data.tag();
1765 const uint32_t decoded_wiretype = decoded_tag & 7;
1766 const uint16_t rep = type_card & field_layout::kRepMask;
1767 const bool is_group = rep == field_layout::kRepGroup;
1768
1769 // Validate wiretype:
1770 switch (rep) {
1771 case field_layout::kRepMessage:
1772 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1773 goto fallback;
1774 }
1775 break;
1776 case field_layout::kRepGroup:
1777 if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
1778 goto fallback;
1779 }
1780 break;
1781 default: {
1782 fallback:
1783 // Lazy and implicit weak fields are handled by generated code:
1784 // TODO(b/210762816): support these.
1785 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1786 }
1787 }
1788
1789 const bool is_oneof = card == field_layout::kFcOneof;
1790 bool need_init = false;
1791 if (card == field_layout::kFcOptional) {
1792 SetHas(table, entry, msg, hasbits);
1793 } else if (is_oneof) {
1794 need_init = ChangeOneof(table, entry, field_num: data.tag() >> 3, ctx, msg);
1795 }
1796 MessageLite*& field = RefAt<MessageLite*>(x: msg, offset: entry.offset);
1797 if (need_init || field == nullptr) {
1798 const MessageLite* default_instance =
1799 table->field_aux(entry: &entry)->message_default;
1800 field = default_instance->New(arena: ctx->data().arena);
1801 }
1802 SyncHasbits(msg, hasbits, table);
1803 if (is_group) {
1804 return ctx->ParseGroup(msg: field, ptr, tag: decoded_tag);
1805 }
1806 return ctx->ParseMessage(msg: field, ptr);
1807}
1808
1809const char* TcParser::MpRepeatedMessage(PROTOBUF_TC_PARAM_DECL) {
1810 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1811 const uint16_t type_card = entry.type_card;
1812 GOOGLE_DCHECK_EQ(type_card & field_layout::kFcMask,
1813 static_cast<uint16_t>(field_layout::kFcRepeated));
1814 const uint32_t decoded_tag = data.tag();
1815 const uint32_t decoded_wiretype = decoded_tag & 7;
1816 const uint16_t rep = type_card & field_layout::kRepMask;
1817 const bool is_group = rep == field_layout::kRepGroup;
1818
1819 // Validate wiretype:
1820 switch (rep) {
1821 case field_layout::kRepMessage:
1822 if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
1823 goto fallback;
1824 }
1825 break;
1826 case field_layout::kRepGroup:
1827 if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
1828 goto fallback;
1829 }
1830 break;
1831 default: {
1832 fallback:
1833 // Lazy and implicit weak fields are handled by generated code:
1834 // TODO(b/210762816): support these.
1835 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1836 }
1837 }
1838
1839 SyncHasbits(msg, hasbits, table);
1840 const MessageLite* default_instance =
1841 table->field_aux(entry: &entry)->message_default;
1842 auto& field = RefAt<RepeatedPtrFieldBase>(x: msg, offset: entry.offset);
1843 MessageLite* value =
1844 field.Add<GenericTypeHandler<MessageLite>>(prototype: default_instance);
1845 if (is_group) {
1846 return ctx->ParseGroup(msg: value, ptr, tag: decoded_tag);
1847 }
1848 return ctx->ParseMessage(msg: value, ptr);
1849}
1850
1851const char* TcParser::MpMap(PROTOBUF_TC_PARAM_DECL) {
1852 const auto& entry = RefAt<FieldEntry>(x: table, offset: data.entry_offset());
1853 (void)entry;
1854 PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
1855}
1856
1857} // namespace internal
1858} // namespace protobuf
1859} // namespace google
1860