1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34
35#ifndef GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
36#define GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
37
38#include <algorithm>
39#include <cstdint>
40#include <iterator>
41#include <map>
42#include <string>
43
44#include <google/protobuf/compiler/scc.h>
45#include <google/protobuf/compiler/code_generator.h>
46#include <google/protobuf/compiler/cpp/names.h>
47#include <google/protobuf/compiler/cpp/options.h>
48#include <google/protobuf/descriptor.pb.h>
49#include <google/protobuf/io/printer.h>
50#include <google/protobuf/descriptor.h>
51#include <google/protobuf/port.h>
52#include <google/protobuf/stubs/strutil.h>
53
54// Must be included last.
55#include <google/protobuf/port_def.inc>
56
57namespace google {
58namespace protobuf {
59namespace compiler {
60namespace cpp {
61
62enum class ArenaDtorNeeds { kNone = 0, kOnDemand = 1, kRequired = 2 };
63
64inline std::string ProtobufNamespace(const Options& /* options */) {
65 return "PROTOBUF_NAMESPACE_ID";
66}
67
68inline std::string MacroPrefix(const Options& /* options */) {
69 return "GOOGLE_PROTOBUF";
70}
71
72inline std::string DeprecatedAttribute(const Options& /* options */,
73 const FieldDescriptor* d) {
74 return d->options().deprecated() ? "PROTOBUF_DEPRECATED " : "";
75}
76
77inline std::string DeprecatedAttribute(const Options& /* options */,
78 const EnumValueDescriptor* d) {
79 return d->options().deprecated() ? "PROTOBUF_DEPRECATED_ENUM " : "";
80}
81
82// Commonly-used separator comments. Thick is a line of '=', thin is a line
83// of '-'.
84extern const char kThickSeparator[];
85extern const char kThinSeparator[];
86
87void SetCommonVars(const Options& options,
88 std::map<std::string, std::string>* variables);
89
90// Variables to access message data from the message scope.
91void SetCommonMessageDataVariables(
92 const Descriptor* descriptor,
93 std::map<std::string, std::string>* variables);
94
95void SetUnknownFieldsVariable(const Descriptor* descriptor,
96 const Options& options,
97 std::map<std::string, std::string>* variables);
98
99bool GetBootstrapBasename(const Options& options, const std::string& basename,
100 std::string* bootstrap_basename);
101bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
102 bool bootstrap_flag, std::string* basename);
103bool IsBootstrapProto(const Options& options, const FileDescriptor* file);
104
105// Name space of the proto file. This namespace is such that the string
106// "<namespace>::some_name" is the correct fully qualified namespace.
107// This means if the package is empty the namespace is "", and otherwise
108// the namespace is "::foo::bar::...::baz" without trailing semi-colons.
109std::string Namespace(const FileDescriptor* d, const Options& options);
110std::string Namespace(const Descriptor* d, const Options& options);
111std::string Namespace(const FieldDescriptor* d, const Options& options);
112std::string Namespace(const EnumDescriptor* d, const Options& options);
113
114// Returns true if it's safe to reset "field" to zero.
115bool CanInitializeByZeroing(const FieldDescriptor* field);
116
117std::string ClassName(const Descriptor* descriptor);
118std::string ClassName(const EnumDescriptor* enum_descriptor);
119
120std::string QualifiedClassName(const Descriptor* d, const Options& options);
121std::string QualifiedClassName(const EnumDescriptor* d, const Options& options);
122
123std::string QualifiedClassName(const Descriptor* d);
124std::string QualifiedClassName(const EnumDescriptor* d);
125
126// DEPRECATED just use ClassName or QualifiedClassName, a boolean is very
127// unreadable at the callsite.
128// Returns the non-nested type name for the given type. If "qualified" is
129// true, prefix the type with the full namespace. For example, if you had:
130// package foo.bar;
131// message Baz { message Moo {} }
132// Then the qualified ClassName for Moo would be:
133// ::foo::bar::Baz_Moo
134// While the non-qualified version would be:
135// Baz_Moo
136inline std::string ClassName(const Descriptor* descriptor, bool qualified) {
137 return qualified ? QualifiedClassName(d: descriptor, options: Options())
138 : ClassName(descriptor);
139}
140
141inline std::string ClassName(const EnumDescriptor* descriptor, bool qualified) {
142 return qualified ? QualifiedClassName(d: descriptor, options: Options())
143 : ClassName(enum_descriptor: descriptor);
144}
145
146// Returns the extension name prefixed with the class name if nested but without
147// the package name.
148std::string ExtensionName(const FieldDescriptor* d);
149
150std::string QualifiedExtensionName(const FieldDescriptor* d,
151 const Options& options);
152std::string QualifiedExtensionName(const FieldDescriptor* d);
153
154// Type name of default instance.
155std::string DefaultInstanceType(const Descriptor* descriptor,
156 const Options& options, bool split = false);
157
158// Non-qualified name of the default_instance of this message.
159std::string DefaultInstanceName(const Descriptor* descriptor,
160 const Options& options, bool split = false);
161
162// Non-qualified name of the default instance pointer. This is used only for
163// implicit weak fields, where we need an extra indirection.
164std::string DefaultInstancePtr(const Descriptor* descriptor,
165 const Options& options, bool split = false);
166
167// Fully qualified name of the default_instance of this message.
168std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
169 const Options& options,
170 bool split = false);
171
172// Fully qualified name of the default instance pointer.
173std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
174 const Options& options,
175 bool split = false);
176
177// DescriptorTable variable name.
178std::string DescriptorTableName(const FileDescriptor* file,
179 const Options& options);
180
181// When declaring symbol externs from another file, this macro will supply the
182// dllexport needed for the target file, if any.
183std::string FileDllExport(const FileDescriptor* file, const Options& options);
184
185// Name of the base class: google::protobuf::Message or google::protobuf::MessageLite.
186std::string SuperClassName(const Descriptor* descriptor,
187 const Options& options);
188
189// Adds an underscore if necessary to prevent conflicting with a keyword.
190std::string ResolveKeyword(const std::string& name);
191
192// Get the (unqualified) name that should be used for this field in C++ code.
193// The name is coerced to lower-case to emulate proto1 behavior. People
194// should be using lowercase-with-underscores style for proto field names
195// anyway, so normally this just returns field->name().
196std::string FieldName(const FieldDescriptor* field);
197
198// Returns the (unqualified) private member name for this field in C++ code.
199std::string FieldMemberName(const FieldDescriptor* field, bool split);
200
201// Returns an estimate of the compiler's alignment for the field. This
202// can't guarantee to be correct because the generated code could be compiled on
203// different systems with different alignment rules. The estimates below assume
204// 64-bit pointers.
205int EstimateAlignmentSize(const FieldDescriptor* field);
206
207// Get the unqualified name that should be used for a field's field
208// number constant.
209std::string FieldConstantName(const FieldDescriptor* field);
210
211// Returns the scope where the field was defined (for extensions, this is
212// different from the message type to which the field applies).
213inline const Descriptor* FieldScope(const FieldDescriptor* field) {
214 return field->is_extension() ? field->extension_scope()
215 : field->containing_type();
216}
217
218// Returns the fully-qualified type name field->message_type(). Usually this
219// is just ClassName(field->message_type(), true);
220std::string FieldMessageTypeName(const FieldDescriptor* field,
221 const Options& options);
222
223// Get the C++ type name for a primitive type (e.g. "double", "::google::protobuf::int32", etc.).
224const char* PrimitiveTypeName(FieldDescriptor::CppType type);
225std::string PrimitiveTypeName(const Options& options,
226 FieldDescriptor::CppType type);
227
228// Get the declared type name in CamelCase format, as is used e.g. for the
229// methods of WireFormat. For example, TYPE_INT32 becomes "Int32".
230const char* DeclaredTypeMethodName(FieldDescriptor::Type type);
231
232// Return the code that evaluates to the number when compiled.
233std::string Int32ToString(int number);
234
235// Get code that evaluates to the field's default value.
236std::string DefaultValue(const Options& options, const FieldDescriptor* field);
237
238// Compatibility function for callers outside proto2.
239std::string DefaultValue(const FieldDescriptor* field);
240
241// Convert a file name into a valid identifier.
242std::string FilenameIdentifier(const std::string& filename);
243
244// For each .proto file generates a unique name. To prevent collisions of
245// symbols in the global namespace
246std::string UniqueName(const std::string& name, const std::string& filename,
247 const Options& options);
248inline std::string UniqueName(const std::string& name, const FileDescriptor* d,
249 const Options& options) {
250 return UniqueName(name, filename: d->name(), options);
251}
252inline std::string UniqueName(const std::string& name, const Descriptor* d,
253 const Options& options) {
254 return UniqueName(name, d: d->file(), options);
255}
256inline std::string UniqueName(const std::string& name, const EnumDescriptor* d,
257 const Options& options) {
258 return UniqueName(name, d: d->file(), options);
259}
260inline std::string UniqueName(const std::string& name,
261 const ServiceDescriptor* d,
262 const Options& options) {
263 return UniqueName(name, d: d->file(), options);
264}
265
266// Versions for call sites that only support the internal runtime (like proto1
267// support).
268inline Options InternalRuntimeOptions() {
269 Options options;
270 options.opensource_runtime = false;
271 return options;
272}
273inline std::string UniqueName(const std::string& name,
274 const std::string& filename) {
275 return UniqueName(name, filename, options: InternalRuntimeOptions());
276}
277inline std::string UniqueName(const std::string& name,
278 const FileDescriptor* d) {
279 return UniqueName(name, filename: d->name(), options: InternalRuntimeOptions());
280}
281inline std::string UniqueName(const std::string& name, const Descriptor* d) {
282 return UniqueName(name, d: d->file(), options: InternalRuntimeOptions());
283}
284inline std::string UniqueName(const std::string& name,
285 const EnumDescriptor* d) {
286 return UniqueName(name, d: d->file(), options: InternalRuntimeOptions());
287}
288inline std::string UniqueName(const std::string& name,
289 const ServiceDescriptor* d) {
290 return UniqueName(name, d: d->file(), options: InternalRuntimeOptions());
291}
292
293// Return the qualified C++ name for a file level symbol.
294std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
295 const std::string& name,
296 const Options& options);
297
298// Escape C++ trigraphs by escaping question marks to \?
299std::string EscapeTrigraphs(const std::string& to_escape);
300
301// Escaped function name to eliminate naming conflict.
302std::string SafeFunctionName(const Descriptor* descriptor,
303 const FieldDescriptor* field,
304 const std::string& prefix);
305
306// Returns true if generated messages have public unknown fields accessors
307inline bool PublicUnknownFieldsAccessors(const Descriptor* message) {
308 return message->file()->syntax() != FileDescriptor::SYNTAX_PROTO3;
309}
310
311// Returns the optimize mode for <file>, respecting <options.enforce_lite>.
312FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
313 const Options& options);
314
315// Determines whether unknown fields will be stored in an UnknownFieldSet or
316// a string.
317inline bool UseUnknownFieldSet(const FileDescriptor* file,
318 const Options& options) {
319 return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
320}
321
322inline bool IsWeak(const FieldDescriptor* field, const Options& options) {
323 if (field->options().weak()) {
324 GOOGLE_CHECK(!options.opensource_runtime);
325 return true;
326 }
327 return false;
328}
329
330bool IsStringInlined(const FieldDescriptor* descriptor, const Options& options);
331
332// For a string field, returns the effective ctype. If the actual ctype is
333// not supported, returns the default of STRING.
334FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
335 const Options& options);
336
337inline bool IsCord(const FieldDescriptor* field, const Options& options) {
338 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
339 EffectiveStringCType(field, options) == FieldOptions::CORD;
340}
341
342inline bool IsString(const FieldDescriptor* field, const Options& options) {
343 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
344 EffectiveStringCType(field, options) == FieldOptions::STRING;
345}
346
347inline bool IsStringPiece(const FieldDescriptor* field,
348 const Options& options) {
349 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
350 EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
351}
352
353class MessageSCCAnalyzer;
354
355// Does the given FileDescriptor use lazy fields?
356bool HasLazyFields(const FileDescriptor* file, const Options& options,
357 MessageSCCAnalyzer* scc_analyzer);
358
359// Is the given field a supported lazy field?
360bool IsLazy(const FieldDescriptor* field, const Options& options,
361 MessageSCCAnalyzer* scc_analyzer);
362
363// Is this an explicit (non-profile driven) lazy field, as denoted by
364// lazy/unverified_lazy in the descriptor?
365inline bool IsExplicitLazy(const FieldDescriptor* field) {
366 return field->options().lazy() || field->options().unverified_lazy();
367}
368
369bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options,
370 MessageSCCAnalyzer* scc_analyzer);
371
372bool IsLazilyVerifiedLazy(const FieldDescriptor* field, const Options& options);
373
374// Is the given message being split (go/pdsplit)?
375bool ShouldSplit(const Descriptor* desc, const Options& options);
376
377// Is the given field being split out?
378bool ShouldSplit(const FieldDescriptor* field, const Options& options);
379
380inline bool IsFieldUsed(const FieldDescriptor* /* field */,
381 const Options& /* options */) {
382 return true;
383}
384
385// Returns true if "field" is stripped.
386inline bool IsFieldStripped(const FieldDescriptor* /*field*/,
387 const Options& /*options*/) {
388 return false;
389}
390
391// Does the file contain any definitions that need extension_set.h?
392bool HasExtensionsOrExtendableMessage(const FileDescriptor* file);
393
394// Does the file have any repeated fields, necessitating the file to include
395// repeated_field.h? This does not include repeated extensions, since those are
396// all stored internally in an ExtensionSet, not a separate RepeatedField*.
397bool HasRepeatedFields(const FileDescriptor* file);
398
399// Does the file have any string/bytes fields with ctype=STRING_PIECE? This
400// does not include extensions, since ctype is ignored for extensions.
401bool HasStringPieceFields(const FileDescriptor* file, const Options& options);
402
403// Does the file have any string/bytes fields with ctype=CORD? This does not
404// include extensions, since ctype is ignored for extensions.
405bool HasCordFields(const FileDescriptor* file, const Options& options);
406
407// Does the file have any map fields, necessitating the file to include
408// map_field_inl.h and map.h.
409bool HasMapFields(const FileDescriptor* file);
410
411// Does this file have any enum type definitions?
412bool HasEnumDefinitions(const FileDescriptor* file);
413
414// Does this file have generated parsing, serialization, and other
415// standard methods for which reflection-based fallback implementations exist?
416inline bool HasGeneratedMethods(const FileDescriptor* file,
417 const Options& options) {
418 return GetOptimizeFor(file, options) != FileOptions::CODE_SIZE;
419}
420
421// Do message classes in this file have descriptor and reflection methods?
422inline bool HasDescriptorMethods(const FileDescriptor* file,
423 const Options& options) {
424 return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
425}
426
427// Should we generate generic services for this file?
428inline bool HasGenericServices(const FileDescriptor* file,
429 const Options& options) {
430 return file->service_count() > 0 &&
431 GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME &&
432 file->options().cc_generic_services();
433}
434
435inline bool IsProto2MessageSet(const Descriptor* descriptor,
436 const Options& options) {
437 return !options.opensource_runtime &&
438 options.enforce_mode != EnforceOptimizeMode::kLiteRuntime &&
439 !options.lite_implicit_weak_fields &&
440 descriptor->options().message_set_wire_format() &&
441 descriptor->full_name() == "google.protobuf.bridge.MessageSet";
442}
443
444inline bool IsMapEntryMessage(const Descriptor* descriptor) {
445 return descriptor->options().map_entry();
446}
447
448// Returns true if the field's CPPTYPE is string or message.
449bool IsStringOrMessage(const FieldDescriptor* field);
450
451std::string UnderscoresToCamelCase(const std::string& input,
452 bool cap_next_letter);
453
454inline bool IsProto3(const FileDescriptor* file) {
455 return file->syntax() == FileDescriptor::SYNTAX_PROTO3;
456}
457
458inline bool HasHasbit(const FieldDescriptor* field) {
459 // This predicate includes proto3 message fields only if they have "optional".
460 // Foo submsg1 = 1; // HasHasbit() == false
461 // optional Foo submsg2 = 2; // HasHasbit() == true
462 // This is slightly odd, as adding "optional" to a singular proto3 field does
463 // not change the semantics or API. However whenever any field in a message
464 // has a hasbit, it forces reflection to include hasbit offsets for *all*
465 // fields, even if almost all of them are set to -1 (no hasbit). So to avoid
466 // causing a sudden size regression for ~all proto3 messages, we give proto3
467 // message fields a hasbit only if "optional" is present. If the user is
468 // explicitly writing "optional", it is likely they are writing it on
469 // primitive fields also.
470 return (field->has_optional_keyword() || field->is_required()) &&
471 !field->options().weak();
472}
473
474// Returns true if 'enum' semantics are such that unknown values are preserved
475// in the enum field itself, rather than going to the UnknownFieldSet.
476inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
477 return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
478}
479
480inline bool IsCrossFileMessage(const FieldDescriptor* field) {
481 return field->type() == FieldDescriptor::TYPE_MESSAGE &&
482 field->message_type()->file() != field->file();
483}
484
485inline std::string MakeDefaultName(const FieldDescriptor* field) {
486 return StrCat(a: "_i_give_permission_to_break_this_code_default_",
487 b: FieldName(field), c: "_");
488}
489
490// Semantically distinct from MakeDefaultName in that it gives the C++ code
491// referencing a default field from the message scope, rather than just the
492// variable name.
493// For example, declarations of default variables should always use just
494// MakeDefaultName to produce code like:
495// Type _i_give_permission_to_break_this_code_default_field_;
496//
497// Code that references these should use MakeDefaultFieldName, in case the field
498// exists at some nested level like:
499// internal_container_._i_give_permission_to_break_this_code_default_field_;
500inline std::string MakeDefaultFieldName(const FieldDescriptor* field) {
501 return StrCat(a: "Impl_::", b: MakeDefaultName(field));
502}
503
504inline std::string MakeVarintCachedSizeName(const FieldDescriptor* field) {
505 return StrCat(a: "_", b: FieldName(field), c: "_cached_byte_size_");
506}
507
508// Semantically distinct from MakeVarintCachedSizeName in that it gives the C++
509// code referencing the object from the message scope, rather than just the
510// variable name.
511// For example, declarations of default variables should always use just
512// MakeVarintCachedSizeName to produce code like:
513// Type _field_cached_byte_size_;
514//
515// Code that references these variables should use
516// MakeVarintCachedSizeFieldName, in case the field exists at some nested level
517// like:
518// internal_container_._field_cached_byte_size_;
519inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field,
520 bool split) {
521 return StrCat(a: "_impl_.", b: split ? "_split_->" : "", c: "_",
522 d: FieldName(field), e: "_cached_byte_size_");
523}
524
525// Note: A lot of libraries detect Any protos based on Descriptor::full_name()
526// while the two functions below use FileDescriptor::name(). In a sane world the
527// two approaches should be equivalent. But if you are dealing with descriptors
528// from untrusted sources, you might need to match semantics across libraries.
529bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options);
530bool IsAnyMessage(const Descriptor* descriptor, const Options& options);
531
532bool IsWellKnownMessage(const FileDescriptor* descriptor);
533
534inline std::string IncludeGuard(const FileDescriptor* file, bool pb_h,
535 const Options& options) {
536 // If we are generating a .pb.h file and the proto_h option is enabled, then
537 // the .pb.h gets an extra suffix.
538 std::string filename_identifier = FilenameIdentifier(
539 filename: file->name() + (pb_h && options.proto_h ? ".pb.h" : ""));
540
541 if (IsWellKnownMessage(descriptor: file)) {
542 // For well-known messages we need third_party/protobuf and net/proto2 to
543 // have distinct include guards, because some source files include both and
544 // both need to be defined (the third_party copies will be in the
545 // google::protobuf_opensource namespace).
546 return MacroPrefix(options) + "_INCLUDED_" + filename_identifier;
547 } else {
548 // Ideally this case would use distinct include guards for opensource and
549 // google3 protos also. (The behavior of "first #included wins" is not
550 // ideal). But unfortunately some legacy code includes both and depends on
551 // the identical include guards to avoid compile errors.
552 //
553 // We should clean this up so that this case can be removed.
554 return "GOOGLE_PROTOBUF_INCLUDED_" + filename_identifier;
555 }
556}
557
558// Returns the OptimizeMode for this file, furthermore it updates a status
559// bool if has_opt_codesize_extension is non-null. If this status bool is true
560// it means this file contains an extension that itself is defined as
561// optimized_for = CODE_SIZE.
562FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
563 const Options& options,
564 bool* has_opt_codesize_extension);
565inline FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
566 const Options& options) {
567 return GetOptimizeFor(file, options, has_opt_codesize_extension: nullptr);
568}
569inline bool NeedsEagerDescriptorAssignment(const FileDescriptor* file,
570 const Options& options) {
571 bool has_opt_codesize_extension;
572 if (GetOptimizeFor(file, options, has_opt_codesize_extension: &has_opt_codesize_extension) ==
573 FileOptions::CODE_SIZE &&
574 has_opt_codesize_extension) {
575 // If this filedescriptor contains an extension from another file which
576 // is optimized_for = CODE_SIZE. We need to be careful in the ordering so
577 // we eagerly build the descriptors in the dependencies before building
578 // the descriptors of this file.
579 return true;
580 } else {
581 // If we have a generated code based parser we never need eager
582 // initialization of descriptors of our deps.
583 return false;
584 }
585}
586
587// This orders the messages in a .pb.cc as it's outputted by file.cc
588void FlattenMessagesInFile(const FileDescriptor* file,
589 std::vector<const Descriptor*>* result);
590inline std::vector<const Descriptor*> FlattenMessagesInFile(
591 const FileDescriptor* file) {
592 std::vector<const Descriptor*> result;
593 FlattenMessagesInFile(file, result: &result);
594 return result;
595}
596
597template <typename F>
598void ForEachMessage(const Descriptor* descriptor, F&& func) {
599 for (int i = 0; i < descriptor->nested_type_count(); i++)
600 ForEachMessage(descriptor->nested_type(index: i), std::forward<F&&>(func));
601 func(descriptor);
602}
603
604template <typename F>
605void ForEachMessage(const FileDescriptor* descriptor, F&& func) {
606 for (int i = 0; i < descriptor->message_type_count(); i++)
607 ForEachMessage(descriptor->message_type(index: i), std::forward<F&&>(func));
608}
609
610bool HasWeakFields(const Descriptor* desc, const Options& options);
611bool HasWeakFields(const FileDescriptor* desc, const Options& options);
612
613// Returns true if the "required" restriction check should be ignored for the
614// given field.
615inline static bool ShouldIgnoreRequiredFieldCheck(const FieldDescriptor* field,
616 const Options& options) {
617 // Do not check "required" for lazily verified lazy fields.
618 return IsLazilyVerifiedLazy(field, options);
619}
620
621struct MessageAnalysis {
622 bool is_recursive = false;
623 bool contains_cord = false;
624 bool contains_extension = false;
625 bool contains_required = false;
626 bool contains_weak = false; // Implicit weak as well.
627};
628
629// This class is used in FileGenerator, to ensure linear instead of
630// quadratic performance, if we do this per message we would get O(V*(V+E)).
631// Logically this is just only used in message.cc, but in the header for
632// FileGenerator to help share it.
633class PROTOC_EXPORT MessageSCCAnalyzer {
634 public:
635 explicit MessageSCCAnalyzer(const Options& options) : options_(options) {}
636
637 MessageAnalysis GetSCCAnalysis(const SCC* scc);
638
639 bool HasRequiredFields(const Descriptor* descriptor) {
640 MessageAnalysis result = GetSCCAnalysis(scc: GetSCC(descriptor));
641 return result.contains_required || result.contains_extension;
642 }
643 bool HasWeakField(const Descriptor* descriptor) {
644 MessageAnalysis result = GetSCCAnalysis(scc: GetSCC(descriptor));
645 return result.contains_weak;
646 }
647 const SCC* GetSCC(const Descriptor* descriptor) {
648 return analyzer_.GetSCC(descriptor);
649 }
650
651 private:
652 struct DepsGenerator {
653 std::vector<const Descriptor*> operator()(const Descriptor* desc) const {
654 std::vector<const Descriptor*> deps;
655 for (int i = 0; i < desc->field_count(); i++) {
656 if (desc->field(index: i)->message_type()) {
657 deps.push_back(x: desc->field(index: i)->message_type());
658 }
659 }
660 return deps;
661 }
662 };
663 SCCAnalyzer<DepsGenerator> analyzer_;
664 Options options_;
665 std::map<const SCC*, MessageAnalysis> analysis_cache_;
666};
667
668void ListAllFields(const Descriptor* d,
669 std::vector<const FieldDescriptor*>* fields);
670void ListAllFields(const FileDescriptor* d,
671 std::vector<const FieldDescriptor*>* fields);
672
673template <class T>
674void ForEachField(const Descriptor* d, T&& func) {
675 for (int i = 0; i < d->nested_type_count(); i++) {
676 ForEachField(d->nested_type(index: i), std::forward<T&&>(func));
677 }
678 for (int i = 0; i < d->extension_count(); i++) {
679 func(d->extension(index: i));
680 }
681 for (int i = 0; i < d->field_count(); i++) {
682 func(d->field(index: i));
683 }
684}
685
686template <class T>
687void ForEachField(const FileDescriptor* d, T&& func) {
688 for (int i = 0; i < d->message_type_count(); i++) {
689 ForEachField(d->message_type(index: i), std::forward<T&&>(func));
690 }
691 for (int i = 0; i < d->extension_count(); i++) {
692 func(d->extension(index: i));
693 }
694}
695
696void ListAllTypesForServices(const FileDescriptor* fd,
697 std::vector<const Descriptor*>* types);
698
699// Indicates whether we should use implicit weak fields for this file.
700bool UsingImplicitWeakFields(const FileDescriptor* file,
701 const Options& options);
702
703// Indicates whether to treat this field as implicitly weak.
704bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
705 MessageSCCAnalyzer* scc_analyzer);
706
707inline bool HasSimpleBaseClass(const Descriptor* desc, const Options& options) {
708 if (!HasDescriptorMethods(file: desc->file(), options)) return false;
709 if (desc->extension_range_count() != 0) return false;
710 if (desc->field_count() == 0) return true;
711 // TODO(jorg): Support additional common message types with only one
712 // or two fields
713 return false;
714}
715
716inline bool HasSimpleBaseClasses(const FileDescriptor* file,
717 const Options& options) {
718 bool v = false;
719 ForEachMessage(descriptor: file, func: [&v, &options](const Descriptor* desc) {
720 v |= HasSimpleBaseClass(desc, options);
721 });
722 return v;
723}
724
725inline std::string SimpleBaseClass(const Descriptor* desc,
726 const Options& options) {
727 if (!HasDescriptorMethods(file: desc->file(), options)) return "";
728 if (desc->extension_range_count() != 0) return "";
729 if (desc->field_count() == 0) {
730 return "ZeroFieldsBase";
731 }
732 // TODO(jorg): Support additional common message types with only one
733 // or two fields
734 return "";
735}
736
737// Returns true if this message has a _tracker_ field.
738inline bool HasTracker(const Descriptor* desc, const Options& options) {
739 return options.field_listener_options.inject_field_listener_events &&
740 desc->file()->options().optimize_for() !=
741 google::protobuf::FileOptions::LITE_RUNTIME;
742}
743
744// Returns true if this message needs an Impl_ struct for it's data.
745inline bool HasImplData(const Descriptor* desc, const Options& options) {
746 return !HasSimpleBaseClass(desc, options);
747}
748
749// Formatter is a functor class which acts as a closure around printer and
750// the variable map. It's much like printer->Print except it supports both named
751// variables that are substituted using a key value map and direct arguments. In
752// the format string $1$, $2$, etc... are substituted for the first, second, ...
753// direct argument respectively in the format call, it accepts both strings and
754// integers. The implementation verifies all arguments are used and are "first"
755// used in order of appearance in the argument list. For example,
756//
757// Format("return array[$1$];", 3) -> "return array[3];"
758// Format("array[$2$] = $1$;", "Bla", 3) -> FATAL error (wrong order)
759// Format("array[$1$] = $2$;", 3, "Bla") -> "array[3] = Bla;"
760//
761// The arguments can be used more than once like
762//
763// Format("array[$1$] = $2$; // Index = $1$", 3, "Bla") ->
764// "array[3] = Bla; // Index = 3"
765//
766// If you use more arguments use the following style to help the reader,
767//
768// Format("int $1$() {\n"
769// " array[$2$] = $3$;\n"
770// " return $4$;"
771// "}\n",
772// funname, // 1
773// idx, // 2
774// varname, // 3
775// retval); // 4
776//
777// but consider using named variables. Named variables like $foo$, with some
778// identifier foo, are looked up in the map. One additional feature is that
779// spaces are accepted between the '$' delimiters, $ foo$ will
780// substitute to " bar" if foo stands for "bar", but in case it's empty
781// will substitute to "". Hence, for example,
782//
783// Format(vars, "$dllexport $void fun();") -> "void fun();"
784// "__declspec(export) void fun();"
785//
786// which is convenient to prevent double, leading or trailing spaces.
787class PROTOC_EXPORT Formatter {
788 public:
789 explicit Formatter(io::Printer* printer) : printer_(printer) {}
790 Formatter(io::Printer* printer,
791 const std::map<std::string, std::string>& vars)
792 : printer_(printer), vars_(vars) {}
793
794 template <typename T>
795 void Set(const std::string& key, const T& value) {
796 vars_[key] = ToString(value);
797 }
798
799 void AddMap(const std::map<std::string, std::string>& vars) {
800 for (const auto& keyval : vars) vars_[keyval.first] = keyval.second;
801 }
802
803 template <typename... Args>
804 void operator()(const char* format, const Args&... args) const {
805 printer_->FormatInternal(args: {ToString(args)...}, vars: vars_, format);
806 }
807
808 void Indent() const { printer_->Indent(); }
809 void Outdent() const { printer_->Outdent(); }
810 io::Printer* printer() const { return printer_; }
811
812 class PROTOC_EXPORT ScopedIndenter {
813 public:
814 explicit ScopedIndenter(Formatter* format) : format_(format) {
815 format_->Indent();
816 }
817 ~ScopedIndenter() { format_->Outdent(); }
818
819 private:
820 Formatter* format_;
821 };
822
823 PROTOBUF_NODISCARD ScopedIndenter ScopedIndent() {
824 return ScopedIndenter(this);
825 }
826 template <typename... Args>
827 PROTOBUF_NODISCARD ScopedIndenter ScopedIndent(const char* format,
828 const Args&&... args) {
829 (*this)(format, static_cast<Args&&>(args)...);
830 return ScopedIndenter(this);
831 }
832
833 class PROTOC_EXPORT SaveState {
834 public:
835 explicit SaveState(Formatter* format)
836 : format_(format), vars_(format->vars_) {}
837 ~SaveState() { format_->vars_.swap(x&: vars_); }
838
839 private:
840 Formatter* format_;
841 std::map<std::string, std::string> vars_;
842 };
843
844 private:
845 io::Printer* printer_;
846 std::map<std::string, std::string> vars_;
847
848 // Convenience overloads to accept different types as arguments.
849 static std::string ToString(const std::string& s) { return s; }
850 template <typename I, typename = typename std::enable_if<
851 std::is_integral<I>::value>::type>
852 static std::string ToString(I x) {
853 return StrCat(x);
854 }
855 static std::string ToString(strings::Hex x) { return StrCat(a: x); }
856 static std::string ToString(const FieldDescriptor* d) { return Payload(descriptor: d); }
857 static std::string ToString(const Descriptor* d) { return Payload(descriptor: d); }
858 static std::string ToString(const EnumDescriptor* d) { return Payload(descriptor: d); }
859 static std::string ToString(const EnumValueDescriptor* d) {
860 return Payload(descriptor: d);
861 }
862 static std::string ToString(const OneofDescriptor* d) { return Payload(descriptor: d); }
863
864 template <typename Descriptor>
865 static std::string Payload(const Descriptor* descriptor) {
866 std::vector<int> path;
867 descriptor->GetLocationPath(&path);
868 GeneratedCodeInfo::Annotation annotation;
869 for (int index : path) {
870 annotation.add_path(value: index);
871 }
872 annotation.set_source_file(descriptor->file()->name());
873 return annotation.SerializeAsString();
874 }
875};
876
877template <class T>
878void PrintFieldComment(const Formatter& format, const T* field) {
879 // Print the field's (or oneof's) proto-syntax definition as a comment.
880 // We don't want to print group bodies so we cut off after the first
881 // line.
882 DebugStringOptions options;
883 options.elide_group_body = true;
884 options.elide_oneof_body = true;
885 std::string def = field->DebugStringWithOptions(options);
886 format("// $1$\n", def.substr(pos: 0, n: def.find_first_of(c: '\n')));
887}
888
889class PROTOC_EXPORT NamespaceOpener {
890 public:
891 explicit NamespaceOpener(const Formatter& format)
892 : printer_(format.printer()) {}
893 NamespaceOpener(const std::string& name, const Formatter& format)
894 : NamespaceOpener(format) {
895 ChangeTo(name);
896 }
897 ~NamespaceOpener() { ChangeTo(name: ""); }
898
899 void ChangeTo(const std::string& name) {
900 std::vector<std::string> new_stack_ =
901 Split(full: name, delim: "::", skip_empty: true);
902 size_t len = std::min(name_stack_.size(), new_stack_.size());
903 size_t common_idx = 0;
904 while (common_idx < len) {
905 if (name_stack_[common_idx] != new_stack_[common_idx]) break;
906 common_idx++;
907 }
908 for (auto it = name_stack_.crbegin();
909 it != name_stack_.crend() - common_idx; ++it) {
910 if (*it == "PROTOBUF_NAMESPACE_ID") {
911 printer_->Print(text: "PROTOBUF_NAMESPACE_CLOSE\n");
912 } else {
913 printer_->Print(text: "} // namespace $ns$\n", args: "ns", args: *it);
914 }
915 }
916 name_stack_.swap(x&: new_stack_);
917 for (size_t i = common_idx; i < name_stack_.size(); ++i) {
918 if (name_stack_[i] == "PROTOBUF_NAMESPACE_ID") {
919 printer_->Print(text: "PROTOBUF_NAMESPACE_OPEN\n");
920 } else {
921 printer_->Print(text: "namespace $ns$ {\n", args: "ns", args: name_stack_[i]);
922 }
923 }
924 }
925
926 private:
927 io::Printer* printer_;
928 std::vector<std::string> name_stack_;
929};
930
931enum class Utf8CheckMode {
932 kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields.
933 kVerify = 1, // Only log an error but parsing will succeed.
934 kNone = 2, // No UTF-8 check.
935};
936
937Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
938 const Options& options);
939
940void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
941 const Options& options, bool for_parse,
942 const char* parameters,
943 const Formatter& format);
944
945void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
946 const Options& options, bool for_parse,
947 const char* parameters,
948 const Formatter& format);
949
950template <typename T>
951struct FieldRangeImpl {
952 struct Iterator {
953 using iterator_category = std::forward_iterator_tag;
954 using value_type = const FieldDescriptor*;
955 using difference_type = int;
956
957 value_type operator*() { return descriptor->field(idx); }
958
959 friend bool operator==(const Iterator& a, const Iterator& b) {
960 GOOGLE_DCHECK(a.descriptor == b.descriptor);
961 return a.idx == b.idx;
962 }
963 friend bool operator!=(const Iterator& a, const Iterator& b) {
964 return !(a == b);
965 }
966
967 Iterator& operator++() {
968 idx++;
969 return *this;
970 }
971
972 int idx;
973 const T* descriptor;
974 };
975
976 Iterator begin() const { return {0, descriptor}; }
977 Iterator end() const { return {descriptor->field_count(), descriptor}; }
978
979 const T* descriptor;
980};
981
982template <typename T>
983FieldRangeImpl<T> FieldRange(const T* desc) {
984 return {desc};
985}
986
987struct OneOfRangeImpl {
988 struct Iterator {
989 using iterator_category = std::forward_iterator_tag;
990 using value_type = const OneofDescriptor*;
991 using difference_type = int;
992
993 value_type operator*() { return descriptor->oneof_decl(index: idx); }
994
995 friend bool operator==(const Iterator& a, const Iterator& b) {
996 GOOGLE_DCHECK(a.descriptor == b.descriptor);
997 return a.idx == b.idx;
998 }
999 friend bool operator!=(const Iterator& a, const Iterator& b) {
1000 return !(a == b);
1001 }
1002
1003 Iterator& operator++() {
1004 idx++;
1005 return *this;
1006 }
1007
1008 int idx;
1009 const Descriptor* descriptor;
1010 };
1011
1012 Iterator begin() const { return {.idx: 0, .descriptor: descriptor}; }
1013 Iterator end() const {
1014 return {.idx: descriptor->real_oneof_decl_count(), .descriptor: descriptor};
1015 }
1016
1017 const Descriptor* descriptor;
1018};
1019
1020inline OneOfRangeImpl OneOfRange(const Descriptor* desc) { return {.descriptor: desc}; }
1021
1022PROTOC_EXPORT std::string StripProto(const std::string& filename);
1023
1024bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options);
1025
1026bool EnableMessageOwnedArenaTrial(const Descriptor* desc,
1027 const Options& options);
1028
1029bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1030 MessageSCCAnalyzer* scc_analyzer);
1031bool ShouldVerify(const FileDescriptor* file, const Options& options,
1032 MessageSCCAnalyzer* scc_analyzer);
1033
1034// Indicates whether to use predefined verify methods for a given message. If a
1035// message is "simple" and needs no special verification per field (e.g. message
1036// field, repeated packed, UTF8 string, etc.), we can use either VerifySimple or
1037// VerifySimpleAlwaysCheckInt32 methods as all verification can be done based on
1038// the wire type.
1039//
1040// Otherwise, we need "custom" verify methods tailored to a message to pass
1041// which field needs a special verification; i.e. InternalVerify.
1042enum class VerifySimpleType {
1043 kSimpleInt32Never, // Use VerifySimple
1044 kSimpleInt32Always, // Use VerifySimpleAlwaysCheckInt32
1045 kCustom, // Use InternalVerify and check only for int32
1046 kCustomInt32Never, // Use InternalVerify but never check for int32
1047 kCustomInt32Always, // Use InternalVerify and always check for int32
1048};
1049
1050// Returns VerifySimpleType if messages can be verified by predefined methods.
1051VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor);
1052
1053bool IsUtf8String(const FieldDescriptor* field);
1054
1055bool HasMessageFieldOrExtension(const Descriptor* desc);
1056
1057} // namespace cpp
1058} // namespace compiler
1059} // namespace protobuf
1060} // namespace google
1061
1062#include <google/protobuf/port_undef.inc>
1063
1064#endif // GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
1065