1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Recursive descent FTW.
36
37#include <google/protobuf/compiler/parser.h>
38
39#include <float.h>
40
41#include <cstdint>
42#include <limits>
43#include <unordered_map>
44#include <unordered_set>
45
46#include <google/protobuf/stubs/casts.h>
47#include <google/protobuf/stubs/logging.h>
48#include <google/protobuf/stubs/common.h>
49#include <google/protobuf/stubs/strutil.h>
50#include <google/protobuf/descriptor.h>
51#include <google/protobuf/descriptor.pb.h>
52#include <google/protobuf/io/tokenizer.h>
53#include <google/protobuf/wire_format.h>
54#include <google/protobuf/stubs/map_util.h>
55#include <google/protobuf/stubs/hash.h>
56
57namespace google {
58namespace protobuf {
59namespace compiler {
60
61using internal::WireFormat;
62
63namespace {
64
65typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
66
67const TypeNameMap& GetTypeNameTable() {
68 static auto* table = new auto([]() {
69 TypeNameMap result;
70
71 result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
72 result["float"] = FieldDescriptorProto::TYPE_FLOAT;
73 result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
74 result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
75 result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
76 result["bool"] = FieldDescriptorProto::TYPE_BOOL;
77 result["string"] = FieldDescriptorProto::TYPE_STRING;
78 result["group"] = FieldDescriptorProto::TYPE_GROUP;
79
80 result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
81 result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
82 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
83 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
84 result["int32"] = FieldDescriptorProto::TYPE_INT32;
85 result["int64"] = FieldDescriptorProto::TYPE_INT64;
86 result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
87 result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
88
89 return result;
90 }());
91 return *table;
92}
93
94// Camel-case the field name and append "Entry" for generated map entry name.
95// e.g. map<KeyType, ValueType> foo_map => FooMapEntry
96std::string MapEntryName(const std::string& field_name) {
97 std::string result;
98 static const char kSuffix[] = "Entry";
99 result.reserve(res_arg: field_name.size() + sizeof(kSuffix));
100 bool cap_next = true;
101 for (const char field_name_char : field_name) {
102 if (field_name_char == '_') {
103 cap_next = true;
104 } else if (cap_next) {
105 // Note: Do not use ctype.h due to locales.
106 if ('a' <= field_name_char && field_name_char <= 'z') {
107 result.push_back(c: field_name_char - 'a' + 'A');
108 } else {
109 result.push_back(c: field_name_char);
110 }
111 cap_next = false;
112 } else {
113 result.push_back(c: field_name_char);
114 }
115 }
116 result.append(s: kSuffix);
117 return result;
118}
119
120bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
121
122bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
123
124bool IsNumber(char c) { return c >= '0' && c <= '9'; }
125
126bool IsUpperCamelCase(const std::string& name) {
127 if (name.empty()) {
128 return true;
129 }
130 // Name must start with an upper case character.
131 if (!IsUppercase(c: name[0])) {
132 return false;
133 }
134 // Must not contains underscore.
135 for (const char c : name) {
136 if (c == '_') {
137 return false;
138 }
139 }
140 return true;
141}
142
143bool IsUpperUnderscore(const std::string& name) {
144 for (const char c : name) {
145 if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
146 return false;
147 }
148 }
149 return true;
150}
151
152bool IsLowerUnderscore(const std::string& name) {
153 for (const char c : name) {
154 if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
155 return false;
156 }
157 }
158 return true;
159}
160
161bool IsNumberFollowUnderscore(const std::string& name) {
162 for (int i = 1; i < name.length(); i++) {
163 const char c = name[i];
164 if (IsNumber(c) && name[i - 1] == '_') {
165 return true;
166 }
167 }
168 return false;
169}
170
171} // anonymous namespace
172
173// Makes code slightly more readable. The meaning of "DO(foo)" is
174// "Execute foo and fail if it fails.", where failure is indicated by
175// returning false.
176#define DO(STATEMENT) \
177 if (STATEMENT) { \
178 } else \
179 return false
180
181// ===================================================================
182
183Parser::Parser()
184 : input_(nullptr),
185 error_collector_(nullptr),
186 source_location_table_(nullptr),
187 had_errors_(false),
188 require_syntax_identifier_(false),
189 stop_after_syntax_identifier_(false) {
190}
191
192Parser::~Parser() {}
193
194// ===================================================================
195
196inline bool Parser::LookingAt(const char* text) {
197 return input_->current().text == text;
198}
199
200inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
201 return input_->current().type == token_type;
202}
203
204inline bool Parser::AtEnd() { return LookingAtType(token_type: io::Tokenizer::TYPE_END); }
205
206bool Parser::TryConsume(const char* text) {
207 if (LookingAt(text)) {
208 input_->Next();
209 return true;
210 } else {
211 return false;
212 }
213}
214
215bool Parser::Consume(const char* text, const char* error) {
216 if (TryConsume(text)) {
217 return true;
218 } else {
219 AddError(error);
220 return false;
221 }
222}
223
224bool Parser::Consume(const char* text) {
225 std::string error = "Expected \"" + std::string(text) + "\".";
226 return Consume(text, error: error.c_str());
227}
228
229bool Parser::ConsumeIdentifier(std::string* output, const char* error) {
230 if (LookingAtType(token_type: io::Tokenizer::TYPE_IDENTIFIER)) {
231 *output = input_->current().text;
232 input_->Next();
233 return true;
234 } else {
235 AddError(error);
236 return false;
237 }
238}
239
240bool Parser::ConsumeInteger(int* output, const char* error) {
241 if (LookingAtType(token_type: io::Tokenizer::TYPE_INTEGER)) {
242 uint64_t value = 0;
243 if (!io::Tokenizer::ParseInteger(text: input_->current().text,
244 max_value: std::numeric_limits<int32_t>::max(),
245 output: &value)) {
246 AddError(error: "Integer out of range.");
247 // We still return true because we did, in fact, parse an integer.
248 }
249 *output = value;
250 input_->Next();
251 return true;
252 } else {
253 AddError(error);
254 return false;
255 }
256}
257
258bool Parser::ConsumeSignedInteger(int* output, const char* error) {
259 bool is_negative = false;
260 uint64_t max_value = std::numeric_limits<int32_t>::max();
261 if (TryConsume(text: "-")) {
262 is_negative = true;
263 max_value += 1;
264 }
265 uint64_t value = 0;
266 DO(ConsumeInteger64(max_value, &value, error));
267 if (is_negative) value *= -1;
268 *output = value;
269 return true;
270}
271
272bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
273 const char* error) {
274 if (LookingAtType(token_type: io::Tokenizer::TYPE_INTEGER)) {
275 if (!io::Tokenizer::ParseInteger(text: input_->current().text, max_value,
276 output)) {
277 AddError(error: "Integer out of range.");
278 // We still return true because we did, in fact, parse an integer.
279 *output = 0;
280 }
281 input_->Next();
282 return true;
283 } else {
284 AddError(error);
285 return false;
286 }
287}
288
289bool Parser::ConsumeNumber(double* output, const char* error) {
290 if (LookingAtType(token_type: io::Tokenizer::TYPE_FLOAT)) {
291 *output = io::Tokenizer::ParseFloat(text: input_->current().text);
292 input_->Next();
293 return true;
294 } else if (LookingAtType(token_type: io::Tokenizer::TYPE_INTEGER)) {
295 // Also accept integers.
296 uint64_t value = 0;
297 if (!io::Tokenizer::ParseInteger(text: input_->current().text,
298 max_value: std::numeric_limits<uint64_t>::max(),
299 output: &value)) {
300 AddError(error: "Integer out of range.");
301 // We still return true because we did, in fact, parse a number.
302 }
303 *output = value;
304 input_->Next();
305 return true;
306 } else if (LookingAt(text: "inf")) {
307 *output = std::numeric_limits<double>::infinity();
308 input_->Next();
309 return true;
310 } else if (LookingAt(text: "nan")) {
311 *output = std::numeric_limits<double>::quiet_NaN();
312 input_->Next();
313 return true;
314 } else {
315 AddError(error);
316 return false;
317 }
318}
319
320bool Parser::ConsumeString(std::string* output, const char* error) {
321 if (LookingAtType(token_type: io::Tokenizer::TYPE_STRING)) {
322 io::Tokenizer::ParseString(text: input_->current().text, output);
323 input_->Next();
324 // Allow C++ like concatenation of adjacent string tokens.
325 while (LookingAtType(token_type: io::Tokenizer::TYPE_STRING)) {
326 io::Tokenizer::ParseStringAppend(text: input_->current().text, output);
327 input_->Next();
328 }
329 return true;
330 } else {
331 AddError(error);
332 return false;
333 }
334}
335
336bool Parser::TryConsumeEndOfDeclaration(const char* text,
337 const LocationRecorder* location) {
338 if (LookingAt(text)) {
339 std::string leading, trailing;
340 std::vector<std::string> detached;
341 input_->NextWithComments(prev_trailing_comments: &trailing, detached_comments: &detached, next_leading_comments: &leading);
342
343 // Save the leading comments for next time, and recall the leading comments
344 // from last time.
345 leading.swap(s&: upcoming_doc_comments_);
346
347 if (location != nullptr) {
348 upcoming_detached_comments_.swap(x&: detached);
349 location->AttachComments(leading: &leading, trailing: &trailing, detached_comments: &detached);
350 } else if (strcmp(s1: text, s2: "}") == 0) {
351 // If the current location is null and we are finishing the current scope,
352 // drop pending upcoming detached comments.
353 upcoming_detached_comments_.swap(x&: detached);
354 } else {
355 // Otherwise, append the new detached comments to the existing upcoming
356 // detached comments.
357 upcoming_detached_comments_.insert(position: upcoming_detached_comments_.end(),
358 first: detached.begin(), last: detached.end());
359 }
360
361 return true;
362 } else {
363 return false;
364 }
365}
366
367bool Parser::ConsumeEndOfDeclaration(const char* text,
368 const LocationRecorder* location) {
369 if (TryConsumeEndOfDeclaration(text, location)) {
370 return true;
371 } else {
372 AddError(error: "Expected \"" + std::string(text) + "\".");
373 return false;
374 }
375}
376
377// -------------------------------------------------------------------
378
379void Parser::AddError(int line, int column, const std::string& error) {
380 if (error_collector_ != nullptr) {
381 error_collector_->AddError(line, column, message: error);
382 }
383 had_errors_ = true;
384}
385
386void Parser::AddError(const std::string& error) {
387 AddError(line: input_->current().line, column: input_->current().column, error);
388}
389
390void Parser::AddWarning(const std::string& warning) {
391 if (error_collector_ != nullptr) {
392 error_collector_->AddWarning(input_->current().line,
393 input_->current().column, warning);
394 }
395}
396
397// -------------------------------------------------------------------
398
399Parser::LocationRecorder::LocationRecorder(Parser* parser)
400 : parser_(parser),
401 source_code_info_(parser->source_code_info_),
402 location_(parser_->source_code_info_->add_location()) {
403 location_->add_span(value: parser_->input_->current().line);
404 location_->add_span(value: parser_->input_->current().column);
405}
406
407Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
408 Init(parent, source_code_info: parent.source_code_info_);
409}
410
411Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
412 int path1,
413 SourceCodeInfo* source_code_info) {
414 Init(parent, source_code_info);
415 AddPath(path_component: path1);
416}
417
418Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
419 int path1) {
420 Init(parent, source_code_info: parent.source_code_info_);
421 AddPath(path_component: path1);
422}
423
424Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
425 int path1, int path2) {
426 Init(parent, source_code_info: parent.source_code_info_);
427 AddPath(path_component: path1);
428 AddPath(path_component: path2);
429}
430
431void Parser::LocationRecorder::Init(const LocationRecorder& parent,
432 SourceCodeInfo* source_code_info) {
433 parser_ = parent.parser_;
434 source_code_info_ = source_code_info;
435
436 location_ = source_code_info_->add_location();
437 location_->mutable_path()->CopyFrom(other: parent.location_->path());
438
439 location_->add_span(value: parser_->input_->current().line);
440 location_->add_span(value: parser_->input_->current().column);
441}
442
443Parser::LocationRecorder::~LocationRecorder() {
444 if (location_->span_size() <= 2) {
445 EndAt(token: parser_->input_->previous());
446 }
447}
448
449void Parser::LocationRecorder::AddPath(int path_component) {
450 location_->add_path(value: path_component);
451}
452
453void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
454 location_->set_span(index: 0, value: token.line);
455 location_->set_span(index: 1, value: token.column);
456}
457
458void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
459 location_->set_span(index: 0, value: other.location_->span(index: 0));
460 location_->set_span(index: 1, value: other.location_->span(index: 1));
461}
462
463void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
464 if (token.line != location_->span(index: 0)) {
465 location_->add_span(value: token.line);
466 }
467 location_->add_span(value: token.end_column);
468}
469
470void Parser::LocationRecorder::RecordLegacyLocation(
471 const Message* descriptor,
472 DescriptorPool::ErrorCollector::ErrorLocation location) {
473 if (parser_->source_location_table_ != nullptr) {
474 parser_->source_location_table_->Add(
475 descriptor, location, line: location_->span(index: 0), column: location_->span(index: 1));
476 }
477}
478
479void Parser::LocationRecorder::RecordLegacyImportLocation(
480 const Message* descriptor, const std::string& name) {
481 if (parser_->source_location_table_ != nullptr) {
482 parser_->source_location_table_->AddImport(
483 descriptor, name, line: location_->span(index: 0), column: location_->span(index: 1));
484 }
485}
486
487int Parser::LocationRecorder::CurrentPathSize() const {
488 return location_->path_size();
489}
490
491void Parser::LocationRecorder::AttachComments(
492 std::string* leading, std::string* trailing,
493 std::vector<std::string>* detached_comments) const {
494 GOOGLE_CHECK(!location_->has_leading_comments());
495 GOOGLE_CHECK(!location_->has_trailing_comments());
496
497 if (!leading->empty()) {
498 location_->mutable_leading_comments()->swap(s&: *leading);
499 }
500 if (!trailing->empty()) {
501 location_->mutable_trailing_comments()->swap(s&: *trailing);
502 }
503 for (int i = 0; i < detached_comments->size(); ++i) {
504 location_->add_leading_detached_comments()->swap(s&: (*detached_comments)[i]);
505 }
506 detached_comments->clear();
507}
508
509// -------------------------------------------------------------------
510
511void Parser::SkipStatement() {
512 while (true) {
513 if (AtEnd()) {
514 return;
515 } else if (LookingAtType(token_type: io::Tokenizer::TYPE_SYMBOL)) {
516 if (TryConsumeEndOfDeclaration(text: ";", location: nullptr)) {
517 return;
518 } else if (TryConsume(text: "{")) {
519 SkipRestOfBlock();
520 return;
521 } else if (LookingAt(text: "}")) {
522 return;
523 }
524 }
525 input_->Next();
526 }
527}
528
529void Parser::SkipRestOfBlock() {
530 while (true) {
531 if (AtEnd()) {
532 return;
533 } else if (LookingAtType(token_type: io::Tokenizer::TYPE_SYMBOL)) {
534 if (TryConsumeEndOfDeclaration(text: "}", location: nullptr)) {
535 return;
536 } else if (TryConsume(text: "{")) {
537 SkipRestOfBlock();
538 }
539 }
540 input_->Next();
541 }
542}
543
544// ===================================================================
545
546bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
547 bool has_allow_alias = false;
548 bool allow_alias = false;
549
550 for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
551 const UninterpretedOption option = proto->options().uninterpreted_option(index: i);
552 if (option.name_size() > 1) {
553 continue;
554 }
555 if (!option.name(index: 0).is_extension() &&
556 option.name(index: 0).name_part() == "allow_alias") {
557 has_allow_alias = true;
558 if (option.identifier_value() == "true") {
559 allow_alias = true;
560 }
561 break;
562 }
563 }
564
565 if (has_allow_alias && !allow_alias) {
566 std::string error =
567 "\"" + proto->name() +
568 "\" declares 'option allow_alias = false;' which has no effect. "
569 "Please remove the declaration.";
570 // This needlessly clutters declarations with nops.
571 AddError(error);
572 return false;
573 }
574
575 std::set<int> used_values;
576 bool has_duplicates = false;
577 for (int i = 0; i < proto->value_size(); ++i) {
578 const EnumValueDescriptorProto& enum_value = proto->value(index: i);
579 if (used_values.find(x: enum_value.number()) != used_values.end()) {
580 has_duplicates = true;
581 break;
582 } else {
583 used_values.insert(x: enum_value.number());
584 }
585 }
586 if (allow_alias && !has_duplicates) {
587 std::string error =
588 "\"" + proto->name() +
589 "\" declares support for enum aliases but no enum values share field "
590 "numbers. Please remove the unnecessary 'option allow_alias = true;' "
591 "declaration.";
592 // Generate an error if an enum declares support for duplicate enum values
593 // and does not use it protect future authors.
594 AddError(error);
595 return false;
596 }
597
598 // Enforce that enum constants must be UPPER_CASE except in case of
599 // enum_alias.
600 if (!allow_alias) {
601 for (const auto& enum_value : proto->value()) {
602 if (!IsUpperUnderscore(name: enum_value.name())) {
603 AddWarning(
604 warning: "Enum constant should be in UPPER_CASE. Found: " +
605 enum_value.name() +
606 ". See https://developers.google.com/protocol-buffers/docs/style");
607 }
608 }
609 }
610
611 return true;
612}
613
614bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
615 input_ = input;
616 had_errors_ = false;
617 syntax_identifier_.clear();
618
619 // Note that |file| could be NULL at this point if
620 // stop_after_syntax_identifier_ is true. So, we conservatively allocate
621 // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
622 // later on.
623 SourceCodeInfo source_code_info;
624 source_code_info_ = &source_code_info;
625
626 if (LookingAtType(token_type: io::Tokenizer::TYPE_START)) {
627 // Advance to first token.
628 input_->NextWithComments(prev_trailing_comments: nullptr, detached_comments: &upcoming_detached_comments_,
629 next_leading_comments: &upcoming_doc_comments_);
630 }
631
632 {
633 LocationRecorder root_location(this);
634 root_location.RecordLegacyLocation(descriptor: file,
635 location: DescriptorPool::ErrorCollector::OTHER);
636
637 if (require_syntax_identifier_ || LookingAt(text: "syntax")) {
638 if (!ParseSyntaxIdentifier(parent: root_location)) {
639 // Don't attempt to parse the file if we didn't recognize the syntax
640 // identifier.
641 return false;
642 }
643 // Store the syntax into the file.
644 if (file != nullptr) file->set_syntax(syntax_identifier_);
645 } else if (!stop_after_syntax_identifier_) {
646 GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: " << file->name()
647 << ". Please use 'syntax = \"proto2\";' "
648 << "or 'syntax = \"proto3\";' to specify a syntax "
649 << "version. (Defaulted to proto2 syntax.)";
650 syntax_identifier_ = "proto2";
651 }
652
653 if (stop_after_syntax_identifier_) return !had_errors_;
654
655 // Repeatedly parse statements until we reach the end of the file.
656 while (!AtEnd()) {
657 if (!ParseTopLevelStatement(file, root_location)) {
658 // This statement failed to parse. Skip it, but keep looping to parse
659 // other statements.
660 SkipStatement();
661
662 if (LookingAt(text: "}")) {
663 AddError(error: "Unmatched \"}\".");
664 input_->NextWithComments(prev_trailing_comments: nullptr, detached_comments: &upcoming_detached_comments_,
665 next_leading_comments: &upcoming_doc_comments_);
666 }
667 }
668 }
669 }
670
671 input_ = nullptr;
672 source_code_info_ = nullptr;
673 assert(file != nullptr);
674 source_code_info.Swap(other: file->mutable_source_code_info());
675 return !had_errors_;
676}
677
678bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
679 LocationRecorder syntax_location(parent,
680 FileDescriptorProto::kSyntaxFieldNumber);
681 DO(Consume(
682 "syntax",
683 "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
684 DO(Consume("="));
685 io::Tokenizer::Token syntax_token = input_->current();
686 std::string syntax;
687 DO(ConsumeString(&syntax, "Expected syntax identifier."));
688 DO(ConsumeEndOfDeclaration(";", &syntax_location));
689
690 syntax_identifier_ = syntax;
691
692 if (syntax != "proto2" && syntax != "proto3" &&
693 !stop_after_syntax_identifier_) {
694 AddError(line: syntax_token.line, column: syntax_token.column,
695 error: "Unrecognized syntax identifier \"" + syntax +
696 "\". This parser "
697 "only recognizes \"proto2\" and \"proto3\".");
698 return false;
699 }
700
701 return true;
702}
703
704bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
705 const LocationRecorder& root_location) {
706 if (TryConsumeEndOfDeclaration(text: ";", location: nullptr)) {
707 // empty statement; ignore
708 return true;
709 } else if (LookingAt(text: "message")) {
710 LocationRecorder location(root_location,
711 FileDescriptorProto::kMessageTypeFieldNumber,
712 file->message_type_size());
713 return ParseMessageDefinition(message: file->add_message_type(), message_location: location, containing_file: file);
714 } else if (LookingAt(text: "enum")) {
715 LocationRecorder location(root_location,
716 FileDescriptorProto::kEnumTypeFieldNumber,
717 file->enum_type_size());
718 return ParseEnumDefinition(enum_type: file->add_enum_type(), enum_location: location, containing_file: file);
719 } else if (LookingAt(text: "service")) {
720 LocationRecorder location(root_location,
721 FileDescriptorProto::kServiceFieldNumber,
722 file->service_size());
723 return ParseServiceDefinition(service: file->add_service(), service_location: location, containing_file: file);
724 } else if (LookingAt(text: "extend")) {
725 LocationRecorder location(root_location,
726 FileDescriptorProto::kExtensionFieldNumber);
727 return ParseExtend(
728 extensions: file->mutable_extension(), messages: file->mutable_message_type(), parent_location: root_location,
729 location_field_number_for_nested_type: FileDescriptorProto::kMessageTypeFieldNumber, extend_location: location, containing_file: file);
730 } else if (LookingAt(text: "import")) {
731 return ParseImport(dependency: file->mutable_dependency(),
732 public_dependency: file->mutable_public_dependency(),
733 weak_dependency: file->mutable_weak_dependency(), root_location, containing_file: file);
734 } else if (LookingAt(text: "package")) {
735 return ParsePackage(file, root_location, containing_file: file);
736 } else if (LookingAt(text: "option")) {
737 LocationRecorder location(root_location,
738 FileDescriptorProto::kOptionsFieldNumber);
739 return ParseOption(options: file->mutable_options(), options_location: location, containing_file: file,
740 style: OPTION_STATEMENT);
741 } else {
742 AddError(error: "Expected top-level statement (e.g. \"message\").");
743 return false;
744 }
745}
746
747// -------------------------------------------------------------------
748// Messages
749
750bool Parser::ParseMessageDefinition(
751 DescriptorProto* message, const LocationRecorder& message_location,
752 const FileDescriptorProto* containing_file) {
753 DO(Consume("message"));
754 {
755 LocationRecorder location(message_location,
756 DescriptorProto::kNameFieldNumber);
757 location.RecordLegacyLocation(descriptor: message,
758 location: DescriptorPool::ErrorCollector::NAME);
759 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
760 if (!IsUpperCamelCase(name: message->name())) {
761 AddWarning(
762 warning: "Message name should be in UpperCamelCase. Found: " +
763 message->name() +
764 ". See https://developers.google.com/protocol-buffers/docs/style");
765 }
766 }
767 DO(ParseMessageBlock(message, message_location, containing_file));
768
769 if (syntax_identifier_ == "proto3") {
770 // Add synthetic one-field oneofs for optional fields, except messages which
771 // already have presence in proto3.
772 //
773 // We have to make sure the oneof names don't conflict with any other
774 // field or oneof.
775 std::unordered_set<std::string> names;
776 for (const auto& field : message->field()) {
777 names.insert(x: field.name());
778 }
779 for (const auto& oneof : message->oneof_decl()) {
780 names.insert(x: oneof.name());
781 }
782
783 for (auto& field : *message->mutable_field()) {
784 if (field.proto3_optional()) {
785 std::string oneof_name = field.name();
786
787 // Prepend 'XXXXX_' until we are no longer conflicting.
788 // Avoid prepending a double-underscore because such names are
789 // reserved in C++.
790 if (oneof_name.empty() || oneof_name[0] != '_') {
791 oneof_name = '_' + oneof_name;
792 }
793 while (names.count(x: oneof_name) > 0) {
794 oneof_name = 'X' + oneof_name;
795 }
796
797 names.insert(x: oneof_name);
798 field.set_oneof_index(message->oneof_decl_size());
799 OneofDescriptorProto* oneof = message->add_oneof_decl();
800 oneof->set_name(oneof_name);
801 }
802 }
803 }
804
805 return true;
806}
807
808namespace {
809
810const int kMaxRangeSentinel = -1;
811
812bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
813 const MessageOptions& options = message.options();
814 for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
815 const UninterpretedOption& uninterpreted = options.uninterpreted_option(index: i);
816 if (uninterpreted.name_size() == 1 &&
817 uninterpreted.name(index: 0).name_part() == "message_set_wire_format" &&
818 uninterpreted.identifier_value() == "true") {
819 return true;
820 }
821 }
822 return false;
823}
824
825// Modifies any extension ranges that specified 'max' as the end of the
826// extension range, and sets them to the type-specific maximum. The actual max
827// tag number can only be determined after all options have been parsed.
828void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
829 const bool is_message_set = IsMessageSetWireFormatMessage(message: *message);
830 const int max_extension_number = is_message_set
831 ? std::numeric_limits<int32_t>::max()
832 : FieldDescriptor::kMaxNumber + 1;
833 for (int i = 0; i < message->extension_range_size(); ++i) {
834 if (message->extension_range(index: i).end() == kMaxRangeSentinel) {
835 message->mutable_extension_range(index: i)->set_end(max_extension_number);
836 }
837 }
838}
839
840// Modifies any reserved ranges that specified 'max' as the end of the
841// reserved range, and sets them to the type-specific maximum. The actual max
842// tag number can only be determined after all options have been parsed.
843void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
844 const bool is_message_set = IsMessageSetWireFormatMessage(message: *message);
845 const int max_field_number = is_message_set
846 ? std::numeric_limits<int32_t>::max()
847 : FieldDescriptor::kMaxNumber + 1;
848 for (int i = 0; i < message->reserved_range_size(); ++i) {
849 if (message->reserved_range(index: i).end() == kMaxRangeSentinel) {
850 message->mutable_reserved_range(index: i)->set_end(max_field_number);
851 }
852 }
853}
854
855} // namespace
856
857bool Parser::ParseMessageBlock(DescriptorProto* message,
858 const LocationRecorder& message_location,
859 const FileDescriptorProto* containing_file) {
860 DO(ConsumeEndOfDeclaration("{", &message_location));
861
862 while (!TryConsumeEndOfDeclaration(text: "}", location: nullptr)) {
863 if (AtEnd()) {
864 AddError(error: "Reached end of input in message definition (missing '}').");
865 return false;
866 }
867
868 if (!ParseMessageStatement(message, message_location, containing_file)) {
869 // This statement failed to parse. Skip it, but keep looping to parse
870 // other statements.
871 SkipStatement();
872 }
873 }
874
875 if (message->extension_range_size() > 0) {
876 AdjustExtensionRangesWithMaxEndNumber(message);
877 }
878 if (message->reserved_range_size() > 0) {
879 AdjustReservedRangesWithMaxEndNumber(message);
880 }
881 return true;
882}
883
884bool Parser::ParseMessageStatement(DescriptorProto* message,
885 const LocationRecorder& message_location,
886 const FileDescriptorProto* containing_file) {
887 if (TryConsumeEndOfDeclaration(text: ";", location: nullptr)) {
888 // empty statement; ignore
889 return true;
890 } else if (LookingAt(text: "message")) {
891 LocationRecorder location(message_location,
892 DescriptorProto::kNestedTypeFieldNumber,
893 message->nested_type_size());
894 return ParseMessageDefinition(message: message->add_nested_type(), message_location: location,
895 containing_file);
896 } else if (LookingAt(text: "enum")) {
897 LocationRecorder location(message_location,
898 DescriptorProto::kEnumTypeFieldNumber,
899 message->enum_type_size());
900 return ParseEnumDefinition(enum_type: message->add_enum_type(), enum_location: location,
901 containing_file);
902 } else if (LookingAt(text: "extensions")) {
903 LocationRecorder location(message_location,
904 DescriptorProto::kExtensionRangeFieldNumber);
905 return ParseExtensions(message, extensions_location: location, containing_file);
906 } else if (LookingAt(text: "reserved")) {
907 return ParseReserved(message, message_location);
908 } else if (LookingAt(text: "extend")) {
909 LocationRecorder location(message_location,
910 DescriptorProto::kExtensionFieldNumber);
911 return ParseExtend(extensions: message->mutable_extension(),
912 messages: message->mutable_nested_type(), parent_location: message_location,
913 location_field_number_for_nested_type: DescriptorProto::kNestedTypeFieldNumber, extend_location: location,
914 containing_file);
915 } else if (LookingAt(text: "option")) {
916 LocationRecorder location(message_location,
917 DescriptorProto::kOptionsFieldNumber);
918 return ParseOption(options: message->mutable_options(), options_location: location, containing_file,
919 style: OPTION_STATEMENT);
920 } else if (LookingAt(text: "oneof")) {
921 int oneof_index = message->oneof_decl_size();
922 LocationRecorder oneof_location(
923 message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
924
925 return ParseOneof(oneof_decl: message->add_oneof_decl(), containing_type: message, oneof_index,
926 oneof_location, containing_type_location: message_location, containing_file);
927 } else {
928 LocationRecorder location(message_location,
929 DescriptorProto::kFieldFieldNumber,
930 message->field_size());
931 return ParseMessageField(
932 field: message->add_field(), messages: message->mutable_nested_type(), parent_location: message_location,
933 location_field_number_for_nested_type: DescriptorProto::kNestedTypeFieldNumber, field_location: location, containing_file);
934 }
935}
936
937bool Parser::ParseMessageField(FieldDescriptorProto* field,
938 RepeatedPtrField<DescriptorProto>* messages,
939 const LocationRecorder& parent_location,
940 int location_field_number_for_nested_type,
941 const LocationRecorder& field_location,
942 const FileDescriptorProto* containing_file) {
943 {
944 FieldDescriptorProto::Label label;
945 if (ParseLabel(label: &label, field_location)) {
946 field->set_label(label);
947 if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
948 syntax_identifier_ == "proto3") {
949 field->set_proto3_optional(true);
950 }
951 }
952 }
953
954 return ParseMessageFieldNoLabel(field, messages, parent_location,
955 location_field_number_for_nested_type,
956 field_location, containing_file);
957}
958
959bool Parser::ParseMessageFieldNoLabel(
960 FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
961 const LocationRecorder& parent_location,
962 int location_field_number_for_nested_type,
963 const LocationRecorder& field_location,
964 const FileDescriptorProto* containing_file) {
965 MapField map_field;
966 // Parse type.
967 {
968 LocationRecorder location(field_location); // add path later
969 location.RecordLegacyLocation(descriptor: field, location: DescriptorPool::ErrorCollector::TYPE);
970
971 bool type_parsed = false;
972 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
973 std::string type_name;
974
975 // Special case map field. We only treat the field as a map field if the
976 // field type name starts with the word "map" with a following "<".
977 if (TryConsume(text: "map")) {
978 if (LookingAt(text: "<")) {
979 map_field.is_map_field = true;
980 DO(ParseMapType(&map_field, field, location));
981 } else {
982 // False positive
983 type_parsed = true;
984 type_name = "map";
985 }
986 }
987 if (!map_field.is_map_field) {
988 // Handle the case where no explicit label is given for a non-map field.
989 if (!field->has_label() && DefaultToOptionalFields()) {
990 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
991 }
992 if (!field->has_label()) {
993 AddError(error: "Expected \"required\", \"optional\", or \"repeated\".");
994 // We can actually reasonably recover here by just assuming the user
995 // forgot the label altogether.
996 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
997 }
998
999 // Handle the case where the actual type is a message or enum named
1000 // "map", which we already consumed in the code above.
1001 if (!type_parsed) {
1002 DO(ParseType(&type, &type_name));
1003 }
1004 if (type_name.empty()) {
1005 location.AddPath(path_component: FieldDescriptorProto::kTypeFieldNumber);
1006 field->set_type(type);
1007 } else {
1008 location.AddPath(path_component: FieldDescriptorProto::kTypeNameFieldNumber);
1009 field->set_type_name(type_name);
1010 }
1011 }
1012 }
1013
1014 // Parse name and '='.
1015 io::Tokenizer::Token name_token = input_->current();
1016 {
1017 LocationRecorder location(field_location,
1018 FieldDescriptorProto::kNameFieldNumber);
1019 location.RecordLegacyLocation(descriptor: field, location: DescriptorPool::ErrorCollector::NAME);
1020 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1021
1022 if (!IsLowerUnderscore(name: field->name())) {
1023 AddWarning(
1024 warning: "Field name should be lowercase. Found: " + field->name() +
1025 ". See: https://developers.google.com/protocol-buffers/docs/style");
1026 }
1027 if (IsNumberFollowUnderscore(name: field->name())) {
1028 AddWarning(
1029 warning: "Number should not come right after an underscore. Found: " +
1030 field->name() +
1031 ". See: https://developers.google.com/protocol-buffers/docs/style");
1032 }
1033 }
1034 DO(Consume("=", "Missing field number."));
1035
1036 // Parse field number.
1037 {
1038 LocationRecorder location(field_location,
1039 FieldDescriptorProto::kNumberFieldNumber);
1040 location.RecordLegacyLocation(descriptor: field,
1041 location: DescriptorPool::ErrorCollector::NUMBER);
1042 int number;
1043 DO(ConsumeInteger(&number, "Expected field number."));
1044 field->set_number(number);
1045 }
1046
1047 // Parse options.
1048 DO(ParseFieldOptions(field, field_location, containing_file));
1049
1050 // Deal with groups.
1051 if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1052 // Awkward: Since a group declares both a message type and a field, we
1053 // have to create overlapping locations.
1054 LocationRecorder group_location(parent_location);
1055 group_location.StartAt(other: field_location);
1056 group_location.AddPath(path_component: location_field_number_for_nested_type);
1057 group_location.AddPath(path_component: messages->size());
1058
1059 DescriptorProto* group = messages->Add();
1060 group->set_name(field->name());
1061
1062 // Record name location to match the field name's location.
1063 {
1064 LocationRecorder location(group_location,
1065 DescriptorProto::kNameFieldNumber);
1066 location.StartAt(token: name_token);
1067 location.EndAt(token: name_token);
1068 location.RecordLegacyLocation(descriptor: group,
1069 location: DescriptorPool::ErrorCollector::NAME);
1070 }
1071
1072 // The field's type_name also comes from the name. Confusing!
1073 {
1074 LocationRecorder location(field_location,
1075 FieldDescriptorProto::kTypeNameFieldNumber);
1076 location.StartAt(token: name_token);
1077 location.EndAt(token: name_token);
1078 }
1079
1080 // As a hack for backwards-compatibility, we force the group name to start
1081 // with a capital letter and lower-case the field name. New code should
1082 // not use groups; it should use nested messages.
1083 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1084 AddError(line: name_token.line, column: name_token.column,
1085 error: "Group names must start with a capital letter.");
1086 }
1087 LowerString(s: field->mutable_name());
1088
1089 field->set_type_name(group->name());
1090 if (LookingAt(text: "{")) {
1091 DO(ParseMessageBlock(group, group_location, containing_file));
1092 } else {
1093 AddError(error: "Missing group body.");
1094 return false;
1095 }
1096 } else {
1097 DO(ConsumeEndOfDeclaration(";", &field_location));
1098 }
1099
1100 // Create a map entry type if this is a map field.
1101 if (map_field.is_map_field) {
1102 GenerateMapEntry(map_field, field, messages);
1103 }
1104
1105 return true;
1106}
1107
1108bool Parser::ParseMapType(MapField* map_field, FieldDescriptorProto* field,
1109 LocationRecorder& type_name_location) {
1110 if (field->has_oneof_index()) {
1111 AddError(error: "Map fields are not allowed in oneofs.");
1112 return false;
1113 }
1114 if (field->has_label()) {
1115 AddError(
1116 error: "Field labels (required/optional/repeated) are not allowed on "
1117 "map fields.");
1118 return false;
1119 }
1120 if (field->has_extendee()) {
1121 AddError(error: "Map fields are not allowed to be extensions.");
1122 return false;
1123 }
1124 field->set_label(FieldDescriptorProto::LABEL_REPEATED);
1125 DO(Consume("<"));
1126 DO(ParseType(&map_field->key_type, &map_field->key_type_name));
1127 DO(Consume(","));
1128 DO(ParseType(&map_field->value_type, &map_field->value_type_name));
1129 DO(Consume(">"));
1130 // Defer setting of the type name of the map field until the
1131 // field name is parsed. Add the source location though.
1132 type_name_location.AddPath(path_component: FieldDescriptorProto::kTypeNameFieldNumber);
1133 return true;
1134}
1135
1136void Parser::GenerateMapEntry(const MapField& map_field,
1137 FieldDescriptorProto* field,
1138 RepeatedPtrField<DescriptorProto>* messages) {
1139 DescriptorProto* entry = messages->Add();
1140 std::string entry_name = MapEntryName(field_name: field->name());
1141 field->set_type_name(entry_name);
1142 entry->set_name(entry_name);
1143 entry->mutable_options()->set_map_entry(true);
1144 FieldDescriptorProto* key_field = entry->add_field();
1145 key_field->set_name("key");
1146 key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1147 key_field->set_number(1);
1148 if (map_field.key_type_name.empty()) {
1149 key_field->set_type(map_field.key_type);
1150 } else {
1151 key_field->set_type_name(map_field.key_type_name);
1152 }
1153 FieldDescriptorProto* value_field = entry->add_field();
1154 value_field->set_name("value");
1155 value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1156 value_field->set_number(2);
1157 if (map_field.value_type_name.empty()) {
1158 value_field->set_type(map_field.value_type);
1159 } else {
1160 value_field->set_type_name(map_field.value_type_name);
1161 }
1162 // Propagate the "enforce_utf8" option to key and value fields if they
1163 // are strings. This helps simplify the implementation of code generators
1164 // and also reflection-based parsing code.
1165 //
1166 // The following definition:
1167 // message Foo {
1168 // map<string, string> value = 1 [enforce_utf8 = false];
1169 // }
1170 // will be interpreted as:
1171 // message Foo {
1172 // message ValueEntry {
1173 // option map_entry = true;
1174 // string key = 1 [enforce_utf8 = false];
1175 // string value = 2 [enforce_utf8 = false];
1176 // }
1177 // repeated ValueEntry value = 1 [enforce_utf8 = false];
1178 // }
1179 //
1180 // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1181 // from protocol compiler.
1182 for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1183 const UninterpretedOption& option =
1184 field->options().uninterpreted_option(index: i);
1185 if (option.name_size() == 1 &&
1186 option.name(index: 0).name_part() == "enforce_utf8" &&
1187 !option.name(index: 0).is_extension()) {
1188 if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1189 key_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1190 from: option);
1191 }
1192 if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1193 value_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1194 from: option);
1195 }
1196 }
1197 }
1198}
1199
1200bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1201 const LocationRecorder& field_location,
1202 const FileDescriptorProto* containing_file) {
1203 if (!LookingAt(text: "[")) return true;
1204
1205 LocationRecorder location(field_location,
1206 FieldDescriptorProto::kOptionsFieldNumber);
1207
1208 DO(Consume("["));
1209
1210 // Parse field options.
1211 do {
1212 if (LookingAt(text: "default")) {
1213 // We intentionally pass field_location rather than location here, since
1214 // the default value is not actually an option.
1215 DO(ParseDefaultAssignment(field, field_location, containing_file));
1216 } else if (LookingAt(text: "json_name")) {
1217 // Like default value, this "json_name" is not an actual option.
1218 DO(ParseJsonName(field, field_location, containing_file));
1219 } else {
1220 DO(ParseOption(field->mutable_options(), location, containing_file,
1221 OPTION_ASSIGNMENT));
1222 }
1223 } while (TryConsume(text: ","));
1224
1225 DO(Consume("]"));
1226 return true;
1227}
1228
1229bool Parser::ParseDefaultAssignment(
1230 FieldDescriptorProto* field, const LocationRecorder& field_location,
1231 const FileDescriptorProto* containing_file) {
1232 if (field->has_default_value()) {
1233 AddError(error: "Already set option \"default\".");
1234 field->clear_default_value();
1235 }
1236
1237 DO(Consume("default"));
1238 DO(Consume("="));
1239
1240 LocationRecorder location(field_location,
1241 FieldDescriptorProto::kDefaultValueFieldNumber);
1242 location.RecordLegacyLocation(descriptor: field,
1243 location: DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1244 std::string* default_value = field->mutable_default_value();
1245
1246 if (!field->has_type()) {
1247 // The field has a type name, but we don't know if it is a message or an
1248 // enum yet. (If it were a primitive type, |field| would have a type set
1249 // already.) In this case, simply take the current string as the default
1250 // value; we will catch the error later if it is not a valid enum value.
1251 // (N.B. that we do not check whether the current token is an identifier:
1252 // doing so throws strange errors when the user mistypes a primitive
1253 // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1254 // = 42]". In such a case the fundamental error is really that "int" is not
1255 // a type, not that "42" is not an identifier. See b/12533582.)
1256 *default_value = input_->current().text;
1257 input_->Next();
1258 return true;
1259 }
1260
1261 switch (field->type()) {
1262 case FieldDescriptorProto::TYPE_INT32:
1263 case FieldDescriptorProto::TYPE_INT64:
1264 case FieldDescriptorProto::TYPE_SINT32:
1265 case FieldDescriptorProto::TYPE_SINT64:
1266 case FieldDescriptorProto::TYPE_SFIXED32:
1267 case FieldDescriptorProto::TYPE_SFIXED64: {
1268 uint64_t max_value = std::numeric_limits<int64_t>::max();
1269 if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1270 field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1271 field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1272 max_value = std::numeric_limits<int32_t>::max();
1273 }
1274
1275 // These types can be negative.
1276 if (TryConsume(text: "-")) {
1277 default_value->append(s: "-");
1278 // Two's complement always has one more negative value than positive.
1279 ++max_value;
1280 }
1281 // Parse the integer to verify that it is not out-of-range.
1282 uint64_t value;
1283 DO(ConsumeInteger64(max_value, &value,
1284 "Expected integer for field default value."));
1285 // And stringify it again.
1286 default_value->append(str: StrCat(a: value));
1287 break;
1288 }
1289
1290 case FieldDescriptorProto::TYPE_UINT32:
1291 case FieldDescriptorProto::TYPE_UINT64:
1292 case FieldDescriptorProto::TYPE_FIXED32:
1293 case FieldDescriptorProto::TYPE_FIXED64: {
1294 uint64_t max_value = std::numeric_limits<uint64_t>::max();
1295 if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1296 field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1297 max_value = std::numeric_limits<uint32_t>::max();
1298 }
1299
1300 // Numeric, not negative.
1301 if (TryConsume(text: "-")) {
1302 AddError(error: "Unsigned field can't have negative default value.");
1303 }
1304 // Parse the integer to verify that it is not out-of-range.
1305 uint64_t value;
1306 DO(ConsumeInteger64(max_value, &value,
1307 "Expected integer for field default value."));
1308 // And stringify it again.
1309 default_value->append(str: StrCat(a: value));
1310 break;
1311 }
1312
1313 case FieldDescriptorProto::TYPE_FLOAT:
1314 case FieldDescriptorProto::TYPE_DOUBLE:
1315 // These types can be negative.
1316 if (TryConsume(text: "-")) {
1317 default_value->append(s: "-");
1318 }
1319 // Parse the integer because we have to convert hex integers to decimal
1320 // floats.
1321 double value;
1322 DO(ConsumeNumber(&value, "Expected number."));
1323 // And stringify it again.
1324 default_value->append(str: SimpleDtoa(value));
1325 break;
1326
1327 case FieldDescriptorProto::TYPE_BOOL:
1328 if (TryConsume(text: "true")) {
1329 default_value->assign(s: "true");
1330 } else if (TryConsume(text: "false")) {
1331 default_value->assign(s: "false");
1332 } else {
1333 AddError(error: "Expected \"true\" or \"false\".");
1334 return false;
1335 }
1336 break;
1337
1338 case FieldDescriptorProto::TYPE_STRING:
1339 // Note: When file option java_string_check_utf8 is true, if a
1340 // non-string representation (eg byte[]) is later supported, it must
1341 // be checked for UTF-8-ness.
1342 DO(ConsumeString(default_value,
1343 "Expected string for field default "
1344 "value."));
1345 break;
1346
1347 case FieldDescriptorProto::TYPE_BYTES:
1348 DO(ConsumeString(default_value, "Expected string."));
1349 *default_value = CEscape(src: *default_value);
1350 break;
1351
1352 case FieldDescriptorProto::TYPE_ENUM:
1353 DO(ConsumeIdentifier(default_value,
1354 "Expected enum identifier for field "
1355 "default value."));
1356 break;
1357
1358 case FieldDescriptorProto::TYPE_MESSAGE:
1359 case FieldDescriptorProto::TYPE_GROUP:
1360 AddError(error: "Messages can't have default values.");
1361 return false;
1362 }
1363
1364 return true;
1365}
1366
1367bool Parser::ParseJsonName(FieldDescriptorProto* field,
1368 const LocationRecorder& field_location,
1369 const FileDescriptorProto* containing_file) {
1370 if (field->has_json_name()) {
1371 AddError(error: "Already set option \"json_name\".");
1372 field->clear_json_name();
1373 }
1374
1375 LocationRecorder location(field_location,
1376 FieldDescriptorProto::kJsonNameFieldNumber);
1377 location.RecordLegacyLocation(descriptor: field,
1378 location: DescriptorPool::ErrorCollector::OPTION_NAME);
1379
1380 DO(Consume("json_name"));
1381 DO(Consume("="));
1382
1383 LocationRecorder value_location(location);
1384 value_location.RecordLegacyLocation(
1385 descriptor: field, location: DescriptorPool::ErrorCollector::OPTION_VALUE);
1386
1387 DO(ConsumeString(field->mutable_json_name(),
1388 "Expected string for JSON name."));
1389 return true;
1390}
1391
1392bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1393 const LocationRecorder& part_location,
1394 const FileDescriptorProto* containing_file) {
1395 UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1396 std::string identifier; // We parse identifiers into this string.
1397 if (LookingAt(text: "(")) { // This is an extension.
1398 DO(Consume("("));
1399
1400 {
1401 LocationRecorder location(
1402 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1403 // An extension name consists of dot-separated identifiers, and may begin
1404 // with a dot.
1405 if (LookingAtType(token_type: io::Tokenizer::TYPE_IDENTIFIER)) {
1406 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1407 name->mutable_name_part()->append(str: identifier);
1408 }
1409 while (LookingAt(text: ".")) {
1410 DO(Consume("."));
1411 name->mutable_name_part()->append(s: ".");
1412 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1413 name->mutable_name_part()->append(str: identifier);
1414 }
1415 }
1416
1417 DO(Consume(")"));
1418 name->set_is_extension(true);
1419 } else { // This is a regular field.
1420 LocationRecorder location(
1421 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1422 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1423 name->mutable_name_part()->append(str: identifier);
1424 name->set_is_extension(false);
1425 }
1426 return true;
1427}
1428
1429bool Parser::ParseUninterpretedBlock(std::string* value) {
1430 // Note that enclosing braces are not added to *value.
1431 // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1432 // an expression, not a block of statements.
1433 DO(Consume("{"));
1434 int brace_depth = 1;
1435 while (!AtEnd()) {
1436 if (LookingAt(text: "{")) {
1437 brace_depth++;
1438 } else if (LookingAt(text: "}")) {
1439 brace_depth--;
1440 if (brace_depth == 0) {
1441 input_->Next();
1442 return true;
1443 }
1444 }
1445 // TODO(sanjay): Interpret line/column numbers to preserve formatting
1446 if (!value->empty()) value->push_back(c: ' ');
1447 value->append(str: input_->current().text);
1448 input_->Next();
1449 }
1450 AddError(error: "Unexpected end of stream while parsing aggregate value.");
1451 return false;
1452}
1453
1454// We don't interpret the option here. Instead we store it in an
1455// UninterpretedOption, to be interpreted later.
1456bool Parser::ParseOption(Message* options,
1457 const LocationRecorder& options_location,
1458 const FileDescriptorProto* containing_file,
1459 OptionStyle style) {
1460 // Create an entry in the uninterpreted_option field.
1461 const FieldDescriptor* uninterpreted_option_field =
1462 options->GetDescriptor()->FindFieldByName(name: "uninterpreted_option");
1463 GOOGLE_CHECK(uninterpreted_option_field != nullptr)
1464 << "No field named \"uninterpreted_option\" in the Options proto.";
1465
1466 const Reflection* reflection = options->GetReflection();
1467
1468 LocationRecorder location(
1469 options_location, uninterpreted_option_field->number(),
1470 reflection->FieldSize(message: *options, field: uninterpreted_option_field));
1471
1472 if (style == OPTION_STATEMENT) {
1473 DO(Consume("option"));
1474 }
1475
1476 UninterpretedOption* uninterpreted_option =
1477 down_cast<UninterpretedOption*>(f: options->GetReflection()->AddMessage(
1478 message: options, field: uninterpreted_option_field));
1479
1480 // Parse dot-separated name.
1481 {
1482 LocationRecorder name_location(location,
1483 UninterpretedOption::kNameFieldNumber);
1484 name_location.RecordLegacyLocation(
1485 descriptor: uninterpreted_option, location: DescriptorPool::ErrorCollector::OPTION_NAME);
1486
1487 {
1488 LocationRecorder part_location(name_location,
1489 uninterpreted_option->name_size());
1490 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1491 containing_file));
1492 }
1493
1494 while (LookingAt(text: ".")) {
1495 DO(Consume("."));
1496 LocationRecorder part_location(name_location,
1497 uninterpreted_option->name_size());
1498 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1499 containing_file));
1500 }
1501 }
1502
1503 DO(Consume("="));
1504
1505 {
1506 LocationRecorder value_location(location);
1507 value_location.RecordLegacyLocation(
1508 descriptor: uninterpreted_option, location: DescriptorPool::ErrorCollector::OPTION_VALUE);
1509
1510 // All values are a single token, except for negative numbers, which consist
1511 // of a single '-' symbol, followed by a positive number.
1512 bool is_negative = TryConsume(text: "-");
1513
1514 switch (input_->current().type) {
1515 case io::Tokenizer::TYPE_START:
1516 GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1517 return false;
1518
1519 case io::Tokenizer::TYPE_END:
1520 AddError(error: "Unexpected end of stream while parsing option value.");
1521 return false;
1522
1523 case io::Tokenizer::TYPE_WHITESPACE:
1524 case io::Tokenizer::TYPE_NEWLINE:
1525 GOOGLE_CHECK(!input_->report_whitespace() && !input_->report_newlines())
1526 << "Whitespace tokens were not requested.";
1527 GOOGLE_LOG(FATAL) << "Tokenizer reported whitespace.";
1528 return false;
1529
1530 case io::Tokenizer::TYPE_IDENTIFIER: {
1531 value_location.AddPath(
1532 path_component: UninterpretedOption::kIdentifierValueFieldNumber);
1533 if (is_negative) {
1534 AddError(error: "Invalid '-' symbol before identifier.");
1535 return false;
1536 }
1537 std::string value;
1538 DO(ConsumeIdentifier(&value, "Expected identifier."));
1539 uninterpreted_option->set_identifier_value(value);
1540 break;
1541 }
1542
1543 case io::Tokenizer::TYPE_INTEGER: {
1544 uint64_t value;
1545 uint64_t max_value =
1546 is_negative
1547 ? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
1548 : std::numeric_limits<uint64_t>::max();
1549 DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1550 if (is_negative) {
1551 value_location.AddPath(
1552 path_component: UninterpretedOption::kNegativeIntValueFieldNumber);
1553 uninterpreted_option->set_negative_int_value(
1554 static_cast<int64_t>(0 - value));
1555 } else {
1556 value_location.AddPath(
1557 path_component: UninterpretedOption::kPositiveIntValueFieldNumber);
1558 uninterpreted_option->set_positive_int_value(value);
1559 }
1560 break;
1561 }
1562
1563 case io::Tokenizer::TYPE_FLOAT: {
1564 value_location.AddPath(path_component: UninterpretedOption::kDoubleValueFieldNumber);
1565 double value;
1566 DO(ConsumeNumber(&value, "Expected number."));
1567 uninterpreted_option->set_double_value(is_negative ? -value : value);
1568 break;
1569 }
1570
1571 case io::Tokenizer::TYPE_STRING: {
1572 value_location.AddPath(path_component: UninterpretedOption::kStringValueFieldNumber);
1573 if (is_negative) {
1574 AddError(error: "Invalid '-' symbol before string.");
1575 return false;
1576 }
1577 std::string value;
1578 DO(ConsumeString(&value, "Expected string."));
1579 uninterpreted_option->set_string_value(value);
1580 break;
1581 }
1582
1583 case io::Tokenizer::TYPE_SYMBOL:
1584 if (LookingAt(text: "{")) {
1585 value_location.AddPath(
1586 path_component: UninterpretedOption::kAggregateValueFieldNumber);
1587 DO(ParseUninterpretedBlock(
1588 uninterpreted_option->mutable_aggregate_value()));
1589 } else {
1590 AddError(error: "Expected option value.");
1591 return false;
1592 }
1593 break;
1594 }
1595 }
1596
1597 if (style == OPTION_STATEMENT) {
1598 DO(ConsumeEndOfDeclaration(";", &location));
1599 }
1600
1601 return true;
1602}
1603
1604bool Parser::ParseExtensions(DescriptorProto* message,
1605 const LocationRecorder& extensions_location,
1606 const FileDescriptorProto* containing_file) {
1607 // Parse the declaration.
1608 DO(Consume("extensions"));
1609
1610 int old_range_size = message->extension_range_size();
1611
1612 do {
1613 // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1614 LocationRecorder location(extensions_location,
1615 message->extension_range_size());
1616
1617 DescriptorProto::ExtensionRange* range = message->add_extension_range();
1618 location.RecordLegacyLocation(descriptor: range,
1619 location: DescriptorPool::ErrorCollector::NUMBER);
1620
1621 int start, end;
1622 io::Tokenizer::Token start_token;
1623
1624 {
1625 LocationRecorder start_location(
1626 location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1627 start_token = input_->current();
1628 DO(ConsumeInteger(&start, "Expected field number range."));
1629 }
1630
1631 if (TryConsume(text: "to")) {
1632 LocationRecorder end_location(
1633 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1634 if (TryConsume(text: "max")) {
1635 // Set to the sentinel value - 1 since we increment the value below.
1636 // The actual value of the end of the range should be set with
1637 // AdjustExtensionRangesWithMaxEndNumber.
1638 end = kMaxRangeSentinel - 1;
1639 } else {
1640 DO(ConsumeInteger(&end, "Expected integer."));
1641 }
1642 } else {
1643 LocationRecorder end_location(
1644 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1645 end_location.StartAt(token: start_token);
1646 end_location.EndAt(token: start_token);
1647 end = start;
1648 }
1649
1650 // Users like to specify inclusive ranges, but in code we like the end
1651 // number to be exclusive.
1652 ++end;
1653
1654 range->set_start(start);
1655 range->set_end(end);
1656 } while (TryConsume(text: ","));
1657
1658 if (LookingAt(text: "[")) {
1659 int range_number_index = extensions_location.CurrentPathSize();
1660 SourceCodeInfo info;
1661
1662 // Parse extension range options in the first range.
1663 ExtensionRangeOptions* options =
1664 message->mutable_extension_range(index: old_range_size)->mutable_options();
1665
1666 {
1667 LocationRecorder index_location(
1668 extensions_location, 0 /* we fill this in w/ actual index below */,
1669 &info);
1670 LocationRecorder location(
1671 index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1672 DO(Consume("["));
1673
1674 do {
1675 DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1676 } while (TryConsume(text: ","));
1677
1678 DO(Consume("]"));
1679 }
1680
1681 // Then copy the extension range options to all of the other ranges we've
1682 // parsed.
1683 for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1684 message->mutable_extension_range(index: i)->mutable_options()->CopyFrom(
1685 from: *options);
1686 }
1687 // and copy source locations to the other ranges, too
1688 for (int i = old_range_size; i < message->extension_range_size(); i++) {
1689 for (int j = 0; j < info.location_size(); j++) {
1690 if (info.location(index: j).path_size() == range_number_index + 1) {
1691 // this location's path is up to the extension range index, but
1692 // doesn't include options; so it's redundant with location above
1693 continue;
1694 }
1695 SourceCodeInfo_Location* dest = source_code_info_->add_location();
1696 *dest = info.location(index: j);
1697 dest->set_path(index: range_number_index, value: i);
1698 }
1699 }
1700 }
1701
1702 DO(ConsumeEndOfDeclaration(";", &extensions_location));
1703 return true;
1704}
1705
1706// This is similar to extension range parsing, except that it accepts field
1707// name literals.
1708bool Parser::ParseReserved(DescriptorProto* message,
1709 const LocationRecorder& message_location) {
1710 io::Tokenizer::Token start_token = input_->current();
1711 // Parse the declaration.
1712 DO(Consume("reserved"));
1713 if (LookingAtType(token_type: io::Tokenizer::TYPE_STRING)) {
1714 LocationRecorder location(message_location,
1715 DescriptorProto::kReservedNameFieldNumber);
1716 location.StartAt(token: start_token);
1717 return ParseReservedNames(message, parent_location: location);
1718 } else {
1719 LocationRecorder location(message_location,
1720 DescriptorProto::kReservedRangeFieldNumber);
1721 location.StartAt(token: start_token);
1722 return ParseReservedNumbers(message, parent_location: location);
1723 }
1724}
1725
1726bool Parser::ParseReservedNames(DescriptorProto* message,
1727 const LocationRecorder& parent_location) {
1728 do {
1729 LocationRecorder location(parent_location, message->reserved_name_size());
1730 DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1731 } while (TryConsume(text: ","));
1732 DO(ConsumeEndOfDeclaration(";", &parent_location));
1733 return true;
1734}
1735
1736bool Parser::ParseReservedNumbers(DescriptorProto* message,
1737 const LocationRecorder& parent_location) {
1738 bool first = true;
1739 do {
1740 LocationRecorder location(parent_location, message->reserved_range_size());
1741
1742 DescriptorProto::ReservedRange* range = message->add_reserved_range();
1743 int start, end;
1744 io::Tokenizer::Token start_token;
1745 {
1746 LocationRecorder start_location(
1747 location, DescriptorProto::ReservedRange::kStartFieldNumber);
1748 start_token = input_->current();
1749 DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1750 : "Expected field number range.")));
1751 }
1752
1753 if (TryConsume(text: "to")) {
1754 LocationRecorder end_location(
1755 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1756 if (TryConsume(text: "max")) {
1757 // Set to the sentinel value - 1 since we increment the value below.
1758 // The actual value of the end of the range should be set with
1759 // AdjustExtensionRangesWithMaxEndNumber.
1760 end = kMaxRangeSentinel - 1;
1761 } else {
1762 DO(ConsumeInteger(&end, "Expected integer."));
1763 }
1764 } else {
1765 LocationRecorder end_location(
1766 location, DescriptorProto::ReservedRange::kEndFieldNumber);
1767 end_location.StartAt(token: start_token);
1768 end_location.EndAt(token: start_token);
1769 end = start;
1770 }
1771
1772 // Users like to specify inclusive ranges, but in code we like the end
1773 // number to be exclusive.
1774 ++end;
1775
1776 range->set_start(start);
1777 range->set_end(end);
1778 first = false;
1779 } while (TryConsume(text: ","));
1780
1781 DO(ConsumeEndOfDeclaration(";", &parent_location));
1782 return true;
1783}
1784
1785bool Parser::ParseReserved(EnumDescriptorProto* message,
1786 const LocationRecorder& message_location) {
1787 io::Tokenizer::Token start_token = input_->current();
1788 // Parse the declaration.
1789 DO(Consume("reserved"));
1790 if (LookingAtType(token_type: io::Tokenizer::TYPE_STRING)) {
1791 LocationRecorder location(message_location,
1792 EnumDescriptorProto::kReservedNameFieldNumber);
1793 location.StartAt(token: start_token);
1794 return ParseReservedNames(message, parent_location: location);
1795 } else {
1796 LocationRecorder location(message_location,
1797 EnumDescriptorProto::kReservedRangeFieldNumber);
1798 location.StartAt(token: start_token);
1799 return ParseReservedNumbers(message, parent_location: location);
1800 }
1801}
1802
1803bool Parser::ParseReservedNames(EnumDescriptorProto* message,
1804 const LocationRecorder& parent_location) {
1805 do {
1806 LocationRecorder location(parent_location, message->reserved_name_size());
1807 DO(ConsumeString(message->add_reserved_name(), "Expected enum value."));
1808 } while (TryConsume(text: ","));
1809 DO(ConsumeEndOfDeclaration(";", &parent_location));
1810 return true;
1811}
1812
1813bool Parser::ParseReservedNumbers(EnumDescriptorProto* message,
1814 const LocationRecorder& parent_location) {
1815 bool first = true;
1816 do {
1817 LocationRecorder location(parent_location, message->reserved_range_size());
1818
1819 EnumDescriptorProto::EnumReservedRange* range =
1820 message->add_reserved_range();
1821 int start, end;
1822 io::Tokenizer::Token start_token;
1823 {
1824 LocationRecorder start_location(
1825 location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
1826 start_token = input_->current();
1827 DO(ConsumeSignedInteger(&start,
1828 (first ? "Expected enum value or number range."
1829 : "Expected enum number range.")));
1830 }
1831
1832 if (TryConsume(text: "to")) {
1833 LocationRecorder end_location(
1834 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1835 if (TryConsume(text: "max")) {
1836 // This is in the enum descriptor path, which doesn't have the message
1837 // set duality to fix up, so it doesn't integrate with the sentinel.
1838 end = INT_MAX;
1839 } else {
1840 DO(ConsumeSignedInteger(&end, "Expected integer."));
1841 }
1842 } else {
1843 LocationRecorder end_location(
1844 location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1845 end_location.StartAt(token: start_token);
1846 end_location.EndAt(token: start_token);
1847 end = start;
1848 }
1849
1850 range->set_start(start);
1851 range->set_end(end);
1852 first = false;
1853 } while (TryConsume(text: ","));
1854
1855 DO(ConsumeEndOfDeclaration(";", &parent_location));
1856 return true;
1857}
1858
1859bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1860 RepeatedPtrField<DescriptorProto>* messages,
1861 const LocationRecorder& parent_location,
1862 int location_field_number_for_nested_type,
1863 const LocationRecorder& extend_location,
1864 const FileDescriptorProto* containing_file) {
1865 DO(Consume("extend"));
1866
1867 // Parse the extendee type.
1868 io::Tokenizer::Token extendee_start = input_->current();
1869 std::string extendee;
1870 DO(ParseUserDefinedType(&extendee));
1871 io::Tokenizer::Token extendee_end = input_->previous();
1872
1873 // Parse the block.
1874 DO(ConsumeEndOfDeclaration("{", &extend_location));
1875
1876 bool is_first = true;
1877
1878 do {
1879 if (AtEnd()) {
1880 AddError(error: "Reached end of input in extend definition (missing '}').");
1881 return false;
1882 }
1883
1884 // Note that kExtensionFieldNumber was already pushed by the parent.
1885 LocationRecorder location(extend_location, extensions->size());
1886
1887 FieldDescriptorProto* field = extensions->Add();
1888
1889 {
1890 LocationRecorder extendee_location(
1891 location, FieldDescriptorProto::kExtendeeFieldNumber);
1892 extendee_location.StartAt(token: extendee_start);
1893 extendee_location.EndAt(token: extendee_end);
1894
1895 if (is_first) {
1896 extendee_location.RecordLegacyLocation(
1897 descriptor: field, location: DescriptorPool::ErrorCollector::EXTENDEE);
1898 is_first = false;
1899 }
1900 }
1901
1902 field->set_extendee(extendee);
1903
1904 if (!ParseMessageField(field, messages, parent_location,
1905 location_field_number_for_nested_type, field_location: location,
1906 containing_file)) {
1907 // This statement failed to parse. Skip it, but keep looping to parse
1908 // other statements.
1909 SkipStatement();
1910 }
1911 } while (!TryConsumeEndOfDeclaration(text: "}", location: nullptr));
1912
1913 return true;
1914}
1915
1916bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1917 DescriptorProto* containing_type, int oneof_index,
1918 const LocationRecorder& oneof_location,
1919 const LocationRecorder& containing_type_location,
1920 const FileDescriptorProto* containing_file) {
1921 DO(Consume("oneof"));
1922
1923 {
1924 LocationRecorder name_location(oneof_location,
1925 OneofDescriptorProto::kNameFieldNumber);
1926 DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1927 }
1928
1929 DO(ConsumeEndOfDeclaration("{", &oneof_location));
1930
1931 do {
1932 if (AtEnd()) {
1933 AddError(error: "Reached end of input in oneof definition (missing '}').");
1934 return false;
1935 }
1936
1937 if (LookingAt(text: "option")) {
1938 LocationRecorder option_location(
1939 oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1940 if (!ParseOption(options: oneof_decl->mutable_options(), options_location: option_location,
1941 containing_file, style: OPTION_STATEMENT)) {
1942 return false;
1943 }
1944 continue;
1945 }
1946
1947 // Print a nice error if the user accidentally tries to place a label
1948 // on an individual member of a oneof.
1949 if (LookingAt(text: "required") || LookingAt(text: "optional") ||
1950 LookingAt(text: "repeated")) {
1951 AddError(
1952 error: "Fields in oneofs must not have labels (required / optional "
1953 "/ repeated).");
1954 // We can continue parsing here because we understand what the user
1955 // meant. The error report will still make parsing fail overall.
1956 input_->Next();
1957 }
1958
1959 LocationRecorder field_location(containing_type_location,
1960 DescriptorProto::kFieldFieldNumber,
1961 containing_type->field_size());
1962
1963 FieldDescriptorProto* field = containing_type->add_field();
1964 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1965 field->set_oneof_index(oneof_index);
1966
1967 if (!ParseMessageFieldNoLabel(field, messages: containing_type->mutable_nested_type(),
1968 parent_location: containing_type_location,
1969 location_field_number_for_nested_type: DescriptorProto::kNestedTypeFieldNumber,
1970 field_location, containing_file)) {
1971 // This statement failed to parse. Skip it, but keep looping to parse
1972 // other statements.
1973 SkipStatement();
1974 }
1975 } while (!TryConsumeEndOfDeclaration(text: "}", location: nullptr));
1976
1977 return true;
1978}
1979
1980// -------------------------------------------------------------------
1981// Enums
1982
1983bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1984 const LocationRecorder& enum_location,
1985 const FileDescriptorProto* containing_file) {
1986 DO(Consume("enum"));
1987
1988 {
1989 LocationRecorder location(enum_location,
1990 EnumDescriptorProto::kNameFieldNumber);
1991 location.RecordLegacyLocation(descriptor: enum_type,
1992 location: DescriptorPool::ErrorCollector::NAME);
1993 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1994 }
1995
1996 DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1997
1998 DO(ValidateEnum(enum_type));
1999
2000 return true;
2001}
2002
2003bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
2004 const LocationRecorder& enum_location,
2005 const FileDescriptorProto* containing_file) {
2006 DO(ConsumeEndOfDeclaration("{", &enum_location));
2007
2008 while (!TryConsumeEndOfDeclaration(text: "}", location: nullptr)) {
2009 if (AtEnd()) {
2010 AddError(error: "Reached end of input in enum definition (missing '}').");
2011 return false;
2012 }
2013
2014 if (!ParseEnumStatement(message: enum_type, enum_location, containing_file)) {
2015 // This statement failed to parse. Skip it, but keep looping to parse
2016 // other statements.
2017 SkipStatement();
2018 }
2019 }
2020
2021 return true;
2022}
2023
2024bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
2025 const LocationRecorder& enum_location,
2026 const FileDescriptorProto* containing_file) {
2027 if (TryConsumeEndOfDeclaration(text: ";", location: nullptr)) {
2028 // empty statement; ignore
2029 return true;
2030 } else if (LookingAt(text: "option")) {
2031 LocationRecorder location(enum_location,
2032 EnumDescriptorProto::kOptionsFieldNumber);
2033 return ParseOption(options: enum_type->mutable_options(), options_location: location, containing_file,
2034 style: OPTION_STATEMENT);
2035 } else if (LookingAt(text: "reserved")) {
2036 return ParseReserved(message: enum_type, message_location: enum_location);
2037 } else {
2038 LocationRecorder location(enum_location,
2039 EnumDescriptorProto::kValueFieldNumber,
2040 enum_type->value_size());
2041 return ParseEnumConstant(enum_value: enum_type->add_value(), enum_value_location: location, containing_file);
2042 }
2043}
2044
2045bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
2046 const LocationRecorder& enum_value_location,
2047 const FileDescriptorProto* containing_file) {
2048 // Parse name.
2049 {
2050 LocationRecorder location(enum_value_location,
2051 EnumValueDescriptorProto::kNameFieldNumber);
2052 location.RecordLegacyLocation(descriptor: enum_value,
2053 location: DescriptorPool::ErrorCollector::NAME);
2054 DO(ConsumeIdentifier(enum_value->mutable_name(),
2055 "Expected enum constant name."));
2056 }
2057
2058 DO(Consume("=", "Missing numeric value for enum constant."));
2059
2060 // Parse value.
2061 {
2062 LocationRecorder location(enum_value_location,
2063 EnumValueDescriptorProto::kNumberFieldNumber);
2064 location.RecordLegacyLocation(descriptor: enum_value,
2065 location: DescriptorPool::ErrorCollector::NUMBER);
2066
2067 int number;
2068 DO(ConsumeSignedInteger(&number, "Expected integer."));
2069 enum_value->set_number(number);
2070 }
2071
2072 DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2073 containing_file));
2074
2075 DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2076
2077 return true;
2078}
2079
2080bool Parser::ParseEnumConstantOptions(
2081 EnumValueDescriptorProto* value,
2082 const LocationRecorder& enum_value_location,
2083 const FileDescriptorProto* containing_file) {
2084 if (!LookingAt(text: "[")) return true;
2085
2086 LocationRecorder location(enum_value_location,
2087 EnumValueDescriptorProto::kOptionsFieldNumber);
2088
2089 DO(Consume("["));
2090
2091 do {
2092 DO(ParseOption(value->mutable_options(), location, containing_file,
2093 OPTION_ASSIGNMENT));
2094 } while (TryConsume(text: ","));
2095
2096 DO(Consume("]"));
2097 return true;
2098}
2099
2100// -------------------------------------------------------------------
2101// Services
2102
2103bool Parser::ParseServiceDefinition(
2104 ServiceDescriptorProto* service, const LocationRecorder& service_location,
2105 const FileDescriptorProto* containing_file) {
2106 DO(Consume("service"));
2107
2108 {
2109 LocationRecorder location(service_location,
2110 ServiceDescriptorProto::kNameFieldNumber);
2111 location.RecordLegacyLocation(descriptor: service,
2112 location: DescriptorPool::ErrorCollector::NAME);
2113 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2114 }
2115
2116 DO(ParseServiceBlock(service, service_location, containing_file));
2117 return true;
2118}
2119
2120bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2121 const LocationRecorder& service_location,
2122 const FileDescriptorProto* containing_file) {
2123 DO(ConsumeEndOfDeclaration("{", &service_location));
2124
2125 while (!TryConsumeEndOfDeclaration(text: "}", location: nullptr)) {
2126 if (AtEnd()) {
2127 AddError(error: "Reached end of input in service definition (missing '}').");
2128 return false;
2129 }
2130
2131 if (!ParseServiceStatement(message: service, service_location, containing_file)) {
2132 // This statement failed to parse. Skip it, but keep looping to parse
2133 // other statements.
2134 SkipStatement();
2135 }
2136 }
2137
2138 return true;
2139}
2140
2141bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2142 const LocationRecorder& service_location,
2143 const FileDescriptorProto* containing_file) {
2144 if (TryConsumeEndOfDeclaration(text: ";", location: nullptr)) {
2145 // empty statement; ignore
2146 return true;
2147 } else if (LookingAt(text: "option")) {
2148 LocationRecorder location(service_location,
2149 ServiceDescriptorProto::kOptionsFieldNumber);
2150 return ParseOption(options: service->mutable_options(), options_location: location, containing_file,
2151 style: OPTION_STATEMENT);
2152 } else {
2153 LocationRecorder location(service_location,
2154 ServiceDescriptorProto::kMethodFieldNumber,
2155 service->method_size());
2156 return ParseServiceMethod(method: service->add_method(), method_location: location, containing_file);
2157 }
2158}
2159
2160bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2161 const LocationRecorder& method_location,
2162 const FileDescriptorProto* containing_file) {
2163 DO(Consume("rpc"));
2164
2165 {
2166 LocationRecorder location(method_location,
2167 MethodDescriptorProto::kNameFieldNumber);
2168 location.RecordLegacyLocation(descriptor: method, location: DescriptorPool::ErrorCollector::NAME);
2169 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2170 }
2171
2172 // Parse input type.
2173 DO(Consume("("));
2174 {
2175 if (LookingAt(text: "stream")) {
2176 LocationRecorder location(
2177 method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2178 location.RecordLegacyLocation(descriptor: method,
2179 location: DescriptorPool::ErrorCollector::OTHER);
2180 method->set_client_streaming(true);
2181 DO(Consume("stream"));
2182 }
2183 LocationRecorder location(method_location,
2184 MethodDescriptorProto::kInputTypeFieldNumber);
2185 location.RecordLegacyLocation(descriptor: method,
2186 location: DescriptorPool::ErrorCollector::INPUT_TYPE);
2187 DO(ParseUserDefinedType(method->mutable_input_type()));
2188 }
2189 DO(Consume(")"));
2190
2191 // Parse output type.
2192 DO(Consume("returns"));
2193 DO(Consume("("));
2194 {
2195 if (LookingAt(text: "stream")) {
2196 LocationRecorder location(
2197 method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2198 location.RecordLegacyLocation(descriptor: method,
2199 location: DescriptorPool::ErrorCollector::OTHER);
2200 DO(Consume("stream"));
2201 method->set_server_streaming(true);
2202 }
2203 LocationRecorder location(method_location,
2204 MethodDescriptorProto::kOutputTypeFieldNumber);
2205 location.RecordLegacyLocation(descriptor: method,
2206 location: DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2207 DO(ParseUserDefinedType(method->mutable_output_type()));
2208 }
2209 DO(Consume(")"));
2210
2211 if (LookingAt(text: "{")) {
2212 // Options!
2213 DO(ParseMethodOptions(method_location, containing_file,
2214 MethodDescriptorProto::kOptionsFieldNumber,
2215 method->mutable_options()));
2216 } else {
2217 DO(ConsumeEndOfDeclaration(";", &method_location));
2218 }
2219
2220 return true;
2221}
2222
2223bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2224 const FileDescriptorProto* containing_file,
2225 const int optionsFieldNumber,
2226 Message* mutable_options) {
2227 // Options!
2228 ConsumeEndOfDeclaration(text: "{", location: &parent_location);
2229 while (!TryConsumeEndOfDeclaration(text: "}", location: nullptr)) {
2230 if (AtEnd()) {
2231 AddError(error: "Reached end of input in method options (missing '}').");
2232 return false;
2233 }
2234
2235 if (TryConsumeEndOfDeclaration(text: ";", location: nullptr)) {
2236 // empty statement; ignore
2237 } else {
2238 LocationRecorder location(parent_location, optionsFieldNumber);
2239 if (!ParseOption(options: mutable_options, options_location: location, containing_file,
2240 style: OPTION_STATEMENT)) {
2241 // This statement failed to parse. Skip it, but keep looping to
2242 // parse other statements.
2243 SkipStatement();
2244 }
2245 }
2246 }
2247
2248 return true;
2249}
2250
2251// -------------------------------------------------------------------
2252
2253bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2254 const LocationRecorder& field_location) {
2255 if (!LookingAt(text: "optional") && !LookingAt(text: "repeated") &&
2256 !LookingAt(text: "required")) {
2257 return false;
2258 }
2259 LocationRecorder location(field_location,
2260 FieldDescriptorProto::kLabelFieldNumber);
2261 if (TryConsume(text: "optional")) {
2262 *label = FieldDescriptorProto::LABEL_OPTIONAL;
2263 } else if (TryConsume(text: "repeated")) {
2264 *label = FieldDescriptorProto::LABEL_REPEATED;
2265 } else {
2266 Consume(text: "required");
2267 *label = FieldDescriptorProto::LABEL_REQUIRED;
2268 }
2269 return true;
2270}
2271
2272bool Parser::ParseType(FieldDescriptorProto::Type* type,
2273 std::string* type_name) {
2274 const auto& type_names_table = GetTypeNameTable();
2275 auto iter = type_names_table.find(x: input_->current().text);
2276 if (iter != type_names_table.end()) {
2277 *type = iter->second;
2278 input_->Next();
2279 } else {
2280 DO(ParseUserDefinedType(type_name));
2281 }
2282 return true;
2283}
2284
2285bool Parser::ParseUserDefinedType(std::string* type_name) {
2286 type_name->clear();
2287
2288 const auto& type_names_table = GetTypeNameTable();
2289 auto iter = type_names_table.find(x: input_->current().text);
2290 if (iter != type_names_table.end()) {
2291 // Note: The only place enum types are allowed is for field types, but
2292 // if we are parsing a field type then we would not get here because
2293 // primitives are allowed there as well. So this error message doesn't
2294 // need to account for enums.
2295 AddError(error: "Expected message type.");
2296
2297 // Pretend to accept this type so that we can go on parsing.
2298 *type_name = input_->current().text;
2299 input_->Next();
2300 return true;
2301 }
2302
2303 // A leading "." means the name is fully-qualified.
2304 if (TryConsume(text: ".")) type_name->append(s: ".");
2305
2306 // Consume the first part of the name.
2307 std::string identifier;
2308 DO(ConsumeIdentifier(&identifier, "Expected type name."));
2309 type_name->append(str: identifier);
2310
2311 // Consume more parts.
2312 while (TryConsume(text: ".")) {
2313 type_name->append(s: ".");
2314 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2315 type_name->append(str: identifier);
2316 }
2317
2318 return true;
2319}
2320
2321// ===================================================================
2322
2323bool Parser::ParsePackage(FileDescriptorProto* file,
2324 const LocationRecorder& root_location,
2325 const FileDescriptorProto* containing_file) {
2326 if (file->has_package()) {
2327 AddError(error: "Multiple package definitions.");
2328 // Don't append the new package to the old one. Just replace it. Not
2329 // that it really matters since this is an error anyway.
2330 file->clear_package();
2331 }
2332
2333 LocationRecorder location(root_location,
2334 FileDescriptorProto::kPackageFieldNumber);
2335 location.RecordLegacyLocation(descriptor: file, location: DescriptorPool::ErrorCollector::NAME);
2336
2337 DO(Consume("package"));
2338
2339 while (true) {
2340 std::string identifier;
2341 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2342 file->mutable_package()->append(str: identifier);
2343 if (!TryConsume(text: ".")) break;
2344 file->mutable_package()->append(s: ".");
2345 }
2346
2347 DO(ConsumeEndOfDeclaration(";", &location));
2348
2349 return true;
2350}
2351
2352bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2353 RepeatedField<int32_t>* public_dependency,
2354 RepeatedField<int32_t>* weak_dependency,
2355 const LocationRecorder& root_location,
2356 const FileDescriptorProto* containing_file) {
2357 LocationRecorder location(root_location,
2358 FileDescriptorProto::kDependencyFieldNumber,
2359 dependency->size());
2360
2361 DO(Consume("import"));
2362
2363 if (LookingAt(text: "public")) {
2364 LocationRecorder public_location(
2365 root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2366 public_dependency->size());
2367 DO(Consume("public"));
2368 *public_dependency->Add() = dependency->size();
2369 } else if (LookingAt(text: "weak")) {
2370 LocationRecorder weak_location(
2371 root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2372 weak_dependency->size());
2373 weak_location.RecordLegacyImportLocation(descriptor: containing_file, name: "weak");
2374 DO(Consume("weak"));
2375 *weak_dependency->Add() = dependency->size();
2376 }
2377
2378 std::string import_file;
2379 DO(ConsumeString(&import_file,
2380 "Expected a string naming the file to import."));
2381 *dependency->Add() = import_file;
2382 location.RecordLegacyImportLocation(descriptor: containing_file, name: import_file);
2383
2384 DO(ConsumeEndOfDeclaration(";", &location));
2385
2386 return true;
2387}
2388
2389// ===================================================================
2390
2391SourceLocationTable::SourceLocationTable() {}
2392SourceLocationTable::~SourceLocationTable() {}
2393
2394bool SourceLocationTable::Find(
2395 const Message* descriptor,
2396 DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2397 int* column) const {
2398 const std::pair<int, int>* result =
2399 FindOrNull(collection: location_map_, key: std::make_pair(x&: descriptor, y&: location));
2400 if (result == nullptr) {
2401 *line = -1;
2402 *column = 0;
2403 return false;
2404 } else {
2405 *line = result->first;
2406 *column = result->second;
2407 return true;
2408 }
2409}
2410
2411bool SourceLocationTable::FindImport(const Message* descriptor,
2412 const std::string& name, int* line,
2413 int* column) const {
2414 const std::pair<int, int>* result =
2415 FindOrNull(collection: import_location_map_, key: std::make_pair(x&: descriptor, y: name));
2416 if (result == nullptr) {
2417 *line = -1;
2418 *column = 0;
2419 return false;
2420 } else {
2421 *line = result->first;
2422 *column = result->second;
2423 return true;
2424 }
2425}
2426
2427void SourceLocationTable::Add(
2428 const Message* descriptor,
2429 DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2430 int column) {
2431 location_map_[std::make_pair(x&: descriptor, y&: location)] =
2432 std::make_pair(x&: line, y&: column);
2433}
2434
2435void SourceLocationTable::AddImport(const Message* descriptor,
2436 const std::string& name, int line,
2437 int column) {
2438 import_location_map_[std::make_pair(x&: descriptor, y: name)] =
2439 std::make_pair(x&: line, y&: column);
2440}
2441
2442void SourceLocationTable::Clear() { location_map_.clear(); }
2443
2444} // namespace compiler
2445} // namespace protobuf
2446} // namespace google
2447