1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34
35#ifdef _MSC_VER
36#include <direct.h>
37#else
38#include <unistd.h>
39#endif
40#include <errno.h>
41#include <fcntl.h>
42#include <sys/stat.h>
43#include <sys/types.h>
44
45#include <algorithm>
46#include <memory>
47
48#include <google/protobuf/compiler/importer.h>
49#include <google/protobuf/compiler/parser.h>
50#include <google/protobuf/io/tokenizer.h>
51#include <google/protobuf/io/zero_copy_stream_impl.h>
52#include <google/protobuf/stubs/strutil.h>
53#include <google/protobuf/io/io_win32.h>
54
55#ifdef _WIN32
56#include <ctype.h>
57#endif
58
59namespace google {
60namespace protobuf {
61namespace compiler {
62
63#ifdef _WIN32
64// DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
65// them like we do below.
66using google::protobuf::io::win32::access;
67using google::protobuf::io::win32::open;
68#endif
69
70// Returns true if the text looks like a Windows-style absolute path, starting
71// with a drive letter. Example: "C:\foo". TODO(kenton): Share this with
72// copy in command_line_interface.cc?
73static bool IsWindowsAbsolutePath(const std::string& text) {
74#if defined(_WIN32) || defined(__CYGWIN__)
75 return text.size() >= 3 && text[1] == ':' && isalpha(text[0]) &&
76 (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
77#else
78 return false;
79#endif
80}
81
82MultiFileErrorCollector::~MultiFileErrorCollector() {}
83
84// This class serves two purposes:
85// - It implements the ErrorCollector interface (used by Tokenizer and Parser)
86// in terms of MultiFileErrorCollector, using a particular filename.
87// - It lets us check if any errors have occurred.
88class SourceTreeDescriptorDatabase::SingleFileErrorCollector
89 : public io::ErrorCollector {
90 public:
91 SingleFileErrorCollector(const std::string& filename,
92 MultiFileErrorCollector* multi_file_error_collector)
93 : filename_(filename),
94 multi_file_error_collector_(multi_file_error_collector),
95 had_errors_(false) {}
96 ~SingleFileErrorCollector() override {}
97
98 bool had_errors() { return had_errors_; }
99
100 // implements ErrorCollector ---------------------------------------
101 void AddError(int line, int column, const std::string& message) override {
102 if (multi_file_error_collector_ != nullptr) {
103 multi_file_error_collector_->AddError(filename: filename_, line, column, message);
104 }
105 had_errors_ = true;
106 }
107
108 private:
109 std::string filename_;
110 MultiFileErrorCollector* multi_file_error_collector_;
111 bool had_errors_;
112};
113
114// ===================================================================
115
116SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
117 SourceTree* source_tree)
118 : source_tree_(source_tree),
119 fallback_database_(nullptr),
120 error_collector_(nullptr),
121 using_validation_error_collector_(false),
122 validation_error_collector_(this) {}
123
124SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
125 SourceTree* source_tree, DescriptorDatabase* fallback_database)
126 : source_tree_(source_tree),
127 fallback_database_(fallback_database),
128 error_collector_(nullptr),
129 using_validation_error_collector_(false),
130 validation_error_collector_(this) {}
131
132SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
133
134bool SourceTreeDescriptorDatabase::FindFileByName(const std::string& filename,
135 FileDescriptorProto* output) {
136 std::unique_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
137 if (input == nullptr) {
138 if (fallback_database_ != nullptr &&
139 fallback_database_->FindFileByName(filename, output)) {
140 return true;
141 }
142 if (error_collector_ != nullptr) {
143 error_collector_->AddError(filename, line: -1, column: 0,
144 message: source_tree_->GetLastErrorMessage());
145 }
146 return false;
147 }
148
149 // Set up the tokenizer and parser.
150 SingleFileErrorCollector file_error_collector(filename, error_collector_);
151 io::Tokenizer tokenizer(input.get(), &file_error_collector);
152
153 Parser parser;
154 if (error_collector_ != nullptr) {
155 parser.RecordErrorsTo(error_collector: &file_error_collector);
156 }
157 if (using_validation_error_collector_) {
158 parser.RecordSourceLocationsTo(location_table: &source_locations_);
159 }
160
161 // Parse it.
162 output->set_name(filename);
163 return parser.Parse(input: &tokenizer, file: output) && !file_error_collector.had_errors();
164}
165
166bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
167 const std::string& symbol_name, FileDescriptorProto* output) {
168 return false;
169}
170
171bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
172 const std::string& containing_type, int field_number,
173 FileDescriptorProto* output) {
174 return false;
175}
176
177// -------------------------------------------------------------------
178
179SourceTreeDescriptorDatabase::ValidationErrorCollector::
180 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
181 : owner_(owner) {}
182
183SourceTreeDescriptorDatabase::ValidationErrorCollector::
184 ~ValidationErrorCollector() {}
185
186void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
187 const std::string& filename, const std::string& element_name,
188 const Message* descriptor, ErrorLocation location,
189 const std::string& message) {
190 if (owner_->error_collector_ == nullptr) return;
191
192 int line, column;
193 if (location == DescriptorPool::ErrorCollector::IMPORT) {
194 owner_->source_locations_.FindImport(descriptor, name: element_name, line: &line,
195 column: &column);
196 } else {
197 owner_->source_locations_.Find(descriptor, location, line: &line, column: &column);
198 }
199 owner_->error_collector_->AddError(filename, line, column, message);
200}
201
202void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddWarning(
203 const std::string& filename, const std::string& element_name,
204 const Message* descriptor, ErrorLocation location,
205 const std::string& message) {
206 if (owner_->error_collector_ == nullptr) return;
207
208 int line, column;
209 if (location == DescriptorPool::ErrorCollector::IMPORT) {
210 owner_->source_locations_.FindImport(descriptor, name: element_name, line: &line,
211 column: &column);
212 } else {
213 owner_->source_locations_.Find(descriptor, location, line: &line, column: &column);
214 }
215 owner_->error_collector_->AddWarning(filename, line, column, message);
216}
217
218// ===================================================================
219
220Importer::Importer(SourceTree* source_tree,
221 MultiFileErrorCollector* error_collector)
222 : database_(source_tree),
223 pool_(&database_, database_.GetValidationErrorCollector()) {
224 pool_.EnforceWeakDependencies(enforce: true);
225 database_.RecordErrorsTo(error_collector);
226}
227
228Importer::~Importer() {}
229
230const FileDescriptor* Importer::Import(const std::string& filename) {
231 return pool_.FindFileByName(name: filename);
232}
233
234void Importer::AddUnusedImportTrackFile(const std::string& file_name,
235 bool is_error) {
236 pool_.AddUnusedImportTrackFile(file_name, is_error);
237}
238
239void Importer::ClearUnusedImportTrackFiles() {
240 pool_.ClearUnusedImportTrackFiles();
241}
242
243
244// ===================================================================
245
246SourceTree::~SourceTree() {}
247
248std::string SourceTree::GetLastErrorMessage() { return "File not found."; }
249
250DiskSourceTree::DiskSourceTree() {}
251
252DiskSourceTree::~DiskSourceTree() {}
253
254static inline char LastChar(const std::string& str) {
255 return str[str.size() - 1];
256}
257
258// Given a path, returns an equivalent path with these changes:
259// - On Windows, any backslashes are replaced with forward slashes.
260// - Any instances of the directory "." are removed.
261// - Any consecutive '/'s are collapsed into a single slash.
262// Note that the resulting string may be empty.
263//
264// TODO(kenton): It would be nice to handle "..", e.g. so that we can figure
265// out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
266// symlink or doesn't exist, then things get complicated, and we can't
267// actually determine this without investigating the filesystem, probably
268// in non-portable ways. So, we punt.
269//
270// TODO(kenton): It would be nice to use realpath() here except that it
271// resolves symbolic links. This could cause problems if people place
272// symbolic links in their source tree. For example, if you executed:
273// protoc --proto_path=foo foo/bar/baz.proto
274// then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
275// to a path which does not appear to be under foo, and thus the compiler
276// will complain that baz.proto is not inside the --proto_path.
277static std::string CanonicalizePath(std::string path) {
278#ifdef _WIN32
279 // The Win32 API accepts forward slashes as a path delimiter even though
280 // backslashes are standard. Let's avoid confusion and use only forward
281 // slashes.
282 if (HasPrefixString(path, "\\\\")) {
283 // Avoid converting two leading backslashes.
284 path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
285 } else {
286 path = StringReplace(path, "\\", "/", true);
287 }
288#endif
289
290 std::vector<std::string> canonical_parts;
291 std::vector<std::string> parts = Split(
292 full: path, delim: "/", skip_empty: true); // Note: Removes empty parts.
293 for (const std::string& part : parts) {
294 if (part == ".") {
295 // Ignore.
296 } else {
297 canonical_parts.push_back(x: part);
298 }
299 }
300 std::string result = Join(components: canonical_parts, delim: "/");
301 if (!path.empty() && path[0] == '/') {
302 // Restore leading slash.
303 result = '/' + result;
304 }
305 if (!path.empty() && LastChar(str: path) == '/' && !result.empty() &&
306 LastChar(str: result) != '/') {
307 // Restore trailing slash.
308 result += '/';
309 }
310 return result;
311}
312
313static inline bool ContainsParentReference(const std::string& path) {
314 return path == ".." || HasPrefixString(str: path, prefix: "../") ||
315 HasSuffixString(str: path, suffix: "/..") || path.find(s: "/../") != std::string::npos;
316}
317
318// Maps a file from an old location to a new one. Typically, old_prefix is
319// a virtual path and new_prefix is its corresponding disk path. Returns
320// false if the filename did not start with old_prefix, otherwise replaces
321// old_prefix with new_prefix and stores the result in *result. Examples:
322// string result;
323// assert(ApplyMapping("foo/bar", "", "baz", &result));
324// assert(result == "baz/foo/bar");
325//
326// assert(ApplyMapping("foo/bar", "foo", "baz", &result));
327// assert(result == "baz/bar");
328//
329// assert(ApplyMapping("foo", "foo", "bar", &result));
330// assert(result == "bar");
331//
332// assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
333// assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
334// assert(!ApplyMapping("foobar", "foo", "baz", &result));
335static bool ApplyMapping(const std::string& filename,
336 const std::string& old_prefix,
337 const std::string& new_prefix, std::string* result) {
338 if (old_prefix.empty()) {
339 // old_prefix matches any relative path.
340 if (ContainsParentReference(path: filename)) {
341 // We do not allow the file name to use "..".
342 return false;
343 }
344 if (HasPrefixString(str: filename, prefix: "/") || IsWindowsAbsolutePath(text: filename)) {
345 // This is an absolute path, so it isn't matched by the empty string.
346 return false;
347 }
348 result->assign(str: new_prefix);
349 if (!result->empty()) result->push_back(c: '/');
350 result->append(str: filename);
351 return true;
352 } else if (HasPrefixString(str: filename, prefix: old_prefix)) {
353 // old_prefix is a prefix of the filename. Is it the whole filename?
354 if (filename.size() == old_prefix.size()) {
355 // Yep, it's an exact match.
356 *result = new_prefix;
357 return true;
358 } else {
359 // Not an exact match. Is the next character a '/'? Otherwise,
360 // this isn't actually a match at all. E.g. the prefix "foo/bar"
361 // does not match the filename "foo/barbaz".
362 int after_prefix_start = -1;
363 if (filename[old_prefix.size()] == '/') {
364 after_prefix_start = old_prefix.size() + 1;
365 } else if (filename[old_prefix.size() - 1] == '/') {
366 // old_prefix is never empty, and canonicalized paths never have
367 // consecutive '/' characters.
368 after_prefix_start = old_prefix.size();
369 }
370 if (after_prefix_start != -1) {
371 // Yep. So the prefixes are directories and the filename is a file
372 // inside them.
373 std::string after_prefix = filename.substr(pos: after_prefix_start);
374 if (ContainsParentReference(path: after_prefix)) {
375 // We do not allow the file name to use "..".
376 return false;
377 }
378 result->assign(str: new_prefix);
379 if (!result->empty()) result->push_back(c: '/');
380 result->append(str: after_prefix);
381 return true;
382 }
383 }
384 }
385
386 return false;
387}
388
389void DiskSourceTree::MapPath(const std::string& virtual_path,
390 const std::string& disk_path) {
391 mappings_.push_back(x: Mapping(virtual_path, CanonicalizePath(path: disk_path)));
392}
393
394DiskSourceTree::DiskFileToVirtualFileResult
395DiskSourceTree::DiskFileToVirtualFile(const std::string& disk_file,
396 std::string* virtual_file,
397 std::string* shadowing_disk_file) {
398 int mapping_index = -1;
399 std::string canonical_disk_file = CanonicalizePath(path: disk_file);
400
401 for (int i = 0; i < mappings_.size(); i++) {
402 // Apply the mapping in reverse.
403 if (ApplyMapping(filename: canonical_disk_file, old_prefix: mappings_[i].disk_path,
404 new_prefix: mappings_[i].virtual_path, result: virtual_file)) {
405 // Success.
406 mapping_index = i;
407 break;
408 }
409 }
410
411 if (mapping_index == -1) {
412 return NO_MAPPING;
413 }
414
415 // Iterate through all mappings with higher precedence and verify that none
416 // of them map this file to some other existing file.
417 for (int i = 0; i < mapping_index; i++) {
418 if (ApplyMapping(filename: *virtual_file, old_prefix: mappings_[i].virtual_path,
419 new_prefix: mappings_[i].disk_path, result: shadowing_disk_file)) {
420 if (access(name: shadowing_disk_file->c_str(), F_OK) >= 0) {
421 // File exists.
422 return SHADOWED;
423 }
424 }
425 }
426 shadowing_disk_file->clear();
427
428 // Verify that we can open the file. Note that this also has the side-effect
429 // of verifying that we are not canonicalizing away any non-existent
430 // directories.
431 std::unique_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(filename: disk_file));
432 if (stream == nullptr) {
433 return CANNOT_OPEN;
434 }
435
436 return SUCCESS;
437}
438
439bool DiskSourceTree::VirtualFileToDiskFile(const std::string& virtual_file,
440 std::string* disk_file) {
441 std::unique_ptr<io::ZeroCopyInputStream> stream(
442 OpenVirtualFile(virtual_file, disk_file));
443 return stream != nullptr;
444}
445
446io::ZeroCopyInputStream* DiskSourceTree::Open(const std::string& filename) {
447 return OpenVirtualFile(virtual_file: filename, disk_file: nullptr);
448}
449
450std::string DiskSourceTree::GetLastErrorMessage() {
451 return last_error_message_;
452}
453
454io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
455 const std::string& virtual_file, std::string* disk_file) {
456 if (virtual_file != CanonicalizePath(path: virtual_file) ||
457 ContainsParentReference(path: virtual_file)) {
458 // We do not allow importing of paths containing things like ".." or
459 // consecutive slashes since the compiler expects files to be uniquely
460 // identified by file name.
461 last_error_message_ =
462 "Backslashes, consecutive slashes, \".\", or \"..\" "
463 "are not allowed in the virtual path";
464 return nullptr;
465 }
466
467 for (const auto& mapping : mappings_) {
468 std::string temp_disk_file;
469 if (ApplyMapping(filename: virtual_file, old_prefix: mapping.virtual_path, new_prefix: mapping.disk_path,
470 result: &temp_disk_file)) {
471 io::ZeroCopyInputStream* stream = OpenDiskFile(filename: temp_disk_file);
472 if (stream != nullptr) {
473 if (disk_file != nullptr) {
474 *disk_file = temp_disk_file;
475 }
476 return stream;
477 }
478
479 if (errno == EACCES) {
480 // The file exists but is not readable.
481 last_error_message_ =
482 "Read access is denied for file: " + temp_disk_file;
483 return nullptr;
484 }
485 }
486 }
487 last_error_message_ = "File not found.";
488 return nullptr;
489}
490
491io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
492 const std::string& filename) {
493 struct stat sb;
494 int ret = 0;
495 do {
496 ret = stat(file: filename.c_str(), buf: &sb);
497 } while (ret != 0 && errno == EINTR);
498#if defined(_WIN32)
499 if (ret == 0 && sb.st_mode & S_IFDIR) {
500 last_error_message_ = "Input file is a directory.";
501 return nullptr;
502 }
503#else
504 if (ret == 0 && S_ISDIR(sb.st_mode)) {
505 last_error_message_ = "Input file is a directory.";
506 return nullptr;
507 }
508#endif
509 int file_descriptor;
510 do {
511 file_descriptor = open(file: filename.c_str(), O_RDONLY);
512 } while (file_descriptor < 0 && errno == EINTR);
513 if (file_descriptor >= 0) {
514 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
515 result->SetCloseOnDelete(true);
516 return result;
517 } else {
518 return nullptr;
519 }
520}
521
522} // namespace compiler
523} // namespace protobuf
524} // namespace google
525