1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // Author: kenton@google.com (Kenton Varda) |
32 | // Based on original Protocol Buffers design by |
33 | // Sanjay Ghemawat, Jeff Dean, and others. |
34 | |
35 | #ifdef _MSC_VER |
36 | #include <direct.h> |
37 | #else |
38 | #include <unistd.h> |
39 | #endif |
40 | #include <errno.h> |
41 | #include <fcntl.h> |
42 | #include <sys/stat.h> |
43 | #include <sys/types.h> |
44 | |
45 | #include <algorithm> |
46 | #include <memory> |
47 | |
48 | #include <google/protobuf/compiler/importer.h> |
49 | #include <google/protobuf/compiler/parser.h> |
50 | #include <google/protobuf/io/tokenizer.h> |
51 | #include <google/protobuf/io/zero_copy_stream_impl.h> |
52 | #include <google/protobuf/stubs/strutil.h> |
53 | #include <google/protobuf/io/io_win32.h> |
54 | |
55 | #ifdef _WIN32 |
56 | #include <ctype.h> |
57 | #endif |
58 | |
59 | namespace google { |
60 | namespace protobuf { |
61 | namespace compiler { |
62 | |
63 | #ifdef _WIN32 |
64 | // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import |
65 | // them like we do below. |
66 | using google::protobuf::io::win32::access; |
67 | using google::protobuf::io::win32::open; |
68 | #endif |
69 | |
70 | // Returns true if the text looks like a Windows-style absolute path, starting |
71 | // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with |
72 | // copy in command_line_interface.cc? |
73 | static bool IsWindowsAbsolutePath(const std::string& text) { |
74 | #if defined(_WIN32) || defined(__CYGWIN__) |
75 | return text.size() >= 3 && text[1] == ':' && isalpha(text[0]) && |
76 | (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1; |
77 | #else |
78 | return false; |
79 | #endif |
80 | } |
81 | |
82 | MultiFileErrorCollector::~MultiFileErrorCollector() {} |
83 | |
84 | // This class serves two purposes: |
85 | // - It implements the ErrorCollector interface (used by Tokenizer and Parser) |
86 | // in terms of MultiFileErrorCollector, using a particular filename. |
87 | // - It lets us check if any errors have occurred. |
88 | class SourceTreeDescriptorDatabase::SingleFileErrorCollector |
89 | : public io::ErrorCollector { |
90 | public: |
91 | SingleFileErrorCollector(const std::string& filename, |
92 | MultiFileErrorCollector* multi_file_error_collector) |
93 | : filename_(filename), |
94 | multi_file_error_collector_(multi_file_error_collector), |
95 | had_errors_(false) {} |
96 | ~SingleFileErrorCollector() override {} |
97 | |
98 | bool had_errors() { return had_errors_; } |
99 | |
100 | // implements ErrorCollector --------------------------------------- |
101 | void AddError(int line, int column, const std::string& message) override { |
102 | if (multi_file_error_collector_ != nullptr) { |
103 | multi_file_error_collector_->AddError(filename: filename_, line, column, message); |
104 | } |
105 | had_errors_ = true; |
106 | } |
107 | |
108 | private: |
109 | std::string filename_; |
110 | MultiFileErrorCollector* multi_file_error_collector_; |
111 | bool had_errors_; |
112 | }; |
113 | |
114 | // =================================================================== |
115 | |
116 | SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase( |
117 | SourceTree* source_tree) |
118 | : source_tree_(source_tree), |
119 | fallback_database_(nullptr), |
120 | error_collector_(nullptr), |
121 | using_validation_error_collector_(false), |
122 | validation_error_collector_(this) {} |
123 | |
124 | SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase( |
125 | SourceTree* source_tree, DescriptorDatabase* fallback_database) |
126 | : source_tree_(source_tree), |
127 | fallback_database_(fallback_database), |
128 | error_collector_(nullptr), |
129 | using_validation_error_collector_(false), |
130 | validation_error_collector_(this) {} |
131 | |
132 | SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {} |
133 | |
134 | bool SourceTreeDescriptorDatabase::FindFileByName(const std::string& filename, |
135 | FileDescriptorProto* output) { |
136 | std::unique_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename)); |
137 | if (input == nullptr) { |
138 | if (fallback_database_ != nullptr && |
139 | fallback_database_->FindFileByName(filename, output)) { |
140 | return true; |
141 | } |
142 | if (error_collector_ != nullptr) { |
143 | error_collector_->AddError(filename, line: -1, column: 0, |
144 | message: source_tree_->GetLastErrorMessage()); |
145 | } |
146 | return false; |
147 | } |
148 | |
149 | // Set up the tokenizer and parser. |
150 | SingleFileErrorCollector file_error_collector(filename, error_collector_); |
151 | io::Tokenizer tokenizer(input.get(), &file_error_collector); |
152 | |
153 | Parser parser; |
154 | if (error_collector_ != nullptr) { |
155 | parser.RecordErrorsTo(error_collector: &file_error_collector); |
156 | } |
157 | if (using_validation_error_collector_) { |
158 | parser.RecordSourceLocationsTo(location_table: &source_locations_); |
159 | } |
160 | |
161 | // Parse it. |
162 | output->set_name(filename); |
163 | return parser.Parse(input: &tokenizer, file: output) && !file_error_collector.had_errors(); |
164 | } |
165 | |
166 | bool SourceTreeDescriptorDatabase::FindFileContainingSymbol( |
167 | const std::string& symbol_name, FileDescriptorProto* output) { |
168 | return false; |
169 | } |
170 | |
171 | bool SourceTreeDescriptorDatabase::FindFileContainingExtension( |
172 | const std::string& containing_type, int field_number, |
173 | FileDescriptorProto* output) { |
174 | return false; |
175 | } |
176 | |
177 | // ------------------------------------------------------------------- |
178 | |
179 | SourceTreeDescriptorDatabase::ValidationErrorCollector:: |
180 | ValidationErrorCollector(SourceTreeDescriptorDatabase* owner) |
181 | : owner_(owner) {} |
182 | |
183 | SourceTreeDescriptorDatabase::ValidationErrorCollector:: |
184 | ~ValidationErrorCollector() {} |
185 | |
186 | void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError( |
187 | const std::string& filename, const std::string& element_name, |
188 | const Message* descriptor, ErrorLocation location, |
189 | const std::string& message) { |
190 | if (owner_->error_collector_ == nullptr) return; |
191 | |
192 | int line, column; |
193 | if (location == DescriptorPool::ErrorCollector::IMPORT) { |
194 | owner_->source_locations_.FindImport(descriptor, name: element_name, line: &line, |
195 | column: &column); |
196 | } else { |
197 | owner_->source_locations_.Find(descriptor, location, line: &line, column: &column); |
198 | } |
199 | owner_->error_collector_->AddError(filename, line, column, message); |
200 | } |
201 | |
202 | void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddWarning( |
203 | const std::string& filename, const std::string& element_name, |
204 | const Message* descriptor, ErrorLocation location, |
205 | const std::string& message) { |
206 | if (owner_->error_collector_ == nullptr) return; |
207 | |
208 | int line, column; |
209 | if (location == DescriptorPool::ErrorCollector::IMPORT) { |
210 | owner_->source_locations_.FindImport(descriptor, name: element_name, line: &line, |
211 | column: &column); |
212 | } else { |
213 | owner_->source_locations_.Find(descriptor, location, line: &line, column: &column); |
214 | } |
215 | owner_->error_collector_->AddWarning(filename, line, column, message); |
216 | } |
217 | |
218 | // =================================================================== |
219 | |
220 | Importer::Importer(SourceTree* source_tree, |
221 | MultiFileErrorCollector* error_collector) |
222 | : database_(source_tree), |
223 | pool_(&database_, database_.GetValidationErrorCollector()) { |
224 | pool_.EnforceWeakDependencies(enforce: true); |
225 | database_.RecordErrorsTo(error_collector); |
226 | } |
227 | |
228 | Importer::~Importer() {} |
229 | |
230 | const FileDescriptor* Importer::Import(const std::string& filename) { |
231 | return pool_.FindFileByName(name: filename); |
232 | } |
233 | |
234 | void Importer::AddUnusedImportTrackFile(const std::string& file_name, |
235 | bool is_error) { |
236 | pool_.AddUnusedImportTrackFile(file_name, is_error); |
237 | } |
238 | |
239 | void Importer::ClearUnusedImportTrackFiles() { |
240 | pool_.ClearUnusedImportTrackFiles(); |
241 | } |
242 | |
243 | |
244 | // =================================================================== |
245 | |
246 | SourceTree::~SourceTree() {} |
247 | |
248 | std::string SourceTree::GetLastErrorMessage() { return "File not found." ; } |
249 | |
250 | DiskSourceTree::DiskSourceTree() {} |
251 | |
252 | DiskSourceTree::~DiskSourceTree() {} |
253 | |
254 | static inline char LastChar(const std::string& str) { |
255 | return str[str.size() - 1]; |
256 | } |
257 | |
258 | // Given a path, returns an equivalent path with these changes: |
259 | // - On Windows, any backslashes are replaced with forward slashes. |
260 | // - Any instances of the directory "." are removed. |
261 | // - Any consecutive '/'s are collapsed into a single slash. |
262 | // Note that the resulting string may be empty. |
263 | // |
264 | // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure |
265 | // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a |
266 | // symlink or doesn't exist, then things get complicated, and we can't |
267 | // actually determine this without investigating the filesystem, probably |
268 | // in non-portable ways. So, we punt. |
269 | // |
270 | // TODO(kenton): It would be nice to use realpath() here except that it |
271 | // resolves symbolic links. This could cause problems if people place |
272 | // symbolic links in their source tree. For example, if you executed: |
273 | // protoc --proto_path=foo foo/bar/baz.proto |
274 | // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize |
275 | // to a path which does not appear to be under foo, and thus the compiler |
276 | // will complain that baz.proto is not inside the --proto_path. |
277 | static std::string CanonicalizePath(std::string path) { |
278 | #ifdef _WIN32 |
279 | // The Win32 API accepts forward slashes as a path delimiter even though |
280 | // backslashes are standard. Let's avoid confusion and use only forward |
281 | // slashes. |
282 | if (HasPrefixString(path, "\\\\" )) { |
283 | // Avoid converting two leading backslashes. |
284 | path = "\\\\" + StringReplace(path.substr(2), "\\" , "/" , true); |
285 | } else { |
286 | path = StringReplace(path, "\\" , "/" , true); |
287 | } |
288 | #endif |
289 | |
290 | std::vector<std::string> canonical_parts; |
291 | std::vector<std::string> parts = Split( |
292 | full: path, delim: "/" , skip_empty: true); // Note: Removes empty parts. |
293 | for (const std::string& part : parts) { |
294 | if (part == "." ) { |
295 | // Ignore. |
296 | } else { |
297 | canonical_parts.push_back(x: part); |
298 | } |
299 | } |
300 | std::string result = Join(components: canonical_parts, delim: "/" ); |
301 | if (!path.empty() && path[0] == '/') { |
302 | // Restore leading slash. |
303 | result = '/' + result; |
304 | } |
305 | if (!path.empty() && LastChar(str: path) == '/' && !result.empty() && |
306 | LastChar(str: result) != '/') { |
307 | // Restore trailing slash. |
308 | result += '/'; |
309 | } |
310 | return result; |
311 | } |
312 | |
313 | static inline bool ContainsParentReference(const std::string& path) { |
314 | return path == ".." || HasPrefixString(str: path, prefix: "../" ) || |
315 | HasSuffixString(str: path, suffix: "/.." ) || path.find(s: "/../" ) != std::string::npos; |
316 | } |
317 | |
318 | // Maps a file from an old location to a new one. Typically, old_prefix is |
319 | // a virtual path and new_prefix is its corresponding disk path. Returns |
320 | // false if the filename did not start with old_prefix, otherwise replaces |
321 | // old_prefix with new_prefix and stores the result in *result. Examples: |
322 | // string result; |
323 | // assert(ApplyMapping("foo/bar", "", "baz", &result)); |
324 | // assert(result == "baz/foo/bar"); |
325 | // |
326 | // assert(ApplyMapping("foo/bar", "foo", "baz", &result)); |
327 | // assert(result == "baz/bar"); |
328 | // |
329 | // assert(ApplyMapping("foo", "foo", "bar", &result)); |
330 | // assert(result == "bar"); |
331 | // |
332 | // assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); |
333 | // assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); |
334 | // assert(!ApplyMapping("foobar", "foo", "baz", &result)); |
335 | static bool ApplyMapping(const std::string& filename, |
336 | const std::string& old_prefix, |
337 | const std::string& new_prefix, std::string* result) { |
338 | if (old_prefix.empty()) { |
339 | // old_prefix matches any relative path. |
340 | if (ContainsParentReference(path: filename)) { |
341 | // We do not allow the file name to use "..". |
342 | return false; |
343 | } |
344 | if (HasPrefixString(str: filename, prefix: "/" ) || IsWindowsAbsolutePath(text: filename)) { |
345 | // This is an absolute path, so it isn't matched by the empty string. |
346 | return false; |
347 | } |
348 | result->assign(str: new_prefix); |
349 | if (!result->empty()) result->push_back(c: '/'); |
350 | result->append(str: filename); |
351 | return true; |
352 | } else if (HasPrefixString(str: filename, prefix: old_prefix)) { |
353 | // old_prefix is a prefix of the filename. Is it the whole filename? |
354 | if (filename.size() == old_prefix.size()) { |
355 | // Yep, it's an exact match. |
356 | *result = new_prefix; |
357 | return true; |
358 | } else { |
359 | // Not an exact match. Is the next character a '/'? Otherwise, |
360 | // this isn't actually a match at all. E.g. the prefix "foo/bar" |
361 | // does not match the filename "foo/barbaz". |
362 | int after_prefix_start = -1; |
363 | if (filename[old_prefix.size()] == '/') { |
364 | after_prefix_start = old_prefix.size() + 1; |
365 | } else if (filename[old_prefix.size() - 1] == '/') { |
366 | // old_prefix is never empty, and canonicalized paths never have |
367 | // consecutive '/' characters. |
368 | after_prefix_start = old_prefix.size(); |
369 | } |
370 | if (after_prefix_start != -1) { |
371 | // Yep. So the prefixes are directories and the filename is a file |
372 | // inside them. |
373 | std::string after_prefix = filename.substr(pos: after_prefix_start); |
374 | if (ContainsParentReference(path: after_prefix)) { |
375 | // We do not allow the file name to use "..". |
376 | return false; |
377 | } |
378 | result->assign(str: new_prefix); |
379 | if (!result->empty()) result->push_back(c: '/'); |
380 | result->append(str: after_prefix); |
381 | return true; |
382 | } |
383 | } |
384 | } |
385 | |
386 | return false; |
387 | } |
388 | |
389 | void DiskSourceTree::MapPath(const std::string& virtual_path, |
390 | const std::string& disk_path) { |
391 | mappings_.push_back(x: Mapping(virtual_path, CanonicalizePath(path: disk_path))); |
392 | } |
393 | |
394 | DiskSourceTree::DiskFileToVirtualFileResult |
395 | DiskSourceTree::DiskFileToVirtualFile(const std::string& disk_file, |
396 | std::string* virtual_file, |
397 | std::string* shadowing_disk_file) { |
398 | int mapping_index = -1; |
399 | std::string canonical_disk_file = CanonicalizePath(path: disk_file); |
400 | |
401 | for (int i = 0; i < mappings_.size(); i++) { |
402 | // Apply the mapping in reverse. |
403 | if (ApplyMapping(filename: canonical_disk_file, old_prefix: mappings_[i].disk_path, |
404 | new_prefix: mappings_[i].virtual_path, result: virtual_file)) { |
405 | // Success. |
406 | mapping_index = i; |
407 | break; |
408 | } |
409 | } |
410 | |
411 | if (mapping_index == -1) { |
412 | return NO_MAPPING; |
413 | } |
414 | |
415 | // Iterate through all mappings with higher precedence and verify that none |
416 | // of them map this file to some other existing file. |
417 | for (int i = 0; i < mapping_index; i++) { |
418 | if (ApplyMapping(filename: *virtual_file, old_prefix: mappings_[i].virtual_path, |
419 | new_prefix: mappings_[i].disk_path, result: shadowing_disk_file)) { |
420 | if (access(name: shadowing_disk_file->c_str(), F_OK) >= 0) { |
421 | // File exists. |
422 | return SHADOWED; |
423 | } |
424 | } |
425 | } |
426 | shadowing_disk_file->clear(); |
427 | |
428 | // Verify that we can open the file. Note that this also has the side-effect |
429 | // of verifying that we are not canonicalizing away any non-existent |
430 | // directories. |
431 | std::unique_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(filename: disk_file)); |
432 | if (stream == nullptr) { |
433 | return CANNOT_OPEN; |
434 | } |
435 | |
436 | return SUCCESS; |
437 | } |
438 | |
439 | bool DiskSourceTree::VirtualFileToDiskFile(const std::string& virtual_file, |
440 | std::string* disk_file) { |
441 | std::unique_ptr<io::ZeroCopyInputStream> stream( |
442 | OpenVirtualFile(virtual_file, disk_file)); |
443 | return stream != nullptr; |
444 | } |
445 | |
446 | io::ZeroCopyInputStream* DiskSourceTree::Open(const std::string& filename) { |
447 | return OpenVirtualFile(virtual_file: filename, disk_file: nullptr); |
448 | } |
449 | |
450 | std::string DiskSourceTree::GetLastErrorMessage() { |
451 | return last_error_message_; |
452 | } |
453 | |
454 | io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile( |
455 | const std::string& virtual_file, std::string* disk_file) { |
456 | if (virtual_file != CanonicalizePath(path: virtual_file) || |
457 | ContainsParentReference(path: virtual_file)) { |
458 | // We do not allow importing of paths containing things like ".." or |
459 | // consecutive slashes since the compiler expects files to be uniquely |
460 | // identified by file name. |
461 | last_error_message_ = |
462 | "Backslashes, consecutive slashes, \".\", or \"..\" " |
463 | "are not allowed in the virtual path" ; |
464 | return nullptr; |
465 | } |
466 | |
467 | for (const auto& mapping : mappings_) { |
468 | std::string temp_disk_file; |
469 | if (ApplyMapping(filename: virtual_file, old_prefix: mapping.virtual_path, new_prefix: mapping.disk_path, |
470 | result: &temp_disk_file)) { |
471 | io::ZeroCopyInputStream* stream = OpenDiskFile(filename: temp_disk_file); |
472 | if (stream != nullptr) { |
473 | if (disk_file != nullptr) { |
474 | *disk_file = temp_disk_file; |
475 | } |
476 | return stream; |
477 | } |
478 | |
479 | if (errno == EACCES) { |
480 | // The file exists but is not readable. |
481 | last_error_message_ = |
482 | "Read access is denied for file: " + temp_disk_file; |
483 | return nullptr; |
484 | } |
485 | } |
486 | } |
487 | last_error_message_ = "File not found." ; |
488 | return nullptr; |
489 | } |
490 | |
491 | io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile( |
492 | const std::string& filename) { |
493 | struct stat sb; |
494 | int ret = 0; |
495 | do { |
496 | ret = stat(file: filename.c_str(), buf: &sb); |
497 | } while (ret != 0 && errno == EINTR); |
498 | #if defined(_WIN32) |
499 | if (ret == 0 && sb.st_mode & S_IFDIR) { |
500 | last_error_message_ = "Input file is a directory." ; |
501 | return nullptr; |
502 | } |
503 | #else |
504 | if (ret == 0 && S_ISDIR(sb.st_mode)) { |
505 | last_error_message_ = "Input file is a directory." ; |
506 | return nullptr; |
507 | } |
508 | #endif |
509 | int file_descriptor; |
510 | do { |
511 | file_descriptor = open(file: filename.c_str(), O_RDONLY); |
512 | } while (file_descriptor < 0 && errno == EINTR); |
513 | if (file_descriptor >= 0) { |
514 | io::FileInputStream* result = new io::FileInputStream(file_descriptor); |
515 | result->SetCloseOnDelete(true); |
516 | return result; |
517 | } else { |
518 | return nullptr; |
519 | } |
520 | } |
521 | |
522 | } // namespace compiler |
523 | } // namespace protobuf |
524 | } // namespace google |
525 | |