1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Interface for manipulating databases of descriptors.
36
37#ifndef GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
38#define GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
39
40
41#include <map>
42#include <string>
43#include <utility>
44#include <vector>
45
46#include <google/protobuf/stubs/common.h>
47#include <google/protobuf/descriptor.h>
48
49// Must be included last.
50#include <google/protobuf/port_def.inc>
51
52#ifdef SWIG
53#error "You cannot SWIG proto headers"
54#endif
55
56namespace google {
57namespace protobuf {
58
59// Defined in this file.
60class DescriptorDatabase;
61class SimpleDescriptorDatabase;
62class EncodedDescriptorDatabase;
63class DescriptorPoolDatabase;
64class MergedDescriptorDatabase;
65
66// Abstract interface for a database of descriptors.
67//
68// This is useful if you want to create a DescriptorPool which loads
69// descriptors on-demand from some sort of large database. If the database
70// is large, it may be inefficient to enumerate every .proto file inside it
71// calling DescriptorPool::BuildFile() for each one. Instead, a DescriptorPool
72// can be created which wraps a DescriptorDatabase and only builds particular
73// descriptors when they are needed.
74class PROTOBUF_EXPORT DescriptorDatabase {
75 public:
76 inline DescriptorDatabase() {}
77 virtual ~DescriptorDatabase();
78
79 // Find a file by file name. Fills in in *output and returns true if found.
80 // Otherwise, returns false, leaving the contents of *output undefined.
81 virtual bool FindFileByName(const std::string& filename,
82 FileDescriptorProto* output) = 0;
83
84 // Find the file that declares the given fully-qualified symbol name.
85 // If found, fills in *output and returns true, otherwise returns false
86 // and leaves *output undefined.
87 virtual bool FindFileContainingSymbol(const std::string& symbol_name,
88 FileDescriptorProto* output) = 0;
89
90 // Find the file which defines an extension extending the given message type
91 // with the given field number. If found, fills in *output and returns true,
92 // otherwise returns false and leaves *output undefined. containing_type
93 // must be a fully-qualified type name.
94 virtual bool FindFileContainingExtension(const std::string& containing_type,
95 int field_number,
96 FileDescriptorProto* output) = 0;
97
98 // Finds the tag numbers used by all known extensions of
99 // extendee_type, and appends them to output in an undefined
100 // order. This method is best-effort: it's not guaranteed that the
101 // database will find all extensions, and it's not guaranteed that
102 // FindFileContainingExtension will return true on all of the found
103 // numbers. Returns true if the search was successful, otherwise
104 // returns false and leaves output unchanged.
105 //
106 // This method has a default implementation that always returns
107 // false.
108 virtual bool FindAllExtensionNumbers(const std::string& /* extendee_type */,
109 std::vector<int>* /* output */) {
110 return false;
111 }
112
113
114 // Finds the file names and appends them to the output in an
115 // undefined order. This method is best-effort: it's not guaranteed that the
116 // database will find all files. Returns true if the database supports
117 // searching all file names, otherwise returns false and leaves output
118 // unchanged.
119 //
120 // This method has a default implementation that always returns
121 // false.
122 virtual bool FindAllFileNames(std::vector<std::string>* /*output*/) {
123 return false;
124 }
125
126 // Finds the package names and appends them to the output in an
127 // undefined order. This method is best-effort: it's not guaranteed that the
128 // database will find all packages. Returns true if the database supports
129 // searching all package names, otherwise returns false and leaves output
130 // unchanged.
131 bool FindAllPackageNames(std::vector<std::string>* output);
132
133 // Finds the message names and appends them to the output in an
134 // undefined order. This method is best-effort: it's not guaranteed that the
135 // database will find all messages. Returns true if the database supports
136 // searching all message names, otherwise returns false and leaves output
137 // unchanged.
138 bool FindAllMessageNames(std::vector<std::string>* output);
139
140 private:
141 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorDatabase);
142};
143
144// A DescriptorDatabase into which you can insert files manually.
145//
146// FindFileContainingSymbol() is fully-implemented. When you add a file, its
147// symbols will be indexed for this purpose. Note that the implementation
148// may return false positives, but only if it isn't possible for the symbol
149// to be defined in any other file. In particular, if a file defines a symbol
150// "Foo", then searching for "Foo.[anything]" will match that file. This way,
151// the database does not need to aggressively index all children of a symbol.
152//
153// FindFileContainingExtension() is mostly-implemented. It works if and only
154// if the original FieldDescriptorProto defining the extension has a
155// fully-qualified type name in its "extendee" field (i.e. starts with a '.').
156// If the extendee is a relative name, SimpleDescriptorDatabase will not
157// attempt to resolve the type, so it will not know what type the extension is
158// extending. Therefore, calling FindFileContainingExtension() with the
159// extension's containing type will never actually find that extension. Note
160// that this is an unlikely problem, as all FileDescriptorProtos created by the
161// protocol compiler (as well as ones created by calling
162// FileDescriptor::CopyTo()) will always use fully-qualified names for all
163// types. You only need to worry if you are constructing FileDescriptorProtos
164// yourself, or are calling compiler::Parser directly.
165class PROTOBUF_EXPORT SimpleDescriptorDatabase : public DescriptorDatabase {
166 public:
167 SimpleDescriptorDatabase();
168 ~SimpleDescriptorDatabase() override;
169
170 // Adds the FileDescriptorProto to the database, making a copy. The object
171 // can be deleted after Add() returns. Returns false if the file conflicted
172 // with a file already in the database, in which case an error will have
173 // been written to GOOGLE_LOG(ERROR).
174 bool Add(const FileDescriptorProto& file);
175
176 // Adds the FileDescriptorProto to the database and takes ownership of it.
177 bool AddAndOwn(const FileDescriptorProto* file);
178
179 // implements DescriptorDatabase -----------------------------------
180 bool FindFileByName(const std::string& filename,
181 FileDescriptorProto* output) override;
182 bool FindFileContainingSymbol(const std::string& symbol_name,
183 FileDescriptorProto* output) override;
184 bool FindFileContainingExtension(const std::string& containing_type,
185 int field_number,
186 FileDescriptorProto* output) override;
187 bool FindAllExtensionNumbers(const std::string& extendee_type,
188 std::vector<int>* output) override;
189
190 bool FindAllFileNames(std::vector<std::string>* output) override;
191
192 private:
193 // An index mapping file names, symbol names, and extension numbers to
194 // some sort of values.
195 template <typename Value>
196 class DescriptorIndex {
197 public:
198 // Helpers to recursively add particular descriptors and all their contents
199 // to the index.
200 bool AddFile(const FileDescriptorProto& file, Value value);
201 bool AddSymbol(const std::string& name, Value value);
202 bool AddNestedExtensions(const std::string& filename,
203 const DescriptorProto& message_type, Value value);
204 bool AddExtension(const std::string& filename,
205 const FieldDescriptorProto& field, Value value);
206
207 Value FindFile(const std::string& filename);
208 Value FindSymbol(const std::string& name);
209 Value FindExtension(const std::string& containing_type, int field_number);
210 bool FindAllExtensionNumbers(const std::string& containing_type,
211 std::vector<int>* output);
212 void FindAllFileNames(std::vector<std::string>* output);
213
214 private:
215 std::map<std::string, Value> by_name_;
216 std::map<std::string, Value> by_symbol_;
217 std::map<std::pair<std::string, int>, Value> by_extension_;
218
219 // Invariant: The by_symbol_ map does not contain any symbols which are
220 // prefixes of other symbols in the map. For example, "foo.bar" is a
221 // prefix of "foo.bar.baz" (but is not a prefix of "foo.barbaz").
222 //
223 // This invariant is important because it means that given a symbol name,
224 // we can find a key in the map which is a prefix of the symbol in O(lg n)
225 // time, and we know that there is at most one such key.
226 //
227 // The prefix lookup algorithm works like so:
228 // 1) Find the last key in the map which is less than or equal to the
229 // search key.
230 // 2) If the found key is a prefix of the search key, then return it.
231 // Otherwise, there is no match.
232 //
233 // I am sure this algorithm has been described elsewhere, but since I
234 // wasn't able to find it quickly I will instead prove that it works
235 // myself. The key to the algorithm is that if a match exists, step (1)
236 // will find it. Proof:
237 // 1) Define the "search key" to be the key we are looking for, the "found
238 // key" to be the key found in step (1), and the "match key" to be the
239 // key which actually matches the search key (i.e. the key we're trying
240 // to find).
241 // 2) The found key must be less than or equal to the search key by
242 // definition.
243 // 3) The match key must also be less than or equal to the search key
244 // (because it is a prefix).
245 // 4) The match key cannot be greater than the found key, because if it
246 // were, then step (1) of the algorithm would have returned the match
247 // key instead (since it finds the *greatest* key which is less than or
248 // equal to the search key).
249 // 5) Therefore, the found key must be between the match key and the search
250 // key, inclusive.
251 // 6) Since the search key must be a sub-symbol of the match key, if it is
252 // not equal to the match key, then search_key[match_key.size()] must
253 // be '.'.
254 // 7) Since '.' sorts before any other character that is valid in a symbol
255 // name, then if the found key is not equal to the match key, then
256 // found_key[match_key.size()] must also be '.', because any other value
257 // would make it sort after the search key.
258 // 8) Therefore, if the found key is not equal to the match key, then the
259 // found key must be a sub-symbol of the match key. However, this would
260 // contradict our map invariant which says that no symbol in the map is
261 // a sub-symbol of any other.
262 // 9) Therefore, the found key must match the match key.
263 //
264 // The above proof assumes the match key exists. In the case that the
265 // match key does not exist, then step (1) will return some other symbol.
266 // That symbol cannot be a super-symbol of the search key since if it were,
267 // then it would be a match, and we're assuming the match key doesn't exist.
268 // Therefore, step 2 will correctly return no match.
269 };
270
271 DescriptorIndex<const FileDescriptorProto*> index_;
272 std::vector<std::unique_ptr<const FileDescriptorProto>> files_to_delete_;
273
274 // If file is non-nullptr, copy it into *output and return true, otherwise
275 // return false.
276 bool MaybeCopy(const FileDescriptorProto* file, FileDescriptorProto* output);
277
278 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(SimpleDescriptorDatabase);
279};
280
281// Very similar to SimpleDescriptorDatabase, but stores all the descriptors
282// as raw bytes and generally tries to use as little memory as possible.
283//
284// The same caveats regarding FindFileContainingExtension() apply as with
285// SimpleDescriptorDatabase.
286class PROTOBUF_EXPORT EncodedDescriptorDatabase : public DescriptorDatabase {
287 public:
288 EncodedDescriptorDatabase();
289 ~EncodedDescriptorDatabase() override;
290
291 // Adds the FileDescriptorProto to the database. The descriptor is provided
292 // in encoded form. The database does not make a copy of the bytes, nor
293 // does it take ownership; it's up to the caller to make sure the bytes
294 // remain valid for the life of the database. Returns false and logs an error
295 // if the bytes are not a valid FileDescriptorProto or if the file conflicted
296 // with a file already in the database.
297 bool Add(const void* encoded_file_descriptor, int size);
298
299 // Like Add(), but makes a copy of the data, so that the caller does not
300 // need to keep it around.
301 bool AddCopy(const void* encoded_file_descriptor, int size);
302
303 // Like FindFileContainingSymbol but returns only the name of the file.
304 bool FindNameOfFileContainingSymbol(const std::string& symbol_name,
305 std::string* output);
306
307 // implements DescriptorDatabase -----------------------------------
308 bool FindFileByName(const std::string& filename,
309 FileDescriptorProto* output) override;
310 bool FindFileContainingSymbol(const std::string& symbol_name,
311 FileDescriptorProto* output) override;
312 bool FindFileContainingExtension(const std::string& containing_type,
313 int field_number,
314 FileDescriptorProto* output) override;
315 bool FindAllExtensionNumbers(const std::string& extendee_type,
316 std::vector<int>* output) override;
317 bool FindAllFileNames(std::vector<std::string>* output) override;
318
319 private:
320 class DescriptorIndex;
321 // Keep DescriptorIndex by pointer to hide the implementation to keep a
322 // cleaner header.
323 std::unique_ptr<DescriptorIndex> index_;
324 std::vector<void*> files_to_delete_;
325
326 // If encoded_file.first is non-nullptr, parse the data into *output and
327 // return true, otherwise return false.
328 bool MaybeParse(std::pair<const void*, int> encoded_file,
329 FileDescriptorProto* output);
330
331 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EncodedDescriptorDatabase);
332};
333
334// A DescriptorDatabase that fetches files from a given pool.
335class PROTOBUF_EXPORT DescriptorPoolDatabase : public DescriptorDatabase {
336 public:
337 explicit DescriptorPoolDatabase(const DescriptorPool& pool);
338 ~DescriptorPoolDatabase() override;
339
340 // implements DescriptorDatabase -----------------------------------
341 bool FindFileByName(const std::string& filename,
342 FileDescriptorProto* output) override;
343 bool FindFileContainingSymbol(const std::string& symbol_name,
344 FileDescriptorProto* output) override;
345 bool FindFileContainingExtension(const std::string& containing_type,
346 int field_number,
347 FileDescriptorProto* output) override;
348 bool FindAllExtensionNumbers(const std::string& extendee_type,
349 std::vector<int>* output) override;
350
351 private:
352 const DescriptorPool& pool_;
353 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorPoolDatabase);
354};
355
356// A DescriptorDatabase that wraps two or more others. It first searches the
357// first database and, if that fails, tries the second, and so on.
358class PROTOBUF_EXPORT MergedDescriptorDatabase : public DescriptorDatabase {
359 public:
360 // Merge just two databases. The sources remain property of the caller.
361 MergedDescriptorDatabase(DescriptorDatabase* source1,
362 DescriptorDatabase* source2);
363 // Merge more than two databases. The sources remain property of the caller.
364 // The vector may be deleted after the constructor returns but the
365 // DescriptorDatabases need to stick around.
366 explicit MergedDescriptorDatabase(
367 const std::vector<DescriptorDatabase*>& sources);
368 ~MergedDescriptorDatabase() override;
369
370 // implements DescriptorDatabase -----------------------------------
371 bool FindFileByName(const std::string& filename,
372 FileDescriptorProto* output) override;
373 bool FindFileContainingSymbol(const std::string& symbol_name,
374 FileDescriptorProto* output) override;
375 bool FindFileContainingExtension(const std::string& containing_type,
376 int field_number,
377 FileDescriptorProto* output) override;
378 // Merges the results of calling all databases. Returns true iff any
379 // of the databases returned true.
380 bool FindAllExtensionNumbers(const std::string& extendee_type,
381 std::vector<int>* output) override;
382
383
384 // This function is best-effort. Returns true if at least one underlying
385 // DescriptorDatabase returns true.
386 bool FindAllFileNames(std::vector<std::string>* output) override;
387
388 private:
389 std::vector<DescriptorDatabase*> sources_;
390 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MergedDescriptorDatabase);
391};
392
393} // namespace protobuf
394} // namespace google
395
396#include <google/protobuf/port_undef.inc>
397
398#endif // GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
399