1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file defines static methods and classes for comparing Protocol
36// Messages.
37//
38// Aug. 2008: Added Unknown Fields Comparison for messages.
39// Aug. 2009: Added different options to compare repeated fields.
40// Apr. 2010: Moved field comparison to FieldComparator
41// Sep. 2020: Added option to output map keys in path
42
43#ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
44#define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
45
46
47#include <functional>
48#include <map>
49#include <memory>
50#include <set>
51#include <string>
52#include <vector>
53
54#include <google/protobuf/descriptor.h> // FieldDescriptor
55#include <google/protobuf/message.h> // Message
56#include <google/protobuf/unknown_field_set.h>
57#include <google/protobuf/util/field_comparator.h>
58
59// Always include as last one, otherwise it can break compilation
60#include <google/protobuf/port_def.inc>
61
62namespace google {
63namespace protobuf {
64
65class DynamicMessageFactory;
66class FieldDescriptor;
67
68namespace io {
69class ZeroCopyOutputStream;
70class Printer;
71} // namespace io
72
73namespace util {
74
75class DefaultFieldComparator;
76class FieldContext; // declared below MessageDifferencer
77
78// Defines a collection of field descriptors.
79// In case of internal google codebase we are using absl::FixedArray instead
80// of vector. It significantly speeds up proto comparison (by ~30%) by
81// reducing the number of malloc/free operations
82typedef std::vector<const FieldDescriptor*> FieldDescriptorArray;
83
84// A basic differencer that can be used to determine
85// the differences between two specified Protocol Messages. If any differences
86// are found, the Compare method will return false, and any differencer reporter
87// specified via ReportDifferencesTo will have its reporting methods called (see
88// below for implementation of the report). Based off of the original
89// ProtocolDifferencer implementation in //net/proto/protocol-differencer.h
90// (Thanks Todd!).
91//
92// MessageDifferencer REQUIRES that compared messages be the same type, defined
93// as messages that share the same descriptor. If not, the behavior of this
94// class is undefined.
95//
96// People disagree on what MessageDifferencer should do when asked to compare
97// messages with different descriptors. Some people think it should always
98// return false. Others expect it to try to look for similar fields and
99// compare them anyway -- especially if the descriptors happen to be identical.
100// If we chose either of these behaviors, some set of people would find it
101// surprising, and could end up writing code expecting the other behavior
102// without realizing their error. Therefore, we forbid that usage.
103//
104// This class is implemented based on the proto2 reflection. The performance
105// should be good enough for normal usages. However, for places where the
106// performance is extremely sensitive, there are several alternatives:
107// - Comparing serialized string
108// Downside: false negatives (there are messages that are the same but their
109// serialized strings are different).
110// - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin)
111// Downside: more generated code; maintenance overhead for the additional rule
112// (must be in sync with the original proto_library).
113//
114// Note on handling of google.protobuf.Any: MessageDifferencer automatically
115// unpacks Any::value into a Message and compares its individual fields.
116// Messages encoded in a repeated Any cannot be compared using TreatAsMap.
117//
118// Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to
119// guard it with a lock to use the same MessageDifferencer instance from
120// multiple threads. Note that it's fine to call static comparison methods
121// (like MessageDifferencer::Equals) concurrently, but it's not recommended for
122// performance critical code as it leads to extra allocations.
123class PROTOBUF_EXPORT MessageDifferencer {
124 public:
125 // Determines whether the supplied messages are equal. Equality is defined as
126 // all fields within the two messages being set to the same value. Primitive
127 // fields and strings are compared by value while embedded messages/groups
128 // are compared as if via a recursive call. Use Compare() with IgnoreField()
129 // if some fields should be ignored in the comparison. Use Compare() with
130 // TreatAsSet() if there are repeated fields where ordering does not matter.
131 //
132 // This method REQUIRES that the two messages have the same
133 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
134 static bool Equals(const Message& message1, const Message& message2);
135
136 // Determines whether the supplied messages are equivalent. Equivalency is
137 // defined as all fields within the two messages having the same value. This
138 // differs from the Equals method above in that fields with default values
139 // are considered set to said value automatically. For details on how default
140 // values are defined for each field type, see:
141 // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional.
142 // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare()
143 // if some fields should be ignored in the comparison.
144 //
145 // This method REQUIRES that the two messages have the same
146 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
147 static bool Equivalent(const Message& message1, const Message& message2);
148
149 // Determines whether the supplied messages are approximately equal.
150 // Approximate equality is defined as all fields within the two messages
151 // being approximately equal. Primitive (non-float) fields and strings are
152 // compared by value, floats are compared using MathUtil::AlmostEquals() and
153 // embedded messages/groups are compared as if via a recursive call. Use
154 // IgnoreField() and Compare() if some fields should be ignored in the
155 // comparison.
156 //
157 // This method REQUIRES that the two messages have the same
158 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
159 static bool ApproximatelyEquals(const Message& message1,
160 const Message& message2);
161
162 // Determines whether the supplied messages are approximately equivalent.
163 // Approximate equivalency is defined as all fields within the two messages
164 // being approximately equivalent. As in
165 // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and
166 // strings are compared by value, floats are compared using
167 // MathUtil::AlmostEquals() and embedded messages/groups are compared as if
168 // via a recursive call. However, fields with default values are considered
169 // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField()
170 // and Compare() if some fields should be ignored in the comparison.
171 //
172 // This method REQUIRES that the two messages have the same
173 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
174 static bool ApproximatelyEquivalent(const Message& message1,
175 const Message& message2);
176
177 // Identifies an individual field in a message instance. Used for field_path,
178 // below.
179 struct SpecificField {
180 // For known fields, "field" is filled in and "unknown_field_number" is -1.
181 // For unknown fields, "field" is NULL, "unknown_field_number" is the field
182 // number, and "unknown_field_type" is its type.
183 const FieldDescriptor* field = nullptr;
184 int unknown_field_number = -1;
185 UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT;
186
187 // If this a repeated field, "index" is the index within it. For unknown
188 // fields, this is the index of the field among all unknown fields of the
189 // same field number and type.
190 int index = -1;
191
192 // If "field" is a repeated field which is being treated as a map or
193 // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates
194 // the index the position to which the element has moved. If the element
195 // has not moved, "new_index" will have the same value as "index".
196 int new_index = -1;
197
198 // If "field" is a map field, point to the map entry.
199 const Message* map_entry1 = nullptr;
200 const Message* map_entry2 = nullptr;
201
202 // For unknown fields, these are the pointers to the UnknownFieldSet
203 // containing the unknown fields. In certain cases (e.g. proto1's
204 // MessageSet, or nested groups of unknown fields), these may differ from
205 // the messages' internal UnknownFieldSets.
206 const UnknownFieldSet* unknown_field_set1 = nullptr;
207 const UnknownFieldSet* unknown_field_set2 = nullptr;
208
209 // For unknown fields, these are the index of the field within the
210 // UnknownFieldSets. One or the other will be -1 when
211 // reporting an addition or deletion.
212 int unknown_field_index1 = -1;
213 int unknown_field_index2 = -1;
214 };
215
216 // Abstract base class from which all MessageDifferencer
217 // reporters derive. The five Report* methods below will be called when
218 // a field has been added, deleted, modified, moved, or matched. The third
219 // argument is a vector of FieldDescriptor pointers which describes the chain
220 // of fields that was taken to find the current field. For example, for a
221 // field found in an embedded message, the vector will contain two
222 // FieldDescriptors. The first will be the field of the embedded message
223 // itself and the second will be the actual field in the embedded message
224 // that was added/deleted/modified.
225 // Fields will be reported in PostTraversalOrder.
226 // For example, given following proto, if both baz and mooo are changed.
227 // foo {
228 // bar {
229 // baz: 1
230 // mooo: 2
231 // }
232 // }
233 // ReportModified will be invoked with following order:
234 // 1. foo.bar.baz or foo.bar.mooo
235 // 2. foo.bar.mooo or foo.bar.baz
236 // 2. foo.bar
237 // 3. foo
238 class PROTOBUF_EXPORT Reporter {
239 public:
240 Reporter();
241 virtual ~Reporter();
242
243 // Reports that a field has been added into Message2.
244 virtual void ReportAdded(const Message& message1, const Message& message2,
245 const std::vector<SpecificField>& field_path) = 0;
246
247 // Reports that a field has been deleted from Message1.
248 virtual void ReportDeleted(
249 const Message& message1, const Message& message2,
250 const std::vector<SpecificField>& field_path) = 0;
251
252 // Reports that the value of a field has been modified.
253 virtual void ReportModified(
254 const Message& message1, const Message& message2,
255 const std::vector<SpecificField>& field_path) = 0;
256
257 // Reports that a repeated field has been moved to another location. This
258 // only applies when using TreatAsSet or TreatAsMap() -- see below. Also
259 // note that for any given field, ReportModified and ReportMoved are
260 // mutually exclusive. If a field has been both moved and modified, then
261 // only ReportModified will be called.
262 virtual void ReportMoved(
263 const Message& /* message1 */, const Message& /* message2 */,
264 const std::vector<SpecificField>& /* field_path */) {}
265
266 // Reports that two fields match. Useful for doing side-by-side diffs.
267 // This function is mutually exclusive with ReportModified and ReportMoved.
268 // Note that you must call set_report_matches(true) before calling Compare
269 // to make use of this function.
270 virtual void ReportMatched(
271 const Message& /* message1 */, const Message& /* message2 */,
272 const std::vector<SpecificField>& /* field_path */) {}
273
274 // Reports that two fields would have been compared, but the
275 // comparison has been skipped because the field was marked as
276 // 'ignored' using IgnoreField(). This function is mutually
277 // exclusive with all the other Report() functions.
278 //
279 // The contract of ReportIgnored is slightly different than the
280 // other Report() functions, in that |field_path.back().index| is
281 // always equal to -1, even if the last field is repeated. This is
282 // because while the other Report() functions indicate where in a
283 // repeated field the action (Addition, Deletion, etc...)
284 // happened, when a repeated field is 'ignored', the differencer
285 // simply calls ReportIgnored on the repeated field as a whole and
286 // moves on without looking at its individual elements.
287 //
288 // Furthermore, ReportIgnored() does not indicate whether the
289 // fields were in fact equal or not, as Compare() does not inspect
290 // these fields at all. It is up to the Reporter to decide whether
291 // the fields are equal or not (perhaps with a second call to
292 // Compare()), if it cares.
293 virtual void ReportIgnored(
294 const Message& /* message1 */, const Message& /* message2 */,
295 const std::vector<SpecificField>& /* field_path */) {}
296
297 // Report that an unknown field is ignored. (see comment above).
298 // Note this is a different function since the last SpecificField in field
299 // path has a null field. This could break existing Reporter.
300 virtual void ReportUnknownFieldIgnored(
301 const Message& /* message1 */, const Message& /* message2 */,
302 const std::vector<SpecificField>& /* field_path */) {}
303
304 private:
305 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter);
306 };
307
308 // MapKeyComparator is used to determine if two elements have the same key
309 // when comparing elements of a repeated field as a map.
310 class PROTOBUF_EXPORT MapKeyComparator {
311 public:
312 MapKeyComparator();
313 virtual ~MapKeyComparator();
314
315 virtual bool IsMatch(
316 const Message& /* message1 */, const Message& /* message2 */,
317 const std::vector<SpecificField>& /* parent_fields */) const {
318 GOOGLE_CHECK(false) << "IsMatch() is not implemented.";
319 return false;
320 }
321
322 private:
323 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator);
324 };
325
326 // Abstract base class from which all IgnoreCriteria derive.
327 // By adding IgnoreCriteria more complex ignore logic can be implemented.
328 // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared
329 // field IsIgnored is called on each added IgnoreCriteria until one returns
330 // true or all return false.
331 // IsIgnored is called for fields where at least one side has a value.
332 class PROTOBUF_EXPORT IgnoreCriteria {
333 public:
334 IgnoreCriteria();
335 virtual ~IgnoreCriteria();
336
337 // Returns true if the field should be ignored.
338 virtual bool IsIgnored(
339 const Message& /* message1 */, const Message& /* message2 */,
340 const FieldDescriptor* /* field */,
341 const std::vector<SpecificField>& /* parent_fields */) = 0;
342
343 // Returns true if the unknown field should be ignored.
344 // Note: This will be called for unknown fields as well in which case
345 // field.field will be null.
346 virtual bool IsUnknownFieldIgnored(
347 const Message& /* message1 */, const Message& /* message2 */,
348 const SpecificField& /* field */,
349 const std::vector<SpecificField>& /* parent_fields */) {
350 return false;
351 }
352 };
353
354 // To add a Reporter, construct default here, then use ReportDifferencesTo or
355 // ReportDifferencesToString.
356 explicit MessageDifferencer();
357
358 ~MessageDifferencer();
359
360 enum MessageFieldComparison {
361 EQUAL, // Fields must be present in both messages
362 // for the messages to be considered the same.
363 EQUIVALENT, // Fields with default values are considered set
364 // for comparison purposes even if not explicitly
365 // set in the messages themselves. Unknown fields
366 // are ignored.
367 };
368
369 enum Scope {
370 FULL, // All fields of both messages are considered in the comparison.
371 PARTIAL // Only fields present in the first message are considered; fields
372 // set only in the second message will be skipped during
373 // comparison.
374 };
375
376 // DEPRECATED. Use FieldComparator::FloatComparison instead.
377 enum FloatComparison {
378 EXACT, // Floats and doubles are compared exactly.
379 APPROXIMATE // Floats and doubles are compared using the
380 // MathUtil::AlmostEquals method.
381 };
382
383 enum RepeatedFieldComparison {
384 AS_LIST, // Repeated fields are compared in order. Differing values at
385 // the same index are reported using ReportModified(). If the
386 // repeated fields have different numbers of elements, the
387 // unpaired elements are reported using ReportAdded() or
388 // ReportDeleted().
389 AS_SET, // Treat all the repeated fields as sets.
390 // See TreatAsSet(), as below.
391 AS_SMART_LIST, // Similar to AS_SET, but preserve the order and find the
392 // longest matching sequence from the first matching
393 // element. To use an optimal solution, call
394 // SetMatchIndicesForSmartListCallback() to pass it in.
395 AS_SMART_SET, // Similar to AS_SET, but match elements with fewest diffs.
396 };
397
398 // The elements of the given repeated field will be treated as a set for
399 // diffing purposes, so different orderings of the same elements will be
400 // considered equal. Elements which are present on both sides of the
401 // comparison but which have changed position will be reported with
402 // ReportMoved(). Elements which only exist on one side or the other are
403 // reported with ReportAdded() and ReportDeleted() regardless of their
404 // positions. ReportModified() is never used for this repeated field. If
405 // the only differences between the compared messages is that some fields
406 // have been moved, then the comparison returns true.
407 //
408 // Note that despite the name of this method, this is really
409 // comparison as multisets: if one side of the comparison has a duplicate
410 // in the repeated field but the other side doesn't, this will count as
411 // a mismatch.
412 //
413 // If the scope of comparison is set to PARTIAL, then in addition to what's
414 // above, extra values added to repeated fields of the second message will
415 // not cause the comparison to fail.
416 //
417 // Note that set comparison is currently O(k * n^2) (where n is the total
418 // number of elements, and k is the average size of each element). In theory
419 // it could be made O(n * k) with a more complex hashing implementation. Feel
420 // free to contribute one if the current implementation is too slow for you.
421 // If partial matching is also enabled, the time complexity will be O(k * n^2
422 // + n^3) in which n^3 is the time complexity of the maximum matching
423 // algorithm.
424 //
425 // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
426 void TreatAsSet(const FieldDescriptor* field);
427 void TreatAsSmartSet(const FieldDescriptor* field);
428
429 // The elements of the given repeated field will be treated as a list for
430 // diffing purposes, so different orderings of the same elements will NOT be
431 // considered equal.
432 //
433 // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
434 void TreatAsList(const FieldDescriptor* field);
435 // Note that the complexity is similar to treating as SET.
436 void TreatAsSmartList(const FieldDescriptor* field);
437
438 // The elements of the given repeated field will be treated as a map for
439 // diffing purposes, with |key| being the map key. Thus, elements with the
440 // same key will be compared even if they do not appear at the same index.
441 // Differences are reported similarly to TreatAsSet(), except that
442 // ReportModified() is used to report elements with the same key but
443 // different values. Note that if an element is both moved and modified,
444 // only ReportModified() will be called. As with TreatAsSet, if the only
445 // differences between the compared messages is that some fields have been
446 // moved, then the comparison returns true. See TreatAsSet for notes on
447 // performance.
448 //
449 // REQUIRES: field->is_repeated()
450 // REQUIRES: field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
451 // REQUIRES: key->containing_type() == field->message_type()
452 void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key);
453 // Same as TreatAsMap except that this method will use multiple fields as
454 // the key in comparison. All specified fields in 'key_fields' should be
455 // present in the compared elements. Two elements will be treated as having
456 // the same key iff they have the same value for every specified field. There
457 // are two steps in the comparison process. The first one is key matching.
458 // Every element from one message will be compared to every element from
459 // the other message. Only fields in 'key_fields' are compared in this step
460 // to decide if two elements have the same key. The second step is value
461 // comparison. Those pairs of elements with the same key (with equal value
462 // for every field in 'key_fields') will be compared in this step.
463 // Time complexity of the first step is O(s * m * n ^ 2) where s is the
464 // average size of the fields specified in 'key_fields', m is the number of
465 // fields in 'key_fields' and n is the number of elements. If partial
466 // matching is enabled, an extra O(n^3) will be incured by the maximum
467 // matching algorithm. The second step is O(k * n) where k is the average
468 // size of each element.
469 void TreatAsMapWithMultipleFieldsAsKey(
470 const FieldDescriptor* field,
471 const std::vector<const FieldDescriptor*>& key_fields);
472 // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field
473 // do not necessarily need to be a direct subfield. Each element in
474 // key_field_paths indicate a path from the message being compared, listing
475 // successive subfield to reach the key field.
476 //
477 // REQUIRES:
478 // for key_field_path in key_field_paths:
479 // key_field_path[0]->containing_type() == field->message_type()
480 // for i in [0, key_field_path.size() - 1):
481 // key_field_path[i+1]->containing_type() ==
482 // key_field_path[i]->message_type()
483 // key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
484 // !key_field_path[i]->is_repeated()
485 void TreatAsMapWithMultipleFieldPathsAsKey(
486 const FieldDescriptor* field,
487 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
488
489 // Uses a custom MapKeyComparator to determine if two elements have the same
490 // key when comparing a repeated field as a map.
491 // The caller is responsible to delete the key_comparator.
492 // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the
493 // first key matching step. Rather than comparing some specified fields, it
494 // will invoke the IsMatch method of the given 'key_comparator' to decide if
495 // two elements have the same key.
496 void TreatAsMapUsingKeyComparator(const FieldDescriptor* field,
497 const MapKeyComparator* key_comparator);
498
499 // Initiates and returns a new instance of MultipleFieldsMapKeyComparator.
500 MapKeyComparator* CreateMultipleFieldsMapKeyComparator(
501 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
502
503 // Add a custom ignore criteria that is evaluated in addition to the
504 // ignored fields added with IgnoreField.
505 // Takes ownership of ignore_criteria.
506 void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria);
507
508 // Indicates that any field with the given descriptor should be
509 // ignored for the purposes of comparing two messages. This applies
510 // to fields nested in the message structure as well as top level
511 // ones. When the MessageDifferencer encounters an ignored field,
512 // ReportIgnored is called on the reporter, if one is specified.
513 //
514 // The only place where the field's 'ignored' status is not applied is when
515 // it is being used as a key in a field passed to TreatAsMap or is one of
516 // the fields passed to TreatAsMapWithMultipleFieldsAsKey.
517 // In this case it is compared in key matching but after that it's ignored
518 // in value comparison.
519 void IgnoreField(const FieldDescriptor* field);
520
521 // Sets the field comparator used to determine differences between protocol
522 // buffer fields. By default it's set to a DefaultFieldComparator instance.
523 // MessageDifferencer doesn't take ownership over the passed object.
524 // Note that this method must be called before Compare for the comparator to
525 // be used.
526 void set_field_comparator(FieldComparator* comparator);
527#ifdef PROTOBUF_FUTURE_BREAKING_CHANGES
528 void set_field_comparator(DefaultFieldComparator* comparator);
529#endif // PROTOBUF_FUTURE_BREAKING_CHANGES
530
531 // DEPRECATED. Pass a DefaultFieldComparator instance instead.
532 // Sets the fraction and margin for the float comparison of a given field.
533 // Uses MathUtil::WithinFractionOrMargin to compare the values.
534 // NOTE: this method does nothing if differencer's field comparator has been
535 // set to a custom object.
536 //
537 // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or
538 // field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT
539 // REQUIRES: float_comparison_ == APPROXIMATE
540 void SetFractionAndMargin(const FieldDescriptor* field, double fraction,
541 double margin);
542
543 // Sets the type of comparison (as defined in the MessageFieldComparison
544 // enumeration above) that is used by this differencer when determining how
545 // to compare fields in messages.
546 void set_message_field_comparison(MessageFieldComparison comparison);
547
548 // Returns the current message field comparison used in this differencer.
549 MessageFieldComparison message_field_comparison() const;
550
551 // Tells the differencer whether or not to report matches. This method must
552 // be called before Compare. The default for a new differencer is false.
553 void set_report_matches(bool report_matches) {
554 report_matches_ = report_matches;
555 }
556
557 // Tells the differencer whether or not to report moves (in a set or map
558 // repeated field). This method must be called before Compare. The default for
559 // a new differencer is true.
560 void set_report_moves(bool report_moves) { report_moves_ = report_moves; }
561
562 // Tells the differencer whether or not to report ignored values. This method
563 // must be called before Compare. The default for a new differencer is true.
564 void set_report_ignores(bool report_ignores) {
565 report_ignores_ = report_ignores;
566 }
567
568 // Sets the scope of the comparison (as defined in the Scope enumeration
569 // above) that is used by this differencer when determining which fields to
570 // compare between the messages.
571 void set_scope(Scope scope);
572
573 // Returns the current scope used by this differencer.
574 Scope scope() const;
575
576 // DEPRECATED. Pass a DefaultFieldComparator instance instead.
577 // Sets the type of comparison (as defined in the FloatComparison enumeration
578 // above) that is used by this differencer when comparing float (and double)
579 // fields in messages.
580 // NOTE: this method does nothing if differencer's field comparator has been
581 // set to a custom object.
582 void set_float_comparison(FloatComparison comparison);
583
584 // Sets the type of comparison for repeated field (as defined in the
585 // RepeatedFieldComparison enumeration above) that is used by this
586 // differencer when compare repeated fields in messages.
587 void set_repeated_field_comparison(RepeatedFieldComparison comparison);
588
589 // Returns the current repeated field comparison used by this differencer.
590 RepeatedFieldComparison repeated_field_comparison() const;
591
592 // Compares the two specified messages, returning true if they are the same,
593 // false otherwise. If this method returns false, any changes between the
594 // two messages will be reported if a Reporter was specified via
595 // ReportDifferencesTo (see also ReportDifferencesToString).
596 //
597 // This method REQUIRES that the two messages have the same
598 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
599 bool Compare(const Message& message1, const Message& message2);
600
601 // Same as above, except comparing only the list of fields specified by the
602 // two vectors of FieldDescriptors.
603 bool CompareWithFields(
604 const Message& message1, const Message& message2,
605 const std::vector<const FieldDescriptor*>& message1_fields,
606 const std::vector<const FieldDescriptor*>& message2_fields);
607
608 // Automatically creates a reporter that will output the differences
609 // found (if any) to the specified output string pointer. Note that this
610 // method must be called before Compare.
611 void ReportDifferencesToString(std::string* output);
612
613 // Tells the MessageDifferencer to report differences via the specified
614 // reporter. Note that this method must be called before Compare for
615 // the reporter to be used. It is the responsibility of the caller to delete
616 // this object.
617 // If the provided pointer equals NULL, the MessageDifferencer stops reporting
618 // differences to any previously set reporters or output strings.
619 void ReportDifferencesTo(Reporter* reporter);
620
621 private:
622 // Class for processing Any deserialization. This logic is used by both the
623 // MessageDifferencer and StreamReporter classes.
624 class UnpackAnyField {
625 private:
626 std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_;
627
628 public:
629 UnpackAnyField() = default;
630 ~UnpackAnyField() = default;
631 // If "any" is of type google.protobuf.Any, extract its payload using
632 // DynamicMessageFactory and store in "data".
633 bool UnpackAny(const Message& any, std::unique_ptr<Message>* data);
634 };
635
636 public:
637 // An implementation of the MessageDifferencer Reporter that outputs
638 // any differences found in human-readable form to the supplied
639 // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter
640 // *must* be '$'.
641 //
642 // WARNING: this reporter does not necessarily flush its output until it is
643 // destroyed. As a result, it is not safe to assume the output is valid or
644 // complete until after you destroy the reporter. For example, if you use a
645 // StreamReporter to write to a StringOutputStream, the target string may
646 // contain uninitialized data until the reporter is destroyed.
647 class PROTOBUF_EXPORT StreamReporter : public Reporter {
648 public:
649 explicit StreamReporter(io::ZeroCopyOutputStream* output);
650 explicit StreamReporter(io::Printer* printer); // delimiter '$'
651 ~StreamReporter() override;
652
653 // When set to true, the stream reporter will also output aggregates nodes
654 // (i.e. messages and groups) whose subfields have been modified. When
655 // false, will only report the individual subfields. Defaults to false.
656 void set_report_modified_aggregates(bool report) {
657 report_modified_aggregates_ = report;
658 }
659
660 // The following are implementations of the methods described above.
661
662 void ReportAdded(const Message& message1, const Message& message2,
663 const std::vector<SpecificField>& field_path) override;
664
665 void ReportDeleted(const Message& message1, const Message& message2,
666 const std::vector<SpecificField>& field_path) override;
667
668 void ReportModified(const Message& message1, const Message& message2,
669 const std::vector<SpecificField>& field_path) override;
670
671 void ReportMoved(const Message& message1, const Message& message2,
672 const std::vector<SpecificField>& field_path) override;
673
674 void ReportMatched(const Message& message1, const Message& message2,
675 const std::vector<SpecificField>& field_path) override;
676
677 void ReportIgnored(const Message& message1, const Message& message2,
678 const std::vector<SpecificField>& field_path) override;
679
680 void ReportUnknownFieldIgnored(
681 const Message& message1, const Message& message2,
682 const std::vector<SpecificField>& field_path) override;
683
684 // Messages that are being compared must be provided to StreamReporter prior
685 // to processing
686 void SetMessages(const Message& message1, const Message& message2);
687
688 protected:
689 // Prints the specified path of fields to the buffer.
690 virtual void PrintPath(const std::vector<SpecificField>& field_path,
691 bool left_side);
692
693 // Prints the value of fields to the buffer. left_side is true if the
694 // given message is from the left side of the comparison, false if it
695 // was the right. This is relevant only to decide whether to follow
696 // unknown_field_index1 or unknown_field_index2 when an unknown field
697 // is encountered in field_path.
698 virtual void PrintValue(const Message& message,
699 const std::vector<SpecificField>& field_path,
700 bool left_side);
701
702 // Prints the specified path of unknown fields to the buffer.
703 virtual void PrintUnknownFieldValue(const UnknownField* unknown_field);
704
705 // Just print a string
706 void Print(const std::string& str);
707
708 private:
709 // helper function for PrintPath that contains logic for printing maps
710 void PrintMapKey(bool left_side, const SpecificField& specific_field);
711
712 io::Printer* printer_;
713 bool delete_printer_;
714 bool report_modified_aggregates_;
715 const Message* message1_;
716 const Message* message2_;
717 MessageDifferencer::UnpackAnyField unpack_any_field_;
718 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter);
719 };
720
721 private:
722 friend class SimpleFieldComparator;
723
724 // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator.
725 // Implementation of this class needs to do field value comparison which
726 // relies on some private methods of MessageDifferencer. That's why this
727 // class is declared as a nested class of MessageDifferencer.
728 class MultipleFieldsMapKeyComparator;
729
730 // A MapKeyComparator for use with map_entries.
731 class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator {
732 public:
733 explicit MapEntryKeyComparator(MessageDifferencer* message_differencer);
734 bool IsMatch(
735 const Message& message1, const Message& message2,
736 const std::vector<SpecificField>& parent_fields) const override;
737
738 private:
739 MessageDifferencer* message_differencer_;
740 };
741
742 // Returns true if field1's number() is less than field2's.
743 static bool FieldBefore(const FieldDescriptor* field1,
744 const FieldDescriptor* field2);
745
746 // Retrieve all the set fields, including extensions.
747 FieldDescriptorArray RetrieveFields(const Message& message,
748 bool base_message);
749
750 // Combine the two lists of fields into the combined_fields output vector.
751 // All fields present in both lists will always be included in the combined
752 // list. Fields only present in one of the lists will only appear in the
753 // combined list if the corresponding fields_scope option is set to FULL.
754 FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1,
755 Scope fields1_scope,
756 const FieldDescriptorArray& fields2,
757 Scope fields2_scope);
758
759 // Internal version of the Compare method which performs the actual
760 // comparison. The parent_fields vector is a vector containing field
761 // descriptors of all fields accessed to get to this comparison operation
762 // (i.e. if the current message is an embedded message, the parent_fields
763 // vector will contain the field that has this embedded message).
764 bool Compare(const Message& message1, const Message& message2,
765 std::vector<SpecificField>* parent_fields);
766
767 // Compares all the unknown fields in two messages.
768 bool CompareUnknownFields(const Message& message1, const Message& message2,
769 const UnknownFieldSet&, const UnknownFieldSet&,
770 std::vector<SpecificField>* parent_fields);
771
772 // Compares the specified messages for the requested field lists. The field
773 // lists are modified depending on comparison settings, and then passed to
774 // CompareWithFieldsInternal.
775 bool CompareRequestedFieldsUsingSettings(
776 const Message& message1, const Message& message2,
777 const FieldDescriptorArray& message1_fields,
778 const FieldDescriptorArray& message2_fields,
779 std::vector<SpecificField>* parent_fields);
780
781 // Compares the specified messages with the specified field lists.
782 bool CompareWithFieldsInternal(const Message& message1,
783 const Message& message2,
784 const FieldDescriptorArray& message1_fields,
785 const FieldDescriptorArray& message2_fields,
786 std::vector<SpecificField>* parent_fields);
787
788 // Compares the repeated fields, and report the error.
789 bool CompareRepeatedField(const Message& message1, const Message& message2,
790 const FieldDescriptor* field,
791 std::vector<SpecificField>* parent_fields);
792
793 // Compares map fields, and report the error.
794 bool CompareMapField(const Message& message1, const Message& message2,
795 const FieldDescriptor* field,
796 std::vector<SpecificField>* parent_fields);
797
798 // Helper for CompareRepeatedField and CompareMapField: compares and reports
799 // differences element-wise. This is the implementation for non-map fields,
800 // and can also compare map fields by using the underlying representation.
801 bool CompareRepeatedRep(const Message& message1, const Message& message2,
802 const FieldDescriptor* field,
803 std::vector<SpecificField>* parent_fields);
804
805 // Helper for CompareMapField: compare the map fields using map reflection
806 // instead of sync to repeated.
807 bool CompareMapFieldByMapReflection(const Message& message1,
808 const Message& message2,
809 const FieldDescriptor* field,
810 std::vector<SpecificField>* parent_fields,
811 DefaultFieldComparator* comparator);
812
813 // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields.
814 bool CompareFieldValue(const Message& message1, const Message& message2,
815 const FieldDescriptor* field, int index1, int index2);
816
817 // Compares the specified field on the two messages, returning
818 // true if they are the same, false otherwise. For repeated fields,
819 // this method only compares the value in the specified index. This method
820 // uses Compare functions to recurse into submessages.
821 // The parent_fields vector is used in calls to a Reporter instance calls.
822 // It can be NULL, in which case the MessageDifferencer will create new
823 // list of parent messages if it needs to recursively compare the given field.
824 // To avoid confusing users you should not set it to NULL unless you modified
825 // Reporter to handle the change of parent_fields correctly.
826 bool CompareFieldValueUsingParentFields(
827 const Message& message1, const Message& message2,
828 const FieldDescriptor* field, int index1, int index2,
829 std::vector<SpecificField>* parent_fields);
830
831 // Compares the specified field on the two messages, returning comparison
832 // result, as returned by appropriate FieldComparator.
833 FieldComparator::ComparisonResult GetFieldComparisonResult(
834 const Message& message1, const Message& message2,
835 const FieldDescriptor* field, int index1, int index2,
836 const FieldContext* field_context);
837
838 // Check if the two elements in the repeated field are match to each other.
839 // if the key_comprator is NULL, this function returns true when the two
840 // elements are equal.
841 bool IsMatch(const FieldDescriptor* repeated_field,
842 const MapKeyComparator* key_comparator, const Message* message1,
843 const Message* message2,
844 const std::vector<SpecificField>& parent_fields,
845 Reporter* reporter, int index1, int index2);
846
847 // Returns true when this repeated field has been configured to be treated
848 // as a Set / SmartSet / SmartList.
849 bool IsTreatedAsSet(const FieldDescriptor* field);
850 bool IsTreatedAsSmartSet(const FieldDescriptor* field);
851
852 bool IsTreatedAsSmartList(const FieldDescriptor* field);
853 // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList
854 // by default to find the longest matching sequence from the first matching
855 // element. The callback takes two vectors showing the matching indices from
856 // the other vector, where -1 means an unmatch.
857 void SetMatchIndicesForSmartListCallback(
858 std::function<void(std::vector<int>*, std::vector<int>*)> callback);
859
860 // Returns true when this repeated field is to be compared as a subset, ie.
861 // has been configured to be treated as a set or map and scope is set to
862 // PARTIAL.
863 bool IsTreatedAsSubset(const FieldDescriptor* field);
864
865 // Returns true if this field is to be ignored when this
866 // MessageDifferencer compares messages.
867 bool IsIgnored(const Message& message1, const Message& message2,
868 const FieldDescriptor* field,
869 const std::vector<SpecificField>& parent_fields);
870
871 // Returns true if this unknown field is to be ignored when this
872 // MessageDifferencer compares messages.
873 bool IsUnknownFieldIgnored(const Message& message1, const Message& message2,
874 const SpecificField& field,
875 const std::vector<SpecificField>& parent_fields);
876
877 // Returns MapKeyComparator* when this field has been configured to be treated
878 // as a map or its is_map() return true. If not, returns NULL.
879 const MapKeyComparator* GetMapKeyComparator(
880 const FieldDescriptor* field) const;
881
882 // Attempts to match indices of a repeated field, so that the contained values
883 // match. Clears output vectors and sets their values to indices of paired
884 // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1
885 // and match_list2[1] == 0. The unmatched indices are indicated by -1.
886 // Assumes the repeated field is not treated as a simple list.
887 // This method returns false if the match failed. However, it doesn't mean
888 // that the comparison succeeds when this method returns true (you need to
889 // double-check in this case).
890 bool MatchRepeatedFieldIndices(
891 const Message& message1, const Message& message2,
892 const FieldDescriptor* repeated_field,
893 const MapKeyComparator* key_comparator,
894 const std::vector<SpecificField>& parent_fields,
895 std::vector<int>* match_list1, std::vector<int>* match_list2);
896
897 // Checks if index is equal to new_index in all the specific fields.
898 static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields);
899
900 // CHECKs that the given repeated field can be compared according to
901 // new_comparison.
902 void CheckRepeatedFieldComparisons(
903 const FieldDescriptor* field,
904 const RepeatedFieldComparison& new_comparison);
905
906 // Defines a map between field descriptors and their MapKeyComparators.
907 // Used for repeated fields when they are configured as TreatAsMap.
908 typedef std::map<const FieldDescriptor*, const MapKeyComparator*>
909 FieldKeyComparatorMap;
910
911 // Defines a set to store field descriptors. Used for repeated fields when
912 // they are configured as TreatAsSet.
913 typedef std::set<const FieldDescriptor*> FieldSet;
914 typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap;
915
916 Reporter* reporter_;
917 DefaultFieldComparator default_field_comparator_;
918 MessageFieldComparison message_field_comparison_;
919 Scope scope_;
920 RepeatedFieldComparison repeated_field_comparison_;
921
922 FieldMap repeated_field_comparisons_;
923 // Keeps track of MapKeyComparators that are created within
924 // MessageDifferencer. These MapKeyComparators should be deleted
925 // before MessageDifferencer is destroyed.
926 // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't
927 // store the supplied FieldDescriptors directly. Instead, a new
928 // MapKeyComparator is created for comparison purpose.
929 std::vector<MapKeyComparator*> owned_key_comparators_;
930 FieldKeyComparatorMap map_field_key_comparator_;
931 MapEntryKeyComparator map_entry_key_comparator_;
932 std::vector<IgnoreCriteria*> ignore_criteria_;
933 // Reused multiple times in RetrieveFields to avoid extra allocations
934 std::vector<const FieldDescriptor*> tmp_message_fields_;
935
936 FieldSet ignored_fields_;
937
938 union {
939 DefaultFieldComparator* default_impl;
940 FieldComparator* base;
941 } field_comparator_ = {.default_impl: &default_field_comparator_};
942 enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault;
943
944 bool report_matches_;
945 bool report_moves_;
946 bool report_ignores_;
947
948 std::string* output_string_;
949
950 // Callback to post-process the matched indices to support SMART_LIST.
951 std::function<void(std::vector<int>*, std::vector<int>*)>
952 match_indices_for_smart_list_callback_;
953
954 MessageDifferencer::UnpackAnyField unpack_any_field_;
955 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer);
956};
957
958// This class provides extra information to the FieldComparator::Compare
959// function.
960class PROTOBUF_EXPORT FieldContext {
961 public:
962 explicit FieldContext(
963 std::vector<MessageDifferencer::SpecificField>* parent_fields)
964 : parent_fields_(parent_fields) {}
965
966 std::vector<MessageDifferencer::SpecificField>* parent_fields() const {
967 return parent_fields_;
968 }
969
970 private:
971 std::vector<MessageDifferencer::SpecificField>* parent_fields_;
972};
973
974} // namespace util
975} // namespace protobuf
976} // namespace google
977
978#include <google/protobuf/port_undef.inc>
979
980#endif // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
981