1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | #include <google/protobuf/compiler/cpp/padding_optimizer.h> |
32 | |
33 | #include <google/protobuf/compiler/cpp/helpers.h> |
34 | |
35 | namespace google { |
36 | namespace protobuf { |
37 | namespace compiler { |
38 | namespace cpp { |
39 | |
40 | namespace { |
41 | |
42 | // FieldGroup is just a helper for PaddingOptimizer below. It holds a vector of |
43 | // fields that are grouped together because they have compatible alignment, and |
44 | // a preferred location in the final field ordering. |
45 | class FieldGroup { |
46 | public: |
47 | FieldGroup() : preferred_location_(0) {} |
48 | |
49 | // A group with a single field. |
50 | FieldGroup(double preferred_location, const FieldDescriptor* field) |
51 | : preferred_location_(preferred_location), fields_(1, field) {} |
52 | |
53 | // Append the fields in 'other' to this group. |
54 | void Append(const FieldGroup& other) { |
55 | if (other.fields_.empty()) { |
56 | return; |
57 | } |
58 | // Preferred location is the average among all the fields, so we weight by |
59 | // the number of fields on each FieldGroup object. |
60 | preferred_location_ = (preferred_location_ * fields_.size() + |
61 | (other.preferred_location_ * other.fields_.size())) / |
62 | (fields_.size() + other.fields_.size()); |
63 | fields_.insert(position: fields_.end(), first: other.fields_.begin(), last: other.fields_.end()); |
64 | } |
65 | |
66 | void SetPreferredLocation(double location) { preferred_location_ = location; } |
67 | const std::vector<const FieldDescriptor*>& fields() const { return fields_; } |
68 | |
69 | // FieldGroup objects sort by their preferred location. |
70 | bool operator<(const FieldGroup& other) const { |
71 | return preferred_location_ < other.preferred_location_; |
72 | } |
73 | |
74 | private: |
75 | // "preferred_location_" is an estimate of where this group should go in the |
76 | // final list of fields. We compute this by taking the average index of each |
77 | // field in this group in the original ordering of fields. This is very |
78 | // approximate, but should put this group close to where its member fields |
79 | // originally went. |
80 | double preferred_location_; |
81 | std::vector<const FieldDescriptor*> fields_; |
82 | // We rely on the default copy constructor and operator= so this type can be |
83 | // used in a vector. |
84 | }; |
85 | |
86 | } // namespace |
87 | |
88 | // Reorder 'fields' so that if the fields are output into a c++ class in the new |
89 | // order, fields of similar family (see below) are together and within each |
90 | // family, alignment padding is minimized. |
91 | // |
92 | // We try to do this while keeping each field as close as possible to its field |
93 | // number order so that we don't reduce cache locality much for function that |
94 | // access each field in order. Originally, OptimizePadding used declaration |
95 | // order for its decisions, but generated code minus the serializer/parsers uses |
96 | // the output of OptimizePadding as well (stored in |
97 | // MessageGenerator::optimized_order_). Since the serializers use field number |
98 | // order, we use that as a tie-breaker. |
99 | // |
100 | // We classify each field into a particular "family" of fields, that we perform |
101 | // the same operation on in our generated functions. |
102 | // |
103 | // REPEATED is placed first, as the C++ compiler automatically initializes |
104 | // these fields in layout order. |
105 | // |
106 | // STRING is grouped next, as our Clear/SharedCtor/SharedDtor walks it and |
107 | // calls ArenaStringPtr::Destroy on each. |
108 | // |
109 | // LAZY_MESSAGE is grouped next, as it interferes with the ability to memset |
110 | // non-repeated fields otherwise. |
111 | // |
112 | // MESSAGE is grouped next, as our Clear/SharedDtor code walks it and calls |
113 | // delete on each. We initialize these fields with a NULL pointer (see |
114 | // MessageFieldGenerator::GenerateConstructorCode), which allows them to be |
115 | // memset. |
116 | // |
117 | // ZERO_INITIALIZABLE is memset in Clear/SharedCtor |
118 | // |
119 | // OTHER these fields are initialized one-by-one. |
120 | void PaddingOptimizer::OptimizeLayout( |
121 | std::vector<const FieldDescriptor*>* fields, const Options& options, |
122 | MessageSCCAnalyzer* scc_analyzer) { |
123 | // The sorted numeric order of Family determines the declaration order in the |
124 | // memory layout. |
125 | enum Family { |
126 | REPEATED = 0, |
127 | STRING = 1, |
128 | // Laying out LAZY_MESSAGE before MESSAGE allows a single memset to zero |
129 | // MESSAGE and ZERO_INITIALIZABLE fields together. |
130 | LAZY_MESSAGE = 2, |
131 | MESSAGE = 3, |
132 | ZERO_INITIALIZABLE = 4, |
133 | OTHER = 5, |
134 | kMaxFamily |
135 | }; |
136 | |
137 | // First divide fields into those that align to 1 byte, 4 bytes or 8 bytes. |
138 | std::vector<FieldGroup> aligned_to_1[kMaxFamily]; |
139 | std::vector<FieldGroup> aligned_to_4[kMaxFamily]; |
140 | std::vector<FieldGroup> aligned_to_8[kMaxFamily]; |
141 | for (int i = 0; i < fields->size(); ++i) { |
142 | const FieldDescriptor* field = (*fields)[i]; |
143 | |
144 | Family f = OTHER; |
145 | if (field->is_repeated()) { |
146 | f = REPEATED; |
147 | } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { |
148 | f = STRING; |
149 | } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { |
150 | f = MESSAGE; |
151 | if (IsLazy(field, options, scc_analyzer)) { |
152 | f = LAZY_MESSAGE; |
153 | } |
154 | } else if (CanInitializeByZeroing(field)) { |
155 | f = ZERO_INITIALIZABLE; |
156 | } |
157 | |
158 | const int j = field->number(); |
159 | switch (EstimateAlignmentSize(field)) { |
160 | case 1: |
161 | aligned_to_1[f].push_back(x: FieldGroup(j, field)); |
162 | break; |
163 | case 4: |
164 | aligned_to_4[f].push_back(x: FieldGroup(j, field)); |
165 | break; |
166 | case 8: |
167 | aligned_to_8[f].push_back(x: FieldGroup(j, field)); |
168 | break; |
169 | default: |
170 | GOOGLE_LOG(FATAL) << "Unknown alignment size " << EstimateAlignmentSize(field) |
171 | << "for a field " << field->full_name() << "." ; |
172 | } |
173 | } |
174 | |
175 | // For each family, group fields to optimize padding. |
176 | for (int f = 0; f < kMaxFamily; f++) { |
177 | // Now group fields aligned to 1 byte into sets of 4, and treat those like a |
178 | // single field aligned to 4 bytes. |
179 | for (int i = 0; i < aligned_to_1[f].size(); i += 4) { |
180 | FieldGroup field_group; |
181 | for (int j = i; j < aligned_to_1[f].size() && j < i + 4; ++j) { |
182 | field_group.Append(other: aligned_to_1[f][j]); |
183 | } |
184 | aligned_to_4[f].push_back(x: field_group); |
185 | } |
186 | // Sort by preferred location to keep fields as close to their field number |
187 | // order as possible. Using stable_sort ensures that the output is |
188 | // consistent across runs. |
189 | std::stable_sort(first: aligned_to_4[f].begin(), last: aligned_to_4[f].end()); |
190 | |
191 | // Now group fields aligned to 4 bytes (or the 4-field groups created above) |
192 | // into pairs, and treat those like a single field aligned to 8 bytes. |
193 | for (int i = 0; i < aligned_to_4[f].size(); i += 2) { |
194 | FieldGroup field_group; |
195 | for (int j = i; j < aligned_to_4[f].size() && j < i + 2; ++j) { |
196 | field_group.Append(other: aligned_to_4[f][j]); |
197 | } |
198 | if (i == aligned_to_4[f].size() - 1) { |
199 | if (f == OTHER) { |
200 | // Move incomplete 4-byte block to the beginning. This is done to |
201 | // pair with the (possible) leftover blocks from the |
202 | // ZERO_INITIALIZABLE family. |
203 | field_group.SetPreferredLocation(-1); |
204 | } else { |
205 | // Move incomplete 4-byte block to the end. |
206 | field_group.SetPreferredLocation(double{FieldDescriptor::kMaxNumber}); |
207 | } |
208 | } |
209 | aligned_to_8[f].push_back(x: field_group); |
210 | } |
211 | // Sort by preferred location. |
212 | std::stable_sort(first: aligned_to_8[f].begin(), last: aligned_to_8[f].end()); |
213 | } |
214 | |
215 | // Now pull out all the FieldDescriptors in order. |
216 | fields->clear(); |
217 | for (int f = 0; f < kMaxFamily; ++f) { |
218 | for (int i = 0; i < aligned_to_8[f].size(); ++i) { |
219 | fields->insert(position: fields->end(), first: aligned_to_8[f][i].fields().begin(), |
220 | last: aligned_to_8[f][i].fields().end()); |
221 | } |
222 | } |
223 | } |
224 | |
225 | } // namespace cpp |
226 | } // namespace compiler |
227 | } // namespace protobuf |
228 | } // namespace google |
229 | |