1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Multibit: build code (for sparse iterators) |
31 | */ |
32 | #include "multibit.h" |
33 | #include "multibit_build.h" |
34 | #include "scatter.h" |
35 | #include "ue2common.h" |
36 | #include "rose/rose_build_scatter.h" |
37 | #include "util/compile_error.h" |
38 | |
39 | #include <cassert> |
40 | #include <cstring> // for memset |
41 | #include <map> |
42 | #include <queue> |
43 | #include <vector> |
44 | |
45 | using namespace std; |
46 | |
47 | namespace ue2 { |
48 | |
49 | u32 mmbit_size(u32 total_bits) { |
50 | if (total_bits > MMB_MAX_BITS) { |
51 | throw ResourceLimitError(); |
52 | } |
53 | |
54 | // Flat model multibit structures are just stored as a bit vector. |
55 | if (total_bits <= MMB_FLAT_MAX_BITS) { |
56 | return ROUNDUP_N(total_bits, 8) / 8; |
57 | } |
58 | |
59 | u64a current_level = 1; // Number of blocks on current level. |
60 | u64a total = 0; // Total number of blocks. |
61 | while (current_level * MMB_KEY_BITS < total_bits) { |
62 | total += current_level; |
63 | current_level <<= MMB_KEY_SHIFT; |
64 | } |
65 | |
66 | // Last level is a one-for-one bit vector. It needs room for total_bits |
67 | // elements, rounded up to the nearest block. |
68 | u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; |
69 | total += last_level; |
70 | |
71 | assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); |
72 | return (u32)(total * sizeof(MMB_TYPE)); |
73 | } |
74 | |
75 | namespace { |
76 | struct TreeNode { |
77 | MMB_TYPE mask = 0; |
78 | u32 depth = 0; |
79 | map<u32, TreeNode> children; // keyed by rkey |
80 | }; |
81 | } // namespace |
82 | |
83 | static |
84 | void addNode(TreeNode &tree, u32 depth, u32 key, s32 ks, u32 rkey) { |
85 | u32 bit = (key >> ks) & MMB_KEY_MASK; |
86 | DEBUG_PRINTF("depth=%u, key=%u, ks=%d, rkey=%u, bit=%u\n" , depth, key, ks, |
87 | rkey, bit); |
88 | mmb_set(&tree.mask, bit); // add bit to this level |
89 | tree.depth = depth; // record depth |
90 | // next level |
91 | rkey = (rkey << MMB_KEY_SHIFT) + bit; |
92 | ks -= MMB_KEY_SHIFT; |
93 | depth++; |
94 | if (ks >= 0) { |
95 | addNode(tree.children[rkey], depth, key, ks, rkey); |
96 | } |
97 | } |
98 | |
99 | static |
100 | void bfs(vector<mmbit_sparse_iter> &out, const TreeNode &tree) { |
101 | queue<const TreeNode *> q; |
102 | q.push(&tree); |
103 | |
104 | vector<u32> levels; |
105 | u32 depth = 0; |
106 | |
107 | DEBUG_PRINTF("walking q\n" ); |
108 | |
109 | while (!q.empty()) { |
110 | const TreeNode *t = q.front(); |
111 | q.pop(); |
112 | |
113 | if (depth != t->depth) { |
114 | depth = t->depth; |
115 | levels.push_back(out.size()); |
116 | } |
117 | |
118 | DEBUG_PRINTF("pop: mask=0x%08llx, depth=%u, children.size()=%zu\n" , |
119 | t->mask, t->depth, t->children.size()); |
120 | |
121 | out.push_back(mmbit_sparse_iter()); |
122 | memset(&out.back(), 0, sizeof(mmbit_sparse_iter)); |
123 | mmbit_sparse_iter &record = out.back(); |
124 | record.mask = t->mask; |
125 | record.val = 0; |
126 | |
127 | for (auto &e : t->children) { |
128 | q.push(&e.second); |
129 | } |
130 | } |
131 | |
132 | // val for records in non-last levels is the iterator array start offset |
133 | // for that iterator record's children |
134 | u32 start = 0; |
135 | for (size_t i = 0; i < levels.size(); i++) { |
136 | u32 start_next = levels[i]; |
137 | u32 population = 0; |
138 | DEBUG_PRINTF("next level starts at %u\n" , start_next); |
139 | for (u32 j = start; j < start_next; j++) { |
140 | out[j].val = start_next + population; |
141 | DEBUG_PRINTF(" children of %u start at %u\n" , j, out[j].val); |
142 | population += mmb_popcount(out[j].mask); |
143 | } |
144 | start = start_next; |
145 | } |
146 | |
147 | // val for records in the last level is the cumulative popcount |
148 | u32 population = 0; |
149 | for (size_t i = start; i < out.size(); i++) { |
150 | DEBUG_PRINTF("last level: i=%zu, population=%u\n" , i, population); |
151 | out[i].val = population; |
152 | population += mmb_popcount(out[i].mask); |
153 | } |
154 | } |
155 | |
156 | /** \brief Construct a sparse iterator over the values in \a bits for a |
157 | * multibit of size \a total_bits. */ |
158 | vector<mmbit_sparse_iter> mmbBuildSparseIterator(const vector<u32> &bits, |
159 | u32 total_bits) { |
160 | vector<mmbit_sparse_iter> out; |
161 | assert(!bits.empty()); |
162 | assert(total_bits > 0); |
163 | assert(total_bits <= MMB_MAX_BITS); |
164 | |
165 | DEBUG_PRINTF("building sparse iter for %zu of %u bits\n" , |
166 | bits.size(), total_bits); |
167 | |
168 | s32 ks = (total_bits > 1 ? mmbit_keyshift(total_bits) : 0); |
169 | |
170 | // Construct an intermediate tree |
171 | TreeNode tree; |
172 | for (const auto &bit : bits) { |
173 | assert(bit < total_bits); |
174 | addNode(tree, 0, bit, ks, 0); |
175 | } |
176 | |
177 | // From our intermediate tree, lay the data out with a breadth-first walk |
178 | bfs(out, tree); |
179 | assert(!out.empty()); |
180 | |
181 | #ifdef DEBUG |
182 | DEBUG_PRINTF("dump of iterator tree:\n" ); |
183 | for (size_t i = 0; i < out.size(); ++i) { |
184 | printf(" %zu:\tmask=0x%08llx, val=%u\n" , i, out[i].mask, out[i].val); |
185 | } |
186 | #endif |
187 | |
188 | DEBUG_PRINTF("iter has %zu records\n" , out.size()); |
189 | return out; |
190 | } |
191 | |
192 | template<typename T> |
193 | static |
194 | void add_scatter(vector<T> *out, u32 offset, u64a mask) { |
195 | T su; |
196 | memset(&su, 0, sizeof(su)); |
197 | su.offset = offset; |
198 | su.val = mask; |
199 | out->push_back(su); |
200 | DEBUG_PRINTF("add %llu at offset %u\n" , mask, offset); |
201 | } |
202 | |
203 | static |
204 | u32 mmbit_get_level_root_offset(u32 level) { |
205 | return mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE); |
206 | } |
207 | |
208 | void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end, |
209 | scatter_plan_raw *out) { |
210 | DEBUG_PRINTF("building scatter plan for [%u, %u]/%u\n" , begin, end, |
211 | total_bits); |
212 | if (!total_bits) { |
213 | return; |
214 | } |
215 | |
216 | if (total_bits <= MMB_FLAT_MAX_BITS) { |
217 | // Handle flat model cases: first a bunch of 64-bit full-sized blocks, |
218 | // then a single runt block at the end. |
219 | u32 dest = 0; // dest offset |
220 | u32 bits = total_bits; |
221 | u32 base = 0; |
222 | for (; bits > 64; bits -= 64, base += 64, dest += 8) { |
223 | MMB_TYPE mask = get_flat_masks(base, begin, end); |
224 | add_scatter(&out->p_u64a, dest, mask); |
225 | } |
226 | |
227 | // Last chunk. |
228 | assert(bits > 0 && bits <= 64); |
229 | |
230 | MMB_TYPE mask = get_flat_masks(base, begin, end); |
231 | if (bits <= 8) { |
232 | add_scatter(&out->p_u8, dest + 0, mask); |
233 | } else if (bits <= 16) { |
234 | add_scatter(&out->p_u16, dest + 0, mask); |
235 | } else if (bits <= 24) { |
236 | add_scatter(&out->p_u16, dest + 0, mask); |
237 | add_scatter(&out->p_u8, dest + 2, mask >> 16); |
238 | } else if (bits <= 32) { |
239 | add_scatter(&out->p_u32, dest + 0, mask); |
240 | } else if (bits <= 40) { |
241 | add_scatter(&out->p_u32, dest + 0, mask); |
242 | add_scatter(&out->p_u8, dest + 4, mask >> 32); |
243 | } else if (bits <= 48) { |
244 | add_scatter(&out->p_u32, dest + 0, mask); |
245 | add_scatter(&out->p_u16, dest + 4, mask >> 32); |
246 | } else if (bits <= 56) { |
247 | add_scatter(&out->p_u32, dest + 0, mask); |
248 | add_scatter(&out->p_u16, dest + 4, mask >> 32); |
249 | add_scatter(&out->p_u8, dest + 6, mask >> 48); |
250 | } else { |
251 | add_scatter(&out->p_u64a, dest + 0, mask); |
252 | } |
253 | return; |
254 | } |
255 | |
256 | /* handle the multilevel case */ |
257 | s32 ks = mmbit_keyshift(total_bits); |
258 | u32 level = 0; |
259 | assert(sizeof(MMB_TYPE) == sizeof(u64a)); |
260 | |
261 | if (begin == end) { |
262 | add_scatter(&out->p_u64a, 0, 0); |
263 | return; |
264 | } |
265 | |
266 | for (;;) { |
267 | u32 block_offset = mmbit_get_level_root_offset(level); |
268 | u32 k1 = begin >> ks, k2 = end >> ks; |
269 | |
270 | // Summary blocks need to account for the runt block on the end. |
271 | if ((k2 << ks) != end) { |
272 | k2++; |
273 | } |
274 | |
275 | // Partial block to deal with beginning. |
276 | block_offset += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE); |
277 | if (k1 % MMB_KEY_BITS) { |
278 | u32 idx = k1 / MMB_KEY_BITS; |
279 | u32 block_end = (idx + 1) * MMB_KEY_BITS; |
280 | |
281 | // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases |
282 | // here (see the branch in mmb_mask_zero_to). |
283 | MMB_TYPE mask = (-MMB_ONE) << (k1 % MMB_KEY_BITS); |
284 | |
285 | if (k2 < block_end) { |
286 | assert(k2 % MMB_KEY_BITS); |
287 | mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS); |
288 | add_scatter(&out->p_u64a, block_offset, mask); |
289 | goto next_level; |
290 | } else { |
291 | add_scatter(&out->p_u64a, block_offset, mask); |
292 | k1 = block_end; |
293 | block_offset += sizeof(MMB_TYPE); |
294 | } |
295 | } |
296 | |
297 | // Write blocks filled with ones until we get to the last block. |
298 | for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) { |
299 | add_scatter(&out->p_u64a, block_offset, -MMB_ONE); |
300 | block_offset += sizeof(MMB_TYPE); |
301 | } |
302 | |
303 | // Final block. |
304 | if (likely(k1 < k2)) { |
305 | // Again, if k2 was at a block boundary, it would have been handled |
306 | // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can |
307 | // avoid the branch in mmb_mask_zero_to here. |
308 | assert(k2 % MMB_KEY_BITS); |
309 | MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS); |
310 | |
311 | add_scatter(&out->p_u64a, block_offset, mask); |
312 | } |
313 | |
314 | next_level: |
315 | if (ks == 0) { |
316 | break; // Last level is done, finished. |
317 | } |
318 | |
319 | ks -= MMB_KEY_SHIFT; |
320 | level++; |
321 | } |
322 | } |
323 | |
324 | void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out) { |
325 | return mmbBuildInitRangePlan(total_bits, 0, 0, out); |
326 | } |
327 | |
328 | } // namespace ue2 |
329 | |