1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Multibit: build code (for sparse iterators)
31 */
32#include "multibit.h"
33#include "multibit_build.h"
34#include "scatter.h"
35#include "ue2common.h"
36#include "rose/rose_build_scatter.h"
37#include "util/compile_error.h"
38
39#include <cassert>
40#include <cstring> // for memset
41#include <map>
42#include <queue>
43#include <vector>
44
45using namespace std;
46
47namespace ue2 {
48
49u32 mmbit_size(u32 total_bits) {
50 if (total_bits > MMB_MAX_BITS) {
51 throw ResourceLimitError();
52 }
53
54 // Flat model multibit structures are just stored as a bit vector.
55 if (total_bits <= MMB_FLAT_MAX_BITS) {
56 return ROUNDUP_N(total_bits, 8) / 8;
57 }
58
59 u64a current_level = 1; // Number of blocks on current level.
60 u64a total = 0; // Total number of blocks.
61 while (current_level * MMB_KEY_BITS < total_bits) {
62 total += current_level;
63 current_level <<= MMB_KEY_SHIFT;
64 }
65
66 // Last level is a one-for-one bit vector. It needs room for total_bits
67 // elements, rounded up to the nearest block.
68 u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS;
69 total += last_level;
70
71 assert(total * sizeof(MMB_TYPE) <= UINT32_MAX);
72 return (u32)(total * sizeof(MMB_TYPE));
73}
74
75namespace {
76struct TreeNode {
77 MMB_TYPE mask = 0;
78 u32 depth = 0;
79 map<u32, TreeNode> children; // keyed by rkey
80};
81} // namespace
82
83static
84void addNode(TreeNode &tree, u32 depth, u32 key, s32 ks, u32 rkey) {
85 u32 bit = (key >> ks) & MMB_KEY_MASK;
86 DEBUG_PRINTF("depth=%u, key=%u, ks=%d, rkey=%u, bit=%u\n", depth, key, ks,
87 rkey, bit);
88 mmb_set(&tree.mask, bit); // add bit to this level
89 tree.depth = depth; // record depth
90 // next level
91 rkey = (rkey << MMB_KEY_SHIFT) + bit;
92 ks -= MMB_KEY_SHIFT;
93 depth++;
94 if (ks >= 0) {
95 addNode(tree.children[rkey], depth, key, ks, rkey);
96 }
97}
98
99static
100void bfs(vector<mmbit_sparse_iter> &out, const TreeNode &tree) {
101 queue<const TreeNode *> q;
102 q.push(&tree);
103
104 vector<u32> levels;
105 u32 depth = 0;
106
107 DEBUG_PRINTF("walking q\n");
108
109 while (!q.empty()) {
110 const TreeNode *t = q.front();
111 q.pop();
112
113 if (depth != t->depth) {
114 depth = t->depth;
115 levels.push_back(out.size());
116 }
117
118 DEBUG_PRINTF("pop: mask=0x%08llx, depth=%u, children.size()=%zu\n",
119 t->mask, t->depth, t->children.size());
120
121 out.push_back(mmbit_sparse_iter());
122 memset(&out.back(), 0, sizeof(mmbit_sparse_iter));
123 mmbit_sparse_iter &record = out.back();
124 record.mask = t->mask;
125 record.val = 0;
126
127 for (auto &e : t->children) {
128 q.push(&e.second);
129 }
130 }
131
132 // val for records in non-last levels is the iterator array start offset
133 // for that iterator record's children
134 u32 start = 0;
135 for (size_t i = 0; i < levels.size(); i++) {
136 u32 start_next = levels[i];
137 u32 population = 0;
138 DEBUG_PRINTF("next level starts at %u\n", start_next);
139 for (u32 j = start; j < start_next; j++) {
140 out[j].val = start_next + population;
141 DEBUG_PRINTF(" children of %u start at %u\n", j, out[j].val);
142 population += mmb_popcount(out[j].mask);
143 }
144 start = start_next;
145 }
146
147 // val for records in the last level is the cumulative popcount
148 u32 population = 0;
149 for (size_t i = start; i < out.size(); i++) {
150 DEBUG_PRINTF("last level: i=%zu, population=%u\n", i, population);
151 out[i].val = population;
152 population += mmb_popcount(out[i].mask);
153 }
154}
155
156/** \brief Construct a sparse iterator over the values in \a bits for a
157 * multibit of size \a total_bits. */
158vector<mmbit_sparse_iter> mmbBuildSparseIterator(const vector<u32> &bits,
159 u32 total_bits) {
160 vector<mmbit_sparse_iter> out;
161 assert(!bits.empty());
162 assert(total_bits > 0);
163 assert(total_bits <= MMB_MAX_BITS);
164
165 DEBUG_PRINTF("building sparse iter for %zu of %u bits\n",
166 bits.size(), total_bits);
167
168 s32 ks = (total_bits > 1 ? mmbit_keyshift(total_bits) : 0);
169
170 // Construct an intermediate tree
171 TreeNode tree;
172 for (const auto &bit : bits) {
173 assert(bit < total_bits);
174 addNode(tree, 0, bit, ks, 0);
175 }
176
177 // From our intermediate tree, lay the data out with a breadth-first walk
178 bfs(out, tree);
179 assert(!out.empty());
180
181#ifdef DEBUG
182 DEBUG_PRINTF("dump of iterator tree:\n");
183 for (size_t i = 0; i < out.size(); ++i) {
184 printf(" %zu:\tmask=0x%08llx, val=%u\n", i, out[i].mask, out[i].val);
185 }
186#endif
187
188 DEBUG_PRINTF("iter has %zu records\n", out.size());
189 return out;
190}
191
192template<typename T>
193static
194void add_scatter(vector<T> *out, u32 offset, u64a mask) {
195 T su;
196 memset(&su, 0, sizeof(su));
197 su.offset = offset;
198 su.val = mask;
199 out->push_back(su);
200 DEBUG_PRINTF("add %llu at offset %u\n", mask, offset);
201}
202
203static
204u32 mmbit_get_level_root_offset(u32 level) {
205 return mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
206}
207
208void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end,
209 scatter_plan_raw *out) {
210 DEBUG_PRINTF("building scatter plan for [%u, %u]/%u\n", begin, end,
211 total_bits);
212 if (!total_bits) {
213 return;
214 }
215
216 if (total_bits <= MMB_FLAT_MAX_BITS) {
217 // Handle flat model cases: first a bunch of 64-bit full-sized blocks,
218 // then a single runt block at the end.
219 u32 dest = 0; // dest offset
220 u32 bits = total_bits;
221 u32 base = 0;
222 for (; bits > 64; bits -= 64, base += 64, dest += 8) {
223 MMB_TYPE mask = get_flat_masks(base, begin, end);
224 add_scatter(&out->p_u64a, dest, mask);
225 }
226
227 // Last chunk.
228 assert(bits > 0 && bits <= 64);
229
230 MMB_TYPE mask = get_flat_masks(base, begin, end);
231 if (bits <= 8) {
232 add_scatter(&out->p_u8, dest + 0, mask);
233 } else if (bits <= 16) {
234 add_scatter(&out->p_u16, dest + 0, mask);
235 } else if (bits <= 24) {
236 add_scatter(&out->p_u16, dest + 0, mask);
237 add_scatter(&out->p_u8, dest + 2, mask >> 16);
238 } else if (bits <= 32) {
239 add_scatter(&out->p_u32, dest + 0, mask);
240 } else if (bits <= 40) {
241 add_scatter(&out->p_u32, dest + 0, mask);
242 add_scatter(&out->p_u8, dest + 4, mask >> 32);
243 } else if (bits <= 48) {
244 add_scatter(&out->p_u32, dest + 0, mask);
245 add_scatter(&out->p_u16, dest + 4, mask >> 32);
246 } else if (bits <= 56) {
247 add_scatter(&out->p_u32, dest + 0, mask);
248 add_scatter(&out->p_u16, dest + 4, mask >> 32);
249 add_scatter(&out->p_u8, dest + 6, mask >> 48);
250 } else {
251 add_scatter(&out->p_u64a, dest + 0, mask);
252 }
253 return;
254 }
255
256 /* handle the multilevel case */
257 s32 ks = mmbit_keyshift(total_bits);
258 u32 level = 0;
259 assert(sizeof(MMB_TYPE) == sizeof(u64a));
260
261 if (begin == end) {
262 add_scatter(&out->p_u64a, 0, 0);
263 return;
264 }
265
266 for (;;) {
267 u32 block_offset = mmbit_get_level_root_offset(level);
268 u32 k1 = begin >> ks, k2 = end >> ks;
269
270 // Summary blocks need to account for the runt block on the end.
271 if ((k2 << ks) != end) {
272 k2++;
273 }
274
275 // Partial block to deal with beginning.
276 block_offset += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE);
277 if (k1 % MMB_KEY_BITS) {
278 u32 idx = k1 / MMB_KEY_BITS;
279 u32 block_end = (idx + 1) * MMB_KEY_BITS;
280
281 // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases
282 // here (see the branch in mmb_mask_zero_to).
283 MMB_TYPE mask = (-MMB_ONE) << (k1 % MMB_KEY_BITS);
284
285 if (k2 < block_end) {
286 assert(k2 % MMB_KEY_BITS);
287 mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
288 add_scatter(&out->p_u64a, block_offset, mask);
289 goto next_level;
290 } else {
291 add_scatter(&out->p_u64a, block_offset, mask);
292 k1 = block_end;
293 block_offset += sizeof(MMB_TYPE);
294 }
295 }
296
297 // Write blocks filled with ones until we get to the last block.
298 for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) {
299 add_scatter(&out->p_u64a, block_offset, -MMB_ONE);
300 block_offset += sizeof(MMB_TYPE);
301 }
302
303 // Final block.
304 if (likely(k1 < k2)) {
305 // Again, if k2 was at a block boundary, it would have been handled
306 // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can
307 // avoid the branch in mmb_mask_zero_to here.
308 assert(k2 % MMB_KEY_BITS);
309 MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
310
311 add_scatter(&out->p_u64a, block_offset, mask);
312 }
313
314 next_level:
315 if (ks == 0) {
316 break; // Last level is done, finished.
317 }
318
319 ks -= MMB_KEY_SHIFT;
320 level++;
321 }
322}
323
324void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out) {
325 return mmbBuildInitRangePlan(total_bits, 0, 0, out);
326}
327
328} // namespace ue2
329