| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Multibit: build code (for sparse iterators) |
| 31 | */ |
| 32 | #include "multibit.h" |
| 33 | #include "multibit_build.h" |
| 34 | #include "scatter.h" |
| 35 | #include "ue2common.h" |
| 36 | #include "rose/rose_build_scatter.h" |
| 37 | #include "util/compile_error.h" |
| 38 | |
| 39 | #include <cassert> |
| 40 | #include <cstring> // for memset |
| 41 | #include <map> |
| 42 | #include <queue> |
| 43 | #include <vector> |
| 44 | |
| 45 | using namespace std; |
| 46 | |
| 47 | namespace ue2 { |
| 48 | |
| 49 | u32 mmbit_size(u32 total_bits) { |
| 50 | if (total_bits > MMB_MAX_BITS) { |
| 51 | throw ResourceLimitError(); |
| 52 | } |
| 53 | |
| 54 | // Flat model multibit structures are just stored as a bit vector. |
| 55 | if (total_bits <= MMB_FLAT_MAX_BITS) { |
| 56 | return ROUNDUP_N(total_bits, 8) / 8; |
| 57 | } |
| 58 | |
| 59 | u64a current_level = 1; // Number of blocks on current level. |
| 60 | u64a total = 0; // Total number of blocks. |
| 61 | while (current_level * MMB_KEY_BITS < total_bits) { |
| 62 | total += current_level; |
| 63 | current_level <<= MMB_KEY_SHIFT; |
| 64 | } |
| 65 | |
| 66 | // Last level is a one-for-one bit vector. It needs room for total_bits |
| 67 | // elements, rounded up to the nearest block. |
| 68 | u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; |
| 69 | total += last_level; |
| 70 | |
| 71 | assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); |
| 72 | return (u32)(total * sizeof(MMB_TYPE)); |
| 73 | } |
| 74 | |
| 75 | namespace { |
| 76 | struct TreeNode { |
| 77 | MMB_TYPE mask = 0; |
| 78 | u32 depth = 0; |
| 79 | map<u32, TreeNode> children; // keyed by rkey |
| 80 | }; |
| 81 | } // namespace |
| 82 | |
| 83 | static |
| 84 | void addNode(TreeNode &tree, u32 depth, u32 key, s32 ks, u32 rkey) { |
| 85 | u32 bit = (key >> ks) & MMB_KEY_MASK; |
| 86 | DEBUG_PRINTF("depth=%u, key=%u, ks=%d, rkey=%u, bit=%u\n" , depth, key, ks, |
| 87 | rkey, bit); |
| 88 | mmb_set(&tree.mask, bit); // add bit to this level |
| 89 | tree.depth = depth; // record depth |
| 90 | // next level |
| 91 | rkey = (rkey << MMB_KEY_SHIFT) + bit; |
| 92 | ks -= MMB_KEY_SHIFT; |
| 93 | depth++; |
| 94 | if (ks >= 0) { |
| 95 | addNode(tree.children[rkey], depth, key, ks, rkey); |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | static |
| 100 | void bfs(vector<mmbit_sparse_iter> &out, const TreeNode &tree) { |
| 101 | queue<const TreeNode *> q; |
| 102 | q.push(&tree); |
| 103 | |
| 104 | vector<u32> levels; |
| 105 | u32 depth = 0; |
| 106 | |
| 107 | DEBUG_PRINTF("walking q\n" ); |
| 108 | |
| 109 | while (!q.empty()) { |
| 110 | const TreeNode *t = q.front(); |
| 111 | q.pop(); |
| 112 | |
| 113 | if (depth != t->depth) { |
| 114 | depth = t->depth; |
| 115 | levels.push_back(out.size()); |
| 116 | } |
| 117 | |
| 118 | DEBUG_PRINTF("pop: mask=0x%08llx, depth=%u, children.size()=%zu\n" , |
| 119 | t->mask, t->depth, t->children.size()); |
| 120 | |
| 121 | out.push_back(mmbit_sparse_iter()); |
| 122 | memset(&out.back(), 0, sizeof(mmbit_sparse_iter)); |
| 123 | mmbit_sparse_iter &record = out.back(); |
| 124 | record.mask = t->mask; |
| 125 | record.val = 0; |
| 126 | |
| 127 | for (auto &e : t->children) { |
| 128 | q.push(&e.second); |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | // val for records in non-last levels is the iterator array start offset |
| 133 | // for that iterator record's children |
| 134 | u32 start = 0; |
| 135 | for (size_t i = 0; i < levels.size(); i++) { |
| 136 | u32 start_next = levels[i]; |
| 137 | u32 population = 0; |
| 138 | DEBUG_PRINTF("next level starts at %u\n" , start_next); |
| 139 | for (u32 j = start; j < start_next; j++) { |
| 140 | out[j].val = start_next + population; |
| 141 | DEBUG_PRINTF(" children of %u start at %u\n" , j, out[j].val); |
| 142 | population += mmb_popcount(out[j].mask); |
| 143 | } |
| 144 | start = start_next; |
| 145 | } |
| 146 | |
| 147 | // val for records in the last level is the cumulative popcount |
| 148 | u32 population = 0; |
| 149 | for (size_t i = start; i < out.size(); i++) { |
| 150 | DEBUG_PRINTF("last level: i=%zu, population=%u\n" , i, population); |
| 151 | out[i].val = population; |
| 152 | population += mmb_popcount(out[i].mask); |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | /** \brief Construct a sparse iterator over the values in \a bits for a |
| 157 | * multibit of size \a total_bits. */ |
| 158 | vector<mmbit_sparse_iter> mmbBuildSparseIterator(const vector<u32> &bits, |
| 159 | u32 total_bits) { |
| 160 | vector<mmbit_sparse_iter> out; |
| 161 | assert(!bits.empty()); |
| 162 | assert(total_bits > 0); |
| 163 | assert(total_bits <= MMB_MAX_BITS); |
| 164 | |
| 165 | DEBUG_PRINTF("building sparse iter for %zu of %u bits\n" , |
| 166 | bits.size(), total_bits); |
| 167 | |
| 168 | s32 ks = (total_bits > 1 ? mmbit_keyshift(total_bits) : 0); |
| 169 | |
| 170 | // Construct an intermediate tree |
| 171 | TreeNode tree; |
| 172 | for (const auto &bit : bits) { |
| 173 | assert(bit < total_bits); |
| 174 | addNode(tree, 0, bit, ks, 0); |
| 175 | } |
| 176 | |
| 177 | // From our intermediate tree, lay the data out with a breadth-first walk |
| 178 | bfs(out, tree); |
| 179 | assert(!out.empty()); |
| 180 | |
| 181 | #ifdef DEBUG |
| 182 | DEBUG_PRINTF("dump of iterator tree:\n" ); |
| 183 | for (size_t i = 0; i < out.size(); ++i) { |
| 184 | printf(" %zu:\tmask=0x%08llx, val=%u\n" , i, out[i].mask, out[i].val); |
| 185 | } |
| 186 | #endif |
| 187 | |
| 188 | DEBUG_PRINTF("iter has %zu records\n" , out.size()); |
| 189 | return out; |
| 190 | } |
| 191 | |
| 192 | template<typename T> |
| 193 | static |
| 194 | void add_scatter(vector<T> *out, u32 offset, u64a mask) { |
| 195 | T su; |
| 196 | memset(&su, 0, sizeof(su)); |
| 197 | su.offset = offset; |
| 198 | su.val = mask; |
| 199 | out->push_back(su); |
| 200 | DEBUG_PRINTF("add %llu at offset %u\n" , mask, offset); |
| 201 | } |
| 202 | |
| 203 | static |
| 204 | u32 mmbit_get_level_root_offset(u32 level) { |
| 205 | return mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE); |
| 206 | } |
| 207 | |
| 208 | void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end, |
| 209 | scatter_plan_raw *out) { |
| 210 | DEBUG_PRINTF("building scatter plan for [%u, %u]/%u\n" , begin, end, |
| 211 | total_bits); |
| 212 | if (!total_bits) { |
| 213 | return; |
| 214 | } |
| 215 | |
| 216 | if (total_bits <= MMB_FLAT_MAX_BITS) { |
| 217 | // Handle flat model cases: first a bunch of 64-bit full-sized blocks, |
| 218 | // then a single runt block at the end. |
| 219 | u32 dest = 0; // dest offset |
| 220 | u32 bits = total_bits; |
| 221 | u32 base = 0; |
| 222 | for (; bits > 64; bits -= 64, base += 64, dest += 8) { |
| 223 | MMB_TYPE mask = get_flat_masks(base, begin, end); |
| 224 | add_scatter(&out->p_u64a, dest, mask); |
| 225 | } |
| 226 | |
| 227 | // Last chunk. |
| 228 | assert(bits > 0 && bits <= 64); |
| 229 | |
| 230 | MMB_TYPE mask = get_flat_masks(base, begin, end); |
| 231 | if (bits <= 8) { |
| 232 | add_scatter(&out->p_u8, dest + 0, mask); |
| 233 | } else if (bits <= 16) { |
| 234 | add_scatter(&out->p_u16, dest + 0, mask); |
| 235 | } else if (bits <= 24) { |
| 236 | add_scatter(&out->p_u16, dest + 0, mask); |
| 237 | add_scatter(&out->p_u8, dest + 2, mask >> 16); |
| 238 | } else if (bits <= 32) { |
| 239 | add_scatter(&out->p_u32, dest + 0, mask); |
| 240 | } else if (bits <= 40) { |
| 241 | add_scatter(&out->p_u32, dest + 0, mask); |
| 242 | add_scatter(&out->p_u8, dest + 4, mask >> 32); |
| 243 | } else if (bits <= 48) { |
| 244 | add_scatter(&out->p_u32, dest + 0, mask); |
| 245 | add_scatter(&out->p_u16, dest + 4, mask >> 32); |
| 246 | } else if (bits <= 56) { |
| 247 | add_scatter(&out->p_u32, dest + 0, mask); |
| 248 | add_scatter(&out->p_u16, dest + 4, mask >> 32); |
| 249 | add_scatter(&out->p_u8, dest + 6, mask >> 48); |
| 250 | } else { |
| 251 | add_scatter(&out->p_u64a, dest + 0, mask); |
| 252 | } |
| 253 | return; |
| 254 | } |
| 255 | |
| 256 | /* handle the multilevel case */ |
| 257 | s32 ks = mmbit_keyshift(total_bits); |
| 258 | u32 level = 0; |
| 259 | assert(sizeof(MMB_TYPE) == sizeof(u64a)); |
| 260 | |
| 261 | if (begin == end) { |
| 262 | add_scatter(&out->p_u64a, 0, 0); |
| 263 | return; |
| 264 | } |
| 265 | |
| 266 | for (;;) { |
| 267 | u32 block_offset = mmbit_get_level_root_offset(level); |
| 268 | u32 k1 = begin >> ks, k2 = end >> ks; |
| 269 | |
| 270 | // Summary blocks need to account for the runt block on the end. |
| 271 | if ((k2 << ks) != end) { |
| 272 | k2++; |
| 273 | } |
| 274 | |
| 275 | // Partial block to deal with beginning. |
| 276 | block_offset += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE); |
| 277 | if (k1 % MMB_KEY_BITS) { |
| 278 | u32 idx = k1 / MMB_KEY_BITS; |
| 279 | u32 block_end = (idx + 1) * MMB_KEY_BITS; |
| 280 | |
| 281 | // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases |
| 282 | // here (see the branch in mmb_mask_zero_to). |
| 283 | MMB_TYPE mask = (-MMB_ONE) << (k1 % MMB_KEY_BITS); |
| 284 | |
| 285 | if (k2 < block_end) { |
| 286 | assert(k2 % MMB_KEY_BITS); |
| 287 | mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS); |
| 288 | add_scatter(&out->p_u64a, block_offset, mask); |
| 289 | goto next_level; |
| 290 | } else { |
| 291 | add_scatter(&out->p_u64a, block_offset, mask); |
| 292 | k1 = block_end; |
| 293 | block_offset += sizeof(MMB_TYPE); |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | // Write blocks filled with ones until we get to the last block. |
| 298 | for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) { |
| 299 | add_scatter(&out->p_u64a, block_offset, -MMB_ONE); |
| 300 | block_offset += sizeof(MMB_TYPE); |
| 301 | } |
| 302 | |
| 303 | // Final block. |
| 304 | if (likely(k1 < k2)) { |
| 305 | // Again, if k2 was at a block boundary, it would have been handled |
| 306 | // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can |
| 307 | // avoid the branch in mmb_mask_zero_to here. |
| 308 | assert(k2 % MMB_KEY_BITS); |
| 309 | MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS); |
| 310 | |
| 311 | add_scatter(&out->p_u64a, block_offset, mask); |
| 312 | } |
| 313 | |
| 314 | next_level: |
| 315 | if (ks == 0) { |
| 316 | break; // Last level is done, finished. |
| 317 | } |
| 318 | |
| 319 | ks -= MMB_KEY_SHIFT; |
| 320 | level++; |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out) { |
| 325 | return mmbBuildInitRangePlan(total_bits, 0, 0, out); |
| 326 | } |
| 327 | |
| 328 | } // namespace ue2 |
| 329 | |