| 1 | // SPDX-License-Identifier: Apache-2.0 |
| 2 | // ---------------------------------------------------------------------------- |
| 3 | // Copyright 2011-2023 Arm Limited |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 6 | // use this file except in compliance with the License. You may obtain a copy |
| 7 | // of the License at: |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | // License for the specific language governing permissions and limitations |
| 15 | // under the License. |
| 16 | // ---------------------------------------------------------------------------- |
| 17 | |
| 18 | /** |
| 19 | * @brief Functions for generating partition tables on demand. |
| 20 | */ |
| 21 | |
| 22 | #include "astcenc_internal.h" |
| 23 | |
| 24 | /** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */ |
| 25 | #define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64) |
| 26 | |
| 27 | /** |
| 28 | * @brief Generate a canonical representation of a partition pattern. |
| 29 | * |
| 30 | * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store |
| 31 | * the remapped texel index. Remapping ensures that we only match on the partition pattern, |
| 32 | * independent of the partition order generated by the hash. |
| 33 | * |
| 34 | * @param texel_count The number of texels in the block. |
| 35 | * @param partition_of_texel The partition assignments, in hash order. |
| 36 | * @param[out] bit_pattern The output bit pattern representation. |
| 37 | */ |
| 38 | static void generate_canonical_partitioning( |
| 39 | unsigned int texel_count, |
| 40 | const uint8_t* partition_of_texel, |
| 41 | uint64_t bit_pattern[BIT_PATTERN_WORDS] |
| 42 | ) { |
| 43 | // Clear the pattern |
| 44 | for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++) |
| 45 | { |
| 46 | bit_pattern[i] = 0; |
| 47 | } |
| 48 | |
| 49 | // Store a mapping to reorder the raw partitions so that the partitions are ordered such |
| 50 | // that the lowest texel index in partition N is smaller than the lowest texel index in |
| 51 | // partition N + 1. |
| 52 | int mapped_index[BLOCK_MAX_PARTITIONS]; |
| 53 | int map_weight_count = 0; |
| 54 | |
| 55 | for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) |
| 56 | { |
| 57 | mapped_index[i] = -1; |
| 58 | } |
| 59 | |
| 60 | for (unsigned int i = 0; i < texel_count; i++) |
| 61 | { |
| 62 | int index = partition_of_texel[i]; |
| 63 | if (mapped_index[index] < 0) |
| 64 | { |
| 65 | mapped_index[index] = map_weight_count++; |
| 66 | } |
| 67 | |
| 68 | uint64_t xlat_index = mapped_index[index]; |
| 69 | bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F)); |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * @brief Compare two canonical patterns to see if they are the same. |
| 75 | * |
| 76 | * @param part1 The first canonical bit pattern to check. |
| 77 | * @param part2 The second canonical bit pattern to check. |
| 78 | * |
| 79 | * @return @c true if the patterns are the same, @c false otherwise. |
| 80 | */ |
| 81 | static bool compare_canonical_partitionings( |
| 82 | const uint64_t part1[BIT_PATTERN_WORDS], |
| 83 | const uint64_t part2[BIT_PATTERN_WORDS] |
| 84 | ) { |
| 85 | return (part1[0] == part2[0]) |
| 86 | #if BIT_PATTERN_WORDS > 1 |
| 87 | && (part1[1] == part2[1]) |
| 88 | #endif |
| 89 | #if BIT_PATTERN_WORDS > 2 |
| 90 | && (part1[2] == part2[2]) |
| 91 | #endif |
| 92 | #if BIT_PATTERN_WORDS > 3 |
| 93 | && (part1[3] == part2[3]) |
| 94 | #endif |
| 95 | #if BIT_PATTERN_WORDS > 4 |
| 96 | && (part1[4] == part2[4]) |
| 97 | #endif |
| 98 | #if BIT_PATTERN_WORDS > 5 |
| 99 | && (part1[5] == part2[5]) |
| 100 | #endif |
| 101 | #if BIT_PATTERN_WORDS > 6 |
| 102 | && (part1[6] == part2[6]) |
| 103 | #endif |
| 104 | ; |
| 105 | } |
| 106 | |
| 107 | /** |
| 108 | * @brief Hash function used for procedural partition assignment. |
| 109 | * |
| 110 | * @param inp The hash seed. |
| 111 | * |
| 112 | * @return The hashed value. |
| 113 | */ |
| 114 | static uint32_t hash52( |
| 115 | uint32_t inp |
| 116 | ) { |
| 117 | inp ^= inp >> 15; |
| 118 | |
| 119 | // (2^4 + 1) * (2^7 + 1) * (2^17 - 1) |
| 120 | inp *= 0xEEDE0891; |
| 121 | inp ^= inp >> 5; |
| 122 | inp += inp << 16; |
| 123 | inp ^= inp >> 7; |
| 124 | inp ^= inp >> 3; |
| 125 | inp ^= inp << 6; |
| 126 | inp ^= inp >> 17; |
| 127 | return inp; |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * @brief Select texel assignment for a single coordinate. |
| 132 | * |
| 133 | * @param seed The seed - the partition index from the block. |
| 134 | * @param x The texel X coordinate in the block. |
| 135 | * @param y The texel Y coordinate in the block. |
| 136 | * @param z The texel Z coordinate in the block. |
| 137 | * @param partition_count The total partition count of this encoding. |
| 138 | * @param small_block @c true if the block has fewer than 32 texels. |
| 139 | * |
| 140 | * @return The assigned partition index for this texel. |
| 141 | */ |
| 142 | static uint8_t select_partition( |
| 143 | int seed, |
| 144 | int x, |
| 145 | int y, |
| 146 | int z, |
| 147 | int partition_count, |
| 148 | bool small_block |
| 149 | ) { |
| 150 | // For small blocks bias the coordinates to get better distribution |
| 151 | if (small_block) |
| 152 | { |
| 153 | x <<= 1; |
| 154 | y <<= 1; |
| 155 | z <<= 1; |
| 156 | } |
| 157 | |
| 158 | seed += (partition_count - 1) * 1024; |
| 159 | |
| 160 | uint32_t rnum = hash52(seed); |
| 161 | |
| 162 | uint8_t seed1 = rnum & 0xF; |
| 163 | uint8_t seed2 = (rnum >> 4) & 0xF; |
| 164 | uint8_t seed3 = (rnum >> 8) & 0xF; |
| 165 | uint8_t seed4 = (rnum >> 12) & 0xF; |
| 166 | uint8_t seed5 = (rnum >> 16) & 0xF; |
| 167 | uint8_t seed6 = (rnum >> 20) & 0xF; |
| 168 | uint8_t seed7 = (rnum >> 24) & 0xF; |
| 169 | uint8_t seed8 = (rnum >> 28) & 0xF; |
| 170 | uint8_t seed9 = (rnum >> 18) & 0xF; |
| 171 | uint8_t seed10 = (rnum >> 22) & 0xF; |
| 172 | uint8_t seed11 = (rnum >> 26) & 0xF; |
| 173 | uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; |
| 174 | |
| 175 | // Squaring all the seeds in order to bias their distribution towards lower values. |
| 176 | seed1 *= seed1; |
| 177 | seed2 *= seed2; |
| 178 | seed3 *= seed3; |
| 179 | seed4 *= seed4; |
| 180 | seed5 *= seed5; |
| 181 | seed6 *= seed6; |
| 182 | seed7 *= seed7; |
| 183 | seed8 *= seed8; |
| 184 | seed9 *= seed9; |
| 185 | seed10 *= seed10; |
| 186 | seed11 *= seed11; |
| 187 | seed12 *= seed12; |
| 188 | |
| 189 | int sh1, sh2; |
| 190 | if (seed & 1) |
| 191 | { |
| 192 | sh1 = (seed & 2 ? 4 : 5); |
| 193 | sh2 = (partition_count == 3 ? 6 : 5); |
| 194 | } |
| 195 | else |
| 196 | { |
| 197 | sh1 = (partition_count == 3 ? 6 : 5); |
| 198 | sh2 = (seed & 2 ? 4 : 5); |
| 199 | } |
| 200 | |
| 201 | int sh3 = (seed & 0x10) ? sh1 : sh2; |
| 202 | |
| 203 | seed1 >>= sh1; |
| 204 | seed2 >>= sh2; |
| 205 | seed3 >>= sh1; |
| 206 | seed4 >>= sh2; |
| 207 | seed5 >>= sh1; |
| 208 | seed6 >>= sh2; |
| 209 | seed7 >>= sh1; |
| 210 | seed8 >>= sh2; |
| 211 | |
| 212 | seed9 >>= sh3; |
| 213 | seed10 >>= sh3; |
| 214 | seed11 >>= sh3; |
| 215 | seed12 >>= sh3; |
| 216 | |
| 217 | int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| 218 | int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| 219 | int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
| 220 | int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
| 221 | |
| 222 | // Apply the saw |
| 223 | a &= 0x3F; |
| 224 | b &= 0x3F; |
| 225 | c &= 0x3F; |
| 226 | d &= 0x3F; |
| 227 | |
| 228 | // Remove some of the components if we are to output < 4 partitions. |
| 229 | if (partition_count <= 3) |
| 230 | { |
| 231 | d = 0; |
| 232 | } |
| 233 | |
| 234 | if (partition_count <= 2) |
| 235 | { |
| 236 | c = 0; |
| 237 | } |
| 238 | |
| 239 | if (partition_count <= 1) |
| 240 | { |
| 241 | b = 0; |
| 242 | } |
| 243 | |
| 244 | uint8_t partition; |
| 245 | if (a >= b && a >= c && a >= d) |
| 246 | { |
| 247 | partition = 0; |
| 248 | } |
| 249 | else if (b >= c && b >= d) |
| 250 | { |
| 251 | partition = 1; |
| 252 | } |
| 253 | else if (c >= d) |
| 254 | { |
| 255 | partition = 2; |
| 256 | } |
| 257 | else |
| 258 | { |
| 259 | partition = 3; |
| 260 | } |
| 261 | |
| 262 | return partition; |
| 263 | } |
| 264 | |
| 265 | /** |
| 266 | * @brief Generate a single partition info structure. |
| 267 | * |
| 268 | * @param[out] bsd The block size information. |
| 269 | * @param partition_count The partition count of this partitioning. |
| 270 | * @param partition_index The partition index / seed of this partitioning. |
| 271 | * @param partition_remap_index The remapped partition index of this partitioning. |
| 272 | * @param[out] pi The partition info structure to populate. |
| 273 | * |
| 274 | * @return True if this is a useful partition index, False if we can skip it. |
| 275 | */ |
| 276 | static bool generate_one_partition_info_entry( |
| 277 | block_size_descriptor& bsd, |
| 278 | unsigned int partition_count, |
| 279 | unsigned int partition_index, |
| 280 | unsigned int partition_remap_index, |
| 281 | partition_info& pi |
| 282 | ) { |
| 283 | int texels_per_block = bsd.texel_count; |
| 284 | bool small_block = texels_per_block < 32; |
| 285 | |
| 286 | uint8_t *partition_of_texel = pi.partition_of_texel; |
| 287 | |
| 288 | // Assign texels to partitions |
| 289 | int texel_idx = 0; |
| 290 | int counts[BLOCK_MAX_PARTITIONS] { 0 }; |
| 291 | for (unsigned int z = 0; z < bsd.zdim; z++) |
| 292 | { |
| 293 | for (unsigned int y = 0; y < bsd.ydim; y++) |
| 294 | { |
| 295 | for (unsigned int x = 0; x < bsd.xdim; x++) |
| 296 | { |
| 297 | uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); |
| 298 | pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++); |
| 299 | *partition_of_texel++ = part; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | // Fill loop tail so we can overfetch later |
| 305 | for (unsigned int i = 0; i < partition_count; i++) |
| 306 | { |
| 307 | int ptex_count = counts[i]; |
| 308 | int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); |
| 309 | for (int j = ptex_count; j < ptex_count_simd; j++) |
| 310 | { |
| 311 | pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1]; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | // Populate the actual procedural partition count |
| 316 | if (counts[0] == 0) |
| 317 | { |
| 318 | pi.partition_count = 0; |
| 319 | } |
| 320 | else if (counts[1] == 0) |
| 321 | { |
| 322 | pi.partition_count = 1; |
| 323 | } |
| 324 | else if (counts[2] == 0) |
| 325 | { |
| 326 | pi.partition_count = 2; |
| 327 | } |
| 328 | else if (counts[3] == 0) |
| 329 | { |
| 330 | pi.partition_count = 3; |
| 331 | } |
| 332 | else |
| 333 | { |
| 334 | pi.partition_count = 4; |
| 335 | } |
| 336 | |
| 337 | // Populate the partition index |
| 338 | pi.partition_index = static_cast<uint16_t>(partition_index); |
| 339 | |
| 340 | // Populate the coverage bitmaps for 2/3/4 partitions |
| 341 | uint64_t* bitmaps { nullptr }; |
| 342 | if (partition_count == 2) |
| 343 | { |
| 344 | bitmaps = bsd.coverage_bitmaps_2[partition_remap_index]; |
| 345 | } |
| 346 | else if (partition_count == 3) |
| 347 | { |
| 348 | bitmaps = bsd.coverage_bitmaps_3[partition_remap_index]; |
| 349 | } |
| 350 | else if (partition_count == 4) |
| 351 | { |
| 352 | bitmaps = bsd.coverage_bitmaps_4[partition_remap_index]; |
| 353 | } |
| 354 | |
| 355 | for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) |
| 356 | { |
| 357 | pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]); |
| 358 | } |
| 359 | |
| 360 | // Valid partitionings have texels in all of the requested partitions |
| 361 | bool valid = pi.partition_count == partition_count; |
| 362 | |
| 363 | if (bitmaps) |
| 364 | { |
| 365 | // Populate the partition coverage bitmap |
| 366 | for (unsigned int i = 0; i < partition_count; i++) |
| 367 | { |
| 368 | bitmaps[i] = 0ULL; |
| 369 | } |
| 370 | |
| 371 | unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); |
| 372 | for (unsigned int i = 0; i < texels_to_process; i++) |
| 373 | { |
| 374 | unsigned int idx = bsd.kmeans_texels[i]; |
| 375 | bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i; |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | return valid; |
| 380 | } |
| 381 | |
| 382 | static void build_partition_table_for_one_partition_count( |
| 383 | block_size_descriptor& bsd, |
| 384 | bool can_omit_partitionings, |
| 385 | unsigned int partition_count_cutoff, |
| 386 | unsigned int partition_count, |
| 387 | partition_info* ptab, |
| 388 | uint64_t* canonical_patterns |
| 389 | ) { |
| 390 | unsigned int next_index = 0; |
| 391 | bsd.partitioning_count_selected[partition_count - 1] = 0; |
| 392 | bsd.partitioning_count_all[partition_count - 1] = 0; |
| 393 | |
| 394 | // Skip tables larger than config max partition count if we can omit modes |
| 395 | if (can_omit_partitionings && (partition_count > partition_count_cutoff)) |
| 396 | { |
| 397 | return; |
| 398 | } |
| 399 | |
| 400 | // Iterate through twice |
| 401 | // - Pass 0: Keep selected partitionings |
| 402 | // - Pass 1: Keep non-selected partitionings (skip if in omit mode) |
| 403 | unsigned int max_iter = can_omit_partitionings ? 1 : 2; |
| 404 | |
| 405 | // Tracker for things we built in the first iteration |
| 406 | uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 }; |
| 407 | for (unsigned int x = 0; x < max_iter; x++) |
| 408 | { |
| 409 | for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) |
| 410 | { |
| 411 | // Don't include things we built in the first pass |
| 412 | if ((x == 1) && build[i]) |
| 413 | { |
| 414 | continue; |
| 415 | } |
| 416 | |
| 417 | bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]); |
| 418 | if ((x == 0) && !keep_useful) |
| 419 | { |
| 420 | continue; |
| 421 | } |
| 422 | |
| 423 | generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS); |
| 424 | bool keep_canonical = true; |
| 425 | for (unsigned int j = 0; j < next_index; j++) |
| 426 | { |
| 427 | bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS); |
| 428 | if (match) |
| 429 | { |
| 430 | keep_canonical = false; |
| 431 | break; |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | if (keep_useful && keep_canonical) |
| 436 | { |
| 437 | if (x == 0) |
| 438 | { |
| 439 | bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); |
| 440 | bsd.partitioning_count_selected[partition_count - 1]++; |
| 441 | bsd.partitioning_count_all[partition_count - 1]++; |
| 442 | build[i] = 1; |
| 443 | next_index++; |
| 444 | } |
| 445 | } |
| 446 | else |
| 447 | { |
| 448 | if (x == 1) |
| 449 | { |
| 450 | bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); |
| 451 | bsd.partitioning_count_all[partition_count - 1]++; |
| 452 | next_index++; |
| 453 | } |
| 454 | } |
| 455 | } |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | /* See header for documentation. */ |
| 460 | void init_partition_tables( |
| 461 | block_size_descriptor& bsd, |
| 462 | bool can_omit_partitionings, |
| 463 | unsigned int partition_count_cutoff |
| 464 | ) { |
| 465 | partition_info* par_tab2 = bsd.partitionings; |
| 466 | partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS; |
| 467 | partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS; |
| 468 | partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS; |
| 469 | |
| 470 | generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1); |
| 471 | bsd.partitioning_count_selected[0] = 1; |
| 472 | bsd.partitioning_count_all[0] = 1; |
| 473 | |
| 474 | uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS]; |
| 475 | |
| 476 | build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns); |
| 477 | build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns); |
| 478 | build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns); |
| 479 | |
| 480 | delete[] canonical_patterns; |
| 481 | } |
| 482 | |