| 1 | // SPDX-License-Identifier: Apache-2.0 |
| 2 | // ---------------------------------------------------------------------------- |
| 3 | // Copyright 2011-2023 Arm Limited |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 6 | // use this file except in compliance with the License. You may obtain a copy |
| 7 | // of the License at: |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | // License for the specific language governing permissions and limitations |
| 15 | // under the License. |
| 16 | // ---------------------------------------------------------------------------- |
| 17 | |
| 18 | /** |
| 19 | * @brief Functions to generate block size descriptor and decimation tables. |
| 20 | */ |
| 21 | |
| 22 | #include "astcenc_internal.h" |
| 23 | |
| 24 | /** |
| 25 | * @brief Decode the properties of an encoded 2D block mode. |
| 26 | * |
| 27 | * @param block_mode The encoded block mode. |
| 28 | * @param[out] x_weights The number of weights in the X dimension. |
| 29 | * @param[out] y_weights The number of weights in the Y dimension. |
| 30 | * @param[out] is_dual_plane True if this block mode has two weight planes. |
| 31 | * @param[out] quant_mode The quantization level for the weights. |
| 32 | * @param[out] weight_bits The storage bit count for the weights. |
| 33 | * |
| 34 | * @return Returns true if a valid mode, false otherwise. |
| 35 | */ |
| 36 | static bool decode_block_mode_2d( |
| 37 | unsigned int block_mode, |
| 38 | unsigned int& x_weights, |
| 39 | unsigned int& y_weights, |
| 40 | bool& is_dual_plane, |
| 41 | unsigned int& quant_mode, |
| 42 | unsigned int& weight_bits |
| 43 | ) { |
| 44 | unsigned int base_quant_mode = (block_mode >> 4) & 1; |
| 45 | unsigned int H = (block_mode >> 9) & 1; |
| 46 | unsigned int D = (block_mode >> 10) & 1; |
| 47 | unsigned int A = (block_mode >> 5) & 0x3; |
| 48 | |
| 49 | x_weights = 0; |
| 50 | y_weights = 0; |
| 51 | |
| 52 | if ((block_mode & 3) != 0) |
| 53 | { |
| 54 | base_quant_mode |= (block_mode & 3) << 1; |
| 55 | unsigned int B = (block_mode >> 7) & 3; |
| 56 | switch ((block_mode >> 2) & 3) |
| 57 | { |
| 58 | case 0: |
| 59 | x_weights = B + 4; |
| 60 | y_weights = A + 2; |
| 61 | break; |
| 62 | case 1: |
| 63 | x_weights = B + 8; |
| 64 | y_weights = A + 2; |
| 65 | break; |
| 66 | case 2: |
| 67 | x_weights = A + 2; |
| 68 | y_weights = B + 8; |
| 69 | break; |
| 70 | case 3: |
| 71 | B &= 1; |
| 72 | if (block_mode & 0x100) |
| 73 | { |
| 74 | x_weights = B + 2; |
| 75 | y_weights = A + 2; |
| 76 | } |
| 77 | else |
| 78 | { |
| 79 | x_weights = A + 2; |
| 80 | y_weights = B + 6; |
| 81 | } |
| 82 | break; |
| 83 | } |
| 84 | } |
| 85 | else |
| 86 | { |
| 87 | base_quant_mode |= ((block_mode >> 2) & 3) << 1; |
| 88 | if (((block_mode >> 2) & 3) == 0) |
| 89 | { |
| 90 | return false; |
| 91 | } |
| 92 | |
| 93 | unsigned int B = (block_mode >> 9) & 3; |
| 94 | switch ((block_mode >> 7) & 3) |
| 95 | { |
| 96 | case 0: |
| 97 | x_weights = 12; |
| 98 | y_weights = A + 2; |
| 99 | break; |
| 100 | case 1: |
| 101 | x_weights = A + 2; |
| 102 | y_weights = 12; |
| 103 | break; |
| 104 | case 2: |
| 105 | x_weights = A + 6; |
| 106 | y_weights = B + 6; |
| 107 | D = 0; |
| 108 | H = 0; |
| 109 | break; |
| 110 | case 3: |
| 111 | switch ((block_mode >> 5) & 3) |
| 112 | { |
| 113 | case 0: |
| 114 | x_weights = 6; |
| 115 | y_weights = 10; |
| 116 | break; |
| 117 | case 1: |
| 118 | x_weights = 10; |
| 119 | y_weights = 6; |
| 120 | break; |
| 121 | case 2: |
| 122 | case 3: |
| 123 | return false; |
| 124 | } |
| 125 | break; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | unsigned int weight_count = x_weights * y_weights * (D + 1); |
| 130 | quant_mode = (base_quant_mode - 2) + 6 * H; |
| 131 | is_dual_plane = D != 0; |
| 132 | |
| 133 | weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode)); |
| 134 | return (weight_count <= BLOCK_MAX_WEIGHTS && |
| 135 | weight_bits >= BLOCK_MIN_WEIGHT_BITS && |
| 136 | weight_bits <= BLOCK_MAX_WEIGHT_BITS); |
| 137 | } |
| 138 | |
| 139 | /** |
| 140 | * @brief Decode the properties of an encoded 3D block mode. |
| 141 | * |
| 142 | * @param block_mode The encoded block mode. |
| 143 | * @param[out] x_weights The number of weights in the X dimension. |
| 144 | * @param[out] y_weights The number of weights in the Y dimension. |
| 145 | * @param[out] z_weights The number of weights in the Z dimension. |
| 146 | * @param[out] is_dual_plane True if this block mode has two weight planes. |
| 147 | * @param[out] quant_mode The quantization level for the weights. |
| 148 | * @param[out] weight_bits The storage bit count for the weights. |
| 149 | * |
| 150 | * @return Returns true if a valid mode, false otherwise. |
| 151 | */ |
| 152 | static bool decode_block_mode_3d( |
| 153 | unsigned int block_mode, |
| 154 | unsigned int& x_weights, |
| 155 | unsigned int& y_weights, |
| 156 | unsigned int& z_weights, |
| 157 | bool& is_dual_plane, |
| 158 | unsigned int& quant_mode, |
| 159 | unsigned int& weight_bits |
| 160 | ) { |
| 161 | unsigned int base_quant_mode = (block_mode >> 4) & 1; |
| 162 | unsigned int H = (block_mode >> 9) & 1; |
| 163 | unsigned int D = (block_mode >> 10) & 1; |
| 164 | unsigned int A = (block_mode >> 5) & 0x3; |
| 165 | |
| 166 | x_weights = 0; |
| 167 | y_weights = 0; |
| 168 | z_weights = 0; |
| 169 | |
| 170 | if ((block_mode & 3) != 0) |
| 171 | { |
| 172 | base_quant_mode |= (block_mode & 3) << 1; |
| 173 | unsigned int B = (block_mode >> 7) & 3; |
| 174 | unsigned int C = (block_mode >> 2) & 0x3; |
| 175 | x_weights = A + 2; |
| 176 | y_weights = B + 2; |
| 177 | z_weights = C + 2; |
| 178 | } |
| 179 | else |
| 180 | { |
| 181 | base_quant_mode |= ((block_mode >> 2) & 3) << 1; |
| 182 | if (((block_mode >> 2) & 3) == 0) |
| 183 | { |
| 184 | return false; |
| 185 | } |
| 186 | |
| 187 | int B = (block_mode >> 9) & 3; |
| 188 | if (((block_mode >> 7) & 3) != 3) |
| 189 | { |
| 190 | D = 0; |
| 191 | H = 0; |
| 192 | } |
| 193 | switch ((block_mode >> 7) & 3) |
| 194 | { |
| 195 | case 0: |
| 196 | x_weights = 6; |
| 197 | y_weights = B + 2; |
| 198 | z_weights = A + 2; |
| 199 | break; |
| 200 | case 1: |
| 201 | x_weights = A + 2; |
| 202 | y_weights = 6; |
| 203 | z_weights = B + 2; |
| 204 | break; |
| 205 | case 2: |
| 206 | x_weights = A + 2; |
| 207 | y_weights = B + 2; |
| 208 | z_weights = 6; |
| 209 | break; |
| 210 | case 3: |
| 211 | x_weights = 2; |
| 212 | y_weights = 2; |
| 213 | z_weights = 2; |
| 214 | switch ((block_mode >> 5) & 3) |
| 215 | { |
| 216 | case 0: |
| 217 | x_weights = 6; |
| 218 | break; |
| 219 | case 1: |
| 220 | y_weights = 6; |
| 221 | break; |
| 222 | case 2: |
| 223 | z_weights = 6; |
| 224 | break; |
| 225 | case 3: |
| 226 | return false; |
| 227 | } |
| 228 | break; |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1); |
| 233 | quant_mode = (base_quant_mode - 2) + 6 * H; |
| 234 | is_dual_plane = D != 0; |
| 235 | |
| 236 | weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode)); |
| 237 | return (weight_count <= BLOCK_MAX_WEIGHTS && |
| 238 | weight_bits >= BLOCK_MIN_WEIGHT_BITS && |
| 239 | weight_bits <= BLOCK_MAX_WEIGHT_BITS); |
| 240 | } |
| 241 | |
| 242 | /** |
| 243 | * @brief Create a 2D decimation entry for a block-size and weight-decimation pair. |
| 244 | * |
| 245 | * @param x_texels The number of texels in the X dimension. |
| 246 | * @param y_texels The number of texels in the Y dimension. |
| 247 | * @param x_weights The number of weights in the X dimension. |
| 248 | * @param y_weights The number of weights in the Y dimension. |
| 249 | * @param[out] di The decimation info structure to populate. |
| 250 | * @param[out] wb The decimation table init scratch working buffers. |
| 251 | */ |
| 252 | static void init_decimation_info_2d( |
| 253 | unsigned int x_texels, |
| 254 | unsigned int y_texels, |
| 255 | unsigned int x_weights, |
| 256 | unsigned int y_weights, |
| 257 | decimation_info& di, |
| 258 | dt_init_working_buffers& wb |
| 259 | ) { |
| 260 | unsigned int texels_per_block = x_texels * y_texels; |
| 261 | unsigned int weights_per_block = x_weights * y_weights; |
| 262 | |
| 263 | uint8_t max_texel_count_of_weight = 0; |
| 264 | |
| 265 | promise(weights_per_block > 0); |
| 266 | promise(texels_per_block > 0); |
| 267 | promise(x_texels > 0); |
| 268 | promise(y_texels > 0); |
| 269 | |
| 270 | for (unsigned int i = 0; i < weights_per_block; i++) |
| 271 | { |
| 272 | wb.texel_count_of_weight[i] = 0; |
| 273 | } |
| 274 | |
| 275 | for (unsigned int i = 0; i < texels_per_block; i++) |
| 276 | { |
| 277 | wb.weight_count_of_texel[i] = 0; |
| 278 | } |
| 279 | |
| 280 | for (unsigned int y = 0; y < y_texels; y++) |
| 281 | { |
| 282 | for (unsigned int x = 0; x < x_texels; x++) |
| 283 | { |
| 284 | unsigned int texel = y * x_texels + x; |
| 285 | |
| 286 | unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; |
| 287 | unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; |
| 288 | |
| 289 | unsigned int x_weight_frac = x_weight & 0xF; |
| 290 | unsigned int y_weight_frac = y_weight & 0xF; |
| 291 | unsigned int x_weight_int = x_weight >> 4; |
| 292 | unsigned int y_weight_int = y_weight >> 4; |
| 293 | |
| 294 | unsigned int qweight[4]; |
| 295 | qweight[0] = x_weight_int + y_weight_int * x_weights; |
| 296 | qweight[1] = qweight[0] + 1; |
| 297 | qweight[2] = qweight[0] + x_weights; |
| 298 | qweight[3] = qweight[2] + 1; |
| 299 | |
| 300 | // Truncated-precision bilinear interpolation |
| 301 | unsigned int prod = x_weight_frac * y_weight_frac; |
| 302 | |
| 303 | unsigned int weight[4]; |
| 304 | weight[3] = (prod + 8) >> 4; |
| 305 | weight[1] = x_weight_frac - weight[3]; |
| 306 | weight[2] = y_weight_frac - weight[3]; |
| 307 | weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3]; |
| 308 | |
| 309 | for (unsigned int i = 0; i < 4; i++) |
| 310 | { |
| 311 | if (weight[i] != 0) |
| 312 | { |
| 313 | wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]); |
| 314 | wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]); |
| 315 | wb.weight_count_of_texel[texel]++; |
| 316 | wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel); |
| 317 | wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]); |
| 318 | wb.texel_count_of_weight[qweight[i]]++; |
| 319 | max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]); |
| 320 | } |
| 321 | } |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | uint8_t max_texel_weight_count = 0; |
| 326 | for (unsigned int i = 0; i < texels_per_block; i++) |
| 327 | { |
| 328 | di.texel_weight_count[i] = wb.weight_count_of_texel[i]; |
| 329 | max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]); |
| 330 | |
| 331 | for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++) |
| 332 | { |
| 333 | di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j]; |
| 334 | di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); |
| 335 | di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j]; |
| 336 | } |
| 337 | |
| 338 | // Init all 4 entries so we can rely on zeros for vectorization |
| 339 | for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++) |
| 340 | { |
| 341 | di.texel_weight_contribs_int_tr[j][i] = 0; |
| 342 | di.texel_weight_contribs_float_tr[j][i] = 0.0f; |
| 343 | di.texel_weights_tr[j][i] = 0; |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | di.max_texel_weight_count = max_texel_weight_count; |
| 348 | |
| 349 | for (unsigned int i = 0; i < weights_per_block; i++) |
| 350 | { |
| 351 | unsigned int texel_count_wt = wb.texel_count_of_weight[i]; |
| 352 | di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt); |
| 353 | |
| 354 | for (unsigned int j = 0; j < texel_count_wt; j++) |
| 355 | { |
| 356 | uint8_t texel = wb.texels_of_weight[i][j]; |
| 357 | |
| 358 | // Create transposed versions of these for better vectorization |
| 359 | di.weight_texels_tr[j][i] = texel; |
| 360 | di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]); |
| 361 | |
| 362 | // Store the per-texel contribution of this weight for each texel it contributes to |
| 363 | di.texel_contrib_for_weight[j][i] = 0.0f; |
| 364 | for (unsigned int k = 0; k < 4; k++) |
| 365 | { |
| 366 | uint8_t dttw = di.texel_weights_tr[k][texel]; |
| 367 | float dttwf = di.texel_weight_contribs_float_tr[k][texel]; |
| 368 | if (dttw == i && dttwf != 0.0f) |
| 369 | { |
| 370 | di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel]; |
| 371 | break; |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails |
| 377 | // Match last texel in active lane in SIMD group, for better gathers |
| 378 | uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i]; |
| 379 | for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) |
| 380 | { |
| 381 | di.weight_texels_tr[j][i] = last_texel; |
| 382 | di.weights_texel_contribs_tr[j][i] = 0.0f; |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails |
| 387 | unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); |
| 388 | for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) |
| 389 | { |
| 390 | di.texel_weight_count[i] = 0; |
| 391 | |
| 392 | for (unsigned int j = 0; j < 4; j++) |
| 393 | { |
| 394 | di.texel_weight_contribs_float_tr[j][i] = 0; |
| 395 | di.texel_weights_tr[j][i] = 0; |
| 396 | di.texel_weight_contribs_int_tr[j][i] = 0; |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails |
| 401 | // Match last texel in active lane in SIMD group, for better gathers |
| 402 | unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; |
| 403 | uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; |
| 404 | |
| 405 | unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); |
| 406 | for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) |
| 407 | { |
| 408 | di.weight_texel_count[i] = 0; |
| 409 | |
| 410 | for (unsigned int j = 0; j < max_texel_count_of_weight; j++) |
| 411 | { |
| 412 | di.weight_texels_tr[j][i] = last_texel; |
| 413 | di.weights_texel_contribs_tr[j][i] = 0.0f; |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | di.texel_count = static_cast<uint8_t>(texels_per_block); |
| 418 | di.weight_count = static_cast<uint8_t>(weights_per_block); |
| 419 | di.weight_x = static_cast<uint8_t>(x_weights); |
| 420 | di.weight_y = static_cast<uint8_t>(y_weights); |
| 421 | di.weight_z = 1; |
| 422 | } |
| 423 | |
| 424 | /** |
| 425 | * @brief Create a 3D decimation entry for a block-size and weight-decimation pair. |
| 426 | * |
| 427 | * @param x_texels The number of texels in the X dimension. |
| 428 | * @param y_texels The number of texels in the Y dimension. |
| 429 | * @param z_texels The number of texels in the Z dimension. |
| 430 | * @param x_weights The number of weights in the X dimension. |
| 431 | * @param y_weights The number of weights in the Y dimension. |
| 432 | * @param z_weights The number of weights in the Z dimension. |
| 433 | * @param[out] di The decimation info structure to populate. |
| 434 | @param[out] wb The decimation table init scratch working buffers. |
| 435 | */ |
| 436 | static void init_decimation_info_3d( |
| 437 | unsigned int x_texels, |
| 438 | unsigned int y_texels, |
| 439 | unsigned int z_texels, |
| 440 | unsigned int x_weights, |
| 441 | unsigned int y_weights, |
| 442 | unsigned int z_weights, |
| 443 | decimation_info& di, |
| 444 | dt_init_working_buffers& wb |
| 445 | ) { |
| 446 | unsigned int texels_per_block = x_texels * y_texels * z_texels; |
| 447 | unsigned int weights_per_block = x_weights * y_weights * z_weights; |
| 448 | |
| 449 | uint8_t max_texel_count_of_weight = 0; |
| 450 | |
| 451 | promise(weights_per_block > 0); |
| 452 | promise(texels_per_block > 0); |
| 453 | |
| 454 | for (unsigned int i = 0; i < weights_per_block; i++) |
| 455 | { |
| 456 | wb.texel_count_of_weight[i] = 0; |
| 457 | } |
| 458 | |
| 459 | for (unsigned int i = 0; i < texels_per_block; i++) |
| 460 | { |
| 461 | wb.weight_count_of_texel[i] = 0; |
| 462 | } |
| 463 | |
| 464 | for (unsigned int z = 0; z < z_texels; z++) |
| 465 | { |
| 466 | for (unsigned int y = 0; y < y_texels; y++) |
| 467 | { |
| 468 | for (unsigned int x = 0; x < x_texels; x++) |
| 469 | { |
| 470 | int texel = (z * y_texels + y) * x_texels + x; |
| 471 | |
| 472 | int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6; |
| 473 | int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6; |
| 474 | int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6; |
| 475 | |
| 476 | int x_weight_frac = x_weight & 0xF; |
| 477 | int y_weight_frac = y_weight & 0xF; |
| 478 | int z_weight_frac = z_weight & 0xF; |
| 479 | int x_weight_int = x_weight >> 4; |
| 480 | int y_weight_int = y_weight >> 4; |
| 481 | int z_weight_int = z_weight >> 4; |
| 482 | int qweight[4]; |
| 483 | int weight[4]; |
| 484 | qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int; |
| 485 | qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1); |
| 486 | |
| 487 | // simplex interpolation |
| 488 | int fs = x_weight_frac; |
| 489 | int ft = y_weight_frac; |
| 490 | int fp = z_weight_frac; |
| 491 | |
| 492 | int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp)); |
| 493 | int N = x_weights; |
| 494 | int NM = x_weights * y_weights; |
| 495 | |
| 496 | int s1, s2, w0, w1, w2, w3; |
| 497 | switch (cas) |
| 498 | { |
| 499 | case 7: |
| 500 | s1 = 1; |
| 501 | s2 = N; |
| 502 | w0 = 16 - fs; |
| 503 | w1 = fs - ft; |
| 504 | w2 = ft - fp; |
| 505 | w3 = fp; |
| 506 | break; |
| 507 | case 3: |
| 508 | s1 = N; |
| 509 | s2 = 1; |
| 510 | w0 = 16 - ft; |
| 511 | w1 = ft - fs; |
| 512 | w2 = fs - fp; |
| 513 | w3 = fp; |
| 514 | break; |
| 515 | case 5: |
| 516 | s1 = 1; |
| 517 | s2 = NM; |
| 518 | w0 = 16 - fs; |
| 519 | w1 = fs - fp; |
| 520 | w2 = fp - ft; |
| 521 | w3 = ft; |
| 522 | break; |
| 523 | case 4: |
| 524 | s1 = NM; |
| 525 | s2 = 1; |
| 526 | w0 = 16 - fp; |
| 527 | w1 = fp - fs; |
| 528 | w2 = fs - ft; |
| 529 | w3 = ft; |
| 530 | break; |
| 531 | case 2: |
| 532 | s1 = N; |
| 533 | s2 = NM; |
| 534 | w0 = 16 - ft; |
| 535 | w1 = ft - fp; |
| 536 | w2 = fp - fs; |
| 537 | w3 = fs; |
| 538 | break; |
| 539 | case 0: |
| 540 | s1 = NM; |
| 541 | s2 = N; |
| 542 | w0 = 16 - fp; |
| 543 | w1 = fp - ft; |
| 544 | w2 = ft - fs; |
| 545 | w3 = fs; |
| 546 | break; |
| 547 | default: |
| 548 | s1 = NM; |
| 549 | s2 = N; |
| 550 | w0 = 16 - fp; |
| 551 | w1 = fp - ft; |
| 552 | w2 = ft - fs; |
| 553 | w3 = fs; |
| 554 | break; |
| 555 | } |
| 556 | |
| 557 | qweight[1] = qweight[0] + s1; |
| 558 | qweight[2] = qweight[1] + s2; |
| 559 | weight[0] = w0; |
| 560 | weight[1] = w1; |
| 561 | weight[2] = w2; |
| 562 | weight[3] = w3; |
| 563 | |
| 564 | for (unsigned int i = 0; i < 4; i++) |
| 565 | { |
| 566 | if (weight[i] != 0) |
| 567 | { |
| 568 | wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]); |
| 569 | wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]); |
| 570 | wb.weight_count_of_texel[texel]++; |
| 571 | wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel); |
| 572 | wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]); |
| 573 | wb.texel_count_of_weight[qweight[i]]++; |
| 574 | max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]); |
| 575 | } |
| 576 | } |
| 577 | } |
| 578 | } |
| 579 | } |
| 580 | |
| 581 | uint8_t max_texel_weight_count = 0; |
| 582 | for (unsigned int i = 0; i < texels_per_block; i++) |
| 583 | { |
| 584 | di.texel_weight_count[i] = wb.weight_count_of_texel[i]; |
| 585 | max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]); |
| 586 | |
| 587 | // Init all 4 entries so we can rely on zeros for vectorization |
| 588 | for (unsigned int j = 0; j < 4; j++) |
| 589 | { |
| 590 | di.texel_weight_contribs_int_tr[j][i] = 0; |
| 591 | di.texel_weight_contribs_float_tr[j][i] = 0.0f; |
| 592 | di.texel_weights_tr[j][i] = 0; |
| 593 | } |
| 594 | |
| 595 | for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++) |
| 596 | { |
| 597 | di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j]; |
| 598 | di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM); |
| 599 | di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j]; |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | di.max_texel_weight_count = max_texel_weight_count; |
| 604 | |
| 605 | for (unsigned int i = 0; i < weights_per_block; i++) |
| 606 | { |
| 607 | unsigned int texel_count_wt = wb.texel_count_of_weight[i]; |
| 608 | di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt); |
| 609 | |
| 610 | for (unsigned int j = 0; j < texel_count_wt; j++) |
| 611 | { |
| 612 | unsigned int texel = wb.texels_of_weight[i][j]; |
| 613 | |
| 614 | // Create transposed versions of these for better vectorization |
| 615 | di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel); |
| 616 | di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]); |
| 617 | |
| 618 | // Store the per-texel contribution of this weight for each texel it contributes to |
| 619 | di.texel_contrib_for_weight[j][i] = 0.0f; |
| 620 | for (unsigned int k = 0; k < 4; k++) |
| 621 | { |
| 622 | uint8_t dttw = di.texel_weights_tr[k][texel]; |
| 623 | float dttwf = di.texel_weight_contribs_float_tr[k][texel]; |
| 624 | if (dttw == i && dttwf != 0.0f) |
| 625 | { |
| 626 | di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel]; |
| 627 | break; |
| 628 | } |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails |
| 633 | // Match last texel in active lane in SIMD group, for better gathers |
| 634 | uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i]; |
| 635 | for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++) |
| 636 | { |
| 637 | di.weight_texels_tr[j][i] = last_texel; |
| 638 | di.weights_texel_contribs_tr[j][i] = 0.0f; |
| 639 | } |
| 640 | } |
| 641 | |
| 642 | // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails |
| 643 | unsigned int texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block); |
| 644 | for (unsigned int i = texels_per_block; i < texels_per_block_simd; i++) |
| 645 | { |
| 646 | di.texel_weight_count[i] = 0; |
| 647 | |
| 648 | for (unsigned int j = 0; j < 4; j++) |
| 649 | { |
| 650 | di.texel_weight_contribs_float_tr[j][i] = 0; |
| 651 | di.texel_weights_tr[j][i] = 0; |
| 652 | di.texel_weight_contribs_int_tr[j][i] = 0; |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails |
| 657 | // Match last texel in active lane in SIMD group, for better gathers |
| 658 | int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1]; |
| 659 | uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1]; |
| 660 | |
| 661 | unsigned int weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block); |
| 662 | for (unsigned int i = weights_per_block; i < weights_per_block_simd; i++) |
| 663 | { |
| 664 | di.weight_texel_count[i] = 0; |
| 665 | |
| 666 | for (int j = 0; j < max_texel_count_of_weight; j++) |
| 667 | { |
| 668 | di.weight_texels_tr[j][i] = last_texel; |
| 669 | di.weights_texel_contribs_tr[j][i] = 0.0f; |
| 670 | } |
| 671 | } |
| 672 | |
| 673 | di.texel_count = static_cast<uint8_t>(texels_per_block); |
| 674 | di.weight_count = static_cast<uint8_t>(weights_per_block); |
| 675 | di.weight_x = static_cast<uint8_t>(x_weights); |
| 676 | di.weight_y = static_cast<uint8_t>(y_weights); |
| 677 | di.weight_z = static_cast<uint8_t>(z_weights); |
| 678 | } |
| 679 | |
| 680 | /** |
| 681 | * @brief Assign the texels to use for kmeans clustering. |
| 682 | * |
| 683 | * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used. |
| 684 | * The @c bsd.texel_count is an input and must be populated beforehand. |
| 685 | * |
| 686 | * @param[in,out] bsd The block size descriptor to populate. |
| 687 | */ |
| 688 | static void assign_kmeans_texels( |
| 689 | block_size_descriptor& bsd |
| 690 | ) { |
| 691 | // Use all texels for kmeans on a small block |
| 692 | if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS) |
| 693 | { |
| 694 | for (uint8_t i = 0; i < bsd.texel_count; i++) |
| 695 | { |
| 696 | bsd.kmeans_texels[i] = i; |
| 697 | } |
| 698 | |
| 699 | return; |
| 700 | } |
| 701 | |
| 702 | // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block |
| 703 | uint64_t rng_state[2]; |
| 704 | astc::rand_init(rng_state); |
| 705 | |
| 706 | // Initialize array used for tracking used indices |
| 707 | bool seen[BLOCK_MAX_TEXELS]; |
| 708 | for (uint8_t i = 0; i < bsd.texel_count; i++) |
| 709 | { |
| 710 | seen[i] = false; |
| 711 | } |
| 712 | |
| 713 | // Assign 64 random indices, retrying if we see repeats |
| 714 | unsigned int arr_elements_set = 0; |
| 715 | while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS) |
| 716 | { |
| 717 | uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state)); |
| 718 | texel = texel % bsd.texel_count; |
| 719 | if (!seen[texel]) |
| 720 | { |
| 721 | bsd.kmeans_texels[arr_elements_set++] = texel; |
| 722 | seen[texel] = true; |
| 723 | } |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | /** |
| 728 | * @brief Allocate a single 2D decimation table entry. |
| 729 | * |
| 730 | * @param x_texels The number of texels in the X dimension. |
| 731 | * @param y_texels The number of texels in the Y dimension. |
| 732 | * @param x_weights The number of weights in the X dimension. |
| 733 | * @param y_weights The number of weights in the Y dimension. |
| 734 | * @param bsd The block size descriptor we are populating. |
| 735 | * @param wb The decimation table init scratch working buffers. |
| 736 | * @param index The packed array index to populate. |
| 737 | */ |
| 738 | static void construct_dt_entry_2d( |
| 739 | unsigned int x_texels, |
| 740 | unsigned int y_texels, |
| 741 | unsigned int x_weights, |
| 742 | unsigned int y_weights, |
| 743 | block_size_descriptor& bsd, |
| 744 | dt_init_working_buffers& wb, |
| 745 | unsigned int index |
| 746 | ) { |
| 747 | unsigned int weight_count = x_weights * y_weights; |
| 748 | assert(weight_count <= BLOCK_MAX_WEIGHTS); |
| 749 | |
| 750 | bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS; |
| 751 | |
| 752 | decimation_info& di = bsd.decimation_tables[index]; |
| 753 | init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb); |
| 754 | |
| 755 | int maxprec_1plane = -1; |
| 756 | int maxprec_2planes = -1; |
| 757 | for (int i = 0; i < 12; i++) |
| 758 | { |
| 759 | unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i)); |
| 760 | if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) |
| 761 | { |
| 762 | maxprec_1plane = i; |
| 763 | } |
| 764 | |
| 765 | if (try_2planes) |
| 766 | { |
| 767 | unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i)); |
| 768 | if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) |
| 769 | { |
| 770 | maxprec_2planes = i; |
| 771 | } |
| 772 | } |
| 773 | } |
| 774 | |
| 775 | // At least one of the two should be valid ... |
| 776 | assert(maxprec_1plane >= 0 || maxprec_2planes >= 0); |
| 777 | bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane); |
| 778 | bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes); |
| 779 | bsd.decimation_modes[index].refprec_1plane = 0; |
| 780 | bsd.decimation_modes[index].refprec_2planes = 0; |
| 781 | } |
| 782 | |
| 783 | /** |
| 784 | * @brief Allocate block modes and decimation tables for a single 2D block size. |
| 785 | * |
| 786 | * @param x_texels The number of texels in the X dimension. |
| 787 | * @param y_texels The number of texels in the Y dimension. |
| 788 | * @param can_omit_modes Can we discard modes that astcenc won't use, even if legal? |
| 789 | * @param mode_cutoff Percentile cutoff in range [0,1]. Low values more likely to be used. |
| 790 | * @param[out] bsd The block size descriptor to populate. |
| 791 | */ |
| 792 | static void construct_block_size_descriptor_2d( |
| 793 | unsigned int x_texels, |
| 794 | unsigned int y_texels, |
| 795 | bool can_omit_modes, |
| 796 | float mode_cutoff, |
| 797 | block_size_descriptor& bsd |
| 798 | ) { |
| 799 | // Store a remap table for storing packed decimation modes. |
| 800 | // Indexing uses [Y * 16 + X] and max size for each axis is 12. |
| 801 | static const unsigned int MAX_DMI = 12 * 16 + 12; |
| 802 | int decimation_mode_index[MAX_DMI]; |
| 803 | |
| 804 | dt_init_working_buffers* wb = new dt_init_working_buffers; |
| 805 | |
| 806 | bsd.xdim = static_cast<uint8_t>(x_texels); |
| 807 | bsd.ydim = static_cast<uint8_t>(y_texels); |
| 808 | bsd.zdim = 1; |
| 809 | bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels); |
| 810 | |
| 811 | for (unsigned int i = 0; i < MAX_DMI; i++) |
| 812 | { |
| 813 | decimation_mode_index[i] = -1; |
| 814 | } |
| 815 | |
| 816 | // Gather all the decimation grids that can be used with the current block |
| 817 | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
| 818 | const float *percentiles = get_2d_percentile_table(x_texels, y_texels); |
| 819 | float always_cutoff = 0.0f; |
| 820 | #else |
| 821 | // Unused in decompress-only builds |
| 822 | (void)can_omit_modes; |
| 823 | (void)mode_cutoff; |
| 824 | #endif |
| 825 | |
| 826 | // Construct the list of block formats referencing the decimation tables |
| 827 | unsigned int packed_bm_idx = 0; |
| 828 | unsigned int packed_dm_idx = 0; |
| 829 | |
| 830 | // Trackers |
| 831 | unsigned int bm_counts[4] { 0 }; |
| 832 | unsigned int dm_counts[4] { 0 }; |
| 833 | |
| 834 | // Clear the list to a known-bad value |
| 835 | for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) |
| 836 | { |
| 837 | bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; |
| 838 | } |
| 839 | |
| 840 | // Iterate four times to build a usefully ordered list: |
| 841 | // - Pass 0 - keep selected single plane "always" block modes |
| 842 | // - Pass 1 - keep selected single plane "non-always" block modes |
| 843 | // - Pass 2 - keep select dual plane block modes |
| 844 | // - Pass 3 - keep everything else that's legal |
| 845 | unsigned int limit = can_omit_modes ? 3 : 4; |
| 846 | for (unsigned int j = 0; j < limit; j ++) |
| 847 | { |
| 848 | for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) |
| 849 | { |
| 850 | // Skip modes we've already included in a previous pass |
| 851 | if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) |
| 852 | { |
| 853 | continue; |
| 854 | } |
| 855 | |
| 856 | // Decode parameters |
| 857 | unsigned int x_weights; |
| 858 | unsigned int y_weights; |
| 859 | bool is_dual_plane; |
| 860 | unsigned int quant_mode; |
| 861 | unsigned int weight_bits; |
| 862 | bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits); |
| 863 | |
| 864 | // Always skip invalid encodings for the current block size |
| 865 | if (!valid || (x_weights > x_texels) || (y_weights > y_texels)) |
| 866 | { |
| 867 | continue; |
| 868 | } |
| 869 | |
| 870 | // Selectively skip dual plane encodings |
| 871 | if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane)) |
| 872 | { |
| 873 | continue; |
| 874 | } |
| 875 | |
| 876 | // Always skip encodings we can't physically encode based on |
| 877 | // generic encoding bit availability |
| 878 | if (is_dual_plane) |
| 879 | { |
| 880 | // This is the only check we need as only support 1 partition |
| 881 | if ((109 - weight_bits) <= 0) |
| 882 | { |
| 883 | continue; |
| 884 | } |
| 885 | } |
| 886 | else |
| 887 | { |
| 888 | // This is conservative - fewer bits may be available for > 1 partition |
| 889 | if ((111 - weight_bits) <= 0) |
| 890 | { |
| 891 | continue; |
| 892 | } |
| 893 | } |
| 894 | |
| 895 | // Selectively skip encodings based on percentile |
| 896 | bool percentile_hit = false; |
| 897 | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
| 898 | if (j == 0) |
| 899 | { |
| 900 | percentile_hit = percentiles[i] <= always_cutoff; |
| 901 | } |
| 902 | else |
| 903 | { |
| 904 | percentile_hit = percentiles[i] <= mode_cutoff; |
| 905 | } |
| 906 | #endif |
| 907 | |
| 908 | if (j != 3 && !percentile_hit) |
| 909 | { |
| 910 | continue; |
| 911 | } |
| 912 | |
| 913 | // Allocate and initialize the decimation table entry if we've not used it yet |
| 914 | int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights]; |
| 915 | if (decimation_mode < 0) |
| 916 | { |
| 917 | construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx); |
| 918 | decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx; |
| 919 | decimation_mode = packed_dm_idx; |
| 920 | |
| 921 | dm_counts[j]++; |
| 922 | packed_dm_idx++; |
| 923 | } |
| 924 | |
| 925 | auto& bm = bsd.block_modes[packed_bm_idx]; |
| 926 | |
| 927 | bm.decimation_mode = static_cast<uint8_t>(decimation_mode); |
| 928 | bm.quant_mode = static_cast<uint8_t>(quant_mode); |
| 929 | bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane); |
| 930 | bm.weight_bits = static_cast<uint8_t>(weight_bits); |
| 931 | bm.mode_index = static_cast<uint16_t>(i); |
| 932 | |
| 933 | auto& dm = bsd.decimation_modes[decimation_mode]; |
| 934 | |
| 935 | if (is_dual_plane) |
| 936 | { |
| 937 | dm.set_ref_2plane(bm.get_weight_quant_mode()); |
| 938 | } |
| 939 | else |
| 940 | { |
| 941 | dm.set_ref_1plane(bm.get_weight_quant_mode()); |
| 942 | } |
| 943 | |
| 944 | bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx); |
| 945 | |
| 946 | packed_bm_idx++; |
| 947 | bm_counts[j]++; |
| 948 | } |
| 949 | } |
| 950 | |
| 951 | bsd.block_mode_count_1plane_always = bm_counts[0]; |
| 952 | bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1]; |
| 953 | bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2]; |
| 954 | bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3]; |
| 955 | |
| 956 | bsd.decimation_mode_count_always = dm_counts[0]; |
| 957 | bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2]; |
| 958 | bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3]; |
| 959 | |
| 960 | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
| 961 | assert(bsd.block_mode_count_1plane_always > 0); |
| 962 | assert(bsd.decimation_mode_count_always > 0); |
| 963 | |
| 964 | delete[] percentiles; |
| 965 | #endif |
| 966 | |
| 967 | // Ensure the end of the array contains valid data (should never get read) |
| 968 | for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++) |
| 969 | { |
| 970 | bsd.decimation_modes[i].maxprec_1plane = -1; |
| 971 | bsd.decimation_modes[i].maxprec_2planes = -1; |
| 972 | bsd.decimation_modes[i].refprec_1plane = 0; |
| 973 | bsd.decimation_modes[i].refprec_2planes = 0; |
| 974 | } |
| 975 | |
| 976 | // Determine the texels to use for kmeans clustering. |
| 977 | assign_kmeans_texels(bsd); |
| 978 | |
| 979 | delete wb; |
| 980 | } |
| 981 | |
| 982 | /** |
| 983 | * @brief Allocate block modes and decimation tables for a single 3D block size. |
| 984 | * |
| 985 | * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as |
| 986 | * the percentile mode cutoffs. If 3D becomes more widely used we should look at this. |
| 987 | * |
| 988 | * @param x_texels The number of texels in the X dimension. |
| 989 | * @param y_texels The number of texels in the Y dimension. |
| 990 | * @param z_texels The number of texels in the Z dimension. |
| 991 | * @param[out] bsd The block size descriptor to populate. |
| 992 | */ |
| 993 | static void construct_block_size_descriptor_3d( |
| 994 | unsigned int x_texels, |
| 995 | unsigned int y_texels, |
| 996 | unsigned int z_texels, |
| 997 | block_size_descriptor& bsd |
| 998 | ) { |
| 999 | // Store a remap table for storing packed decimation modes. |
| 1000 | // Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6. |
| 1001 | static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6; |
| 1002 | int decimation_mode_index[MAX_DMI]; |
| 1003 | unsigned int decimation_mode_count = 0; |
| 1004 | |
| 1005 | dt_init_working_buffers* wb = new dt_init_working_buffers; |
| 1006 | |
| 1007 | bsd.xdim = static_cast<uint8_t>(x_texels); |
| 1008 | bsd.ydim = static_cast<uint8_t>(y_texels); |
| 1009 | bsd.zdim = static_cast<uint8_t>(z_texels); |
| 1010 | bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels); |
| 1011 | |
| 1012 | for (unsigned int i = 0; i < MAX_DMI; i++) |
| 1013 | { |
| 1014 | decimation_mode_index[i] = -1; |
| 1015 | } |
| 1016 | |
| 1017 | // gather all the infill-modes that can be used with the current block size |
| 1018 | for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++) |
| 1019 | { |
| 1020 | for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++) |
| 1021 | { |
| 1022 | for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++) |
| 1023 | { |
| 1024 | unsigned int weight_count = x_weights * y_weights * z_weights; |
| 1025 | if (weight_count > BLOCK_MAX_WEIGHTS) |
| 1026 | { |
| 1027 | continue; |
| 1028 | } |
| 1029 | |
| 1030 | decimation_info& di = bsd.decimation_tables[decimation_mode_count]; |
| 1031 | decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count; |
| 1032 | init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb); |
| 1033 | |
| 1034 | int maxprec_1plane = -1; |
| 1035 | int maxprec_2planes = -1; |
| 1036 | for (unsigned int i = 0; i < 12; i++) |
| 1037 | { |
| 1038 | unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i)); |
| 1039 | if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS) |
| 1040 | { |
| 1041 | maxprec_1plane = i; |
| 1042 | } |
| 1043 | |
| 1044 | unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i)); |
| 1045 | if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS) |
| 1046 | { |
| 1047 | maxprec_2planes = i; |
| 1048 | } |
| 1049 | } |
| 1050 | |
| 1051 | if ((2 * weight_count) > BLOCK_MAX_WEIGHTS) |
| 1052 | { |
| 1053 | maxprec_2planes = -1; |
| 1054 | } |
| 1055 | |
| 1056 | bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane); |
| 1057 | bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes); |
| 1058 | bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF; |
| 1059 | bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF; |
| 1060 | decimation_mode_count++; |
| 1061 | } |
| 1062 | } |
| 1063 | } |
| 1064 | |
| 1065 | // Ensure the end of the array contains valid data (should never get read) |
| 1066 | for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++) |
| 1067 | { |
| 1068 | bsd.decimation_modes[i].maxprec_1plane = -1; |
| 1069 | bsd.decimation_modes[i].maxprec_2planes = -1; |
| 1070 | bsd.decimation_modes[i].refprec_1plane = 0; |
| 1071 | bsd.decimation_modes[i].refprec_2planes = 0; |
| 1072 | } |
| 1073 | |
| 1074 | bsd.decimation_mode_count_always = 0; // Skipped for 3D modes |
| 1075 | bsd.decimation_mode_count_selected = decimation_mode_count; |
| 1076 | bsd.decimation_mode_count_all = decimation_mode_count; |
| 1077 | |
| 1078 | // Construct the list of block formats referencing the decimation tables |
| 1079 | |
| 1080 | // Clear the list to a known-bad value |
| 1081 | for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) |
| 1082 | { |
| 1083 | bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE; |
| 1084 | } |
| 1085 | |
| 1086 | unsigned int packed_idx = 0; |
| 1087 | unsigned int bm_counts[2] { 0 }; |
| 1088 | |
| 1089 | // Iterate two times to build a usefully ordered list: |
| 1090 | // - Pass 0 - keep valid single plane block modes |
| 1091 | // - Pass 1 - keep valid dual plane block modes |
| 1092 | for (unsigned int j = 0; j < 2; j++) |
| 1093 | { |
| 1094 | for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) |
| 1095 | { |
| 1096 | // Skip modes we've already included in a previous pass |
| 1097 | if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) |
| 1098 | { |
| 1099 | continue; |
| 1100 | } |
| 1101 | |
| 1102 | unsigned int x_weights; |
| 1103 | unsigned int y_weights; |
| 1104 | unsigned int z_weights; |
| 1105 | bool is_dual_plane; |
| 1106 | unsigned int quant_mode; |
| 1107 | unsigned int weight_bits; |
| 1108 | |
| 1109 | bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits); |
| 1110 | // Skip invalid encodings |
| 1111 | if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels) |
| 1112 | { |
| 1113 | continue; |
| 1114 | } |
| 1115 | |
| 1116 | // Skip encodings in the wrong iteration |
| 1117 | if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane)) |
| 1118 | { |
| 1119 | continue; |
| 1120 | } |
| 1121 | |
| 1122 | // Always skip encodings we can't physically encode based on bit availability |
| 1123 | if (is_dual_plane) |
| 1124 | { |
| 1125 | // This is the only check we need as only support 1 partition |
| 1126 | if ((109 - weight_bits) <= 0) |
| 1127 | { |
| 1128 | continue; |
| 1129 | } |
| 1130 | } |
| 1131 | else |
| 1132 | { |
| 1133 | // This is conservative - fewer bits may be available for > 1 partition |
| 1134 | if ((111 - weight_bits) <= 0) |
| 1135 | { |
| 1136 | continue; |
| 1137 | } |
| 1138 | } |
| 1139 | |
| 1140 | int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights]; |
| 1141 | bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode); |
| 1142 | bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode); |
| 1143 | bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits); |
| 1144 | bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane); |
| 1145 | bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i); |
| 1146 | |
| 1147 | bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx); |
| 1148 | bm_counts[j]++; |
| 1149 | packed_idx++; |
| 1150 | } |
| 1151 | } |
| 1152 | |
| 1153 | bsd.block_mode_count_1plane_always = 0; // Skipped for 3D modes |
| 1154 | bsd.block_mode_count_1plane_selected = bm_counts[0]; |
| 1155 | bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1]; |
| 1156 | bsd.block_mode_count_all = bm_counts[0] + bm_counts[1]; |
| 1157 | |
| 1158 | // Determine the texels to use for kmeans clustering. |
| 1159 | assign_kmeans_texels(bsd); |
| 1160 | |
| 1161 | delete wb; |
| 1162 | } |
| 1163 | |
| 1164 | /* See header for documentation. */ |
| 1165 | void init_block_size_descriptor( |
| 1166 | unsigned int x_texels, |
| 1167 | unsigned int y_texels, |
| 1168 | unsigned int z_texels, |
| 1169 | bool can_omit_modes, |
| 1170 | unsigned int partition_count_cutoff, |
| 1171 | float mode_cutoff, |
| 1172 | block_size_descriptor& bsd |
| 1173 | ) { |
| 1174 | if (z_texels > 1) |
| 1175 | { |
| 1176 | construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd); |
| 1177 | } |
| 1178 | else |
| 1179 | { |
| 1180 | construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd); |
| 1181 | } |
| 1182 | |
| 1183 | init_partition_tables(bsd, can_omit_modes, partition_count_cutoff); |
| 1184 | } |
| 1185 | |