1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2023 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Functions for generating partition tables on demand.
20 */
21
22#include "astcenc_internal.h"
23
24/** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */
25#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
26
27/**
28 * @brief Generate a canonical representation of a partition pattern.
29 *
30 * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
31 * the remapped texel index. Remapping ensures that we only match on the partition pattern,
32 * independent of the partition order generated by the hash.
33 *
34 * @param texel_count The number of texels in the block.
35 * @param partition_of_texel The partition assignments, in hash order.
36 * @param[out] bit_pattern The output bit pattern representation.
37 */
38static void generate_canonical_partitioning(
39 unsigned int texel_count,
40 const uint8_t* partition_of_texel,
41 uint64_t bit_pattern[BIT_PATTERN_WORDS]
42) {
43 // Clear the pattern
44 for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
45 {
46 bit_pattern[i] = 0;
47 }
48
49 // Store a mapping to reorder the raw partitions so that the partitions are ordered such
50 // that the lowest texel index in partition N is smaller than the lowest texel index in
51 // partition N + 1.
52 int mapped_index[BLOCK_MAX_PARTITIONS];
53 int map_weight_count = 0;
54
55 for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
56 {
57 mapped_index[i] = -1;
58 }
59
60 for (unsigned int i = 0; i < texel_count; i++)
61 {
62 int index = partition_of_texel[i];
63 if (mapped_index[index] < 0)
64 {
65 mapped_index[index] = map_weight_count++;
66 }
67
68 uint64_t xlat_index = mapped_index[index];
69 bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
70 }
71}
72
73/**
74 * @brief Compare two canonical patterns to see if they are the same.
75 *
76 * @param part1 The first canonical bit pattern to check.
77 * @param part2 The second canonical bit pattern to check.
78 *
79 * @return @c true if the patterns are the same, @c false otherwise.
80 */
81static bool compare_canonical_partitionings(
82 const uint64_t part1[BIT_PATTERN_WORDS],
83 const uint64_t part2[BIT_PATTERN_WORDS]
84) {
85 return (part1[0] == part2[0])
86#if BIT_PATTERN_WORDS > 1
87 && (part1[1] == part2[1])
88#endif
89#if BIT_PATTERN_WORDS > 2
90 && (part1[2] == part2[2])
91#endif
92#if BIT_PATTERN_WORDS > 3
93 && (part1[3] == part2[3])
94#endif
95#if BIT_PATTERN_WORDS > 4
96 && (part1[4] == part2[4])
97#endif
98#if BIT_PATTERN_WORDS > 5
99 && (part1[5] == part2[5])
100#endif
101#if BIT_PATTERN_WORDS > 6
102 && (part1[6] == part2[6])
103#endif
104 ;
105}
106
107/**
108 * @brief Hash function used for procedural partition assignment.
109 *
110 * @param inp The hash seed.
111 *
112 * @return The hashed value.
113 */
114static uint32_t hash52(
115 uint32_t inp
116) {
117 inp ^= inp >> 15;
118
119 // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
120 inp *= 0xEEDE0891;
121 inp ^= inp >> 5;
122 inp += inp << 16;
123 inp ^= inp >> 7;
124 inp ^= inp >> 3;
125 inp ^= inp << 6;
126 inp ^= inp >> 17;
127 return inp;
128}
129
130/**
131 * @brief Select texel assignment for a single coordinate.
132 *
133 * @param seed The seed - the partition index from the block.
134 * @param x The texel X coordinate in the block.
135 * @param y The texel Y coordinate in the block.
136 * @param z The texel Z coordinate in the block.
137 * @param partition_count The total partition count of this encoding.
138 * @param small_block @c true if the block has fewer than 32 texels.
139 *
140 * @return The assigned partition index for this texel.
141 */
142static uint8_t select_partition(
143 int seed,
144 int x,
145 int y,
146 int z,
147 int partition_count,
148 bool small_block
149) {
150 // For small blocks bias the coordinates to get better distribution
151 if (small_block)
152 {
153 x <<= 1;
154 y <<= 1;
155 z <<= 1;
156 }
157
158 seed += (partition_count - 1) * 1024;
159
160 uint32_t rnum = hash52(seed);
161
162 uint8_t seed1 = rnum & 0xF;
163 uint8_t seed2 = (rnum >> 4) & 0xF;
164 uint8_t seed3 = (rnum >> 8) & 0xF;
165 uint8_t seed4 = (rnum >> 12) & 0xF;
166 uint8_t seed5 = (rnum >> 16) & 0xF;
167 uint8_t seed6 = (rnum >> 20) & 0xF;
168 uint8_t seed7 = (rnum >> 24) & 0xF;
169 uint8_t seed8 = (rnum >> 28) & 0xF;
170 uint8_t seed9 = (rnum >> 18) & 0xF;
171 uint8_t seed10 = (rnum >> 22) & 0xF;
172 uint8_t seed11 = (rnum >> 26) & 0xF;
173 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
174
175 // Squaring all the seeds in order to bias their distribution towards lower values.
176 seed1 *= seed1;
177 seed2 *= seed2;
178 seed3 *= seed3;
179 seed4 *= seed4;
180 seed5 *= seed5;
181 seed6 *= seed6;
182 seed7 *= seed7;
183 seed8 *= seed8;
184 seed9 *= seed9;
185 seed10 *= seed10;
186 seed11 *= seed11;
187 seed12 *= seed12;
188
189 int sh1, sh2;
190 if (seed & 1)
191 {
192 sh1 = (seed & 2 ? 4 : 5);
193 sh2 = (partition_count == 3 ? 6 : 5);
194 }
195 else
196 {
197 sh1 = (partition_count == 3 ? 6 : 5);
198 sh2 = (seed & 2 ? 4 : 5);
199 }
200
201 int sh3 = (seed & 0x10) ? sh1 : sh2;
202
203 seed1 >>= sh1;
204 seed2 >>= sh2;
205 seed3 >>= sh1;
206 seed4 >>= sh2;
207 seed5 >>= sh1;
208 seed6 >>= sh2;
209 seed7 >>= sh1;
210 seed8 >>= sh2;
211
212 seed9 >>= sh3;
213 seed10 >>= sh3;
214 seed11 >>= sh3;
215 seed12 >>= sh3;
216
217 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
218 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
219 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
220 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
221
222 // Apply the saw
223 a &= 0x3F;
224 b &= 0x3F;
225 c &= 0x3F;
226 d &= 0x3F;
227
228 // Remove some of the components if we are to output < 4 partitions.
229 if (partition_count <= 3)
230 {
231 d = 0;
232 }
233
234 if (partition_count <= 2)
235 {
236 c = 0;
237 }
238
239 if (partition_count <= 1)
240 {
241 b = 0;
242 }
243
244 uint8_t partition;
245 if (a >= b && a >= c && a >= d)
246 {
247 partition = 0;
248 }
249 else if (b >= c && b >= d)
250 {
251 partition = 1;
252 }
253 else if (c >= d)
254 {
255 partition = 2;
256 }
257 else
258 {
259 partition = 3;
260 }
261
262 return partition;
263}
264
265/**
266 * @brief Generate a single partition info structure.
267 *
268 * @param[out] bsd The block size information.
269 * @param partition_count The partition count of this partitioning.
270 * @param partition_index The partition index / seed of this partitioning.
271 * @param partition_remap_index The remapped partition index of this partitioning.
272 * @param[out] pi The partition info structure to populate.
273 *
274 * @return True if this is a useful partition index, False if we can skip it.
275 */
276static bool generate_one_partition_info_entry(
277 block_size_descriptor& bsd,
278 unsigned int partition_count,
279 unsigned int partition_index,
280 unsigned int partition_remap_index,
281 partition_info& pi
282) {
283 int texels_per_block = bsd.texel_count;
284 bool small_block = texels_per_block < 32;
285
286 uint8_t *partition_of_texel = pi.partition_of_texel;
287
288 // Assign texels to partitions
289 int texel_idx = 0;
290 int counts[BLOCK_MAX_PARTITIONS] { 0 };
291 for (unsigned int z = 0; z < bsd.zdim; z++)
292 {
293 for (unsigned int y = 0; y < bsd.ydim; y++)
294 {
295 for (unsigned int x = 0; x < bsd.xdim; x++)
296 {
297 uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
298 pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
299 *partition_of_texel++ = part;
300 }
301 }
302 }
303
304 // Fill loop tail so we can overfetch later
305 for (unsigned int i = 0; i < partition_count; i++)
306 {
307 int ptex_count = counts[i];
308 int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
309 for (int j = ptex_count; j < ptex_count_simd; j++)
310 {
311 pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
312 }
313 }
314
315 // Populate the actual procedural partition count
316 if (counts[0] == 0)
317 {
318 pi.partition_count = 0;
319 }
320 else if (counts[1] == 0)
321 {
322 pi.partition_count = 1;
323 }
324 else if (counts[2] == 0)
325 {
326 pi.partition_count = 2;
327 }
328 else if (counts[3] == 0)
329 {
330 pi.partition_count = 3;
331 }
332 else
333 {
334 pi.partition_count = 4;
335 }
336
337 // Populate the partition index
338 pi.partition_index = static_cast<uint16_t>(partition_index);
339
340 // Populate the coverage bitmaps for 2/3/4 partitions
341 uint64_t* bitmaps { nullptr };
342 if (partition_count == 2)
343 {
344 bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
345 }
346 else if (partition_count == 3)
347 {
348 bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
349 }
350 else if (partition_count == 4)
351 {
352 bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
353 }
354
355 for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
356 {
357 pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
358 }
359
360 // Valid partitionings have texels in all of the requested partitions
361 bool valid = pi.partition_count == partition_count;
362
363 if (bitmaps)
364 {
365 // Populate the partition coverage bitmap
366 for (unsigned int i = 0; i < partition_count; i++)
367 {
368 bitmaps[i] = 0ULL;
369 }
370
371 unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
372 for (unsigned int i = 0; i < texels_to_process; i++)
373 {
374 unsigned int idx = bsd.kmeans_texels[i];
375 bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
376 }
377 }
378
379 return valid;
380}
381
382static void build_partition_table_for_one_partition_count(
383 block_size_descriptor& bsd,
384 bool can_omit_partitionings,
385 unsigned int partition_count_cutoff,
386 unsigned int partition_count,
387 partition_info* ptab,
388 uint64_t* canonical_patterns
389) {
390 unsigned int next_index = 0;
391 bsd.partitioning_count_selected[partition_count - 1] = 0;
392 bsd.partitioning_count_all[partition_count - 1] = 0;
393
394 // Skip tables larger than config max partition count if we can omit modes
395 if (can_omit_partitionings && (partition_count > partition_count_cutoff))
396 {
397 return;
398 }
399
400 // Iterate through twice
401 // - Pass 0: Keep selected partitionings
402 // - Pass 1: Keep non-selected partitionings (skip if in omit mode)
403 unsigned int max_iter = can_omit_partitionings ? 1 : 2;
404
405 // Tracker for things we built in the first iteration
406 uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
407 for (unsigned int x = 0; x < max_iter; x++)
408 {
409 for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
410 {
411 // Don't include things we built in the first pass
412 if ((x == 1) && build[i])
413 {
414 continue;
415 }
416
417 bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
418 if ((x == 0) && !keep_useful)
419 {
420 continue;
421 }
422
423 generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
424 bool keep_canonical = true;
425 for (unsigned int j = 0; j < next_index; j++)
426 {
427 bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS);
428 if (match)
429 {
430 keep_canonical = false;
431 break;
432 }
433 }
434
435 if (keep_useful && keep_canonical)
436 {
437 if (x == 0)
438 {
439 bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
440 bsd.partitioning_count_selected[partition_count - 1]++;
441 bsd.partitioning_count_all[partition_count - 1]++;
442 build[i] = 1;
443 next_index++;
444 }
445 }
446 else
447 {
448 if (x == 1)
449 {
450 bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
451 bsd.partitioning_count_all[partition_count - 1]++;
452 next_index++;
453 }
454 }
455 }
456 }
457}
458
459/* See header for documentation. */
460void init_partition_tables(
461 block_size_descriptor& bsd,
462 bool can_omit_partitionings,
463 unsigned int partition_count_cutoff
464) {
465 partition_info* par_tab2 = bsd.partitionings;
466 partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
467 partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
468 partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
469
470 generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
471 bsd.partitioning_count_selected[0] = 1;
472 bsd.partitioning_count_all[0] = 1;
473
474 uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
475
476 build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
477 build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
478 build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
479
480 delete[] canonical_patterns;
481}
482