1// basisu_frontend.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// TODO:
17// This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here.
18// Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this.
19//
20#include "../transcoder/basisu.h"
21#include "basisu_frontend.h"
22#include "basisu_opencl.h"
23#include <unordered_set>
24#include <unordered_map>
25
26#if BASISU_SUPPORT_SSE
27#define CPPSPMD_NAME(a) a##_sse41
28#include "basisu_kernels_declares.h"
29#endif
30
31#define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0)
32
33namespace basisu
34{
35 const uint32_t cMaxCodebookCreationThreads = 8;
36
37 const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3;
38 //const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3;
39
40 const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16;
41 const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32;
42 const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16;
43
44 // TODO - How to handle internal verifies in the basisu lib
45 static inline void handle_verify_failure(int line)
46 {
47 error_printf("basisu_frontend: verify check failed at line %i!\n", line);
48 abort();
49 }
50
51 bool basisu_frontend::init(const params &p)
52 {
53 debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n",
54 p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0,
55 p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level);
56
57 if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters))
58 return false;
59 if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters))
60 return false;
61
62 m_source_blocks.resize(0);
63 append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks);
64
65 m_params = p;
66
67 if (m_params.m_pOpenCL_context)
68 {
69 BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block));
70
71 // Upload the RGBA pixel blocks a single time.
72 if (!opencl_set_pixel_blocks(m_params.m_pOpenCL_context, m_source_blocks.size(), (cl_pixel_block*)m_source_blocks.data()))
73 {
74 // This is not fatal, we just won't use OpenCL.
75 error_printf("basisu_frontend::init: opencl_set_pixel_blocks() failed\n");
76 m_params.m_pOpenCL_context = nullptr;
77 m_opencl_failed = true;
78 }
79 }
80
81 m_encoded_blocks.resize(m_params.m_num_source_blocks);
82 memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0]));
83
84 m_num_endpoint_codebook_iterations = 1;
85 m_num_selector_codebook_iterations = 1;
86
87 switch (p.m_compression_level)
88 {
89 case 0:
90 {
91 m_endpoint_refinement = false;
92 m_use_hierarchical_endpoint_codebooks = true;
93 m_use_hierarchical_selector_codebooks = true;
94 break;
95 }
96 case 1:
97 {
98 m_endpoint_refinement = true;
99 m_use_hierarchical_endpoint_codebooks = true;
100 m_use_hierarchical_selector_codebooks = true;
101
102 break;
103 }
104 case 2:
105 {
106 m_endpoint_refinement = true;
107 m_use_hierarchical_endpoint_codebooks = true;
108 m_use_hierarchical_selector_codebooks = true;
109
110 break;
111 }
112 case 3:
113 {
114 m_endpoint_refinement = true;
115 m_use_hierarchical_endpoint_codebooks = false;
116 m_use_hierarchical_selector_codebooks = false;
117 break;
118 }
119 case 4:
120 {
121 m_endpoint_refinement = true;
122 m_use_hierarchical_endpoint_codebooks = true;
123 m_use_hierarchical_selector_codebooks = true;
124 m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
125 m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
126 break;
127 }
128 case 5:
129 {
130 m_endpoint_refinement = true;
131 m_use_hierarchical_endpoint_codebooks = false;
132 m_use_hierarchical_selector_codebooks = false;
133 m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
134 m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
135 break;
136 }
137 case 6:
138 default:
139 {
140 m_endpoint_refinement = true;
141 m_use_hierarchical_endpoint_codebooks = false;
142 m_use_hierarchical_selector_codebooks = false;
143 m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
144 m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
145 break;
146 }
147
148 }
149
150 if (m_params.m_disable_hierarchical_endpoint_codebooks)
151 m_use_hierarchical_endpoint_codebooks = false;
152
153 debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n",
154 m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations);
155
156 return true;
157 }
158
159 bool basisu_frontend::compress()
160 {
161 debug_printf("basisu_frontend::compress\n");
162
163 m_total_blocks = m_params.m_num_source_blocks;
164 m_total_pixels = m_total_blocks * cPixelBlockTotalPixels;
165
166 // Encode the initial high quality ETC1S texture
167
168 init_etc1_images();
169
170 // First quantize the ETC1S endpoints
171
172 if (m_params.m_pGlobal_codebooks)
173 {
174 init_global_codebooks();
175 }
176 else
177 {
178 init_endpoint_training_vectors();
179
180 generate_endpoint_clusters();
181
182 for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++)
183 {
184 if (m_params.m_validate)
185 {
186 BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
187
188 BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
189 }
190
191 if (refine_endpoint_step)
192 {
193 introduce_new_endpoint_clusters();
194 }
195
196 if (m_params.m_validate)
197 {
198 BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
199 }
200
201 generate_endpoint_codebook(refine_endpoint_step);
202
203 if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
204 {
205 char buf[256];
206 snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step);
207 dump_endpoint_clusterization_visualization(buf, false);
208 }
209
210 bool early_out = false;
211
212 if (m_endpoint_refinement)
213 {
214 //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png");
215
216 if (!refine_endpoint_clusterization())
217 early_out = true;
218
219 if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1))
220 {
221 eliminate_redundant_or_empty_endpoint_clusters();
222 generate_endpoint_codebook(basisu::maximum(1U, refine_endpoint_step));
223 }
224
225 if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
226 {
227 char buf[256];
228 snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step);
229
230 dump_endpoint_clusterization_visualization(buf, false);
231 snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step);
232
233 dump_endpoint_clusterization_visualization(buf, true);
234 }
235 }
236
237 if (m_params.m_validate)
238 {
239 BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
240 }
241
242 eliminate_redundant_or_empty_endpoint_clusters();
243
244 if (m_params.m_validate)
245 {
246 BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
247 }
248
249 if (m_params.m_debug_stats)
250 debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size());
251
252 if (early_out)
253 break;
254 }
255
256 if (m_params.m_validate)
257 {
258 BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
259 }
260
261 generate_block_endpoint_clusters();
262
263 create_initial_packed_texture();
264
265 // Now quantize the ETC1S selectors
266
267 generate_selector_clusters();
268
269 if (m_use_hierarchical_selector_codebooks)
270 compute_selector_clusters_within_each_parent_cluster();
271
272 if (m_params.m_compression_level == 0)
273 {
274 create_optimized_selector_codebook(0);
275
276 find_optimal_selector_clusters_for_each_block();
277
278 introduce_special_selector_clusters();
279 }
280 else
281 {
282 const uint32_t num_refine_selector_steps = m_num_selector_codebook_iterations;
283 for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++)
284 {
285 create_optimized_selector_codebook(refine_selector_steps);
286
287 find_optimal_selector_clusters_for_each_block();
288
289 introduce_special_selector_clusters();
290
291 if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames))
292 {
293 if (!refine_block_endpoints_given_selectors())
294 break;
295 }
296 }
297 }
298
299 optimize_selector_codebook();
300
301 if (m_params.m_debug_stats)
302 debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size());
303 }
304
305 finalize();
306
307 if (m_params.m_validate)
308 {
309 if (!validate_output())
310 return false;
311 }
312
313 debug_printf("basisu_frontend::compress: Done\n");
314
315 return true;
316 }
317
318 bool basisu_frontend::init_global_codebooks()
319 {
320 const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks;
321
322 const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints();
323 const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors();
324
325 m_endpoint_cluster_etc_params.resize(endpoints.size());
326 for (uint32_t i = 0; i < endpoints.size(); i++)
327 {
328 m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5;
329 m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5;
330
331 m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255);
332 m_endpoint_cluster_etc_params[i].m_color_used[0] = true;
333 m_endpoint_cluster_etc_params[i].m_valid = true;
334 }
335
336 m_optimized_cluster_selectors.resize(selectors.size());
337 for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
338 {
339 for (uint32_t y = 0; y < 4; y++)
340 for (uint32_t x = 0; x < 4; x++)
341 m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y));
342 }
343
344 m_block_endpoint_clusters_indices.resize(m_total_blocks);
345
346 m_orig_encoded_blocks.resize(m_total_blocks);
347
348 m_block_selector_cluster_index.resize(m_total_blocks);
349
350#if 0
351 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
352 {
353 const uint32_t first_index = block_index_iter;
354 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
355
356#ifndef __EMSCRIPTEN__
357 m_params.m_pJob_pool->add_job([this, first_index, last_index] {
358#endif
359
360 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
361 {
362 const etc_block& blk = m_etc1_blocks_etc1s[block_index];
363
364 const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
365
366 etc_block trial_blk;
367 trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]);
368 trial_blk.set_flip_bit(true);
369
370 uint64_t best_err = UINT64_MAX;
371 uint32_t best_index = 0;
372
373 for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
374 {
375 trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
376
377 const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
378 if (cur_err < best_err)
379 {
380 best_err = cur_err;
381 best_index = i;
382 if (!cur_err)
383 break;
384 }
385
386 } // block_index
387
388 m_block_selector_cluster_index[block_index] = best_index;
389 }
390
391#ifndef __EMSCRIPTEN__
392 });
393#endif
394
395 }
396
397#ifndef __EMSCRIPTEN__
398 m_params.m_pJob_pool->wait_for_all();
399#endif
400
401 m_encoded_blocks.resize(m_total_blocks);
402 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
403 {
404 const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
405 const uint32_t selector_index = m_block_selector_cluster_index[block_index];
406
407 etc_block& blk = m_encoded_blocks[block_index];
408
409 blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
410 blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
411 blk.set_flip_bit(true);
412 blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
413 }
414#endif
415
416 // HACK HACK
417 const uint32_t NUM_PASSES = 3;
418 for (uint32_t pass = 0; pass < NUM_PASSES; pass++)
419 {
420 debug_printf("init_global_codebooks: pass %u\n", pass);
421
422 const uint32_t N = 128;
423 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
424 {
425 const uint32_t first_index = block_index_iter;
426 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
427
428#ifndef __EMSCRIPTEN__
429 m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] {
430#endif
431
432 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
433 {
434 const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index];
435 const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits();
436
437 etc_block trial_blk(blk);
438 trial_blk.set_raw_selector_bits(blk_raw_selector_bits);
439 trial_blk.set_flip_bit(true);
440
441 uint64_t best_err = UINT64_MAX;
442 uint32_t best_index = 0;
443 etc_block best_block(trial_blk);
444
445 for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++)
446 {
447 if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0))
448 continue;
449
450 trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]);
451 trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]);
452
453 const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
454 uint64_t cur_err;
455 if (!pass)
456 cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
457 else
458 cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual);
459
460 if (cur_err < best_err)
461 {
462 best_err = cur_err;
463 best_index = i;
464 best_block = trial_blk;
465
466 if (!cur_err)
467 break;
468 }
469 }
470
471 m_block_endpoint_clusters_indices[block_index][0] = best_index;
472 m_block_endpoint_clusters_indices[block_index][1] = best_index;
473
474 m_orig_encoded_blocks[block_index] = best_block;
475
476 } // block_index
477
478#ifndef __EMSCRIPTEN__
479 });
480#endif
481
482 }
483
484#ifndef __EMSCRIPTEN__
485 m_params.m_pJob_pool->wait_for_all();
486#endif
487
488 m_endpoint_clusters.resize(0);
489 m_endpoint_clusters.resize(endpoints.size());
490 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
491 {
492 const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0];
493 m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2);
494 m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1);
495 }
496
497 m_block_selector_cluster_index.resize(m_total_blocks);
498
499 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
500 {
501 const uint32_t first_index = block_index_iter;
502 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
503
504#ifndef __EMSCRIPTEN__
505 m_params.m_pJob_pool->add_job([this, first_index, last_index] {
506#endif
507
508 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
509 {
510 const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
511
512 etc_block trial_blk;
513 trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]);
514 trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]);
515 trial_blk.set_flip_bit(true);
516
517 uint64_t best_err = UINT64_MAX;
518 uint32_t best_index = 0;
519
520 for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
521 {
522 trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
523
524 const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
525 if (cur_err < best_err)
526 {
527 best_err = cur_err;
528 best_index = i;
529 if (!cur_err)
530 break;
531 }
532
533 } // block_index
534
535 m_block_selector_cluster_index[block_index] = best_index;
536 }
537
538#ifndef __EMSCRIPTEN__
539 });
540#endif
541
542 }
543
544#ifndef __EMSCRIPTEN__
545 m_params.m_pJob_pool->wait_for_all();
546#endif
547
548 m_encoded_blocks.resize(m_total_blocks);
549 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
550 {
551 const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
552 const uint32_t selector_index = m_block_selector_cluster_index[block_index];
553
554 etc_block& blk = m_encoded_blocks[block_index];
555
556 blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
557 blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
558 blk.set_flip_bit(true);
559 blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
560 }
561
562 } // pass
563
564 m_selector_cluster_block_indices.resize(selectors.size());
565 for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++)
566 m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index);
567
568 return true;
569 }
570
571 void basisu_frontend::introduce_special_selector_clusters()
572 {
573 debug_printf("introduce_special_selector_clusters\n");
574
575 uint32_t total_blocks_relocated = 0;
576 const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
577
578 bool_vec block_relocated_flags(m_total_blocks);
579
580 // Make sure the selector codebook always has pure flat blocks for each possible selector, to avoid obvious artifacts.
581 // optimize_selector_codebook() will clean up any redundant clusters we create here.
582 for (uint32_t sel = 0; sel < 4; sel++)
583 {
584 etc_block blk;
585 clear_obj(blk);
586 for (uint32_t j = 0; j < 16; j++)
587 blk.set_selector(j & 3, j >> 2, sel);
588
589 int k;
590 for (k = 0; k < (int)m_optimized_cluster_selectors.size(); k++)
591 if (m_optimized_cluster_selectors[k].get_raw_selector_bits() == blk.get_raw_selector_bits())
592 break;
593 if (k < (int)m_optimized_cluster_selectors.size())
594 continue;
595
596 debug_printf("Introducing sel %u\n", sel);
597
598 const uint32_t new_selector_cluster_index = (uint32_t)m_optimized_cluster_selectors.size();
599
600 m_optimized_cluster_selectors.push_back(blk);
601
602 vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index);
603
604 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
605 {
606 if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits())
607 continue;
608
609 // See if using flat selectors actually decreases the block's error.
610 const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index];
611
612 etc_block cur_blk;
613 const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
614 cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
615 cur_blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
616 cur_blk.set_raw_selector_bits(get_selector_cluster_selector_bits(old_selector_cluster_index).get_raw_selector_bits());
617 cur_blk.set_flip_bit(true);
618
619 const uint64_t cur_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
620
621 cur_blk.set_raw_selector_bits(blk.get_raw_selector_bits());
622
623 const uint64_t new_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
624
625 if (new_err >= cur_err)
626 continue;
627
628 // Change the block to use the new cluster
629 m_block_selector_cluster_index[block_index] = new_selector_cluster_index;
630
631 m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index);
632
633 block_relocated_flags[block_index] = true;
634
635#if 0
636 int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index);
637 if (j >= 0)
638 m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j);
639#endif
640
641 total_blocks_relocated++;
642
643 m_encoded_blocks[block_index].set_raw_selector_bits(blk.get_raw_selector_bits());
644
645 } // block_index
646
647 } // sel
648
649 if (total_blocks_relocated)
650 {
651 debug_printf("Fixing selector codebook\n");
652
653 for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++)
654 {
655 uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index];
656
657 uint32_t dst_ofs = 0;
658
659 for (uint32_t i = 0; i < block_indices.size(); i++)
660 {
661 const uint32_t block_index = block_indices[i];
662 if (!block_relocated_flags[block_index])
663 block_indices[dst_ofs++] = block_index;
664 }
665
666 block_indices.resize(dst_ofs);
667 }
668 }
669
670 debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated);
671 }
672
673 // This method will change the number and ordering of the selector codebook clusters.
674 void basisu_frontend::optimize_selector_codebook()
675 {
676 debug_printf("optimize_selector_codebook\n");
677
678 const uint32_t orig_total_selector_clusters = (uint32_t)m_optimized_cluster_selectors.size();
679
680 bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size());
681 for (uint32_t i = 0; i < m_total_blocks; i++)
682 selector_cluster_was_used[m_block_selector_cluster_index[i]] = true;
683
684 int_vec old_to_new(m_optimized_cluster_selectors.size());
685 int_vec new_to_old;
686 uint32_t total_new_entries = 0;
687
688 std::unordered_map<uint32_t, uint32_t> selector_hashmap;
689
690 for (int i = 0; i < static_cast<int>(m_optimized_cluster_selectors.size()); i++)
691 {
692 if (!selector_cluster_was_used[i])
693 {
694 old_to_new[i] = -1;
695 continue;
696 }
697
698 const uint32_t raw_selector_bits = m_optimized_cluster_selectors[i].get_raw_selector_bits();
699
700 auto find_res = selector_hashmap.insert(std::make_pair(raw_selector_bits, total_new_entries));
701 if (!find_res.second)
702 {
703 old_to_new[i] = (find_res.first)->second;
704 continue;
705 }
706
707 old_to_new[i] = total_new_entries++;
708 new_to_old.push_back(i);
709 }
710
711 debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries);
712
713 for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
714 {
715 BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries));
716 m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]];
717 }
718
719 basisu::vector<etc_block> new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0);
720 basisu::vector<uint_vec> new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0);
721
722 for (uint32_t i = 0; i < total_new_entries; i++)
723 {
724 if (m_optimized_cluster_selectors.size())
725 new_optimized_cluster_selectors[i] = m_optimized_cluster_selectors[new_to_old[i]];
726
727 //if (m_selector_cluster_block_indices.size())
728 // new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]];
729 }
730
731 for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
732 {
733 new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i);
734 }
735
736 m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors);
737 m_selector_cluster_block_indices.swap(new_selector_cluster_indices);
738
739 // This isn't strictly necessary - doing it for completeness/future sanity.
740 if (m_selector_clusters_within_each_parent_cluster.size())
741 {
742 for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
743 for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
744 m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]];
745 }
746
747 debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries);
748 }
749
750 void basisu_frontend::init_etc1_images()
751 {
752 debug_printf("basisu_frontend::init_etc1_images\n");
753
754 interval_timer tm;
755 tm.start();
756
757 m_etc1_blocks_etc1s.resize(m_total_blocks);
758
759 bool use_cpu = true;
760
761 if (m_params.m_pOpenCL_context)
762 {
763 uint32_t total_perms = 64;
764 if (m_params.m_compression_level == 0)
765 total_perms = 4;
766 else if (m_params.m_compression_level == 1)
767 total_perms = 16;
768 else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
769 total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
770
771 bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms);
772 if (status)
773 use_cpu = false;
774 else
775 {
776 error_printf("basisu_frontend::init_etc1_images: opencl_encode_etc1s_blocks() failed! Using CPU.\n");
777 m_params.m_pOpenCL_context = nullptr;
778 m_opencl_failed = true;
779 }
780 }
781
782 if (use_cpu)
783 {
784 const uint32_t N = 4096;
785 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
786 {
787 const uint32_t first_index = block_index_iter;
788 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
789
790#ifndef __EMSCRIPTEN__
791 m_params.m_pJob_pool->add_job([this, first_index, last_index] {
792#endif
793
794 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
795 {
796 const pixel_block& source_blk = get_source_pixel_block(block_index);
797
798 etc1_optimizer optimizer;
799 etc1_optimizer::params optimizer_params;
800 etc1_optimizer::results optimizer_results;
801
802 if (m_params.m_compression_level == 0)
803 optimizer_params.m_quality = cETCQualityFast;
804 else if (m_params.m_compression_level == 1)
805 optimizer_params.m_quality = cETCQualityMedium;
806 else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
807 optimizer_params.m_quality = cETCQualityUber;
808
809 optimizer_params.m_num_src_pixels = 16;
810 optimizer_params.m_pSrc_pixels = source_blk.get_ptr();
811 optimizer_params.m_perceptual = m_params.m_perceptual;
812
813 uint8_t selectors[16];
814 optimizer_results.m_pSelectors = selectors;
815 optimizer_results.m_n = 16;
816
817 optimizer.init(optimizer_params, optimizer_results);
818 if (!optimizer.compute())
819 BASISU_FRONTEND_VERIFY(false);
820
821 etc_block& blk = m_etc1_blocks_etc1s[block_index];
822
823 memset(&blk, 0, sizeof(blk));
824 blk.set_block_color5_etc1s(optimizer_results.m_block_color_unscaled);
825 blk.set_inten_tables_etc1s(optimizer_results.m_block_inten_table);
826 blk.set_flip_bit(true);
827
828 for (uint32_t y = 0; y < 4; y++)
829 for (uint32_t x = 0; x < 4; x++)
830 blk.set_selector(x, y, selectors[x + y * 4]);
831 }
832
833#ifndef __EMSCRIPTEN__
834 });
835#endif
836
837 }
838
839#ifndef __EMSCRIPTEN__
840 m_params.m_pJob_pool->wait_for_all();
841#endif
842
843 } // use_cpu
844
845 debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
846 }
847
848 void basisu_frontend::init_endpoint_training_vectors()
849 {
850 debug_printf("init_endpoint_training_vectors\n");
851
852 vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs();
853
854 training_vecs.resize(m_total_blocks * 2);
855
856 const uint32_t N = 16384;
857 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
858 {
859 const uint32_t first_index = block_index_iter;
860 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
861
862#ifndef __EMSCRIPTEN__
863 m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
864#endif
865
866 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
867 {
868 const etc_block &blk = m_etc1_blocks_etc1s[block_index];
869
870 color_rgba block_colors[2];
871 blk.get_block_low_high_colors(block_colors, 0);
872
873 vec6F v;
874 v[0] = block_colors[0].r * (1.0f / 255.0f);
875 v[1] = block_colors[0].g * (1.0f / 255.0f);
876 v[2] = block_colors[0].b * (1.0f / 255.0f);
877 v[3] = block_colors[1].r * (1.0f / 255.0f);
878 v[4] = block_colors[1].g * (1.0f / 255.0f);
879 v[5] = block_colors[1].b * (1.0f / 255.0f);
880
881 training_vecs[block_index * 2 + 0] = std::make_pair(v, 1);
882 training_vecs[block_index * 2 + 1] = std::make_pair(v, 1);
883
884 } // block_index;
885
886#ifndef __EMSCRIPTEN__
887 } );
888#endif
889
890 } // block_index_iter
891
892#ifndef __EMSCRIPTEN__
893 m_params.m_pJob_pool->wait_for_all();
894#endif
895 }
896
897 void basisu_frontend::generate_endpoint_clusters()
898 {
899 debug_printf("Begin endpoint quantization\n");
900
901 const uint32_t parent_codebook_size = (m_params.m_max_endpoint_clusters >= 256) ? BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE : 0;
902 uint32_t max_threads = 0;
903 max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
904 if (m_params.m_pJob_pool)
905 max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
906
907 debug_printf("max_threads: %u\n", max_threads);
908 bool status = generate_hierarchical_codebook_threaded(m_endpoint_clusterizer,
909 m_params.m_max_endpoint_clusters, m_use_hierarchical_endpoint_codebooks ? parent_codebook_size : 0,
910 m_endpoint_clusters,
911 m_endpoint_parent_clusters,
912 max_threads, m_params.m_pJob_pool, true);
913 BASISU_FRONTEND_VERIFY(status);
914
915 if (m_use_hierarchical_endpoint_codebooks)
916 {
917 if (!m_endpoint_parent_clusters.size())
918 {
919 m_endpoint_parent_clusters.resize(0);
920 m_endpoint_parent_clusters.resize(1);
921 for (uint32_t i = 0; i < m_total_blocks; i++)
922 {
923 m_endpoint_parent_clusters[0].push_back(i*2);
924 m_endpoint_parent_clusters[0].push_back(i*2+1);
925 }
926 }
927
928 BASISU_ASSUME(BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE <= UINT8_MAX);
929
930 m_block_parent_endpoint_cluster.resize(0);
931 m_block_parent_endpoint_cluster.resize(m_total_blocks);
932 vector_set_all(m_block_parent_endpoint_cluster, 0xFF);
933 for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_endpoint_parent_clusters.size(); parent_cluster_index++)
934 {
935 const uint_vec &cluster = m_endpoint_parent_clusters[parent_cluster_index];
936 for (uint32_t j = 0; j < cluster.size(); j++)
937 {
938 const uint32_t block_index = cluster[j] >> 1;
939 m_block_parent_endpoint_cluster[block_index] = static_cast<uint8_t>(parent_cluster_index);
940 }
941 }
942
943 for (uint32_t i = 0; i < m_total_blocks; i++)
944 {
945 BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[i] != 0xFF);
946 }
947
948 // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
949 for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
950 {
951 const uint_vec &cluster = m_endpoint_clusters[cluster_index];
952
953 uint32_t parent_cluster_index = 0;
954 for (uint32_t j = 0; j < cluster.size(); j++)
955 {
956 const uint32_t block_index = cluster[j] >> 1;
957
958 BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size());
959
960 if (!j)
961 {
962 parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
963 }
964 else
965 {
966 BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[block_index] == parent_cluster_index);
967 }
968 }
969 }
970 }
971
972 if (m_params.m_debug_stats)
973 debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", (uint32_t)m_endpoint_clusters.size(), (uint32_t)m_endpoint_parent_clusters.size());
974 }
975
976 // Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses.
977 void basisu_frontend::generate_block_endpoint_clusters()
978 {
979 m_block_endpoint_clusters_indices.resize(m_total_blocks);
980
981 for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
982 {
983 const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
984
985 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
986 {
987 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
988 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
989
990 m_block_endpoint_clusters_indices[block_index][subblock_index] = cluster_index;
991
992 } // cluster_indices_iter
993 }
994
995 if (m_params.m_validate)
996 {
997 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
998 {
999 uint32_t cluster_0 = m_block_endpoint_clusters_indices[block_index][0];
1000 uint32_t cluster_1 = m_block_endpoint_clusters_indices[block_index][1];
1001 BASISU_FRONTEND_VERIFY(cluster_0 == cluster_1);
1002 }
1003 }
1004 }
1005
1006 void basisu_frontend::compute_endpoint_clusters_within_each_parent_cluster()
1007 {
1008 generate_block_endpoint_clusters();
1009
1010 m_endpoint_clusters_within_each_parent_cluster.resize(0);
1011 m_endpoint_clusters_within_each_parent_cluster.resize(m_endpoint_parent_clusters.size());
1012
1013 // Note: It's possible that some blocks got moved into the same cluster, but live in different parent clusters.
1014 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
1015 {
1016 const uint32_t cluster_index = m_block_endpoint_clusters_indices[block_index][0];
1017 const uint32_t parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
1018
1019 m_endpoint_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
1020 }
1021
1022 for (uint32_t i = 0; i < m_endpoint_clusters_within_each_parent_cluster.size(); i++)
1023 {
1024 uint_vec &cluster_indices = m_endpoint_clusters_within_each_parent_cluster[i];
1025
1026 BASISU_FRONTEND_VERIFY(cluster_indices.size());
1027
1028 vector_sort(cluster_indices);
1029
1030 auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
1031 cluster_indices.erase(last, cluster_indices.end());
1032 }
1033 }
1034
1035 void basisu_frontend::compute_endpoint_subblock_error_vec()
1036 {
1037 m_subblock_endpoint_quant_err_vec.resize(0);
1038
1039 const uint32_t N = 512;
1040 for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
1041 {
1042 const uint32_t first_index = cluster_index_iter;
1043 const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
1044
1045#ifndef __EMSCRIPTEN__
1046 m_params.m_pJob_pool->add_job( [this, first_index, last_index] {
1047#endif
1048
1049 for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
1050 {
1051 const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1052
1053 assert(cluster_indices.size());
1054
1055 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1056 {
1057 basisu::vector<color_rgba> cluster_pixels(8);
1058
1059 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1060 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1061
1062 const bool flipped = true;
1063
1064 const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
1065
1066 for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
1067 {
1068 cluster_pixels[pixel_index] = pSource_block_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
1069 }
1070
1071 const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index];
1072
1073 assert(etc_params.m_valid);
1074
1075 color_rgba block_colors[4];
1076 etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true);
1077
1078 uint64_t total_err = 0;
1079
1080 for (uint32_t i = 0; i < 8; i++)
1081 {
1082 const color_rgba &c = cluster_pixels[i];
1083
1084 uint64_t best_err = UINT64_MAX;
1085 //uint32_t best_index = 0;
1086
1087 for (uint32_t s = 0; s < 4; s++)
1088 {
1089 uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
1090 if (err < best_err)
1091 {
1092 best_err = err;
1093 //best_index = s;
1094 }
1095 }
1096
1097 total_err += best_err;
1098 }
1099
1100 subblock_endpoint_quant_err quant_err;
1101 quant_err.m_total_err = total_err;
1102 quant_err.m_cluster_index = cluster_index;
1103 quant_err.m_cluster_subblock_index = cluster_indices_iter;
1104 quant_err.m_block_index = block_index;
1105 quant_err.m_subblock_index = subblock_index;
1106
1107 {
1108 std::lock_guard<std::mutex> lock(m_lock);
1109
1110 m_subblock_endpoint_quant_err_vec.push_back(quant_err);
1111 }
1112 }
1113 } // cluster_index
1114
1115#ifndef __EMSCRIPTEN__
1116 } );
1117#endif
1118
1119 } // cluster_index_iter
1120
1121#ifndef __EMSCRIPTEN__
1122 m_params.m_pJob_pool->wait_for_all();
1123#endif
1124
1125 vector_sort(m_subblock_endpoint_quant_err_vec);
1126 }
1127
1128 void basisu_frontend::introduce_new_endpoint_clusters()
1129 {
1130 debug_printf("introduce_new_endpoint_clusters\n");
1131
1132 generate_block_endpoint_clusters();
1133
1134 int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - (uint32_t)m_endpoint_clusters.size();
1135 if (num_new_endpoint_clusters <= 0)
1136 return;
1137
1138 compute_endpoint_subblock_error_vec();
1139
1140 const uint32_t num_orig_endpoint_clusters = (uint32_t)m_endpoint_clusters.size();
1141
1142 std::unordered_set<uint32_t> training_vector_was_relocated;
1143
1144 uint_vec cluster_sizes(num_orig_endpoint_clusters);
1145 for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
1146 cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
1147
1148 std::unordered_set<uint32_t> ignore_cluster;
1149
1150 uint32_t total_new_clusters = 0;
1151
1152 while (num_new_endpoint_clusters)
1153 {
1154 if (m_subblock_endpoint_quant_err_vec.size() == 0)
1155 break;
1156
1157 subblock_endpoint_quant_err subblock_to_move(m_subblock_endpoint_quant_err_vec.back());
1158
1159 m_subblock_endpoint_quant_err_vec.pop_back();
1160
1161 if (unordered_set_contains(ignore_cluster, subblock_to_move.m_cluster_index))
1162 continue;
1163
1164 uint32_t training_vector_index = subblock_to_move.m_block_index * 2 + subblock_to_move.m_subblock_index;
1165
1166 if (cluster_sizes[subblock_to_move.m_cluster_index] <= 2)
1167 continue;
1168
1169 if (unordered_set_contains(training_vector_was_relocated, training_vector_index))
1170 continue;
1171
1172 if (unordered_set_contains(training_vector_was_relocated, training_vector_index ^ 1))
1173 continue;
1174
1175#if 0
1176 const uint32_t block_index = subblock_to_move.m_block_index;
1177 const etc_block& blk = m_etc1_blocks_etc1s[block_index];
1178 uint32_t ls, hs;
1179 blk.get_selector_range(ls, hs);
1180 if (ls != hs)
1181 continue;
1182#endif
1183
1184 //const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size();
1185
1186 enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index);
1187 enlarge_vector(m_endpoint_cluster_etc_params, 1);
1188
1189 assert(m_endpoint_clusters.size() == m_endpoint_cluster_etc_params.size());
1190
1191 training_vector_was_relocated.insert(training_vector_index);
1192
1193 m_endpoint_clusters.back().push_back(training_vector_index ^ 1);
1194 training_vector_was_relocated.insert(training_vector_index ^ 1);
1195
1196 BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2);
1197 cluster_sizes[subblock_to_move.m_cluster_index] -= 2;
1198
1199 ignore_cluster.insert(subblock_to_move.m_cluster_index);
1200
1201 total_new_clusters++;
1202
1203 num_new_endpoint_clusters--;
1204 }
1205
1206 debug_printf("Introduced %i new endpoint clusters\n", total_new_clusters);
1207
1208 for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
1209 {
1210 uint_vec &cluster_indices = m_endpoint_clusters[i];
1211
1212 uint_vec new_cluster_indices;
1213 for (uint32_t j = 0; j < cluster_indices.size(); j++)
1214 {
1215 uint32_t training_vector_index = cluster_indices[j];
1216
1217 if (!unordered_set_contains(training_vector_was_relocated, training_vector_index))
1218 new_cluster_indices.push_back(training_vector_index);
1219 }
1220
1221 if (cluster_indices.size() != new_cluster_indices.size())
1222 {
1223 BASISU_FRONTEND_VERIFY(new_cluster_indices.size() > 0);
1224 cluster_indices.swap(new_cluster_indices);
1225 }
1226 }
1227
1228 generate_block_endpoint_clusters();
1229 }
1230
1231 struct color_rgba_hasher
1232 {
1233 inline std::size_t operator()(const color_rgba& k) const
1234 {
1235 uint32_t v = *(const uint32_t*)&k;
1236
1237 //return bitmix32(v);
1238
1239 //v ^= (v << 10);
1240 //v ^= (v >> 12);
1241
1242 return v;
1243 }
1244 };
1245
1246 // Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them.
1247 // TODO: Don't optimize endpoint clusters which haven't changed.
1248 // If step>=1, we check to ensure the new endpoint values actually decrease quantization error.
1249 void basisu_frontend::generate_endpoint_codebook(uint32_t step)
1250 {
1251 debug_printf("generate_endpoint_codebook\n");
1252
1253 interval_timer tm;
1254 tm.start();
1255
1256 m_endpoint_cluster_etc_params.resize(m_endpoint_clusters.size());
1257
1258 bool use_cpu = true;
1259 // TODO: Get this working when step>0
1260 if (m_params.m_pOpenCL_context && !step)
1261 {
1262 const uint32_t total_clusters = m_endpoint_clusters.size();
1263
1264 basisu::vector<cl_pixel_cluster> pixel_clusters(total_clusters);
1265
1266 std::vector<color_rgba> input_pixels;
1267 input_pixels.reserve(m_total_blocks * 16);
1268
1269 std::vector<uint32_t> pixel_weights;
1270 pixel_weights.reserve(m_total_blocks * 16);
1271
1272 uint_vec cluster_sizes(total_clusters);
1273
1274 //typedef basisu::hash_map<color_rgba, uint32_t, color_rgba_hasher> color_hasher_type;
1275 //color_hasher_type color_hasher;
1276 //color_hasher.reserve(2048);
1277
1278 interval_timer hash_tm;
1279 hash_tm.start();
1280
1281 basisu::vector<uint32_t> colors, colors2;
1282 colors.reserve(65536);
1283 colors2.reserve(65536);
1284
1285 for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
1286 {
1287 const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1288 assert((cluster_indices.size() & 1) == 0);
1289
1290#if 0
1291 uint64_t first_pixel_index = input_pixels.size();
1292 const uint32_t total_pixels = 16 * (cluster_indices.size() / 2);
1293
1294 input_pixels.resize(input_pixels.size() + total_pixels);
1295 pixel_weights.resize(pixel_weights.size() + total_pixels);
1296
1297 uint64_t dst_ofs = first_pixel_index;
1298
1299 uint64_t total_r = 0, total_g = 0, total_b = 0;
1300 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1301 {
1302 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1303 if (subblock_index)
1304 continue;
1305
1306 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1307 const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1308
1309 for (uint32_t i = 0; i < 16; i++)
1310 {
1311 input_pixels[dst_ofs] = pBlock_pixels[i];
1312 pixel_weights[dst_ofs] = 1;
1313 dst_ofs++;
1314
1315 total_r += pBlock_pixels[i].r;
1316 total_g += pBlock_pixels[i].g;
1317 total_b += pBlock_pixels[i].b;
1318 }
1319 }
1320
1321 //printf("%i %f %f %f\n", cluster_index, total_r / (float)total_pixels, total_g / (float)total_pixels, total_b / (float)total_pixels);
1322
1323 pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
1324 pixel_clusters[cluster_index].m_total_pixels = total_pixels;
1325 cluster_sizes[cluster_index] = total_pixels;
1326#elif 1
1327 colors.resize(cluster_indices.size() * 8);
1328 colors2.resize(cluster_indices.size() * 8);
1329 uint32_t dst_ofs = 0;
1330
1331 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1332 {
1333 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1334 if (subblock_index)
1335 continue;
1336
1337 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1338 const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1339
1340 memcpy(colors.data() + dst_ofs, pBlock_pixels, sizeof(color_rgba) * 16);
1341 dst_ofs += 16;
1342
1343 } // cluster_indices_iter
1344
1345 uint32_t* pSorted = radix_sort(colors.size(), colors.data(), colors2.data(), 0, 3);
1346
1347 const uint64_t first_pixel_index = input_pixels.size();
1348
1349 uint32_t prev_color = 0, cur_weight = 0;
1350
1351 for (uint32_t i = 0; i < colors.size(); i++)
1352 {
1353 uint32_t cur_color = pSorted[i];
1354 if (cur_color == prev_color)
1355 {
1356 if (++cur_weight == 0)
1357 cur_weight--;
1358 }
1359 else
1360 {
1361 if (cur_weight)
1362 {
1363 input_pixels.push_back(*(const color_rgba*)&prev_color);
1364 pixel_weights.push_back(cur_weight);
1365 }
1366
1367 prev_color = cur_color;
1368 cur_weight = 1;
1369 }
1370 }
1371
1372 if (cur_weight)
1373 {
1374 input_pixels.push_back(*(const color_rgba*)&prev_color);
1375 pixel_weights.push_back(cur_weight);
1376 }
1377
1378 uint32_t total_unique_pixels = (uint32_t)(input_pixels.size() - first_pixel_index);
1379
1380 pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
1381 pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
1382
1383 cluster_sizes[cluster_index] = total_unique_pixels;
1384#else
1385 color_hasher.reset();
1386
1387 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1388 {
1389 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1390 if (subblock_index)
1391 continue;
1392
1393 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1394 const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1395
1396 uint32_t *pPrev_weight = nullptr;
1397 color_rgba prev_color;
1398
1399 {
1400 color_rgba cur_color = pBlock_pixels[0];
1401 auto res = color_hasher.insert(cur_color, 0);
1402
1403 uint32_t& weight = (res.first)->second;
1404 if (weight != UINT32_MAX)
1405 weight++;
1406
1407 prev_color = cur_color;
1408 pPrev_weight = &(res.first)->second;
1409 }
1410
1411 for (uint32_t i = 1; i < 16; i++)
1412 {
1413 color_rgba cur_color = pBlock_pixels[i];
1414
1415 if (cur_color == prev_color)
1416 {
1417 if (*pPrev_weight != UINT32_MAX)
1418 *pPrev_weight = *pPrev_weight + 1;
1419 }
1420 else
1421 {
1422 auto res = color_hasher.insert(cur_color, 0);
1423
1424 uint32_t& weight = (res.first)->second;
1425 if (weight != UINT32_MAX)
1426 weight++;
1427
1428 prev_color = cur_color;
1429 pPrev_weight = &(res.first)->second;
1430 }
1431 }
1432
1433 } // cluster_indices_iter
1434
1435 const uint64_t first_pixel_index = input_pixels.size();
1436 uint32_t total_unique_pixels = color_hasher.size();
1437
1438 pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
1439 pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
1440
1441 input_pixels.resize(first_pixel_index + total_unique_pixels);
1442 pixel_weights.resize(first_pixel_index + total_unique_pixels);
1443
1444 uint32_t j = 0;
1445
1446 for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j)
1447 {
1448 input_pixels[first_pixel_index + j] = it->first;
1449 pixel_weights[first_pixel_index + j] = it->second;
1450 }
1451
1452 cluster_sizes[cluster_index] = total_unique_pixels;
1453#endif
1454
1455 } // cluster_index
1456
1457 debug_printf("Total hash time: %3.3f secs\n", hash_tm.get_elapsed_secs());
1458
1459 debug_printf("Total unique colors: %llu\n", input_pixels.size());
1460
1461 uint_vec sorted_cluster_indices_new_to_old(total_clusters);
1462 indirect_sort(total_clusters, sorted_cluster_indices_new_to_old.data(), cluster_sizes.data());
1463 //for (uint32_t i = 0; i < total_clusters; i++)
1464 // sorted_cluster_indices_new_to_old[i] = i;
1465
1466 uint_vec sorted_cluster_indices_old_to_new(total_clusters);
1467 for (uint32_t i = 0; i < total_clusters; i++)
1468 sorted_cluster_indices_old_to_new[sorted_cluster_indices_new_to_old[i]] = i;
1469
1470 basisu::vector<cl_pixel_cluster> sorted_pixel_clusters(total_clusters);
1471 for (uint32_t i = 0; i < total_clusters; i++)
1472 sorted_pixel_clusters[i] = pixel_clusters[sorted_cluster_indices_new_to_old[i]];
1473
1474 uint32_t total_perms = 64;
1475 if (m_params.m_compression_level <= 1)
1476 total_perms = 16;
1477 else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
1478 total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
1479
1480 basisu::vector<etc_block> output_blocks(total_clusters);
1481
1482 if (opencl_encode_etc1s_pixel_clusters(
1483 m_params.m_pOpenCL_context,
1484 output_blocks.data(),
1485 total_clusters,
1486 sorted_pixel_clusters.data(),
1487 input_pixels.size(),
1488 input_pixels.data(),
1489 pixel_weights.data(),
1490 m_params.m_perceptual, total_perms))
1491 {
1492 for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
1493 {
1494 const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index];
1495
1496 const etc_block& blk = output_blocks[new_cluster_index];
1497
1498 endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index];
1499
1500 prev_etc_params.m_valid = true;
1501 etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false);
1502 prev_etc_params.m_inten_table[0] = blk.get_inten_table(0);
1503 prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this
1504 }
1505
1506 use_cpu = false;
1507 }
1508 else
1509 {
1510 error_printf("basisu_frontend::generate_endpoint_codebook: opencl_encode_etc1s_pixel_clusters() failed! Using CPU.\n");
1511 m_params.m_pOpenCL_context = nullptr;
1512 m_opencl_failed = true;
1513 }
1514
1515 } // if (opencl_is_available() && m_params.m_use_opencl)
1516
1517 if (use_cpu)
1518 {
1519 const uint32_t N = 128;
1520 for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
1521 {
1522 const uint32_t first_index = cluster_index_iter;
1523 const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
1524
1525#ifndef __EMSCRIPTEN__
1526 m_params.m_pJob_pool->add_job([this, first_index, last_index, step] {
1527#endif
1528
1529 for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
1530 {
1531 const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1532
1533 BASISU_FRONTEND_VERIFY(cluster_indices.size());
1534
1535 const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8;
1536
1537 basisu::vector<color_rgba> cluster_pixels(total_pixels);
1538
1539 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1540 {
1541 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1542 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1543
1544 const bool flipped = true;
1545
1546 const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1547
1548 for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
1549 {
1550 const color_rgba& c = pBlock_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
1551 cluster_pixels[cluster_indices_iter * 8 + pixel_index] = c;
1552 }
1553 }
1554
1555 endpoint_cluster_etc_params new_subblock_params;
1556
1557 {
1558 etc1_optimizer optimizer;
1559 etc1_solution_coordinates solutions[2];
1560
1561 etc1_optimizer::params cluster_optimizer_params;
1562 cluster_optimizer_params.m_num_src_pixels = total_pixels;
1563 cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
1564
1565 cluster_optimizer_params.m_use_color4 = false;
1566 cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
1567
1568 if (m_params.m_compression_level <= 1)
1569 cluster_optimizer_params.m_quality = cETCQualityMedium;
1570 else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
1571 cluster_optimizer_params.m_quality = cETCQualityUber;
1572
1573 etc1_optimizer::results cluster_optimizer_results;
1574
1575 basisu::vector<uint8_t> cluster_selectors(total_pixels);
1576 cluster_optimizer_results.m_n = total_pixels;
1577 cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
1578
1579 optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
1580
1581 if (!optimizer.compute())
1582 BASISU_FRONTEND_VERIFY(false);
1583
1584 new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
1585 new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
1586 new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error;
1587 }
1588
1589 endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[cluster_index];
1590
1591 bool use_new_subblock_params = false;
1592 if ((!step) || (!prev_etc_params.m_valid))
1593 use_new_subblock_params = true;
1594 else
1595 {
1596 assert(prev_etc_params.m_valid);
1597
1598 uint64_t total_prev_err = 0;
1599
1600 {
1601 color_rgba block_colors[4];
1602
1603 etc_block::get_block_colors5(block_colors, prev_etc_params.m_color_unscaled[0], prev_etc_params.m_inten_table[0], false);
1604
1605 uint64_t total_err = 0;
1606
1607 for (uint32_t i = 0; i < total_pixels; i++)
1608 {
1609 const color_rgba& c = cluster_pixels[i];
1610
1611 uint64_t best_err = UINT64_MAX;
1612 //uint32_t best_index = 0;
1613
1614 for (uint32_t s = 0; s < 4; s++)
1615 {
1616 uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
1617 if (err < best_err)
1618 {
1619 best_err = err;
1620 //best_index = s;
1621 }
1622 }
1623
1624 total_err += best_err;
1625 }
1626
1627 total_prev_err += total_err;
1628 }
1629
1630 // See if we should update this cluster's endpoints (if the error has actually fallen)
1631 if (total_prev_err > new_subblock_params.m_color_error[0])
1632 {
1633 use_new_subblock_params = true;
1634 }
1635 }
1636
1637 if (use_new_subblock_params)
1638 {
1639 new_subblock_params.m_valid = true;
1640
1641 prev_etc_params = new_subblock_params;
1642 }
1643
1644 } // cluster_index
1645
1646#ifndef __EMSCRIPTEN__
1647 });
1648#endif
1649
1650 } // cluster_index_iter
1651
1652#ifndef __EMSCRIPTEN__
1653 m_params.m_pJob_pool->wait_for_all();
1654#endif
1655 }
1656
1657 debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1658 }
1659
1660 bool basisu_frontend::check_etc1s_constraints() const
1661 {
1662 basisu::vector<vec2U> block_clusters(m_total_blocks);
1663
1664 for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
1665 {
1666 const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1667
1668 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1669 {
1670 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1671 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1672
1673 block_clusters[block_index][subblock_index] = cluster_index;
1674
1675 } // cluster_indices_iter
1676 }
1677
1678 for (uint32_t i = 0; i < m_total_blocks; i++)
1679 {
1680 if (block_clusters[i][0] != block_clusters[i][1])
1681 return false;
1682 }
1683
1684 return true;
1685 }
1686
1687 // For each block, determine which ETC1S endpoint cluster can encode that block with lowest error.
1688 // This reassigns blocks to different endpoint clusters.
1689 uint32_t basisu_frontend::refine_endpoint_clusterization()
1690 {
1691 debug_printf("refine_endpoint_clusterization\n");
1692
1693 if (m_use_hierarchical_endpoint_codebooks)
1694 compute_endpoint_clusters_within_each_parent_cluster();
1695
1696 // Note: It's possible that an endpoint cluster may live in more than one parent cluster after the first refinement step.
1697
1698 basisu::vector<vec2U> block_clusters(m_total_blocks);
1699
1700 for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
1701 {
1702 const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1703
1704 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1705 {
1706 const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1707 const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1708
1709 block_clusters[block_index][subblock_index] = cluster_index;
1710
1711 } // cluster_indices_iter
1712 }
1713
1714 //----------------------------------------------------------
1715
1716 // Create a new endpoint clusterization
1717
1718 interval_timer tm;
1719 tm.start();
1720
1721 uint_vec best_cluster_indices(m_total_blocks);
1722
1723 bool use_cpu = true;
1724 // TODO: Support non-hierarchical endpoint codebooks here
1725 if (m_params.m_pOpenCL_context && m_use_hierarchical_endpoint_codebooks)
1726 {
1727 // For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency.
1728 // We also prepare an array of block info structs that point into this new parent endpoint cluster array.
1729 const uint32_t total_parent_clusters = m_endpoint_clusters_within_each_parent_cluster.size();
1730
1731 basisu::vector<cl_block_info_struct> cl_block_info_structs(m_total_blocks);
1732
1733 // the size of each parent cluster, in total clusters
1734 uint_vec parent_cluster_sizes(total_parent_clusters);
1735 for (uint32_t i = 0; i < total_parent_clusters; i++)
1736 parent_cluster_sizes[i] = m_endpoint_clusters_within_each_parent_cluster[i].size();
1737
1738 uint_vec first_parent_cluster_ofs(total_parent_clusters);
1739 uint32_t cur_ofs = 0;
1740 for (uint32_t i = 0; i < total_parent_clusters; i++)
1741 {
1742 first_parent_cluster_ofs[i] = cur_ofs;
1743
1744 cur_ofs += parent_cluster_sizes[i];
1745 }
1746
1747 // Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step.
1748 BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size());
1749 const uint32_t total_actual_endpoint_clusters = cur_ofs;
1750 basisu::vector<cl_endpoint_cluster_struct> cl_endpoint_cluster_structs(total_actual_endpoint_clusters);
1751
1752 for (uint32_t i = 0; i < total_parent_clusters; i++)
1753 {
1754 const uint32_t dst_ofs = first_parent_cluster_ofs[i];
1755
1756 const uint32_t parent_cluster_size = parent_cluster_sizes[i];
1757
1758 assert(m_endpoint_clusters_within_each_parent_cluster[i].size() == parent_cluster_size);
1759
1760 for (uint32_t j = 0; j < parent_cluster_size; j++)
1761 {
1762 const uint32_t endpoint_cluster_index = m_endpoint_clusters_within_each_parent_cluster[i][j];
1763
1764 color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_unscaled[0]);
1765 uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[endpoint_cluster_index].m_inten_table[0];
1766
1767 cl_endpoint_cluster_structs[dst_ofs + j].m_unscaled_color = cluster_etc_base_color;
1768 cl_endpoint_cluster_structs[dst_ofs + j].m_etc_inten = (uint8_t)cluster_etc_inten;
1769 cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index;
1770 }
1771 }
1772
1773 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
1774 {
1775 const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index];
1776
1777 cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]);
1778 cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]);
1779
1780 const uint32_t block_cluster_index = block_clusters[block_index][0];
1781 cl_block_info_structs[block_index].m_cur_cluster_index = (uint16_t)block_cluster_index;
1782 cl_block_info_structs[block_index].m_cur_cluster_etc_inten = (uint8_t)m_endpoint_cluster_etc_params[block_cluster_index].m_inten_table[0];
1783 }
1784
1785 uint_vec block_cluster_indices(m_total_blocks);
1786 for (uint32_t i = 0; i < m_total_blocks; i++)
1787 block_cluster_indices[i] = block_clusters[i][0];
1788
1789 uint_vec sorted_block_indices(m_total_blocks);
1790 indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data());
1791
1792 bool status = opencl_refine_endpoint_clusterization(
1793 m_params.m_pOpenCL_context,
1794 cl_block_info_structs.data(),
1795 total_actual_endpoint_clusters,
1796 cl_endpoint_cluster_structs.data(),
1797 sorted_block_indices.data(),
1798 best_cluster_indices.data(),
1799 m_params.m_perceptual);
1800
1801 if (status)
1802 {
1803 use_cpu = false;
1804 }
1805 else
1806 {
1807 error_printf("basisu_frontend::refine_endpoint_clusterization: opencl_refine_endpoint_clusterization() failed! Using CPU.\n");
1808 m_params.m_pOpenCL_context = nullptr;
1809 m_opencl_failed = true;
1810 }
1811 }
1812
1813 if (use_cpu)
1814 {
1815 const uint32_t N = 1024;
1816 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
1817 {
1818 const uint32_t first_index = block_index_iter;
1819 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
1820
1821#ifndef __EMSCRIPTEN__
1822 m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] {
1823#endif
1824
1825 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
1826 {
1827 const uint32_t cluster_index = block_clusters[block_index][0];
1828 BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]);
1829
1830 const color_rgba* pSubblock_pixels = get_source_pixel_block(block_index).get_ptr();
1831 const uint32_t num_subblock_pixels = 16;
1832
1833 uint64_t best_cluster_err = INT64_MAX;
1834 uint32_t best_cluster_index = 0;
1835
1836 const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0;
1837 const uint_vec* pCluster_indices = m_endpoint_clusters_within_each_parent_cluster.size() ? &m_endpoint_clusters_within_each_parent_cluster[block_parent_endpoint_cluster_index] : nullptr;
1838
1839 const uint32_t total_clusters = m_use_hierarchical_endpoint_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_endpoint_clusters.size();
1840
1841 for (uint32_t i = 0; i < total_clusters; i++)
1842 {
1843 const uint32_t cluster_iter = m_use_hierarchical_endpoint_codebooks ? (*pCluster_indices)[i] : i;
1844
1845 color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0]);
1846 uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0];
1847
1848 uint64_t total_err = 0;
1849
1850 const uint32_t low_selector = 0;//subblock_etc_params_vec[j].m_low_selectors[0];
1851 const uint32_t high_selector = 3;//subblock_etc_params_vec[j].m_high_selectors[0];
1852 color_rgba subblock_colors[4];
1853 // Can't assign it here - may result in too much error when selector quant occurs
1854 if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0])
1855 {
1856 total_err = INT64_MAX;
1857 goto skip_cluster;
1858 }
1859
1860 etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten);
1861
1862#if 0
1863 for (uint32_t p = 0; p < num_subblock_pixels; p++)
1864 {
1865 uint64_t best_err = UINT64_MAX;
1866
1867 for (uint32_t r = low_selector; r <= high_selector; r++)
1868 {
1869 uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false);
1870 best_err = minimum(best_err, err);
1871 if (!best_err)
1872 break;
1873 }
1874
1875 total_err += best_err;
1876 if (total_err > best_cluster_err)
1877 break;
1878 } // p
1879#else
1880 if (m_params.m_perceptual)
1881 {
1882 if (!g_cpu_supports_sse41)
1883 {
1884 for (uint32_t p = 0; p < num_subblock_pixels; p++)
1885 {
1886 uint64_t best_err = UINT64_MAX;
1887
1888 for (uint32_t r = low_selector; r <= high_selector; r++)
1889 {
1890 uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false);
1891 best_err = minimum(best_err, err);
1892 if (!best_err)
1893 break;
1894 }
1895
1896 total_err += best_err;
1897 if (total_err > best_cluster_err)
1898 break;
1899 } // p
1900 }
1901 else
1902 {
1903#if BASISU_SUPPORT_SSE
1904 find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
1905#endif
1906 }
1907 }
1908 else
1909 {
1910 if (!g_cpu_supports_sse41)
1911 {
1912 for (uint32_t p = 0; p < num_subblock_pixels; p++)
1913 {
1914 uint64_t best_err = UINT64_MAX;
1915
1916 for (uint32_t r = low_selector; r <= high_selector; r++)
1917 {
1918 uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false);
1919 best_err = minimum(best_err, err);
1920 if (!best_err)
1921 break;
1922 }
1923
1924 total_err += best_err;
1925 if (total_err > best_cluster_err)
1926 break;
1927 } // p
1928 }
1929 else
1930 {
1931#if BASISU_SUPPORT_SSE
1932 find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
1933#endif
1934 }
1935 }
1936#endif
1937
1938 skip_cluster:
1939 if ((total_err < best_cluster_err) ||
1940 ((cluster_iter == cluster_index) && (total_err == best_cluster_err)))
1941 {
1942 best_cluster_err = total_err;
1943 best_cluster_index = cluster_iter;
1944
1945 if (!best_cluster_err)
1946 break;
1947 }
1948 } // j
1949
1950 best_cluster_indices[block_index] = best_cluster_index;
1951
1952 } // block_index
1953
1954#ifndef __EMSCRIPTEN__
1955 });
1956#endif
1957
1958 } // block_index_iter
1959
1960#ifndef __EMSCRIPTEN__
1961 m_params.m_pJob_pool->wait_for_all();
1962#endif
1963
1964 } // use_cpu
1965
1966 debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs());
1967
1968 basisu::vector<typename basisu::vector<uint32_t> > optimized_endpoint_clusters(m_endpoint_clusters.size());
1969 uint32_t total_subblocks_reassigned = 0;
1970
1971 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
1972 {
1973 const uint32_t training_vector_index = block_index * 2 + 0;
1974
1975 const uint32_t orig_cluster_index = block_clusters[block_index][0];
1976 const uint32_t best_cluster_index = best_cluster_indices[block_index];
1977
1978 optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index);
1979 optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index + 1);
1980
1981 if (best_cluster_index != orig_cluster_index)
1982 {
1983 total_subblocks_reassigned++;
1984 }
1985 }
1986
1987 debug_printf("total_subblocks_reassigned: %u\n", total_subblocks_reassigned);
1988
1989 m_endpoint_clusters = optimized_endpoint_clusters;
1990
1991 return total_subblocks_reassigned;
1992 }
1993
1994 void basisu_frontend::eliminate_redundant_or_empty_endpoint_clusters()
1995 {
1996 debug_printf("eliminate_redundant_or_empty_endpoint_clusters\n");
1997
1998 // Step 1: Sort endpoint clusters by the base colors/intens
1999
2000 uint_vec sorted_endpoint_cluster_indices(m_endpoint_clusters.size());
2001 for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
2002 sorted_endpoint_cluster_indices[i] = i;
2003
2004 indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]);
2005
2006 basisu::vector<basisu::vector<uint32_t> > new_endpoint_clusters(m_endpoint_clusters.size());
2007 basisu::vector<endpoint_cluster_etc_params> new_subblock_etc_params(m_endpoint_clusters.size());
2008
2009 for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
2010 {
2011 uint32_t j = sorted_endpoint_cluster_indices[i];
2012 new_endpoint_clusters[i] = m_endpoint_clusters[j];
2013 new_subblock_etc_params[i] = m_endpoint_cluster_etc_params[j];
2014 }
2015
2016 new_endpoint_clusters.swap(m_endpoint_clusters);
2017 new_subblock_etc_params.swap(m_endpoint_cluster_etc_params);
2018
2019 // Step 2: Eliminate redundant endpoint clusters, or empty endpoint clusters
2020
2021 new_endpoint_clusters.resize(0);
2022 new_subblock_etc_params.resize(0);
2023
2024 for (int i = 0; i < (int)m_endpoint_clusters.size(); )
2025 {
2026 if (!m_endpoint_clusters[i].size())
2027 {
2028 i++;
2029 continue;
2030 }
2031
2032 int j;
2033 for (j = i + 1; j < (int)m_endpoint_clusters.size(); j++)
2034 {
2035 if (!(m_endpoint_cluster_etc_params[i] == m_endpoint_cluster_etc_params[j]))
2036 break;
2037 }
2038
2039 new_endpoint_clusters.push_back(m_endpoint_clusters[i]);
2040 new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]);
2041
2042 for (int k = i + 1; k < j; k++)
2043 {
2044 append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]);
2045 }
2046
2047 i = j;
2048 }
2049
2050 if (m_endpoint_clusters.size() != new_endpoint_clusters.size())
2051 {
2052 if (m_params.m_debug_stats)
2053 debug_printf("Eliminated %u redundant or empty clusters\n", (uint32_t)(m_endpoint_clusters.size() - new_endpoint_clusters.size()));
2054
2055 m_endpoint_clusters.swap(new_endpoint_clusters);
2056
2057 m_endpoint_cluster_etc_params.swap(new_subblock_etc_params);
2058 }
2059 }
2060
2061 void basisu_frontend::create_initial_packed_texture()
2062 {
2063 debug_printf("create_initial_packed_texture\n");
2064
2065 interval_timer tm;
2066 tm.start();
2067
2068 bool use_cpu = true;
2069
2070 if ((m_params.m_pOpenCL_context) && (opencl_is_available()))
2071 {
2072 basisu::vector<color_rgba> block_etc5_color_intens(m_total_blocks);
2073
2074 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2075 {
2076 uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
2077
2078 const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0];
2079 uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0];
2080
2081 block_etc5_color_intens[block_index].set(color_unscaled.r, color_unscaled.g, color_unscaled.b, inten);
2082 }
2083
2084 bool status = opencl_determine_selectors(m_params.m_pOpenCL_context, block_etc5_color_intens.data(),
2085 m_encoded_blocks.data(),
2086 m_params.m_perceptual);
2087 if (!status)
2088 {
2089 error_printf("basisu_frontend::create_initial_packed_texture: opencl_determine_selectors() failed! Using CPU.\n");
2090 m_params.m_pOpenCL_context = nullptr;
2091 m_opencl_failed = true;
2092 }
2093 else
2094 {
2095 use_cpu = false;
2096 }
2097 }
2098
2099 if (use_cpu)
2100 {
2101 const uint32_t N = 4096;
2102 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
2103 {
2104 const uint32_t first_index = block_index_iter;
2105 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
2106
2107#ifndef __EMSCRIPTEN__
2108 m_params.m_pJob_pool->add_job([this, first_index, last_index] {
2109#endif
2110
2111 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
2112 {
2113 uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
2114 uint32_t cluster1 = m_block_endpoint_clusters_indices[block_index][1];
2115 BASISU_FRONTEND_VERIFY(cluster0 == cluster1);
2116
2117 const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
2118
2119 etc_block& blk = m_encoded_blocks[block_index];
2120
2121 color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] };
2122 uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] };
2123
2124 blk.set_block_color5(unscaled[0], unscaled[1]);
2125 blk.set_flip_bit(true);
2126
2127 blk.set_inten_table(0, inten[0]);
2128 blk.set_inten_table(1, inten[1]);
2129
2130 blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
2131
2132 } // block_index
2133
2134#ifndef __EMSCRIPTEN__
2135 });
2136#endif
2137
2138 } // block_index_iter
2139
2140#ifndef __EMSCRIPTEN__
2141 m_params.m_pJob_pool->wait_for_all();
2142#endif
2143
2144 } // use_cpu
2145
2146 m_orig_encoded_blocks = m_encoded_blocks;
2147
2148 debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2149 }
2150
2151 void basisu_frontend::compute_selector_clusters_within_each_parent_cluster()
2152 {
2153 uint_vec block_selector_cluster_indices(m_total_blocks);
2154
2155 for (int cluster_index = 0; cluster_index < static_cast<int>(m_selector_cluster_block_indices.size()); cluster_index++)
2156 {
2157 const basisu::vector<uint32_t>& cluster_indices = m_selector_cluster_block_indices[cluster_index];
2158
2159 for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
2160 {
2161 const uint32_t block_index = cluster_indices[cluster_indices_iter];
2162
2163 block_selector_cluster_indices[block_index] = cluster_index;
2164
2165 } // cluster_indices_iter
2166
2167 } // cluster_index
2168
2169 m_selector_clusters_within_each_parent_cluster.resize(0);
2170 m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size());
2171
2172 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2173 {
2174 const uint32_t cluster_index = block_selector_cluster_indices[block_index];
2175 const uint32_t parent_cluster_index = m_block_parent_selector_cluster[block_index];
2176
2177 m_selector_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
2178 }
2179
2180 for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
2181 {
2182 uint_vec &cluster_indices = m_selector_clusters_within_each_parent_cluster[i];
2183
2184 BASISU_FRONTEND_VERIFY(cluster_indices.size());
2185
2186 vector_sort(cluster_indices);
2187
2188 auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
2189 cluster_indices.erase(last, cluster_indices.end());
2190 }
2191 }
2192
2193 void basisu_frontend::generate_selector_clusters()
2194 {
2195 debug_printf("generate_selector_clusters\n");
2196
2197 typedef tree_vector_quant<vec16F> vec16F_clusterizer;
2198
2199 vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks);
2200
2201 const uint32_t N = 4096;
2202 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
2203 {
2204 const uint32_t first_index = block_index_iter;
2205 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
2206
2207#ifndef __EMSCRIPTEN__
2208 m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
2209#endif
2210
2211 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
2212 {
2213 const etc_block &blk = m_encoded_blocks[block_index];
2214
2215 vec16F v;
2216 for (uint32_t y = 0; y < 4; y++)
2217 for (uint32_t x = 0; x < 4; x++)
2218 v[x + y * 4] = static_cast<float>(blk.get_selector(x, y));
2219
2220 const uint32_t subblock_index = (blk.get_inten_table(0) > blk.get_inten_table(1)) ? 0 : 1;
2221
2222 color_rgba block_colors[2];
2223 blk.get_block_low_high_colors(block_colors, subblock_index);
2224
2225 const uint32_t dist = color_distance(m_params.m_perceptual, block_colors[0], block_colors[1], false);
2226
2227 const uint32_t cColorDistToWeight = 300;
2228 const uint32_t cMaxWeight = 4096;
2229 uint32_t weight = clamp<uint32_t>(dist / cColorDistToWeight, 1, cMaxWeight);
2230
2231 training_vecs[block_index].first = v;
2232 training_vecs[block_index].second = weight;
2233
2234 } // block_index
2235
2236#ifndef __EMSCRIPTEN__
2237 } );
2238#endif
2239
2240 } // block_index_iter
2241
2242#ifndef __EMSCRIPTEN__
2243 m_params.m_pJob_pool->wait_for_all();
2244#endif
2245
2246 vec16F_clusterizer selector_clusterizer;
2247 for (uint32_t i = 0; i < m_total_blocks; i++)
2248 selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second);
2249
2250 const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT;
2251 const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0;
2252 debug_printf("Using selector parent codebook size %u\n", parent_codebook_size);
2253
2254 uint32_t max_threads = 0;
2255 max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
2256 if (m_params.m_pJob_pool)
2257 max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
2258
2259 bool status = generate_hierarchical_codebook_threaded(selector_clusterizer,
2260 m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0,
2261 m_selector_cluster_block_indices,
2262 m_selector_parent_cluster_block_indices,
2263 max_threads, m_params.m_pJob_pool, false);
2264 BASISU_FRONTEND_VERIFY(status);
2265
2266 if (m_use_hierarchical_selector_codebooks)
2267 {
2268 if (!m_selector_parent_cluster_block_indices.size())
2269 {
2270 m_selector_parent_cluster_block_indices.resize(0);
2271 m_selector_parent_cluster_block_indices.resize(1);
2272 for (uint32_t i = 0; i < m_total_blocks; i++)
2273 m_selector_parent_cluster_block_indices[0].push_back(i);
2274 }
2275
2276 BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX);
2277 BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX);
2278
2279 m_block_parent_selector_cluster.resize(0);
2280 m_block_parent_selector_cluster.resize(m_total_blocks);
2281 vector_set_all(m_block_parent_selector_cluster, 0xFF);
2282
2283 for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++)
2284 {
2285 const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index];
2286 for (uint32_t j = 0; j < cluster.size(); j++)
2287 m_block_parent_selector_cluster[cluster[j]] = static_cast<uint8_t>(parent_cluster_index);
2288 }
2289 for (uint32_t i = 0; i < m_total_blocks; i++)
2290 {
2291 BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[i] != 0xFF);
2292 }
2293
2294 // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
2295 for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++)
2296 {
2297 const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index];
2298
2299 uint32_t parent_cluster_index = 0;
2300 for (uint32_t j = 0; j < cluster.size(); j++)
2301 {
2302 const uint32_t block_index = cluster[j];
2303 if (!j)
2304 {
2305 parent_cluster_index = m_block_parent_selector_cluster[block_index];
2306 }
2307 else
2308 {
2309 BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[block_index] == parent_cluster_index);
2310 }
2311 }
2312 }
2313 }
2314
2315 debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size());
2316 }
2317
2318 void basisu_frontend::create_optimized_selector_codebook(uint32_t iter)
2319 {
2320 debug_printf("create_optimized_selector_codebook\n");
2321
2322 interval_timer tm;
2323 tm.start();
2324
2325 const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
2326
2327 debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size());
2328
2329 m_optimized_cluster_selectors.resize(total_selector_clusters);
2330
2331 // For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector.
2332 const uint32_t N = 256;
2333 for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N)
2334 {
2335 const uint32_t first_index = cluster_index_iter;
2336 const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);
2337
2338#ifndef __EMSCRIPTEN__
2339 m_params.m_pJob_pool->add_job([this, first_index, last_index] {
2340#endif
2341
2342 for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
2343 {
2344 const basisu::vector<uint32_t>& cluster_block_indices = m_selector_cluster_block_indices[cluster_index];
2345
2346 if (!cluster_block_indices.size())
2347 continue;
2348
2349 uint64_t overall_best_err = 0;
2350
2351 uint64_t total_err[4][4][4];
2352 clear_obj(total_err);
2353
2354 for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++)
2355 {
2356 const uint32_t block_index = cluster_block_indices[cluster_block_index];
2357
2358 const etc_block& blk = m_encoded_blocks[block_index];
2359
2360 color_rgba blk_colors[4];
2361 blk.get_block_colors(blk_colors, 0);
2362
2363 for (uint32_t y = 0; y < 4; y++)
2364 {
2365 for (uint32_t x = 0; x < 4; x++)
2366 {
2367 const color_rgba& orig_color = get_source_pixel_block(block_index)(x, y);
2368
2369 if (m_params.m_perceptual)
2370 {
2371 for (uint32_t s = 0; s < 4; s++)
2372 total_err[y][x][s] += color_distance(true, blk_colors[s], orig_color, false);
2373 }
2374 else
2375 {
2376 for (uint32_t s = 0; s < 4; s++)
2377 total_err[y][x][s] += color_distance(false, blk_colors[s], orig_color, false);
2378 }
2379 } // x
2380 } // y
2381
2382 } // cluster_block_index
2383
2384 for (uint32_t y = 0; y < 4; y++)
2385 {
2386 for (uint32_t x = 0; x < 4; x++)
2387 {
2388 uint64_t best_err = total_err[y][x][0];
2389 uint8_t best_sel = 0;
2390
2391 for (uint32_t s = 1; s < 4; s++)
2392 {
2393 if (total_err[y][x][s] < best_err)
2394 {
2395 best_err = total_err[y][x][s];
2396 best_sel = (uint8_t)s;
2397 }
2398 }
2399
2400 m_optimized_cluster_selectors[cluster_index].set_selector(x, y, best_sel);
2401
2402 overall_best_err += best_err;
2403 } // x
2404 } // y
2405
2406 } // cluster_index
2407
2408#ifndef __EMSCRIPTEN__
2409 });
2410#endif
2411
2412 } // cluster_index_iter
2413
2414#ifndef __EMSCRIPTEN__
2415 m_params.m_pJob_pool->wait_for_all();
2416#endif
2417
2418 debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2419
2420 if (m_params.m_debug_images)
2421 {
2422 uint32_t max_selector_cluster_size = 0;
2423
2424 for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
2425 max_selector_cluster_size = maximum<uint32_t>(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size());
2426
2427 if ((max_selector_cluster_size * 5) < 32768)
2428 {
2429 const uint32_t x_spacer_len = 16;
2430 image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5);
2431
2432 for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
2433 {
2434 const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index];
2435
2436 for (uint32_t y = 0; y < 4; y++)
2437 for (uint32_t x = 0; x < 4; x++)
2438 selector_cluster_vis.set_clipped(x_spacer_len + x - 12, selector_cluster_index * 5 + y, color_rgba((m_optimized_cluster_selectors[selector_cluster_index].get_selector(x, y) * 255) / 3));
2439
2440 for (uint32_t i = 0; i < cluster_block_indices.size(); i++)
2441 {
2442 uint32_t block_index = cluster_block_indices[i];
2443
2444 const etc_block &blk = m_orig_encoded_blocks[block_index];
2445
2446 for (uint32_t y = 0; y < 4; y++)
2447 for (uint32_t x = 0; x < 4; x++)
2448 selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3));
2449 }
2450 }
2451
2452 char buf[256];
2453 snprintf(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter);
2454 save_png(buf, selector_cluster_vis);
2455 }
2456 }
2457 }
2458
2459 // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints.
2460 // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end.
2461 void basisu_frontend::find_optimal_selector_clusters_for_each_block()
2462 {
2463 debug_printf("find_optimal_selector_clusters_for_each_block\n");
2464
2465 interval_timer tm;
2466 tm.start();
2467
2468 if (m_params.m_validate)
2469 {
2470 // Sanity checks
2471 BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size());
2472 for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
2473 {
2474 for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
2475 {
2476 BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size());
2477 }
2478 }
2479 }
2480
2481 m_block_selector_cluster_index.resize(m_total_blocks);
2482
2483 if (m_params.m_compression_level == 0)
2484 {
2485 // Just leave the blocks in their original selector clusters.
2486 for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
2487 {
2488 for (uint32_t j = 0; j < m_selector_cluster_block_indices[selector_cluster_index].size(); j++)
2489 {
2490 const uint32_t block_index = m_selector_cluster_block_indices[selector_cluster_index][j];
2491
2492 m_block_selector_cluster_index[block_index] = selector_cluster_index;
2493
2494 etc_block& blk = m_encoded_blocks[block_index];
2495 blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_cluster_index].get_raw_selector_bits());
2496 }
2497 }
2498
2499 debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2500
2501 return;
2502 }
2503
2504 bool use_cpu = true;
2505
2506 if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks)
2507 {
2508 const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size();
2509
2510 basisu::vector<fosc_selector_struct> selector_structs;
2511 selector_structs.reserve(m_optimized_cluster_selectors.size());
2512
2513 uint_vec parent_selector_cluster_offsets(num_parent_clusters);
2514
2515 uint_vec selector_cluster_indices;
2516 selector_cluster_indices.reserve(m_optimized_cluster_selectors.size());
2517
2518 uint32_t cur_ofs = 0;
2519 for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++)
2520 {
2521 parent_selector_cluster_offsets[parent_index] = cur_ofs;
2522
2523 for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++)
2524 {
2525 const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j];
2526
2527 uint32_t sel_bits = 0;
2528 for (uint32_t p = 0; p < 16; p++)
2529 sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2));
2530
2531 selector_structs.enlarge(1)->m_packed_selectors = sel_bits;
2532
2533 selector_cluster_indices.push_back(selector_cluster_index);
2534 }
2535
2536 cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size();
2537 }
2538
2539 const uint32_t total_input_selectors = cur_ofs;
2540
2541 basisu::vector<fosc_block_struct> block_structs(m_total_blocks);
2542 for (uint32_t i = 0; i < m_total_blocks; i++)
2543 {
2544 const uint32_t parent_selector_cluster = m_block_parent_selector_cluster[i];
2545
2546 const etc_block& blk = m_encoded_blocks[i];
2547 blk.unpack_color5(block_structs[i].m_etc_color5_inten, blk.get_base5_color(), false);
2548
2549 block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0);
2550 block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster];
2551 block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size();
2552 }
2553
2554 uint_vec output_selector_cluster_indices(m_total_blocks);
2555
2556 bool status = opencl_find_optimal_selector_clusters_for_each_block(
2557 m_params.m_pOpenCL_context,
2558 block_structs.data(),
2559 total_input_selectors,
2560 selector_structs.data(),
2561 selector_cluster_indices.data(),
2562 output_selector_cluster_indices.data(),
2563 m_params.m_perceptual);
2564
2565 if (!status)
2566 {
2567 error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n");
2568 m_params.m_pOpenCL_context = nullptr;
2569 m_opencl_failed = true;
2570 }
2571 else
2572 {
2573 for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
2574 {
2575 m_selector_cluster_block_indices[i].resize(0);
2576 m_selector_cluster_block_indices[i].reserve(128);
2577 }
2578
2579 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2580 {
2581 etc_block& blk = m_encoded_blocks[block_index];
2582
2583 uint32_t best_cluster_index = output_selector_cluster_indices[block_index];
2584
2585 blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
2586
2587 m_block_selector_cluster_index[block_index] = best_cluster_index;
2588
2589 vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
2590 m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
2591 }
2592
2593 use_cpu = false;
2594 }
2595 }
2596
2597 if (use_cpu)
2598 {
2599 basisu::vector<uint8_t> unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size());
2600 for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++)
2601 {
2602 for (uint32_t y = 0; y < 4; y++)
2603 {
2604 for (uint32_t x = 0; x < 4; x++)
2605 {
2606 unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y);
2607 }
2608 }
2609 }
2610
2611 const uint32_t N = 2048;
2612 for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
2613 {
2614 const uint32_t first_index = block_index_iter;
2615 const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
2616
2617 #ifndef __EMSCRIPTEN__
2618 m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] {
2619 #endif
2620
2621 int prev_best_cluster_index = 0;
2622
2623 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
2624 {
2625 const pixel_block& block = get_source_pixel_block(block_index);
2626
2627 etc_block& blk = m_encoded_blocks[block_index];
2628
2629 if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1)))
2630 {
2631 blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits());
2632
2633 m_block_selector_cluster_index[block_index] = prev_best_cluster_index;
2634
2635 continue;
2636 }
2637
2638 const color_rgba* pBlock_pixels = block.get_ptr();
2639
2640 color_rgba trial_block_colors[4];
2641 blk.get_block_colors_etc1s(trial_block_colors);
2642
2643 // precompute errors for the i-th block pixel and selector sel: [sel][i]
2644 uint32_t trial_errors[4][16];
2645
2646 if (m_params.m_perceptual)
2647 {
2648 for (uint32_t sel = 0; sel < 4; ++sel)
2649 for (uint32_t i = 0; i < 16; ++i)
2650 trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false);
2651 }
2652 else
2653 {
2654 for (uint32_t sel = 0; sel < 4; ++sel)
2655 for (uint32_t i = 0; i < 16; ++i)
2656 trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false);
2657 }
2658
2659 // Compute the minimum possible errors (given any selectors) for pixels 0-15
2660 uint64_t min_possible_error_0_15 = 0;
2661 for (uint32_t i = 0; i < 16; i++)
2662 min_possible_error_0_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2663
2664 // Compute the minimum possible errors (given any selectors) for pixels 4-15
2665 uint64_t min_possible_error_4_15 = 0;
2666 for (uint32_t i = 4; i < 16; i++)
2667 min_possible_error_4_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2668
2669 // Compute the minimum possible errors (given any selectors) for pixels 8-15
2670 uint64_t min_possible_error_8_15 = 0;
2671 for (uint32_t i = 8; i < 16; i++)
2672 min_possible_error_8_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2673
2674 // Compute the minimum possible errors (given any selectors) for pixels 12-15
2675 uint64_t min_possible_error_12_15 = 0;
2676 for (uint32_t i = 12; i < 16; i++)
2677 min_possible_error_12_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2678
2679 uint64_t best_cluster_err = INT64_MAX;
2680 uint32_t best_cluster_index = 0;
2681
2682 const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0;
2683 const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr;
2684
2685 const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size();
2686
2687 #if 0
2688 for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
2689 {
2690 const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
2691
2692 const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index];
2693
2694 uint64_t trial_err = 0;
2695 for (int y = 0; y < 4; y++)
2696 {
2697 for (int x = 0; x < 4; x++)
2698 {
2699 const uint32_t sel = cluster_blk.get_selector(x, y);
2700
2701 trial_err += color_distance(m_params.m_perceptual, trial_block_colors[sel], pBlock_pixels[x + y * 4], false);
2702 if (trial_err > best_cluster_err)
2703 goto early_out;
2704 }
2705 }
2706
2707 if (trial_err < best_cluster_err)
2708 {
2709 best_cluster_err = trial_err;
2710 best_cluster_index = cluster_index;
2711 if (!best_cluster_err)
2712 break;
2713 }
2714
2715 early_out:
2716 ;
2717 }
2718 #else
2719 for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
2720 {
2721 const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
2722
2723 const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16];
2724
2725 uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3];
2726 if ((trial_err + min_possible_error_4_15) >= best_cluster_err)
2727 continue;
2728
2729 trial_err += (uint64_t)trial_errors[pSels[4]][4] + trial_errors[pSels[5]][5] + trial_errors[pSels[6]][6] + trial_errors[pSels[7]][7];
2730 if ((trial_err + min_possible_error_8_15) >= best_cluster_err)
2731 continue;
2732
2733 trial_err += (uint64_t)trial_errors[pSels[8]][8] + trial_errors[pSels[9]][9] + trial_errors[pSels[10]][10] + trial_errors[pSels[11]][11];
2734 if ((trial_err + min_possible_error_12_15) >= best_cluster_err)
2735 continue;
2736
2737 trial_err += (uint64_t)trial_errors[pSels[12]][12] + trial_errors[pSels[13]][13] + trial_errors[pSels[14]][14] + trial_errors[pSels[15]][15];
2738
2739 if (trial_err < best_cluster_err)
2740 {
2741 best_cluster_err = trial_err;
2742 best_cluster_index = cluster_index;
2743 if (best_cluster_err == min_possible_error_0_15)
2744 break;
2745 }
2746
2747 } // cluster_iter
2748 #endif
2749
2750 blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
2751
2752 m_block_selector_cluster_index[block_index] = best_cluster_index;
2753
2754 prev_best_cluster_index = best_cluster_index;
2755
2756 } // block_index
2757
2758 #ifndef __EMSCRIPTEN__
2759 } );
2760 #endif
2761
2762 } // block_index_iter
2763
2764 #ifndef __EMSCRIPTEN__
2765 m_params.m_pJob_pool->wait_for_all();
2766 #endif
2767
2768 for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
2769 {
2770 m_selector_cluster_block_indices[i].resize(0);
2771 m_selector_cluster_block_indices[i].reserve(128);
2772 }
2773
2774 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2775 {
2776 const uint32_t best_cluster_index = m_block_selector_cluster_index[block_index];
2777
2778 vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
2779 m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
2780 }
2781
2782 } // if (use_cpu)
2783
2784 debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2785 }
2786
2787 // TODO: Remove old ETC1 specific stuff, and thread this.
2788 uint32_t basisu_frontend::refine_block_endpoints_given_selectors()
2789 {
2790 debug_printf("refine_block_endpoints_given_selectors\n");
2791
2792 for (int block_index = 0; block_index < static_cast<int>(m_total_blocks); block_index++)
2793 {
2794 //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y);
2795 vec2U &endpoint_clusters = m_block_endpoint_clusters_indices[block_index];
2796
2797 m_endpoint_cluster_etc_params[endpoint_clusters[0]].m_subblocks.push_back(block_index * 2);
2798
2799 m_endpoint_cluster_etc_params[endpoint_clusters[1]].m_subblocks.push_back(block_index * 2 + 1);
2800 }
2801
2802 uint32_t total_subblocks_refined = 0;
2803 uint32_t total_subblocks_examined = 0;
2804
2805 for (uint32_t endpoint_cluster_index = 0; endpoint_cluster_index < m_endpoint_cluster_etc_params.size(); endpoint_cluster_index++)
2806 {
2807 endpoint_cluster_etc_params &subblock_params = m_endpoint_cluster_etc_params[endpoint_cluster_index];
2808
2809 const uint_vec &subblocks = subblock_params.m_subblocks;
2810 //uint32_t total_pixels = subblock.m_subblocks.size() * 8;
2811
2812 basisu::vector<color_rgba> subblock_colors[2]; // [use_individual_mode]
2813 uint8_vec subblock_selectors[2];
2814
2815 uint64_t cur_subblock_err[2] = { 0, 0 };
2816
2817 for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
2818 {
2819 uint32_t training_vector_index = subblocks[subblock_iter];
2820
2821 uint32_t block_index = training_vector_index >> 1;
2822 uint32_t subblock_index = training_vector_index & 1;
2823 const bool is_flipped = true;
2824
2825 const etc_block &blk = m_encoded_blocks[block_index];
2826
2827 const bool use_individual_mode = !blk.get_diff_bit();
2828
2829 const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
2830
2831 color_rgba unpacked_block_pixels[16];
2832 unpack_etc1(blk, unpacked_block_pixels);
2833
2834 for (uint32_t i = 0; i < 8; i++)
2835 {
2836 const uint32_t pixel_index = g_etc1_pixel_indices[is_flipped][subblock_index][i];
2837 const etc_coord2 &coords = g_etc1_pixel_coords[is_flipped][subblock_index][i];
2838
2839 subblock_colors[use_individual_mode].push_back(pSource_block_pixels[pixel_index]);
2840
2841 cur_subblock_err[use_individual_mode] += color_distance(m_params.m_perceptual, pSource_block_pixels[pixel_index], unpacked_block_pixels[pixel_index], false);
2842
2843 subblock_selectors[use_individual_mode].push_back(static_cast<uint8_t>(blk.get_selector(coords.m_x, coords.m_y)));
2844 }
2845 } // subblock_iter
2846
2847 etc1_optimizer::results cluster_optimizer_results[2];
2848 bool results_valid[2] = { false, false };
2849
2850 clear_obj(cluster_optimizer_results);
2851
2852 basisu::vector<uint8_t> cluster_selectors[2];
2853
2854 for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
2855 {
2856 const uint32_t total_pixels = (uint32_t)subblock_colors[use_individual_mode].size();
2857
2858 if (!total_pixels)
2859 continue;
2860
2861 total_subblocks_examined += total_pixels / 8;
2862
2863 etc1_optimizer optimizer;
2864 etc1_solution_coordinates solutions[2];
2865
2866 etc1_optimizer::params cluster_optimizer_params;
2867 cluster_optimizer_params.m_num_src_pixels = total_pixels;
2868 cluster_optimizer_params.m_pSrc_pixels = &subblock_colors[use_individual_mode][0];
2869
2870 cluster_optimizer_params.m_use_color4 = use_individual_mode != 0;
2871 cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
2872
2873 cluster_optimizer_params.m_pForce_selectors = &subblock_selectors[use_individual_mode][0];
2874 cluster_optimizer_params.m_quality = cETCQualityUber;
2875
2876 cluster_selectors[use_individual_mode].resize(total_pixels);
2877
2878 cluster_optimizer_results[use_individual_mode].m_n = total_pixels;
2879 cluster_optimizer_results[use_individual_mode].m_pSelectors = &cluster_selectors[use_individual_mode][0];
2880
2881 optimizer.init(cluster_optimizer_params, cluster_optimizer_results[use_individual_mode]);
2882
2883 if (!optimizer.compute())
2884 continue;
2885
2886 if (cluster_optimizer_results[use_individual_mode].m_error < cur_subblock_err[use_individual_mode])
2887 results_valid[use_individual_mode] = true;
2888
2889 } // use_individual_mode
2890
2891 for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
2892 {
2893 if (!results_valid[use_individual_mode])
2894 continue;
2895
2896 uint32_t num_passes = use_individual_mode ? 1 : 2;
2897
2898 bool all_passed5 = true;
2899
2900 for (uint32_t pass = 0; pass < num_passes; pass++)
2901 {
2902 for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
2903 {
2904 const uint32_t training_vector_index = subblocks[subblock_iter];
2905
2906 const uint32_t block_index = training_vector_index >> 1;
2907 const uint32_t subblock_index = training_vector_index & 1;
2908 //const bool is_flipped = true;
2909
2910 etc_block &blk = m_encoded_blocks[block_index];
2911
2912 if (!blk.get_diff_bit() != static_cast<bool>(use_individual_mode != 0))
2913 continue;
2914
2915 if (use_individual_mode)
2916 {
2917 blk.set_base4_color(subblock_index, etc_block::pack_color4(cluster_optimizer_results[1].m_block_color_unscaled, false));
2918 blk.set_inten_table(subblock_index, cluster_optimizer_results[1].m_block_inten_table);
2919
2920 subblock_params.m_color_error[1] = cluster_optimizer_results[1].m_error;
2921 subblock_params.m_inten_table[1] = cluster_optimizer_results[1].m_block_inten_table;
2922 subblock_params.m_color_unscaled[1] = cluster_optimizer_results[1].m_block_color_unscaled;
2923
2924 total_subblocks_refined++;
2925 }
2926 else
2927 {
2928 const uint16_t base_color5 = blk.get_base5_color();
2929 const uint16_t delta_color3 = blk.get_delta3_color();
2930
2931 uint32_t r[2], g[2], b[2];
2932 etc_block::unpack_color5(r[0], g[0], b[0], base_color5, false);
2933 bool success = etc_block::unpack_color5(r[1], g[1], b[1], base_color5, delta_color3, false);
2934 assert(success);
2935 BASISU_NOTE_UNUSED(success);
2936
2937 r[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.r;
2938 g[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.g;
2939 b[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.b;
2940
2941 color_rgba colors[2] = { color_rgba(r[0], g[0], b[0], 255), color_rgba(r[1], g[1], b[1], 255) };
2942
2943 if (!etc_block::try_pack_color5_delta3(colors))
2944 {
2945 all_passed5 = false;
2946 break;
2947 }
2948
2949 if ((pass == 1) && (all_passed5))
2950 {
2951 blk.set_block_color5(colors[0], colors[1]);
2952 blk.set_inten_table(subblock_index, cluster_optimizer_results[0].m_block_inten_table);
2953
2954 subblock_params.m_color_error[0] = cluster_optimizer_results[0].m_error;
2955 subblock_params.m_inten_table[0] = cluster_optimizer_results[0].m_block_inten_table;
2956 subblock_params.m_color_unscaled[0] = cluster_optimizer_results[0].m_block_color_unscaled;
2957
2958 total_subblocks_refined++;
2959 }
2960 }
2961
2962 } // subblock_iter
2963
2964 } // pass
2965
2966 } // use_individual_mode
2967
2968 } // endpoint_cluster_index
2969
2970 if (m_params.m_debug_stats)
2971 debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined);
2972
2973 return total_subblocks_refined;
2974 }
2975
2976 void basisu_frontend::dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors)
2977 {
2978 debug_printf("dump_endpoint_clusterization_visualization\n");
2979
2980 uint32_t max_endpoint_cluster_size = 0;
2981
2982 basisu::vector<uint32_t> cluster_sizes(m_endpoint_clusters.size());
2983 basisu::vector<uint32_t> sorted_cluster_indices(m_endpoint_clusters.size());
2984 for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
2985 {
2986 max_endpoint_cluster_size = maximum<uint32_t>(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size());
2987 cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
2988 }
2989
2990 if (!max_endpoint_cluster_size)
2991 return;
2992
2993 for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
2994 sorted_cluster_indices[i] = i;
2995
2996 //indexed_heap_sort(endpoint_clusters.size(), cluster_sizes.get_ptr(), sorted_cluster_indices.get_ptr());
2997
2998 image endpoint_cluster_vis(12 + minimum<uint32_t>(max_endpoint_cluster_size, 2048) * 5, (uint32_t)m_endpoint_clusters.size() * 3);
2999
3000 for (uint32_t unsorted_cluster_iter = 0; unsorted_cluster_iter < m_endpoint_clusters.size(); unsorted_cluster_iter++)
3001 {
3002 const uint32_t cluster_iter = sorted_cluster_indices[unsorted_cluster_iter];
3003
3004 etc_block blk;
3005 blk.clear();
3006 blk.set_flip_bit(false);
3007 blk.set_diff_bit(true);
3008 blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]);
3009 blk.set_base5_color(etc_block::pack_color5(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0], false));
3010
3011 color_rgba blk_colors[4];
3012 blk.get_block_colors(blk_colors, 0);
3013 for (uint32_t i = 0; i < 4; i++)
3014 endpoint_cluster_vis.fill_box(i * 2, 3 * unsorted_cluster_iter, 2, 2, blk_colors[i]);
3015
3016 for (uint32_t subblock_iter = 0; subblock_iter < m_endpoint_clusters[cluster_iter].size(); subblock_iter++)
3017 {
3018 uint32_t training_vector_index = m_endpoint_clusters[cluster_iter][subblock_iter];
3019
3020 const uint32_t block_index = training_vector_index >> 1;
3021 const uint32_t subblock_index = training_vector_index & 1;
3022
3023 const etc_block& blk2 = m_etc1_blocks_etc1s[block_index];
3024
3025 const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
3026
3027 color_rgba subblock_pixels[8];
3028
3029 if (vis_endpoint_colors)
3030 {
3031 color_rgba colors[2];
3032 blk2.get_block_low_high_colors(colors, subblock_index);
3033 for (uint32_t i = 0; i < 8; i++)
3034 subblock_pixels[i] = colors[subblock_index];
3035 }
3036 else
3037 {
3038 for (uint32_t i = 0; i < 8; i++)
3039 subblock_pixels[i] = pBlock_pixels[g_etc1_pixel_indices[blk2.get_flip_bit()][subblock_index][i]];
3040 }
3041
3042 endpoint_cluster_vis.set_block_clipped(subblock_pixels, 12 + 5 * subblock_iter, 3 * unsorted_cluster_iter, 4, 2);
3043 }
3044 }
3045
3046 save_png(pFilename, endpoint_cluster_vis);
3047 debug_printf("Wrote debug visualization file %s\n", pFilename);
3048 }
3049
3050 void basisu_frontend::finalize()
3051 {
3052 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
3053 {
3054 for (uint32_t subblock_index = 0; subblock_index < 2; subblock_index++)
3055 {
3056 const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, subblock_index);
3057
3058 m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_used[0] = true;
3059 }
3060 }
3061 }
3062
3063 // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook.
3064 // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up.
3065 // This is basically a bottom up clusterization stage, where some leaves can be combined.
3066 void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices)
3067 {
3068 debug_printf("reoptimize_remapped_endpoints\n");
3069
3070 basisu::vector<uint_vec> new_endpoint_cluster_block_indices(m_endpoint_clusters.size());
3071 for (uint32_t i = 0; i < new_block_endpoints.size(); i++)
3072 new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i);
3073
3074 basisu::vector<uint8_t> cluster_valid(new_endpoint_cluster_block_indices.size());
3075 basisu::vector<uint8_t> cluster_improved(new_endpoint_cluster_block_indices.size());
3076
3077 const uint32_t N = 256;
3078 for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N)
3079 {
3080 const uint32_t first_index = cluster_index_iter;
3081 const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N);
3082
3083#ifndef __EMSCRIPTEN__
3084 m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] {
3085#endif
3086
3087 for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
3088 {
3089 const basisu::vector<uint32_t>& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index];
3090
3091 if (!cluster_block_indices.size())
3092 continue;
3093
3094 const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16;
3095
3096 basisu::vector<color_rgba> cluster_pixels(total_pixels);
3097 uint8_vec force_selectors(total_pixels);
3098
3099 etc_block blk;
3100 blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false));
3101 blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false));
3102 blk.set_flip_bit(true);
3103
3104 uint64_t cur_err = 0;
3105
3106 for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++)
3107 {
3108 const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter];
3109
3110 const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
3111
3112 memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba));
3113
3114 const uint32_t selector_cluster_index = pBlock_selector_indices ? (*pBlock_selector_indices)[block_index] : get_block_selector_cluster_index(block_index);
3115
3116 const etc_block &blk_selectors = get_selector_cluster_selector_bits(selector_cluster_index);
3117
3118 blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits());
3119
3120 cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual);
3121
3122 for (uint32_t y = 0; y < 4; y++)
3123 for (uint32_t x = 0; x < 4; x++)
3124 force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast<uint8_t>(blk_selectors.get_selector(x, y));
3125 }
3126
3127 endpoint_cluster_etc_params new_endpoint_cluster_etc_params;
3128
3129 {
3130 etc1_optimizer optimizer;
3131 etc1_solution_coordinates solutions[2];
3132
3133 etc1_optimizer::params cluster_optimizer_params;
3134 cluster_optimizer_params.m_num_src_pixels = total_pixels;
3135 cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
3136
3137 cluster_optimizer_params.m_use_color4 = false;
3138 cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
3139 cluster_optimizer_params.m_pForce_selectors = &force_selectors[0];
3140
3141 if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
3142 cluster_optimizer_params.m_quality = cETCQualityUber;
3143 else
3144 cluster_optimizer_params.m_quality = cETCQualitySlow;
3145
3146 etc1_optimizer::results cluster_optimizer_results;
3147
3148 basisu::vector<uint8_t> cluster_selectors(total_pixels);
3149 cluster_optimizer_results.m_n = total_pixels;
3150 cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
3151
3152 optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
3153
3154 if (!optimizer.compute())
3155 BASISU_FRONTEND_VERIFY(false);
3156
3157 new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
3158 new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
3159 new_endpoint_cluster_etc_params.m_color_error[0] = cluster_optimizer_results.m_error;
3160 new_endpoint_cluster_etc_params.m_color_used[0] = true;
3161 new_endpoint_cluster_etc_params.m_valid = true;
3162 }
3163
3164 if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err)
3165 {
3166 m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params;
3167
3168 cluster_improved[cluster_index] = true;
3169 }
3170
3171 cluster_valid[cluster_index] = true;
3172
3173 } // cluster_index
3174
3175#ifndef __EMSCRIPTEN__
3176 } );
3177#endif
3178
3179 } // cluster_index_iter
3180
3181#ifndef __EMSCRIPTEN__
3182 m_params.m_pJob_pool->wait_for_all();
3183#endif
3184
3185 uint32_t total_unused_clusters = 0;
3186 uint32_t total_improved_clusters = 0;
3187
3188 old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size());
3189 vector_set_all(old_to_new_endpoint_cluster_indices, -1);
3190
3191 int total_new_endpoint_clusters = 0;
3192
3193 for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
3194 {
3195 if (!cluster_valid[old_cluster_index])
3196 total_unused_clusters++;
3197 else
3198 old_to_new_endpoint_cluster_indices[old_cluster_index] = total_new_endpoint_clusters++;
3199
3200 if (cluster_improved[old_cluster_index])
3201 total_improved_clusters++;
3202 }
3203
3204 debug_printf("Total unused clusters: %u\n", total_unused_clusters);
3205 debug_printf("Total improved_clusters: %u\n", total_improved_clusters);
3206 debug_printf("Total endpoint clusters: %u\n", total_new_endpoint_clusters);
3207
3208 if (optimize_final_codebook)
3209 {
3210 cluster_subblock_etc_params_vec new_endpoint_cluster_etc_params(total_new_endpoint_clusters);
3211
3212 for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
3213 {
3214 if (old_to_new_endpoint_cluster_indices[old_cluster_index] >= 0)
3215 new_endpoint_cluster_etc_params[old_to_new_endpoint_cluster_indices[old_cluster_index]] = m_endpoint_cluster_etc_params[old_cluster_index];
3216 }
3217
3218 debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n");
3219
3220 basisu::vector<uint_vec> new_endpoint_clusters(total_new_endpoint_clusters);
3221
3222 for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++)
3223 {
3224 const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index];
3225
3226 const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index];
3227 BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0);
3228
3229 BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_clusters.size());
3230
3231 new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 0);
3232 new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 1);
3233
3234 BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_cluster_etc_params.size());
3235
3236 new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0);
3237 new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1);
3238
3239 m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index;
3240 m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index;
3241 }
3242
3243 debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n");
3244
3245 m_endpoint_clusters = new_endpoint_clusters;
3246 m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params;
3247
3248 eliminate_redundant_or_empty_endpoint_clusters();
3249
3250 debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 3\n");
3251
3252 for (uint32_t new_cluster_index = 0; new_cluster_index < m_endpoint_clusters.size(); new_cluster_index++)
3253 {
3254 for (uint32_t cluster_block_iter = 0; cluster_block_iter < m_endpoint_clusters[new_cluster_index].size(); cluster_block_iter++)
3255 {
3256 const uint32_t subblock_index = m_endpoint_clusters[new_cluster_index][cluster_block_iter];
3257 const uint32_t block_index = subblock_index >> 1;
3258
3259 m_block_endpoint_clusters_indices[block_index][0] = new_cluster_index;
3260 m_block_endpoint_clusters_indices[block_index][1] = new_cluster_index;
3261
3262 const uint32_t old_cluster_index = new_block_endpoints[block_index];
3263
3264 old_to_new_endpoint_cluster_indices[old_cluster_index] = new_cluster_index;
3265 }
3266 }
3267
3268 debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 4\n");
3269
3270 for (uint32_t block_index = 0; block_index < m_encoded_blocks.size(); block_index++)
3271 {
3272 const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
3273
3274 m_encoded_blocks[block_index].set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
3275 m_encoded_blocks[block_index].set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
3276 }
3277
3278 debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size());
3279 }
3280
3281 //debug_printf("validate_output: %u\n", validate_output());
3282 }
3283
3284 // Endpoint clusterization hierarchy integrity checker.
3285 // Note this doesn't check for empty clusters.
3286 bool basisu_frontend::validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const
3287 {
3288 if (!m_endpoint_parent_clusters.size())
3289 return true;
3290
3291 int_vec subblock_parent_indices(m_total_blocks * 2);
3292 subblock_parent_indices.set_all(-1);
3293
3294 int_vec subblock_cluster_indices(m_total_blocks * 2);
3295 subblock_cluster_indices.set_all(-1);
3296
3297 for (uint32_t parent_index = 0; parent_index < m_endpoint_parent_clusters.size(); parent_index++)
3298 {
3299 for (uint32_t i = 0; i < m_endpoint_parent_clusters[parent_index].size(); i++)
3300 {
3301 uint32_t subblock_index = m_endpoint_parent_clusters[parent_index][i];
3302 if (subblock_index >= m_total_blocks * 2)
3303 return false;
3304
3305 // If the endpoint cluster lives in more than one parent node, that's wrong.
3306 if (subblock_parent_indices[subblock_index] != -1)
3307 return false;
3308
3309 subblock_parent_indices[subblock_index] = parent_index;
3310 }
3311 }
3312
3313 // Make sure all endpoint clusters are present in the parent cluster.
3314 for (uint32_t i = 0; i < subblock_parent_indices.size(); i++)
3315 {
3316 if (subblock_parent_indices[i] == -1)
3317 return false;
3318 }
3319
3320 for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
3321 {
3322 int parent_index = 0;
3323
3324 for (uint32_t i = 0; i < m_endpoint_clusters[cluster_index].size(); i++)
3325 {
3326 uint32_t subblock_index = m_endpoint_clusters[cluster_index][i];
3327 if (subblock_index >= m_total_blocks * 2)
3328 return false;
3329
3330 if (subblock_cluster_indices[subblock_index] != -1)
3331 return false;
3332
3333 subblock_cluster_indices[subblock_index] = cluster_index;
3334
3335 // There are transformations on the endpoint clusters that can break the strict tree requirement
3336 if (ensure_clusters_have_same_parents)
3337 {
3338 // Make sure all the subblocks are in the same parent cluster
3339 if (!i)
3340 parent_index = subblock_parent_indices[subblock_index];
3341 else if (subblock_parent_indices[subblock_index] != parent_index)
3342 return false;
3343 }
3344 }
3345 }
3346
3347 // Make sure all endpoint clusters are present in the parent cluster.
3348 for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++)
3349 {
3350 if (subblock_cluster_indices[i] == -1)
3351 return false;
3352 }
3353
3354 return true;
3355 }
3356
3357 // This is very slow and only intended for debugging/development. It's enabled using the "-validate_etc1s" command line option.
3358 bool basisu_frontend::validate_output() const
3359 {
3360 debug_printf("validate_output\n");
3361
3362 if (!check_etc1s_constraints())
3363 return false;
3364
3365 for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
3366 {
3367//#define CHECK(x) do { if (!(x)) { DebugBreak(); return false; } } while(0)
3368#define CHECK(x) BASISU_FRONTEND_VERIFY(x);
3369
3370 CHECK(get_output_block(block_index).get_flip_bit() == true);
3371
3372 const bool diff_flag = get_diff_flag(block_index);
3373 CHECK(diff_flag == true);
3374
3375 etc_block blk;
3376 memset(&blk, 0, sizeof(blk));
3377 blk.set_flip_bit(true);
3378 blk.set_diff_bit(true);
3379
3380 const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
3381 const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
3382
3383 // basisu only supports ETC1S, so these must be equal.
3384 CHECK(endpoint_cluster0_index == endpoint_cluster1_index);
3385
3386 CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false)));
3387
3388 CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false));
3389
3390 blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false));
3391 blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false));
3392
3393 const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
3394 CHECK(selector_cluster_index < get_total_selector_clusters());
3395
3396 CHECK(vector_find(get_selector_cluster_block_indices(selector_cluster_index), block_index) != -1);
3397
3398 blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
3399
3400 const etc_block &rdo_output_block = get_output_block(block_index);
3401
3402 CHECK(rdo_output_block.get_flip_bit() == blk.get_flip_bit());
3403 CHECK(rdo_output_block.get_diff_bit() == blk.get_diff_bit());
3404 CHECK(rdo_output_block.get_inten_table(0) == blk.get_inten_table(0));
3405 CHECK(rdo_output_block.get_inten_table(1) == blk.get_inten_table(1));
3406 CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color());
3407 CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color());
3408 CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits());
3409
3410#undef CHECK
3411 }
3412
3413 return true;
3414 }
3415
3416 void basisu_frontend::dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks)
3417 {
3418 gpu_image g;
3419 g.init(texture_format::cETC1, num_blocks_x * 4, num_blocks_y * 4);
3420
3421 for (uint32_t y = 0; y < num_blocks_y; y++)
3422 {
3423 for (uint32_t x = 0; x < num_blocks_x; x++)
3424 {
3425 const uint32_t block_index = first_block + x + y * num_blocks_x;
3426
3427 etc_block &blk = *(etc_block *)g.get_block_ptr(x, y);
3428
3429 if (output_blocks)
3430 blk = get_output_block(block_index);
3431 else
3432 {
3433 const bool diff_flag = get_diff_flag(block_index);
3434
3435 blk.set_diff_bit(diff_flag);
3436 blk.set_flip_bit(true);
3437
3438 const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
3439 const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
3440
3441 if (diff_flag)
3442 blk.set_block_color5(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false));
3443 else
3444 blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true));
3445
3446 blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag));
3447 blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag));
3448
3449 const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
3450 blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
3451 }
3452 }
3453 }
3454
3455 image img;
3456 g.unpack(img);
3457
3458 save_png(pFilename, img);
3459 }
3460
3461} // namespace basisu
3462
3463