1// basisu_backend.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here.
17//
18#include "basisu_backend.h"
19
20#if BASISU_SUPPORT_SSE
21#define CPPSPMD_NAME(a) a##_sse41
22#include "basisu_kernels_declares.h"
23#endif
24
25#define BASISU_FASTER_SELECTOR_REORDERING 0
26#define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__);
27
28namespace basisu
29{
30 // TODO
31 static inline void verify(bool condition, int line)
32 {
33 if (!condition)
34 {
35 fprintf(stderr, "ERROR: basisu_backend: verify() failed at line %i!\n", line);
36 abort();
37 }
38 }
39
40 basisu_backend::basisu_backend()
41 {
42 clear();
43 }
44
45 void basisu_backend::clear()
46 {
47 m_pFront_end = NULL;
48 m_params.clear();
49 m_output.clear();
50 }
51
52 void basisu_backend::init(basisu_frontend* pFront_end, basisu_backend_params& params, const basisu_backend_slice_desc_vec& slice_descs)
53 {
54 m_pFront_end = pFront_end;
55 m_params = params;
56 m_slices = slice_descs;
57
58 debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, EndpointRDOQualityThresh: %f, SelectorRDOQualityThresh: %f\n",
59 m_slices.size(),
60 params.m_etc1s,
61 params.m_endpoint_rdo_quality_thresh,
62 params.m_selector_rdo_quality_thresh);
63
64 debug_printf("Frontend endpoints: %u selectors: %u\n", m_pFront_end->get_total_endpoint_clusters(), m_pFront_end->get_total_selector_clusters());
65
66 for (uint32_t i = 0; i < m_slices.size(); i++)
67 {
68 debug_printf("Slice: %u, OrigWidth: %u, OrigHeight: %u, Width: %u, Height: %u, NumBlocksX: %u, NumBlocksY: %u, FirstBlockIndex: %u\n",
69 i,
70 m_slices[i].m_orig_width, m_slices[i].m_orig_height,
71 m_slices[i].m_width, m_slices[i].m_height,
72 m_slices[i].m_num_blocks_x, m_slices[i].m_num_blocks_y,
73 m_slices[i].m_first_block_index);
74 }
75 }
76
77 void basisu_backend::create_endpoint_palette()
78 {
79 const basisu_frontend& r = *m_pFront_end;
80
81 m_output.m_num_endpoints = r.get_total_endpoint_clusters();
82
83 m_endpoint_palette.resize(r.get_total_endpoint_clusters());
84 for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)
85 {
86 etc1_endpoint_palette_entry& e = m_endpoint_palette[i];
87
88 e.m_color5_valid = r.get_endpoint_cluster_color_is_used(i, false);
89 e.m_color5 = r.get_endpoint_cluster_unscaled_color(i, false);
90 e.m_inten5 = r.get_endpoint_cluster_inten_table(i, false);
91
92 BASISU_BACKEND_VERIFY(e.m_color5_valid);
93 }
94 }
95
96 void basisu_backend::create_selector_palette()
97 {
98 const basisu_frontend& r = *m_pFront_end;
99
100 m_output.m_num_selectors = r.get_total_selector_clusters();
101
102 m_selector_palette.resize(r.get_total_selector_clusters());
103
104 for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
105 {
106 etc1_selector_palette_entry& s = m_selector_palette[i];
107
108 const etc_block& selector_bits = r.get_selector_cluster_selector_bits(i);
109
110 for (uint32_t y = 0; y < 4; y++)
111 {
112 for (uint32_t x = 0; x < 4; x++)
113 {
114 s[y * 4 + x] = static_cast<uint8_t>(selector_bits.get_selector(x, y));
115 }
116 }
117 }
118 }
119
120 static const struct
121 {
122 int8_t m_dx, m_dy;
123 } g_endpoint_preds[] =
124 {
125 { -1, 0 },
126 { 0, -1 },
127 { -1, -1 }
128 };
129
130 void basisu_backend::reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec& all_endpoint_indices)
131 {
132 basisu_frontend& r = *m_pFront_end;
133 //const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
134
135 if (m_params.m_used_global_codebooks)
136 {
137 m_endpoint_remap_table_old_to_new.clear();
138 m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters());
139 for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)
140 m_endpoint_remap_table_old_to_new[i] = i;
141 }
142 else
143 {
144 //if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0))
145 if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 1))
146 {
147 // We've changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed)
148 uint_vec new_block_endpoints(get_total_blocks());
149
150 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
151 {
152 const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
153 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
154 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
155
156 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
157 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
158 new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index;
159 }
160
161 int_vec old_to_new_endpoint_indices;
162 r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true);
163
164 create_endpoint_palette();
165
166 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
167 {
168 //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
169
170 //const uint32_t width = m_slices[slice_index].m_width;
171 //const uint32_t height = m_slices[slice_index].m_height;
172 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
173 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
174
175 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
176 {
177 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
178 {
179 //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
180
181 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
182
183 m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index];
184 } // block_x
185 } // block_y
186 } // slice_index
187
188 for (uint32_t i = 0; i < all_endpoint_indices.size(); i++)
189 all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]];
190
191 } //if (total_block_endpoints_remapped)
192
193 // Sort endpoint codebook
194 palette_index_reorderer reorderer;
195 reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0);
196 m_endpoint_remap_table_old_to_new = reorderer.get_remap_table();
197 }
198
199 // For endpoints, old_to_new[] may not be bijective!
200 // Some "old" entries may be unused and don't get remapped into the "new" array.
201
202 m_old_endpoint_was_used.clear();
203 m_old_endpoint_was_used.resize(r.get_total_endpoint_clusters());
204 uint32_t first_old_entry_index = UINT32_MAX;
205
206 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
207 {
208 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y;
209 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
210 {
211 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
212 {
213 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
214 const uint32_t old_endpoint_index = m.m_endpoint_index;
215
216 m_old_endpoint_was_used[old_endpoint_index] = true;
217 first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index);
218 } // block_x
219 } // block_y
220 } // slice_index
221
222 debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index);
223
224 m_new_endpoint_was_used.clear();
225 m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters());
226
227 m_endpoint_remap_table_new_to_old.clear();
228 m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters());
229
230 // Set unused entries in the new array to point to the first used entry in the old array.
231 m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index);
232
233 for (uint32_t old_index = 0; old_index < m_endpoint_remap_table_old_to_new.size(); old_index++)
234 {
235 if (m_old_endpoint_was_used[old_index])
236 {
237 const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index];
238
239 m_new_endpoint_was_used[new_index] = true;
240
241 m_endpoint_remap_table_new_to_old[new_index] = old_index;
242 }
243 }
244 }
245
246 void basisu_backend::sort_selector_codebook()
247 {
248 basisu_frontend& r = *m_pFront_end;
249
250 m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters());
251
252 if ((m_params.m_compression_level == 0) || (m_params.m_used_global_codebooks))
253 {
254 for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
255 m_selector_remap_table_new_to_old[i] = i;
256 }
257 else
258 {
259 m_selector_remap_table_new_to_old[0] = 0;
260 uint32_t prev_selector_index = 0;
261
262 int_vec remaining_selectors;
263 remaining_selectors.reserve(r.get_total_selector_clusters() - 1);
264 for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++)
265 remaining_selectors.push_back(i);
266
267 uint_vec selector_palette_bytes(m_selector_palette.size());
268 for (uint32_t i = 0; i < m_selector_palette.size(); i++)
269 selector_palette_bytes[i] = m_selector_palette[i].get_byte(0) | (m_selector_palette[i].get_byte(1) << 8) | (m_selector_palette[i].get_byte(2) << 16) | (m_selector_palette[i].get_byte(3) << 24);
270
271 // This is the traveling salesman problem.
272 for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++)
273 {
274 uint32_t best_hamming_dist = 100;
275 uint32_t best_index = 0;
276
277#if BASISU_FASTER_SELECTOR_REORDERING
278 const uint32_t step = (remaining_selectors.size() > 16) ? 16 : 1;
279 for (uint32_t j = 0; j < remaining_selectors.size(); j += step)
280#else
281 for (uint32_t j = 0; j < remaining_selectors.size(); j++)
282#endif
283 {
284 int selector_index = remaining_selectors[j];
285
286 uint32_t k = selector_palette_bytes[prev_selector_index] ^ selector_palette_bytes[selector_index];
287 uint32_t hamming_dist = g_hamming_dist[k & 0xFF] + g_hamming_dist[(k >> 8) & 0xFF] + g_hamming_dist[(k >> 16) & 0xFF] + g_hamming_dist[k >> 24];
288
289 if (hamming_dist < best_hamming_dist)
290 {
291 best_hamming_dist = hamming_dist;
292 best_index = j;
293 if (best_hamming_dist <= 1)
294 break;
295 }
296 }
297
298 prev_selector_index = remaining_selectors[best_index];
299 m_selector_remap_table_new_to_old[i] = prev_selector_index;
300
301 remaining_selectors[best_index] = remaining_selectors.back();
302 remaining_selectors.resize(remaining_selectors.size() - 1);
303 }
304 }
305
306 m_selector_remap_table_old_to_new.resize(r.get_total_selector_clusters());
307 for (uint32_t i = 0; i < m_selector_remap_table_new_to_old.size(); i++)
308 m_selector_remap_table_old_to_new[m_selector_remap_table_new_to_old[i]] = i;
309 }
310 int basisu_backend::find_video_frame(int slice_index, int delta)
311 {
312 for (uint32_t s = 0; s < m_slices.size(); s++)
313 {
314 if ((int)m_slices[s].m_source_file_index != ((int)m_slices[slice_index].m_source_file_index + delta))
315 continue;
316 if (m_slices[s].m_mip_index != m_slices[slice_index].m_mip_index)
317 continue;
318
319 // Being super paranoid here.
320 if (m_slices[s].m_num_blocks_x != (m_slices[slice_index].m_num_blocks_x))
321 continue;
322 if (m_slices[s].m_num_blocks_y != (m_slices[slice_index].m_num_blocks_y))
323 continue;
324 if (m_slices[s].m_alpha != (m_slices[slice_index].m_alpha))
325 continue;
326 return s;
327 }
328
329 return -1;
330 }
331
332 void basisu_backend::check_for_valid_cr_blocks()
333 {
334 basisu_frontend& r = *m_pFront_end;
335 const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
336
337 if (!is_video)
338 return;
339
340 debug_printf("basisu_backend::check_for_valid_cr_blocks\n");
341
342 uint32_t total_crs = 0;
343 uint32_t total_invalid_crs = 0;
344
345 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
346 {
347 const bool is_iframe = m_slices[slice_index].m_iframe;
348 //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
349
350 //const uint32_t width = m_slices[slice_index].m_width;
351 //const uint32_t height = m_slices[slice_index].m_height;
352 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
353 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
354 const int prev_frame_slice_index = find_video_frame(slice_index, -1);
355
356 // If we don't have a previous frame, and we're not an i-frame, something is wrong.
357 if ((prev_frame_slice_index < 0) && (!is_iframe))
358 {
359 BASISU_BACKEND_VERIFY(0);
360 }
361
362 if ((is_iframe) || (prev_frame_slice_index < 0))
363 {
364 // Ensure no blocks use CR's
365 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
366 {
367 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
368 {
369 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
370 BASISU_BACKEND_VERIFY(m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX);
371 }
372 }
373 }
374 else
375 {
376 // For blocks that use CR's, make sure the endpoints/selectors haven't really changed.
377 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
378 {
379 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
380 {
381 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
382
383 if (m.m_endpoint_predictor == basist::CR_ENDPOINT_PRED_INDEX)
384 {
385 total_crs++;
386
387 encoder_block& prev_m = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y);
388
389 if ((m.m_endpoint_index != prev_m.m_endpoint_index) || (m.m_selector_index != prev_m.m_selector_index))
390 {
391 total_invalid_crs++;
392 }
393 }
394 } // block_x
395 } // block_y
396
397 } // !slice_index
398
399 } // slice_index
400
401 debug_printf("Total CR's: %u, Total invalid CR's: %u\n", total_crs, total_invalid_crs);
402
403 BASISU_BACKEND_VERIFY(total_invalid_crs == 0);
404 }
405
406 void basisu_backend::create_encoder_blocks()
407 {
408 debug_printf("basisu_backend::create_encoder_blocks\n");
409
410 interval_timer tm;
411 tm.start();
412
413 basisu_frontend& r = *m_pFront_end;
414 const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
415
416 m_slice_encoder_blocks.resize(m_slices.size());
417
418 uint32_t total_endpoint_pred_missed = 0, total_endpoint_pred_hits = 0, total_block_endpoints_remapped = 0;
419
420 uint_vec all_endpoint_indices;
421 all_endpoint_indices.reserve(get_total_blocks());
422
423 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
424 {
425 const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1;
426 const bool is_iframe = m_slices[slice_index].m_iframe;
427 const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
428
429 //const uint32_t width = m_slices[slice_index].m_width;
430 //const uint32_t height = m_slices[slice_index].m_height;
431 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
432 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
433
434 m_slice_encoder_blocks[slice_index].resize(num_blocks_x, num_blocks_y);
435
436 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
437 {
438 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
439 {
440 const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
441
442 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
443
444 m.m_endpoint_index = r.get_subblock_endpoint_cluster_index(block_index, 0);
445 BASISU_BACKEND_VERIFY(r.get_subblock_endpoint_cluster_index(block_index, 0) == r.get_subblock_endpoint_cluster_index(block_index, 1));
446
447 m.m_selector_index = r.get_block_selector_cluster_index(block_index);
448
449 m.m_endpoint_predictor = basist::NO_ENDPOINT_PRED_INDEX;
450
451 const uint32_t block_endpoint = m.m_endpoint_index;
452
453 uint32_t best_endpoint_pred = UINT32_MAX;
454
455 for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++)
456 {
457 if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX))
458 {
459 if ((prev_frame_slice_index != -1) && (!is_iframe))
460 {
461 const uint32_t cur_endpoint = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index;
462 const uint32_t cur_selector = m_slice_encoder_blocks[slice_index](block_x, block_y).m_selector_index;
463 const uint32_t prev_endpoint = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_endpoint_index;
464 const uint32_t prev_selector = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_selector_index;
465 if ((cur_endpoint == prev_endpoint) && (cur_selector == prev_selector))
466 {
467 best_endpoint_pred = basist::CR_ENDPOINT_PRED_INDEX;
468 m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_is_cr_target = true;
469 }
470 }
471 }
472 else
473 {
474 int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx;
475 if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x))
476 continue;
477
478 int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy;
479 if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y))
480 continue;
481
482 uint32_t pred_endpoint = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index;
483
484 if (pred_endpoint == block_endpoint)
485 {
486 if (endpoint_pred < best_endpoint_pred)
487 {
488 best_endpoint_pred = endpoint_pred;
489 }
490 }
491 }
492
493 } // endpoint_pred
494
495 if (best_endpoint_pred != UINT32_MAX)
496 {
497 m.m_endpoint_predictor = best_endpoint_pred;
498
499 total_endpoint_pred_hits++;
500 }
501 else if (m_params.m_endpoint_rdo_quality_thresh > 0.0f)
502 {
503 const pixel_block& src_pixels = r.get_source_pixel_block(block_index);
504
505 etc_block etc_blk(r.get_output_block(block_index));
506
507 uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);
508
509 if (cur_err)
510 {
511 const uint64_t thresh_err = (uint64_t)(cur_err * maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh));
512
513 etc_block trial_etc_block(etc_blk);
514
515 uint64_t best_err = UINT64_MAX;
516 uint32_t best_endpoint_index = 0;
517
518 best_endpoint_pred = UINT32_MAX;
519
520 for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++)
521 {
522 if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX))
523 continue;
524
525 int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx;
526 if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x))
527 continue;
528
529 int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy;
530 if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y))
531 continue;
532
533 uint32_t pred_endpoint_index = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index;
534
535 uint32_t pred_inten = r.get_endpoint_cluster_inten_table(pred_endpoint_index, false);
536 color_rgba pred_color = r.get_endpoint_cluster_unscaled_color(pred_endpoint_index, false);
537
538 trial_etc_block.set_block_color5(pred_color, pred_color);
539 trial_etc_block.set_inten_table(0, pred_inten);
540 trial_etc_block.set_inten_table(1, pred_inten);
541
542 color_rgba trial_colors[16];
543 unpack_etc1(trial_etc_block, trial_colors);
544
545 uint64_t trial_err = 0;
546 if (r.get_params().m_perceptual)
547 {
548 for (uint32_t p = 0; p < 16; p++)
549 {
550 trial_err += color_distance(true, src_pixels.get_ptr()[p], trial_colors[p], false);
551 if (trial_err > thresh_err)
552 break;
553 }
554 }
555 else
556 {
557 for (uint32_t p = 0; p < 16; p++)
558 {
559 trial_err += color_distance(false, src_pixels.get_ptr()[p], trial_colors[p], false);
560 if (trial_err > thresh_err)
561 break;
562 }
563 }
564
565 if (trial_err <= thresh_err)
566 {
567 if ((trial_err < best_err) || ((trial_err == best_err) && (endpoint_pred < best_endpoint_pred)))
568 {
569 best_endpoint_pred = endpoint_pred;
570 best_err = trial_err;
571 best_endpoint_index = pred_endpoint_index;
572 }
573 }
574 } // endpoint_pred
575
576 if (best_endpoint_pred != UINT32_MAX)
577 {
578 m.m_endpoint_index = best_endpoint_index;
579 m.m_endpoint_predictor = best_endpoint_pred;
580
581 total_endpoint_pred_hits++;
582 total_block_endpoints_remapped++;
583 }
584 else
585 {
586 total_endpoint_pred_missed++;
587 }
588 }
589 }
590 else
591 {
592 total_endpoint_pred_missed++;
593 }
594
595 if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
596 {
597 all_endpoint_indices.push_back(m.m_endpoint_index);
598 }
599
600 } // block_x
601
602 } // block_y
603
604 } // slice
605
606 debug_printf("total_endpoint_pred_missed: %u (%3.2f%%) total_endpoint_pred_hit: %u (%3.2f%%), total_block_endpoints_remapped: %u (%3.2f%%)\n",
607 total_endpoint_pred_missed, total_endpoint_pred_missed * 100.0f / get_total_blocks(),
608 total_endpoint_pred_hits, total_endpoint_pred_hits * 100.0f / get_total_blocks(),
609 total_block_endpoints_remapped, total_block_endpoints_remapped * 100.0f / get_total_blocks());
610
611 reoptimize_and_sort_endpoints_codebook(total_block_endpoints_remapped, all_endpoint_indices);
612
613 sort_selector_codebook();
614 check_for_valid_cr_blocks();
615
616 debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
617 }
618
619 void basisu_backend::compute_slice_crcs()
620 {
621 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
622 {
623 //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
624 const uint32_t width = m_slices[slice_index].m_width;
625 const uint32_t height = m_slices[slice_index].m_height;
626 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
627 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
628
629 gpu_image gi;
630 gi.init(texture_format::cETC1, width, height);
631
632 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
633 {
634 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
635 {
636 //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
637
638 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
639
640 {
641 etc_block& output_block = *(etc_block*)gi.get_block_ptr(block_x, block_y);
642
643 output_block.set_diff_bit(true);
644 // Setting the flip bit to false to be compatible with the Khronos KDFS.
645 //output_block.set_flip_bit(true);
646 output_block.set_flip_bit(false);
647
648 const uint32_t endpoint_index = m.m_endpoint_index;
649
650 output_block.set_block_color5_etc1s(m_endpoint_palette[endpoint_index].m_color5);
651 output_block.set_inten_tables_etc1s(m_endpoint_palette[endpoint_index].m_inten5);
652
653 const uint32_t selector_idx = m.m_selector_index;
654
655 const etc1_selector_palette_entry& selectors = m_selector_palette[selector_idx];
656 for (uint32_t sy = 0; sy < 4; sy++)
657 for (uint32_t sx = 0; sx < 4; sx++)
658 output_block.set_selector(sx, sy, selectors(sx, sy));
659 }
660
661 } // block_x
662 } // block_y
663
664 m_output.m_slice_image_crcs[slice_index] = basist::crc16(gi.get_ptr(), gi.get_size_in_bytes(), 0);
665
666 if (m_params.m_debug_images)
667 {
668 image gi_unpacked;
669 gi.unpack(gi_unpacked);
670
671 char buf[256];
672#ifdef _WIN32
673 sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);
674#else
675 snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);
676#endif
677 save_png(buf, gi_unpacked);
678 }
679
680 } // slice_index
681 }
682
683 //uint32_t g_color_delta_hist[255 * 3 + 1];
684 //uint32_t g_color_delta_bad_hist[255 * 3 + 1];
685
686 // TODO: Split this into multiple methods.
687 bool basisu_backend::encode_image()
688 {
689 basisu_frontend& r = *m_pFront_end;
690 const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
691
692 uint32_t total_used_selector_history_buf = 0;
693 uint32_t total_selector_indices_remapped = 0;
694
695 basist::approx_move_to_front selector_history_buf(basist::MAX_SELECTOR_HISTORY_BUF_SIZE);
696 histogram selector_history_buf_histogram(basist::MAX_SELECTOR_HISTORY_BUF_SIZE);
697 histogram selector_histogram(r.get_total_selector_clusters() + basist::MAX_SELECTOR_HISTORY_BUF_SIZE + 1);
698 histogram selector_history_buf_rle_histogram(1 << basist::SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
699
700 basisu::vector<uint_vec> selector_syms(m_slices.size());
701
702 const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters();
703 const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + basist::MAX_SELECTOR_HISTORY_BUF_SIZE;
704
705 m_output.m_slice_image_crcs.resize(m_slices.size());
706
707 histogram delta_endpoint_histogram(r.get_total_endpoint_clusters());
708
709 histogram endpoint_pred_histogram(basist::ENDPOINT_PRED_TOTAL_SYMBOLS);
710 basisu::vector<uint_vec> endpoint_pred_syms(m_slices.size());
711
712 uint32_t total_endpoint_indices_remapped = 0;
713
714 uint_vec block_endpoint_indices, block_selector_indices;
715
716 interval_timer tm;
717 tm.start();
718
719 const int COLOR_DELTA_THRESH = 8;
720 const int SEL_DIFF_THRESHOLD = 11;
721
722 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
723 {
724 //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1;
725 //const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1;
726 const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
727 //const uint32_t width = m_slices[slice_index].m_width;
728 //const uint32_t height = m_slices[slice_index].m_height;
729 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
730 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
731
732 selector_history_buf.reset();
733
734 int selector_history_buf_rle_count = 0;
735
736 int prev_endpoint_pred_sym_bits = -1, endpoint_pred_repeat_count = 0;
737
738 uint32_t prev_endpoint_index = 0;
739
740 vector2D<uint8_t> block_endpoints_are_referenced(num_blocks_x, num_blocks_y);
741
742 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
743 {
744 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
745 {
746 //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
747
748 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
749
750 if (m.m_endpoint_predictor == 0)
751 block_endpoints_are_referenced(block_x - 1, block_y) = true;
752 else if (m.m_endpoint_predictor == 1)
753 block_endpoints_are_referenced(block_x, block_y - 1) = true;
754 else if (m.m_endpoint_predictor == 2)
755 {
756 if (!is_video)
757 block_endpoints_are_referenced(block_x - 1, block_y - 1) = true;
758 }
759 if (is_video)
760 {
761 if (m.m_is_cr_target)
762 block_endpoints_are_referenced(block_x, block_y) = true;
763 }
764
765 } // block_x
766 } // block_y
767
768 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
769 {
770 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
771 {
772 const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
773
774 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
775
776 if (((block_x & 1) == 0) && ((block_y & 1) == 0))
777 {
778 uint32_t endpoint_pred_cur_sym_bits = 0;
779
780 for (uint32_t y = 0; y < 2; y++)
781 {
782 for (uint32_t x = 0; x < 2; x++)
783 {
784 const uint32_t bx = block_x + x;
785 const uint32_t by = block_y + y;
786
787 uint32_t pred = basist::NO_ENDPOINT_PRED_INDEX;
788 if ((bx < num_blocks_x) && (by < num_blocks_y))
789 pred = m_slice_encoder_blocks[slice_index](bx, by).m_endpoint_predictor;
790
791 endpoint_pred_cur_sym_bits |= (pred << (x * 2 + y * 4));
792 }
793 }
794
795 if ((int)endpoint_pred_cur_sym_bits == prev_endpoint_pred_sym_bits)
796 {
797 endpoint_pred_repeat_count++;
798 }
799 else
800 {
801 if (endpoint_pred_repeat_count > 0)
802 {
803 if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT)
804 {
805 endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
806 endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
807
808 endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count);
809 }
810 else
811 {
812 for (int j = 0; j < endpoint_pred_repeat_count; j++)
813 {
814 endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits);
815 endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits);
816 }
817 }
818
819 endpoint_pred_repeat_count = 0;
820 }
821
822 endpoint_pred_histogram.inc(endpoint_pred_cur_sym_bits);
823 endpoint_pred_syms[slice_index].push_back(endpoint_pred_cur_sym_bits);
824
825 prev_endpoint_pred_sym_bits = endpoint_pred_cur_sym_bits;
826 }
827 }
828
829 int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index];
830
831 if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
832 {
833 int endpoint_delta = new_endpoint_index - prev_endpoint_index;
834
835 if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y)))
836 {
837 const pixel_block& src_pixels = r.get_source_pixel_block(block_index);
838
839 etc_block etc_blk(r.get_output_block(block_index));
840
841 const uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);
842 const uint32_t cur_inten5 = etc_blk.get_inten_table(0);
843
844 const etc1_endpoint_palette_entry& cur_endpoints = m_endpoint_palette[m.m_endpoint_index];
845
846 if (cur_err)
847 {
848 const float endpoint_remap_thresh = maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh);
849 const uint64_t thresh_err = (uint64_t)(cur_err * endpoint_remap_thresh);
850
851 //const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 32;
852 const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 16;
853
854 if (!g_cpu_supports_sse41)
855 {
856 const uint64_t initial_best_trial_err = UINT64_MAX;
857 uint64_t best_trial_err = initial_best_trial_err;
858 int best_trial_idx = 0;
859
860 etc_block trial_etc_blk(etc_blk);
861
862 const int search_dist = minimum<int>(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST);
863 for (int d = -search_dist; d < search_dist; d++)
864 {
865 int trial_idx = prev_endpoint_index + d;
866 if (trial_idx < 0)
867 trial_idx += (int)r.get_total_endpoint_clusters();
868 else if (trial_idx >= (int)r.get_total_endpoint_clusters())
869 trial_idx -= (int)r.get_total_endpoint_clusters();
870
871 if (trial_idx == new_endpoint_index)
872 continue;
873
874 // Skip it if this new endpoint palette entry is actually never used.
875 if (!m_new_endpoint_was_used[trial_idx])
876 continue;
877
878 const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]];
879
880 if (m_params.m_compression_level <= 1)
881 {
882 if (p.m_inten5 > cur_inten5)
883 continue;
884
885 int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r);
886 int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g);
887 int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b);
888 int color_delta = delta_r + delta_g + delta_b;
889
890 if (color_delta > COLOR_DELTA_THRESH)
891 continue;
892 }
893
894 trial_etc_blk.set_block_color5_etc1s(p.m_color5);
895 trial_etc_blk.set_inten_tables_etc1s(p.m_inten5);
896
897 uint64_t trial_err = trial_etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);
898
899 if ((trial_err < best_trial_err) && (trial_err <= thresh_err))
900 {
901 best_trial_err = trial_err;
902 best_trial_idx = trial_idx;
903 }
904 }
905
906 if (best_trial_err != initial_best_trial_err)
907 {
908 m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx];
909
910 new_endpoint_index = best_trial_idx;
911
912 endpoint_delta = new_endpoint_index - prev_endpoint_index;
913
914 total_endpoint_indices_remapped++;
915 }
916 }
917 else
918 {
919#if BASISU_SUPPORT_SSE
920 uint8_t block_selectors[16];
921 for (uint32_t i = 0; i < 16; i++)
922 block_selectors[i] = (uint8_t)etc_blk.get_selector(i & 3, i >> 2);
923
924 const int64_t initial_best_trial_err = INT64_MAX;
925 int64_t best_trial_err = initial_best_trial_err;
926 int best_trial_idx = 0;
927
928 const int search_dist = minimum<int>(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST);
929 for (int d = -search_dist; d < search_dist; d++)
930 {
931 int trial_idx = prev_endpoint_index + d;
932 if (trial_idx < 0)
933 trial_idx += (int)r.get_total_endpoint_clusters();
934 else if (trial_idx >= (int)r.get_total_endpoint_clusters())
935 trial_idx -= (int)r.get_total_endpoint_clusters();
936
937 if (trial_idx == new_endpoint_index)
938 continue;
939
940 // Skip it if this new endpoint palette entry is actually never used.
941 if (!m_new_endpoint_was_used[trial_idx])
942 continue;
943
944 const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]];
945
946 if (m_params.m_compression_level <= 1)
947 {
948 if (p.m_inten5 > cur_inten5)
949 continue;
950
951 int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r);
952 int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g);
953 int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b);
954 int color_delta = delta_r + delta_g + delta_b;
955
956 if (color_delta > COLOR_DELTA_THRESH)
957 continue;
958 }
959
960 color_rgba block_colors[4];
961 etc_block::get_block_colors_etc1s(block_colors, p.m_color5, p.m_inten5);
962
963 int64_t trial_err;
964 if (r.get_params().m_perceptual)
965 {
966 perceptual_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err);
967 }
968 else
969 {
970 linear_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err);
971 }
972
973 //if (trial_err > thresh_err)
974 // g_color_delta_bad_hist[color_delta]++;
975
976 if ((trial_err < best_trial_err) && (trial_err <= (int64_t)thresh_err))
977 {
978 best_trial_err = trial_err;
979 best_trial_idx = trial_idx;
980 }
981 }
982
983 if (best_trial_err != initial_best_trial_err)
984 {
985 m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx];
986
987 new_endpoint_index = best_trial_idx;
988
989 endpoint_delta = new_endpoint_index - prev_endpoint_index;
990
991 total_endpoint_indices_remapped++;
992 }
993#endif // BASISU_SUPPORT_SSE
994 } // if (!g_cpu_supports_sse41)
995
996 } // if (cur_err)
997
998 } // if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y)))
999
1000 if (endpoint_delta < 0)
1001 endpoint_delta += (int)r.get_total_endpoint_clusters();
1002
1003 delta_endpoint_histogram.inc(endpoint_delta);
1004
1005 } // if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
1006
1007 block_endpoint_indices.push_back(m_endpoint_remap_table_new_to_old[new_endpoint_index]);
1008
1009 prev_endpoint_index = new_endpoint_index;
1010
1011 if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX))
1012 {
1013 int new_selector_index = m_selector_remap_table_old_to_new[m.m_selector_index];
1014
1015 const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f;
1016
1017 int selector_history_buf_index = -1;
1018
1019 // At low comp levels this hurts compression a tiny amount, but is significantly faster so it's a good tradeoff.
1020 if ((m.m_is_cr_target) || (m_params.m_compression_level <= 1))
1021 {
1022 for (uint32_t j = 0; j < selector_history_buf.size(); j++)
1023 {
1024 const int trial_idx = selector_history_buf[j];
1025 if (trial_idx == new_selector_index)
1026 {
1027 total_used_selector_history_buf++;
1028 selector_history_buf_index = j;
1029 selector_history_buf_histogram.inc(j);
1030 break;
1031 }
1032 }
1033 }
1034
1035 // If the block is a CR target we can't override its selectors.
1036 if ((!m.m_is_cr_target) && (selector_history_buf_index == -1))
1037 {
1038 const pixel_block& src_pixels = r.get_source_pixel_block(block_index);
1039
1040 etc_block etc_blk = r.get_output_block(block_index);
1041
1042 // This is new code - the initial release just used the endpoints from the frontend, which isn't correct/accurate.
1043 const etc1_endpoint_palette_entry& q = m_endpoint_palette[m_endpoint_remap_table_new_to_old[new_endpoint_index]];
1044 etc_blk.set_block_color5_etc1s(q.m_color5);
1045 etc_blk.set_inten_tables_etc1s(q.m_inten5);
1046
1047 color_rgba block_colors[4];
1048 etc_blk.get_block_colors(block_colors, 0);
1049
1050 const uint8_t* pCur_selectors = &m_selector_palette[m.m_selector_index][0];
1051
1052 uint64_t cur_err = 0;
1053 if (r.get_params().m_perceptual)
1054 {
1055 for (uint32_t p = 0; p < 16; p++)
1056 cur_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false);
1057 }
1058 else
1059 {
1060 for (uint32_t p = 0; p < 16; p++)
1061 cur_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false);
1062 }
1063
1064 const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh);
1065
1066 // Even if cur_err==limit_err, we still want to scan the history buffer because there may be equivalent entries that are cheaper to code.
1067
1068 uint64_t best_trial_err = UINT64_MAX;
1069 int best_trial_idx = 0;
1070 uint32_t best_trial_history_buf_idx = 0;
1071
1072 for (uint32_t j = 0; j < selector_history_buf.size(); j++)
1073 {
1074 const int trial_idx = selector_history_buf[j];
1075
1076 const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]][0];
1077
1078 if (m_params.m_compression_level <= 1)
1079 {
1080 // Predict if evaluating the full color error would cause an early out, by summing the abs err of the selector indices.
1081 int sel_diff = 0;
1082 for (uint32_t p = 0; p < 16; p += 4)
1083 {
1084 sel_diff += iabs(pCur_selectors[p + 0] - pSelectors[p + 0]);
1085 sel_diff += iabs(pCur_selectors[p + 1] - pSelectors[p + 1]);
1086 sel_diff += iabs(pCur_selectors[p + 2] - pSelectors[p + 2]);
1087 sel_diff += iabs(pCur_selectors[p + 3] - pSelectors[p + 3]);
1088 if (sel_diff >= SEL_DIFF_THRESHOLD)
1089 break;
1090 }
1091 if (sel_diff >= SEL_DIFF_THRESHOLD)
1092 continue;
1093 }
1094
1095 const uint64_t thresh_err = minimum(limit_err, best_trial_err);
1096 uint64_t trial_err = 0;
1097
1098 // This tends to early out quickly, so SSE has a hard time competing.
1099 if (r.get_params().m_perceptual)
1100 {
1101 for (uint32_t p = 0; p < 16; p++)
1102 {
1103 uint32_t sel = pSelectors[p];
1104 trial_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[sel], false);
1105 if (trial_err > thresh_err)
1106 break;
1107 }
1108 }
1109 else
1110 {
1111 for (uint32_t p = 0; p < 16; p++)
1112 {
1113 uint32_t sel = pSelectors[p];
1114 trial_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[sel], false);
1115 if (trial_err > thresh_err)
1116 break;
1117 }
1118 }
1119
1120 if ((trial_err < best_trial_err) && (trial_err <= thresh_err))
1121 {
1122 assert(trial_err <= limit_err);
1123
1124 best_trial_err = trial_err;
1125 best_trial_idx = trial_idx;
1126 best_trial_history_buf_idx = j;
1127 }
1128 }
1129
1130 if (best_trial_err != UINT64_MAX)
1131 {
1132 if (new_selector_index != best_trial_idx)
1133 total_selector_indices_remapped++;
1134
1135 new_selector_index = best_trial_idx;
1136
1137 total_used_selector_history_buf++;
1138
1139 selector_history_buf_index = best_trial_history_buf_idx;
1140
1141 selector_history_buf_histogram.inc(best_trial_history_buf_idx);
1142 }
1143
1144 } // if (m_params.m_selector_rdo_quality_thresh > 0.0f)
1145
1146 m.m_selector_index = m_selector_remap_table_new_to_old[new_selector_index];
1147
1148
1149 if ((selector_history_buf_rle_count) && (selector_history_buf_index != 0))
1150 {
1151 if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)
1152 {
1153 selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
1154 selector_syms[slice_index].push_back(selector_history_buf_rle_count);
1155
1156 int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
1157 if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
1158 selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);
1159 else
1160 selector_history_buf_rle_histogram.inc(run_sym);
1161
1162 selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
1163 }
1164 else
1165 {
1166 for (int k = 0; k < selector_history_buf_rle_count; k++)
1167 {
1168 uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;
1169
1170 selector_syms[slice_index].push_back(sym_index);
1171
1172 selector_histogram.inc(sym_index);
1173 }
1174 }
1175
1176 selector_history_buf_rle_count = 0;
1177 }
1178
1179 if (selector_history_buf_index >= 0)
1180 {
1181 if (selector_history_buf_index == 0)
1182 selector_history_buf_rle_count++;
1183 else
1184 {
1185 uint32_t history_buf_sym = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + selector_history_buf_index;
1186
1187 selector_syms[slice_index].push_back(history_buf_sym);
1188
1189 selector_histogram.inc(history_buf_sym);
1190 }
1191 }
1192 else
1193 {
1194 selector_syms[slice_index].push_back(new_selector_index);
1195
1196 selector_histogram.inc(new_selector_index);
1197 }
1198
1199 m.m_selector_history_buf_index = selector_history_buf_index;
1200
1201 if (selector_history_buf_index < 0)
1202 selector_history_buf.add(new_selector_index);
1203 else if (selector_history_buf.size())
1204 selector_history_buf.use(selector_history_buf_index);
1205 }
1206 block_selector_indices.push_back(m.m_selector_index);
1207
1208 } // block_x
1209
1210 } // block_y
1211
1212 if (endpoint_pred_repeat_count > 0)
1213 {
1214 if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT)
1215 {
1216 endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
1217 endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
1218
1219 endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count);
1220 }
1221 else
1222 {
1223 for (int j = 0; j < endpoint_pred_repeat_count; j++)
1224 {
1225 endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits);
1226 endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits);
1227 }
1228 }
1229
1230 endpoint_pred_repeat_count = 0;
1231 }
1232
1233 if (selector_history_buf_rle_count)
1234 {
1235 if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)
1236 {
1237 selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
1238 selector_syms[slice_index].push_back(selector_history_buf_rle_count);
1239
1240 int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
1241 if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
1242 selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);
1243 else
1244 selector_history_buf_rle_histogram.inc(run_sym);
1245
1246 selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
1247 }
1248 else
1249 {
1250 for (int i = 0; i < selector_history_buf_rle_count; i++)
1251 {
1252 uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;
1253
1254 selector_syms[slice_index].push_back(sym_index);
1255
1256 selector_histogram.inc(sym_index);
1257 }
1258 }
1259
1260 selector_history_buf_rle_count = 0;
1261 }
1262
1263 } // slice_index
1264
1265 //for (int i = 0; i <= 255 * 3; i++)
1266 //{
1267 // printf("%u, %u, %f\n", g_color_delta_bad_hist[i], g_color_delta_hist[i], g_color_delta_hist[i] ? g_color_delta_bad_hist[i] / (float)g_color_delta_hist[i] : 0);
1268 //}
1269
1270 double total_prep_time = tm.get_elapsed_secs();
1271 debug_printf("basisu_backend::encode_image: Total prep time: %3.2f\n", total_prep_time);
1272
1273 debug_printf("Endpoint pred RDO total endpoint indices remapped: %u %3.2f%%\n",
1274 total_endpoint_indices_remapped, total_endpoint_indices_remapped * 100.0f / get_total_blocks());
1275
1276 debug_printf("Selector history RDO total selector indices remapped: %u %3.2f%%, Used history buf: %u %3.2f%%\n",
1277 total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / get_total_blocks(),
1278 total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / get_total_blocks());
1279
1280 //if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0))
1281 if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 1) && (!m_params.m_used_global_codebooks))
1282 {
1283 int_vec unused;
1284 r.reoptimize_remapped_endpoints(block_endpoint_indices, unused, false, &block_selector_indices);
1285
1286 create_endpoint_palette();
1287 }
1288
1289 check_for_valid_cr_blocks();
1290 compute_slice_crcs();
1291
1292 double endpoint_pred_entropy = endpoint_pred_histogram.get_entropy() / endpoint_pred_histogram.get_total();
1293 double delta_endpoint_entropy = delta_endpoint_histogram.get_entropy() / delta_endpoint_histogram.get_total();
1294 double selector_entropy = selector_histogram.get_entropy() / selector_histogram.get_total();
1295
1296 debug_printf("Histogram entropy: EndpointPred: %3.3f DeltaEndpoint: %3.3f DeltaSelector: %3.3f\n", endpoint_pred_entropy, delta_endpoint_entropy, selector_entropy);
1297
1298 if (!endpoint_pred_histogram.get_total())
1299 endpoint_pred_histogram.inc(0);
1300 huffman_encoding_table endpoint_pred_model;
1301 if (!endpoint_pred_model.init(endpoint_pred_histogram, 16))
1302 {
1303 error_printf("endpoint_pred_model.init() failed!");
1304 return false;
1305 }
1306
1307 if (!delta_endpoint_histogram.get_total())
1308 delta_endpoint_histogram.inc(0);
1309 huffman_encoding_table delta_endpoint_model;
1310 if (!delta_endpoint_model.init(delta_endpoint_histogram, 16))
1311 {
1312 error_printf("delta_endpoint_model.init() failed!");
1313 return false;
1314 }
1315 if (!selector_histogram.get_total())
1316 selector_histogram.inc(0);
1317
1318 huffman_encoding_table selector_model;
1319 if (!selector_model.init(selector_histogram, 16))
1320 {
1321 error_printf("selector_model.init() failed!");
1322 return false;
1323 }
1324
1325 if (!selector_history_buf_rle_histogram.get_total())
1326 selector_history_buf_rle_histogram.inc(0);
1327
1328 huffman_encoding_table selector_history_buf_rle_model;
1329 if (!selector_history_buf_rle_model.init(selector_history_buf_rle_histogram, 16))
1330 {
1331 error_printf("selector_history_buf_rle_model.init() failed!");
1332 return false;
1333 }
1334
1335 bitwise_coder coder;
1336 coder.init(1024 * 1024 * 4);
1337
1338 uint32_t endpoint_pred_model_bits = coder.emit_huffman_table(endpoint_pred_model);
1339 uint32_t delta_endpoint_bits = coder.emit_huffman_table(delta_endpoint_model);
1340 uint32_t selector_model_bits = coder.emit_huffman_table(selector_model);
1341 uint32_t selector_history_buf_run_sym_bits = coder.emit_huffman_table(selector_history_buf_rle_model);
1342
1343 coder.put_bits(basist::MAX_SELECTOR_HISTORY_BUF_SIZE, 13);
1344
1345 debug_printf("Model sizes: EndpointPred: %u bits %u bytes (%3.3f bpp) DeltaEndpoint: %u bits %u bytes (%3.3f bpp) Selector: %u bits %u bytes (%3.3f bpp) SelectorHistBufRLE: %u bits %u bytes (%3.3f bpp)\n",
1346 endpoint_pred_model_bits, (endpoint_pred_model_bits + 7) / 8, endpoint_pred_model_bits / float(get_total_input_texels()),
1347 delta_endpoint_bits, (delta_endpoint_bits + 7) / 8, delta_endpoint_bits / float(get_total_input_texels()),
1348 selector_model_bits, (selector_model_bits + 7) / 8, selector_model_bits / float(get_total_input_texels()),
1349 selector_history_buf_run_sym_bits, (selector_history_buf_run_sym_bits + 7) / 8, selector_history_buf_run_sym_bits / float(get_total_input_texels()));
1350
1351 coder.flush();
1352
1353 m_output.m_slice_image_tables = coder.get_bytes();
1354
1355 uint32_t total_endpoint_pred_bits = 0, total_delta_endpoint_bits = 0, total_selector_bits = 0;
1356
1357 uint32_t total_image_bytes = 0;
1358
1359 m_output.m_slice_image_data.resize(m_slices.size());
1360
1361 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
1362 {
1363 //const uint32_t width = m_slices[slice_index].m_width;
1364 //const uint32_t height = m_slices[slice_index].m_height;
1365 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
1366 const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
1367
1368 coder.init(1024 * 1024 * 4);
1369
1370 uint32_t cur_selector_sym_ofs = 0;
1371 uint32_t selector_rle_count = 0;
1372
1373 int endpoint_pred_repeat_count = 0;
1374 uint32_t cur_endpoint_pred_sym_ofs = 0;
1375// uint32_t prev_endpoint_pred_sym = 0;
1376 uint32_t prev_endpoint_index = 0;
1377
1378 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
1379 {
1380 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
1381 {
1382 const encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
1383
1384 if (((block_x & 1) == 0) && ((block_y & 1) == 0))
1385 {
1386 if (endpoint_pred_repeat_count > 0)
1387 {
1388 endpoint_pred_repeat_count--;
1389 }
1390 else
1391 {
1392 uint32_t sym = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++];
1393
1394 if (sym == basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
1395 {
1396 total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model);
1397
1398 endpoint_pred_repeat_count = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++];
1399 assert(endpoint_pred_repeat_count >= (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT);
1400
1401 total_endpoint_pred_bits += coder.put_vlc(endpoint_pred_repeat_count - basist::ENDPOINT_PRED_MIN_REPEAT_COUNT, basist::ENDPOINT_PRED_COUNT_VLC_BITS);
1402
1403 endpoint_pred_repeat_count--;
1404 }
1405 else
1406 {
1407 total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model);
1408
1409 //prev_endpoint_pred_sym = sym;
1410 }
1411 }
1412 }
1413
1414 const int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index];
1415
1416 if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
1417 {
1418 int endpoint_delta = new_endpoint_index - prev_endpoint_index;
1419 if (endpoint_delta < 0)
1420 endpoint_delta += (int)r.get_total_endpoint_clusters();
1421
1422 total_delta_endpoint_bits += coder.put_code(endpoint_delta, delta_endpoint_model);
1423 }
1424
1425 prev_endpoint_index = new_endpoint_index;
1426
1427 if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX))
1428 {
1429 if (!selector_rle_count)
1430 {
1431 uint32_t selector_sym_index = selector_syms[slice_index][cur_selector_sym_ofs++];
1432
1433 if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)
1434 selector_rle_count = selector_syms[slice_index][cur_selector_sym_ofs++];
1435
1436 total_selector_bits += coder.put_code(selector_sym_index, selector_model);
1437
1438 if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)
1439 {
1440 int run_sym = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
1441 if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
1442 {
1443 total_selector_bits += coder.put_code(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1, selector_history_buf_rle_model);
1444
1445 uint32_t n = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
1446 total_selector_bits += coder.put_vlc(n, 7);
1447 }
1448 else
1449 total_selector_bits += coder.put_code(run_sym, selector_history_buf_rle_model);
1450 }
1451 }
1452
1453 if (selector_rle_count)
1454 selector_rle_count--;
1455 }
1456
1457 } // block_x
1458
1459 } // block_y
1460
1461 BASISU_BACKEND_VERIFY(cur_endpoint_pred_sym_ofs == endpoint_pred_syms[slice_index].size());
1462 BASISU_BACKEND_VERIFY(cur_selector_sym_ofs == selector_syms[slice_index].size());
1463
1464 coder.flush();
1465
1466 m_output.m_slice_image_data[slice_index] = coder.get_bytes();
1467
1468 total_image_bytes += (uint32_t)coder.get_bytes().size();
1469
1470 debug_printf("Slice %u compressed size: %u bytes, %3.3f bits per slice texel\n", slice_index, m_output.m_slice_image_data[slice_index].size(), m_output.m_slice_image_data[slice_index].size() * 8.0f / (m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height));
1471
1472 } // slice_index
1473
1474 const double total_texels = static_cast<double>(get_total_input_texels());
1475 const double total_blocks = static_cast<double>(get_total_blocks());
1476
1477 debug_printf("Total endpoint pred bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_endpoint_pred_bits, total_endpoint_pred_bits / 8, total_endpoint_pred_bits / total_texels, total_endpoint_pred_bits / total_blocks);
1478 debug_printf("Total delta endpoint bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_delta_endpoint_bits, total_delta_endpoint_bits / 8, total_delta_endpoint_bits / total_texels, total_delta_endpoint_bits / total_blocks);
1479 debug_printf("Total selector bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_selector_bits, total_selector_bits / 8, total_selector_bits / total_texels, total_selector_bits / total_blocks);
1480
1481 debug_printf("Total table bytes: %u, %3.3f bits/texel\n", m_output.m_slice_image_tables.size(), m_output.m_slice_image_tables.size() * 8.0f / total_texels);
1482 debug_printf("Total image bytes: %u, %3.3f bits/texel\n", total_image_bytes, total_image_bytes * 8.0f / total_texels);
1483
1484 return true;
1485 }
1486
1487 bool basisu_backend::encode_endpoint_palette()
1488 {
1489 const basisu_frontend& r = *m_pFront_end;
1490
1491 // The endpoint indices may have been changed by the backend's RDO step, so go and figure out which ones are actually used again.
1492 bool_vec old_endpoint_was_used(r.get_total_endpoint_clusters());
1493 uint32_t first_old_entry_index = UINT32_MAX;
1494
1495 for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
1496 {
1497 const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y;
1498 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
1499 {
1500 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
1501 {
1502 encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
1503 const uint32_t old_endpoint_index = m.m_endpoint_index;
1504
1505 old_endpoint_was_used[old_endpoint_index] = true;
1506 first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index);
1507 } // block_x
1508 } // block_y
1509 } // slice_index
1510
1511 debug_printf("basisu_backend::encode_endpoint_palette: first_old_entry_index: %u\n", first_old_entry_index);
1512
1513 // Maps NEW to OLD endpoints
1514 uint_vec endpoint_remap_table_new_to_old(r.get_total_endpoint_clusters());
1515 endpoint_remap_table_new_to_old.set_all(first_old_entry_index);
1516
1517 bool_vec new_endpoint_was_used(r.get_total_endpoint_clusters());
1518
1519 for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++)
1520 {
1521 if (old_endpoint_was_used[old_endpoint_index])
1522 {
1523 const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index];
1524
1525 new_endpoint_was_used[new_endpoint_index] = true;
1526
1527 endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index;
1528 }
1529 }
1530
1531 // TODO: Some new endpoint palette entries may actually be unused and aren't worth coding. Fix that.
1532
1533 uint32_t total_unused_new_entries = 0;
1534 for (uint32_t i = 0; i < new_endpoint_was_used.size(); i++)
1535 if (!new_endpoint_was_used[i])
1536 total_unused_new_entries++;
1537 debug_printf("basisu_backend::encode_endpoint_palette: total_unused_new_entries: %u out of %u\n", total_unused_new_entries, new_endpoint_was_used.size());
1538
1539 bool is_grayscale = true;
1540 for (uint32_t old_endpoint_index = 0; old_endpoint_index < (uint32_t)m_endpoint_palette.size(); old_endpoint_index++)
1541 {
1542 int r5 = m_endpoint_palette[old_endpoint_index].m_color5[0];
1543 int g5 = m_endpoint_palette[old_endpoint_index].m_color5[1];
1544 int b5 = m_endpoint_palette[old_endpoint_index].m_color5[2];
1545 if ((r5 != g5) || (r5 != b5))
1546 {
1547 is_grayscale = false;
1548 break;
1549 }
1550 }
1551
1552 histogram color5_delta_hist0(32); // prev 0-9, delta is -9 to 31
1553 histogram color5_delta_hist1(32); // prev 10-21, delta is -21 to 21
1554 histogram color5_delta_hist2(32); // prev 22-31, delta is -31 to 9
1555 histogram inten_delta_hist(8);
1556
1557 color_rgba prev_color5(16, 16, 16, 0);
1558 uint32_t prev_inten = 0;
1559
1560 for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)
1561 {
1562 const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index];
1563
1564 int delta_inten = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten;
1565 inten_delta_hist.inc(delta_inten & 7);
1566 prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5;
1567
1568 for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++)
1569 {
1570 const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31;
1571
1572 if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI)
1573 color5_delta_hist0.inc(delta);
1574 else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI)
1575 color5_delta_hist1.inc(delta);
1576 else
1577 color5_delta_hist2.inc(delta);
1578
1579 prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i];
1580 }
1581 }
1582
1583 if (!color5_delta_hist0.get_total()) color5_delta_hist0.inc(0);
1584 if (!color5_delta_hist1.get_total()) color5_delta_hist1.inc(0);
1585 if (!color5_delta_hist2.get_total()) color5_delta_hist2.inc(0);
1586
1587 huffman_encoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
1588 if (!color5_delta_model0.init(color5_delta_hist0, 16))
1589 {
1590 error_printf("color5_delta_model.init() failed!");
1591 return false;
1592 }
1593
1594 if (!color5_delta_model1.init(color5_delta_hist1, 16))
1595 {
1596 error_printf("color5_delta_model.init() failed!");
1597 return false;
1598 }
1599
1600 if (!color5_delta_model2.init(color5_delta_hist2, 16))
1601 {
1602 error_printf("color5_delta_model.init() failed!");
1603 return false;
1604 }
1605
1606 if (!inten_delta_model.init(inten_delta_hist, 16))
1607 {
1608 error_printf("inten3_model.init() failed!");
1609 return false;
1610 }
1611
1612 bitwise_coder coder;
1613
1614 coder.init(8192);
1615
1616 coder.emit_huffman_table(color5_delta_model0);
1617 coder.emit_huffman_table(color5_delta_model1);
1618 coder.emit_huffman_table(color5_delta_model2);
1619 coder.emit_huffman_table(inten_delta_model);
1620
1621 coder.put_bits(is_grayscale, 1);
1622
1623 prev_color5.set(16, 16, 16, 0);
1624 prev_inten = 0;
1625
1626 for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)
1627 {
1628 const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index];
1629
1630 int delta_inten = (m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten) & 7;
1631 coder.put_code(delta_inten, inten_delta_model);
1632 prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5;
1633
1634 for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++)
1635 {
1636 const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31;
1637
1638 if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI)
1639 coder.put_code(delta, color5_delta_model0);
1640 else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI)
1641 coder.put_code(delta, color5_delta_model1);
1642 else
1643 coder.put_code(delta, color5_delta_model2);
1644
1645 prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i];
1646 }
1647
1648 } // q
1649
1650 coder.flush();
1651
1652 m_output.m_endpoint_palette = coder.get_bytes();
1653
1654 debug_printf("Endpoint codebook size: %u bits %u bytes, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",
1655 8 * (int)m_output.m_endpoint_palette.size(), (int)m_output.m_endpoint_palette.size(), m_output.m_endpoint_palette.size() * 8.0f / r.get_total_endpoint_clusters(), m_output.m_endpoint_palette.size() * 8.0f / get_total_input_texels());
1656
1657 return true;
1658 }
1659
1660 bool basisu_backend::encode_selector_palette()
1661 {
1662 const basisu_frontend& r = *m_pFront_end;
1663
1664 histogram delta_selector_pal_histogram(256);
1665
1666 for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
1667 {
1668 if (!q)
1669 continue;
1670
1671 const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];
1672 const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);
1673
1674 for (uint32_t j = 0; j < 4; j++)
1675 delta_selector_pal_histogram.inc(cur.get_byte(j) ^ predictor.get_byte(j));
1676 }
1677
1678 if (!delta_selector_pal_histogram.get_total())
1679 delta_selector_pal_histogram.inc(0);
1680
1681 huffman_encoding_table delta_selector_pal_model;
1682 if (!delta_selector_pal_model.init(delta_selector_pal_histogram, 16))
1683 {
1684 error_printf("delta_selector_pal_model.init() failed!");
1685 return false;
1686 }
1687
1688 bitwise_coder coder;
1689 coder.init(1024 * 1024);
1690
1691 coder.put_bits(0, 1); // use global codebook
1692 coder.put_bits(0, 1); // uses hybrid codebooks
1693
1694 coder.put_bits(0, 1); // raw bytes
1695
1696 coder.emit_huffman_table(delta_selector_pal_model);
1697
1698 for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
1699 {
1700 if (!q)
1701 {
1702 for (uint32_t j = 0; j < 4; j++)
1703 coder.put_bits(m_selector_palette[m_selector_remap_table_new_to_old[q]].get_byte(j), 8);
1704 continue;
1705 }
1706
1707 const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];
1708 const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);
1709
1710 for (uint32_t j = 0; j < 4; j++)
1711 coder.put_code(cur.get_byte(j) ^ predictor.get_byte(j), delta_selector_pal_model);
1712 }
1713
1714 coder.flush();
1715
1716 m_output.m_selector_palette = coder.get_bytes();
1717
1718 if (m_output.m_selector_palette.size() >= r.get_total_selector_clusters() * 4)
1719 {
1720 coder.init(1024 * 1024);
1721
1722 coder.put_bits(0, 1); // use global codebook
1723 coder.put_bits(0, 1); // uses hybrid codebooks
1724
1725 coder.put_bits(1, 1); // raw bytes
1726
1727 for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
1728 {
1729 const uint32_t i = m_selector_remap_table_new_to_old[q];
1730
1731 for (uint32_t j = 0; j < 4; j++)
1732 coder.put_bits(m_selector_palette[i].get_byte(j), 8);
1733 }
1734
1735 coder.flush();
1736
1737 m_output.m_selector_palette = coder.get_bytes();
1738 }
1739
1740 debug_printf("Selector codebook bits: %u bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",
1741 (int)m_output.m_selector_palette.size() * 8, (int)m_output.m_selector_palette.size(),
1742 m_output.m_selector_palette.size() * 8.0f / r.get_total_selector_clusters(), m_output.m_selector_palette.size() * 8.0f / get_total_input_texels());
1743
1744 return true;
1745 }
1746
1747 uint32_t basisu_backend::encode()
1748 {
1749 //const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
1750 m_output.m_slice_desc = m_slices;
1751 m_output.m_etc1s = m_params.m_etc1s;
1752 m_output.m_uses_global_codebooks = m_params.m_used_global_codebooks;
1753 m_output.m_srgb = m_pFront_end->get_params().m_perceptual;
1754
1755 create_endpoint_palette();
1756 create_selector_palette();
1757
1758 create_encoder_blocks();
1759
1760 if (!encode_image())
1761 return 0;
1762
1763 if (!encode_endpoint_palette())
1764 return 0;
1765
1766 if (!encode_selector_palette())
1767 return 0;
1768
1769 uint32_t total_compressed_bytes = (uint32_t)(m_output.m_slice_image_tables.size() + m_output.m_endpoint_palette.size() + m_output.m_selector_palette.size());
1770 for (uint32_t i = 0; i < m_output.m_slice_image_data.size(); i++)
1771 total_compressed_bytes += (uint32_t)m_output.m_slice_image_data[i].size();
1772
1773 debug_printf("Wrote %u bytes, %3.3f bits/texel\n", total_compressed_bytes, total_compressed_bytes * 8.0f / get_total_input_texels());
1774
1775 return total_compressed_bytes;
1776 }
1777
1778} // namespace basisu
1779