1 | // basisu_frontend.h |
2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // http://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | #pragma once |
16 | #include "basisu_enc.h" |
17 | #include "basisu_etc.h" |
18 | #include "basisu_gpu_texture.h" |
19 | #include "../transcoder/basisu_file_headers.h" |
20 | #include "../transcoder/basisu_transcoder.h" |
21 | |
22 | namespace basisu |
23 | { |
24 | struct opencl_context; |
25 | typedef opencl_context* opencl_context_ptr; |
26 | |
27 | struct vec2U |
28 | { |
29 | uint32_t m_comps[2]; |
30 | |
31 | vec2U() { } |
32 | vec2U(uint32_t a, uint32_t b) { set(a, b); } |
33 | |
34 | void set(uint32_t a, uint32_t b) { m_comps[0] = a; m_comps[1] = b; } |
35 | |
36 | uint32_t operator[] (uint32_t i) const { assert(i < 2); return m_comps[i]; } |
37 | uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } |
38 | }; |
39 | |
40 | const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; |
41 | const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; |
42 | |
43 | class basisu_frontend |
44 | { |
45 | BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_frontend); |
46 | |
47 | public: |
48 | |
49 | basisu_frontend() : |
50 | m_total_blocks(0), |
51 | m_total_pixels(0), |
52 | m_endpoint_refinement(false), |
53 | m_use_hierarchical_endpoint_codebooks(false), |
54 | m_use_hierarchical_selector_codebooks(false), |
55 | m_num_endpoint_codebook_iterations(0), |
56 | m_num_selector_codebook_iterations(0), |
57 | m_opencl_failed(false) |
58 | { |
59 | } |
60 | |
61 | enum |
62 | { |
63 | cMaxEndpointClusters = 16128, |
64 | |
65 | cMaxSelectorClusters = 16128, |
66 | }; |
67 | |
68 | struct params |
69 | { |
70 | params() : |
71 | m_num_source_blocks(0), |
72 | m_pSource_blocks(NULL), |
73 | m_max_endpoint_clusters(256), |
74 | m_max_selector_clusters(256), |
75 | m_compression_level(BASISU_DEFAULT_COMPRESSION_LEVEL), |
76 | m_perceptual(true), |
77 | m_debug_stats(false), |
78 | m_debug_images(false), |
79 | m_dump_endpoint_clusterization(true), |
80 | m_validate(false), |
81 | m_multithreaded(false), |
82 | m_disable_hierarchical_endpoint_codebooks(false), |
83 | m_tex_type(basist::cBASISTexType2D), |
84 | m_pOpenCL_context(nullptr), |
85 | m_pJob_pool(nullptr) |
86 | { |
87 | } |
88 | |
89 | uint32_t m_num_source_blocks; |
90 | pixel_block *m_pSource_blocks; |
91 | |
92 | uint32_t m_max_endpoint_clusters; |
93 | uint32_t m_max_selector_clusters; |
94 | |
95 | uint32_t m_compression_level; |
96 | |
97 | bool m_perceptual; |
98 | bool m_debug_stats; |
99 | bool m_debug_images; |
100 | bool m_dump_endpoint_clusterization; |
101 | bool m_validate; |
102 | bool m_multithreaded; |
103 | bool m_disable_hierarchical_endpoint_codebooks; |
104 | |
105 | basist::basis_texture_type m_tex_type; |
106 | const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; |
107 | |
108 | opencl_context_ptr m_pOpenCL_context; |
109 | |
110 | job_pool *m_pJob_pool; |
111 | }; |
112 | |
113 | bool init(const params &p); |
114 | |
115 | bool compress(); |
116 | |
117 | const params &get_params() const { return m_params; } |
118 | |
119 | const pixel_block &get_source_pixel_block(uint32_t i) const { return m_source_blocks[i]; } |
120 | |
121 | // RDO output blocks |
122 | uint32_t get_total_output_blocks() const { return static_cast<uint32_t>(m_encoded_blocks.size()); } |
123 | |
124 | const etc_block &get_output_block(uint32_t block_index) const { return m_encoded_blocks[block_index]; } |
125 | const etc_block_vec &get_output_blocks() const { return m_encoded_blocks; } |
126 | |
127 | // "Best" ETC1S blocks |
128 | const etc_block &get_etc1s_block(uint32_t block_index) const { return m_etc1_blocks_etc1s[block_index]; } |
129 | |
130 | // Per-block flags |
131 | bool get_diff_flag(uint32_t block_index) const { return m_encoded_blocks[block_index].get_diff_bit(); } |
132 | |
133 | // Endpoint clusters |
134 | uint32_t get_total_endpoint_clusters() const { return static_cast<uint32_t>(m_endpoint_clusters.size()); } |
135 | uint32_t get_subblock_endpoint_cluster_index(uint32_t block_index, uint32_t subblock_index) const { return m_block_endpoint_clusters_indices[block_index][subblock_index]; } |
136 | |
137 | const color_rgba &get_endpoint_cluster_unscaled_color(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_unscaled[individual_mode]; } |
138 | uint32_t get_endpoint_cluster_inten_table(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_inten_table[individual_mode]; } |
139 | |
140 | bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } |
141 | |
142 | // Selector clusters |
143 | uint32_t get_total_selector_clusters() const { return static_cast<uint32_t>(m_selector_cluster_block_indices.size()); } |
144 | uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } |
145 | const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } |
146 | |
147 | // Returns block indices using each selector cluster |
148 | const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } |
149 | |
150 | void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); |
151 | |
152 | void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); |
153 | |
154 | bool get_opencl_failed() const { return m_opencl_failed; } |
155 | |
156 | private: |
157 | params m_params; |
158 | uint32_t m_total_blocks; |
159 | uint32_t m_total_pixels; |
160 | |
161 | bool m_endpoint_refinement; |
162 | bool m_use_hierarchical_endpoint_codebooks; |
163 | bool m_use_hierarchical_selector_codebooks; |
164 | |
165 | uint32_t m_num_endpoint_codebook_iterations; |
166 | uint32_t m_num_selector_codebook_iterations; |
167 | |
168 | // Source pixels for each blocks |
169 | pixel_block_vec m_source_blocks; |
170 | |
171 | // The quantized ETC1S texture. |
172 | etc_block_vec m_encoded_blocks; |
173 | |
174 | // Quantized blocks after endpoint quant, but before selector quant |
175 | etc_block_vec m_orig_encoded_blocks; |
176 | |
177 | // Full quality ETC1S texture |
178 | etc_block_vec m_etc1_blocks_etc1s; |
179 | |
180 | typedef vec<6, float> vec6F; |
181 | |
182 | // Endpoint clusterizer |
183 | typedef tree_vector_quant<vec6F> vec6F_quantizer; |
184 | vec6F_quantizer m_endpoint_clusterizer; |
185 | |
186 | // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. |
187 | basisu::vector<uint_vec> m_endpoint_clusters; |
188 | |
189 | // Array of subblock indices for each parent endpoint cluster |
190 | // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. |
191 | // As the endpoint clusters are manipulated this constraint gets broken. |
192 | basisu::vector<uint_vec> m_endpoint_parent_clusters; |
193 | |
194 | // Each block's parent endpoint cluster index |
195 | uint8_vec m_block_parent_endpoint_cluster; |
196 | |
197 | // Array of endpoint cluster indices for each parent endpoint cluster |
198 | basisu::vector<uint_vec> m_endpoint_clusters_within_each_parent_cluster; |
199 | |
200 | struct endpoint_cluster_etc_params |
201 | { |
202 | endpoint_cluster_etc_params() |
203 | { |
204 | clear(); |
205 | } |
206 | |
207 | void clear() |
208 | { |
209 | clear_obj(m_color_unscaled); |
210 | clear_obj(m_inten_table); |
211 | clear_obj(m_color_error); |
212 | m_subblocks.clear(); |
213 | |
214 | clear_obj(m_color_used); |
215 | m_valid = false; |
216 | } |
217 | |
218 | // TODO: basisu doesn't use individual mode. |
219 | color_rgba m_color_unscaled[2]; // [use_individual_mode] |
220 | uint32_t m_inten_table[2]; |
221 | |
222 | uint64_t m_color_error[2]; |
223 | |
224 | uint_vec m_subblocks; |
225 | |
226 | bool m_color_used[2]; |
227 | |
228 | bool m_valid; |
229 | |
230 | bool operator== (const endpoint_cluster_etc_params &other) const |
231 | { |
232 | for (uint32_t i = 0; i < 2; i++) |
233 | { |
234 | if (m_color_unscaled[i] != other.m_color_unscaled[i]) |
235 | return false; |
236 | } |
237 | |
238 | if (m_inten_table[0] != other.m_inten_table[0]) |
239 | return false; |
240 | if (m_inten_table[1] != other.m_inten_table[1]) |
241 | return false; |
242 | |
243 | return true; |
244 | } |
245 | |
246 | bool operator< (const endpoint_cluster_etc_params &other) const |
247 | { |
248 | for (uint32_t i = 0; i < 2; i++) |
249 | { |
250 | if (m_color_unscaled[i] < other.m_color_unscaled[i]) |
251 | return true; |
252 | else if (m_color_unscaled[i] != other.m_color_unscaled[i]) |
253 | return false; |
254 | } |
255 | |
256 | if (m_inten_table[0] < other.m_inten_table[0]) |
257 | return true; |
258 | else if (m_inten_table[0] == other.m_inten_table[0]) |
259 | { |
260 | if (m_inten_table[1] < other.m_inten_table[1]) |
261 | return true; |
262 | } |
263 | |
264 | return false; |
265 | } |
266 | }; |
267 | |
268 | typedef basisu::vector<endpoint_cluster_etc_params> cluster_subblock_etc_params_vec; |
269 | |
270 | // Each endpoint cluster's ETC1S parameters |
271 | cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; |
272 | |
273 | // The endpoint cluster index used by each ETC1 subblock. |
274 | basisu::vector<vec2U> m_block_endpoint_clusters_indices; |
275 | |
276 | // The block(s) within each selector cluster |
277 | // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! |
278 | basisu::vector<uint_vec> m_selector_cluster_block_indices; |
279 | |
280 | // The selector bits for each selector cluster. |
281 | basisu::vector<etc_block> m_optimized_cluster_selectors; |
282 | |
283 | // The block(s) within each parent selector cluster. |
284 | basisu::vector<uint_vec> m_selector_parent_cluster_block_indices; |
285 | |
286 | // Each block's parent selector cluster |
287 | uint8_vec m_block_parent_selector_cluster; |
288 | |
289 | // Array of selector cluster indices for each parent selector cluster |
290 | basisu::vector<uint_vec> m_selector_clusters_within_each_parent_cluster; |
291 | |
292 | // Each block's selector cluster index |
293 | basisu::vector<uint32_t> m_block_selector_cluster_index; |
294 | |
295 | struct subblock_endpoint_quant_err |
296 | { |
297 | uint64_t m_total_err; |
298 | uint32_t m_cluster_index; |
299 | uint32_t m_cluster_subblock_index; |
300 | uint32_t m_block_index; |
301 | uint32_t m_subblock_index; |
302 | |
303 | bool operator< (const subblock_endpoint_quant_err &rhs) const |
304 | { |
305 | if (m_total_err < rhs.m_total_err) |
306 | return true; |
307 | else if (m_total_err == rhs.m_total_err) |
308 | { |
309 | if (m_block_index < rhs.m_block_index) |
310 | return true; |
311 | else if (m_block_index == rhs.m_block_index) |
312 | return m_subblock_index < rhs.m_subblock_index; |
313 | } |
314 | return false; |
315 | } |
316 | }; |
317 | |
318 | // The sorted subblock endpoint quant error for each endpoint cluster |
319 | basisu::vector<subblock_endpoint_quant_err> m_subblock_endpoint_quant_err_vec; |
320 | |
321 | std::mutex m_lock; |
322 | |
323 | bool m_opencl_failed; |
324 | |
325 | //----------------------------------------------------------------------------- |
326 | |
327 | void init_etc1_images(); |
328 | bool init_global_codebooks(); |
329 | void init_endpoint_training_vectors(); |
330 | void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); |
331 | void generate_endpoint_clusters(); |
332 | void compute_endpoint_subblock_error_vec(); |
333 | void introduce_new_endpoint_clusters(); |
334 | void generate_endpoint_codebook(uint32_t step); |
335 | uint32_t refine_endpoint_clusterization(); |
336 | void eliminate_redundant_or_empty_endpoint_clusters(); |
337 | void generate_block_endpoint_clusters(); |
338 | void compute_endpoint_clusters_within_each_parent_cluster(); |
339 | void compute_selector_clusters_within_each_parent_cluster(); |
340 | void create_initial_packed_texture(); |
341 | void generate_selector_clusters(); |
342 | void create_optimized_selector_codebook(uint32_t iter); |
343 | void find_optimal_selector_clusters_for_each_block(); |
344 | uint32_t refine_block_endpoints_given_selectors(); |
345 | void finalize(); |
346 | bool validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const; |
347 | bool validate_output() const; |
348 | void introduce_special_selector_clusters(); |
349 | void optimize_selector_codebook(); |
350 | bool check_etc1s_constraints() const; |
351 | }; |
352 | |
353 | } // namespace basisu |
354 | |