| 1 | // basisu_frontend.h |
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | #pragma once |
| 16 | #include "basisu_enc.h" |
| 17 | #include "basisu_etc.h" |
| 18 | #include "basisu_gpu_texture.h" |
| 19 | #include "../transcoder/basisu_file_headers.h" |
| 20 | #include "../transcoder/basisu_transcoder.h" |
| 21 | |
| 22 | namespace basisu |
| 23 | { |
| 24 | struct opencl_context; |
| 25 | typedef opencl_context* opencl_context_ptr; |
| 26 | |
| 27 | struct vec2U |
| 28 | { |
| 29 | uint32_t m_comps[2]; |
| 30 | |
| 31 | vec2U() { } |
| 32 | vec2U(uint32_t a, uint32_t b) { set(a, b); } |
| 33 | |
| 34 | void set(uint32_t a, uint32_t b) { m_comps[0] = a; m_comps[1] = b; } |
| 35 | |
| 36 | uint32_t operator[] (uint32_t i) const { assert(i < 2); return m_comps[i]; } |
| 37 | uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } |
| 38 | }; |
| 39 | |
| 40 | const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; |
| 41 | const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; |
| 42 | |
| 43 | class basisu_frontend |
| 44 | { |
| 45 | BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_frontend); |
| 46 | |
| 47 | public: |
| 48 | |
| 49 | basisu_frontend() : |
| 50 | m_total_blocks(0), |
| 51 | m_total_pixels(0), |
| 52 | m_endpoint_refinement(false), |
| 53 | m_use_hierarchical_endpoint_codebooks(false), |
| 54 | m_use_hierarchical_selector_codebooks(false), |
| 55 | m_num_endpoint_codebook_iterations(0), |
| 56 | m_num_selector_codebook_iterations(0), |
| 57 | m_opencl_failed(false) |
| 58 | { |
| 59 | } |
| 60 | |
| 61 | enum |
| 62 | { |
| 63 | cMaxEndpointClusters = 16128, |
| 64 | |
| 65 | cMaxSelectorClusters = 16128, |
| 66 | }; |
| 67 | |
| 68 | struct params |
| 69 | { |
| 70 | params() : |
| 71 | m_num_source_blocks(0), |
| 72 | m_pSource_blocks(NULL), |
| 73 | m_max_endpoint_clusters(256), |
| 74 | m_max_selector_clusters(256), |
| 75 | m_compression_level(BASISU_DEFAULT_COMPRESSION_LEVEL), |
| 76 | m_perceptual(true), |
| 77 | m_debug_stats(false), |
| 78 | m_debug_images(false), |
| 79 | m_dump_endpoint_clusterization(true), |
| 80 | m_validate(false), |
| 81 | m_multithreaded(false), |
| 82 | m_disable_hierarchical_endpoint_codebooks(false), |
| 83 | m_tex_type(basist::cBASISTexType2D), |
| 84 | m_pOpenCL_context(nullptr), |
| 85 | m_pJob_pool(nullptr) |
| 86 | { |
| 87 | } |
| 88 | |
| 89 | uint32_t m_num_source_blocks; |
| 90 | pixel_block *m_pSource_blocks; |
| 91 | |
| 92 | uint32_t m_max_endpoint_clusters; |
| 93 | uint32_t m_max_selector_clusters; |
| 94 | |
| 95 | uint32_t m_compression_level; |
| 96 | |
| 97 | bool m_perceptual; |
| 98 | bool m_debug_stats; |
| 99 | bool m_debug_images; |
| 100 | bool m_dump_endpoint_clusterization; |
| 101 | bool m_validate; |
| 102 | bool m_multithreaded; |
| 103 | bool m_disable_hierarchical_endpoint_codebooks; |
| 104 | |
| 105 | basist::basis_texture_type m_tex_type; |
| 106 | const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; |
| 107 | |
| 108 | opencl_context_ptr m_pOpenCL_context; |
| 109 | |
| 110 | job_pool *m_pJob_pool; |
| 111 | }; |
| 112 | |
| 113 | bool init(const params &p); |
| 114 | |
| 115 | bool compress(); |
| 116 | |
| 117 | const params &get_params() const { return m_params; } |
| 118 | |
| 119 | const pixel_block &get_source_pixel_block(uint32_t i) const { return m_source_blocks[i]; } |
| 120 | |
| 121 | // RDO output blocks |
| 122 | uint32_t get_total_output_blocks() const { return static_cast<uint32_t>(m_encoded_blocks.size()); } |
| 123 | |
| 124 | const etc_block &get_output_block(uint32_t block_index) const { return m_encoded_blocks[block_index]; } |
| 125 | const etc_block_vec &get_output_blocks() const { return m_encoded_blocks; } |
| 126 | |
| 127 | // "Best" ETC1S blocks |
| 128 | const etc_block &get_etc1s_block(uint32_t block_index) const { return m_etc1_blocks_etc1s[block_index]; } |
| 129 | |
| 130 | // Per-block flags |
| 131 | bool get_diff_flag(uint32_t block_index) const { return m_encoded_blocks[block_index].get_diff_bit(); } |
| 132 | |
| 133 | // Endpoint clusters |
| 134 | uint32_t get_total_endpoint_clusters() const { return static_cast<uint32_t>(m_endpoint_clusters.size()); } |
| 135 | uint32_t get_subblock_endpoint_cluster_index(uint32_t block_index, uint32_t subblock_index) const { return m_block_endpoint_clusters_indices[block_index][subblock_index]; } |
| 136 | |
| 137 | const color_rgba &get_endpoint_cluster_unscaled_color(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_unscaled[individual_mode]; } |
| 138 | uint32_t get_endpoint_cluster_inten_table(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_inten_table[individual_mode]; } |
| 139 | |
| 140 | bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } |
| 141 | |
| 142 | // Selector clusters |
| 143 | uint32_t get_total_selector_clusters() const { return static_cast<uint32_t>(m_selector_cluster_block_indices.size()); } |
| 144 | uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } |
| 145 | const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } |
| 146 | |
| 147 | // Returns block indices using each selector cluster |
| 148 | const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } |
| 149 | |
| 150 | void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); |
| 151 | |
| 152 | void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); |
| 153 | |
| 154 | bool get_opencl_failed() const { return m_opencl_failed; } |
| 155 | |
| 156 | private: |
| 157 | params m_params; |
| 158 | uint32_t m_total_blocks; |
| 159 | uint32_t m_total_pixels; |
| 160 | |
| 161 | bool m_endpoint_refinement; |
| 162 | bool m_use_hierarchical_endpoint_codebooks; |
| 163 | bool m_use_hierarchical_selector_codebooks; |
| 164 | |
| 165 | uint32_t m_num_endpoint_codebook_iterations; |
| 166 | uint32_t m_num_selector_codebook_iterations; |
| 167 | |
| 168 | // Source pixels for each blocks |
| 169 | pixel_block_vec m_source_blocks; |
| 170 | |
| 171 | // The quantized ETC1S texture. |
| 172 | etc_block_vec m_encoded_blocks; |
| 173 | |
| 174 | // Quantized blocks after endpoint quant, but before selector quant |
| 175 | etc_block_vec m_orig_encoded_blocks; |
| 176 | |
| 177 | // Full quality ETC1S texture |
| 178 | etc_block_vec m_etc1_blocks_etc1s; |
| 179 | |
| 180 | typedef vec<6, float> vec6F; |
| 181 | |
| 182 | // Endpoint clusterizer |
| 183 | typedef tree_vector_quant<vec6F> vec6F_quantizer; |
| 184 | vec6F_quantizer m_endpoint_clusterizer; |
| 185 | |
| 186 | // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. |
| 187 | basisu::vector<uint_vec> m_endpoint_clusters; |
| 188 | |
| 189 | // Array of subblock indices for each parent endpoint cluster |
| 190 | // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. |
| 191 | // As the endpoint clusters are manipulated this constraint gets broken. |
| 192 | basisu::vector<uint_vec> m_endpoint_parent_clusters; |
| 193 | |
| 194 | // Each block's parent endpoint cluster index |
| 195 | uint8_vec m_block_parent_endpoint_cluster; |
| 196 | |
| 197 | // Array of endpoint cluster indices for each parent endpoint cluster |
| 198 | basisu::vector<uint_vec> m_endpoint_clusters_within_each_parent_cluster; |
| 199 | |
| 200 | struct endpoint_cluster_etc_params |
| 201 | { |
| 202 | endpoint_cluster_etc_params() |
| 203 | { |
| 204 | clear(); |
| 205 | } |
| 206 | |
| 207 | void clear() |
| 208 | { |
| 209 | clear_obj(m_color_unscaled); |
| 210 | clear_obj(m_inten_table); |
| 211 | clear_obj(m_color_error); |
| 212 | m_subblocks.clear(); |
| 213 | |
| 214 | clear_obj(m_color_used); |
| 215 | m_valid = false; |
| 216 | } |
| 217 | |
| 218 | // TODO: basisu doesn't use individual mode. |
| 219 | color_rgba m_color_unscaled[2]; // [use_individual_mode] |
| 220 | uint32_t m_inten_table[2]; |
| 221 | |
| 222 | uint64_t m_color_error[2]; |
| 223 | |
| 224 | uint_vec m_subblocks; |
| 225 | |
| 226 | bool m_color_used[2]; |
| 227 | |
| 228 | bool m_valid; |
| 229 | |
| 230 | bool operator== (const endpoint_cluster_etc_params &other) const |
| 231 | { |
| 232 | for (uint32_t i = 0; i < 2; i++) |
| 233 | { |
| 234 | if (m_color_unscaled[i] != other.m_color_unscaled[i]) |
| 235 | return false; |
| 236 | } |
| 237 | |
| 238 | if (m_inten_table[0] != other.m_inten_table[0]) |
| 239 | return false; |
| 240 | if (m_inten_table[1] != other.m_inten_table[1]) |
| 241 | return false; |
| 242 | |
| 243 | return true; |
| 244 | } |
| 245 | |
| 246 | bool operator< (const endpoint_cluster_etc_params &other) const |
| 247 | { |
| 248 | for (uint32_t i = 0; i < 2; i++) |
| 249 | { |
| 250 | if (m_color_unscaled[i] < other.m_color_unscaled[i]) |
| 251 | return true; |
| 252 | else if (m_color_unscaled[i] != other.m_color_unscaled[i]) |
| 253 | return false; |
| 254 | } |
| 255 | |
| 256 | if (m_inten_table[0] < other.m_inten_table[0]) |
| 257 | return true; |
| 258 | else if (m_inten_table[0] == other.m_inten_table[0]) |
| 259 | { |
| 260 | if (m_inten_table[1] < other.m_inten_table[1]) |
| 261 | return true; |
| 262 | } |
| 263 | |
| 264 | return false; |
| 265 | } |
| 266 | }; |
| 267 | |
| 268 | typedef basisu::vector<endpoint_cluster_etc_params> cluster_subblock_etc_params_vec; |
| 269 | |
| 270 | // Each endpoint cluster's ETC1S parameters |
| 271 | cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; |
| 272 | |
| 273 | // The endpoint cluster index used by each ETC1 subblock. |
| 274 | basisu::vector<vec2U> m_block_endpoint_clusters_indices; |
| 275 | |
| 276 | // The block(s) within each selector cluster |
| 277 | // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! |
| 278 | basisu::vector<uint_vec> m_selector_cluster_block_indices; |
| 279 | |
| 280 | // The selector bits for each selector cluster. |
| 281 | basisu::vector<etc_block> m_optimized_cluster_selectors; |
| 282 | |
| 283 | // The block(s) within each parent selector cluster. |
| 284 | basisu::vector<uint_vec> m_selector_parent_cluster_block_indices; |
| 285 | |
| 286 | // Each block's parent selector cluster |
| 287 | uint8_vec m_block_parent_selector_cluster; |
| 288 | |
| 289 | // Array of selector cluster indices for each parent selector cluster |
| 290 | basisu::vector<uint_vec> m_selector_clusters_within_each_parent_cluster; |
| 291 | |
| 292 | // Each block's selector cluster index |
| 293 | basisu::vector<uint32_t> m_block_selector_cluster_index; |
| 294 | |
| 295 | struct subblock_endpoint_quant_err |
| 296 | { |
| 297 | uint64_t m_total_err; |
| 298 | uint32_t m_cluster_index; |
| 299 | uint32_t m_cluster_subblock_index; |
| 300 | uint32_t m_block_index; |
| 301 | uint32_t m_subblock_index; |
| 302 | |
| 303 | bool operator< (const subblock_endpoint_quant_err &rhs) const |
| 304 | { |
| 305 | if (m_total_err < rhs.m_total_err) |
| 306 | return true; |
| 307 | else if (m_total_err == rhs.m_total_err) |
| 308 | { |
| 309 | if (m_block_index < rhs.m_block_index) |
| 310 | return true; |
| 311 | else if (m_block_index == rhs.m_block_index) |
| 312 | return m_subblock_index < rhs.m_subblock_index; |
| 313 | } |
| 314 | return false; |
| 315 | } |
| 316 | }; |
| 317 | |
| 318 | // The sorted subblock endpoint quant error for each endpoint cluster |
| 319 | basisu::vector<subblock_endpoint_quant_err> m_subblock_endpoint_quant_err_vec; |
| 320 | |
| 321 | std::mutex m_lock; |
| 322 | |
| 323 | bool m_opencl_failed; |
| 324 | |
| 325 | //----------------------------------------------------------------------------- |
| 326 | |
| 327 | void init_etc1_images(); |
| 328 | bool init_global_codebooks(); |
| 329 | void init_endpoint_training_vectors(); |
| 330 | void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); |
| 331 | void generate_endpoint_clusters(); |
| 332 | void compute_endpoint_subblock_error_vec(); |
| 333 | void introduce_new_endpoint_clusters(); |
| 334 | void generate_endpoint_codebook(uint32_t step); |
| 335 | uint32_t refine_endpoint_clusterization(); |
| 336 | void eliminate_redundant_or_empty_endpoint_clusters(); |
| 337 | void generate_block_endpoint_clusters(); |
| 338 | void compute_endpoint_clusters_within_each_parent_cluster(); |
| 339 | void compute_selector_clusters_within_each_parent_cluster(); |
| 340 | void create_initial_packed_texture(); |
| 341 | void generate_selector_clusters(); |
| 342 | void create_optimized_selector_codebook(uint32_t iter); |
| 343 | void find_optimal_selector_clusters_for_each_block(); |
| 344 | uint32_t refine_block_endpoints_given_selectors(); |
| 345 | void finalize(); |
| 346 | bool validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const; |
| 347 | bool validate_output() const; |
| 348 | void introduce_special_selector_clusters(); |
| 349 | void optimize_selector_codebook(); |
| 350 | bool check_etc1s_constraints() const; |
| 351 | }; |
| 352 | |
| 353 | } // namespace basisu |
| 354 | |