| 1 | // basisu_frontend.h | 
|---|
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. | 
|---|
| 3 | // | 
|---|
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 5 | // you may not use this file except in compliance with the License. | 
|---|
| 6 | // You may obtain a copy of the License at | 
|---|
| 7 | // | 
|---|
| 8 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 9 | // | 
|---|
| 10 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 13 | // See the License for the specific language governing permissions and | 
|---|
| 14 | // limitations under the License. | 
|---|
| 15 | #pragma once | 
|---|
| 16 | #include "basisu_enc.h" | 
|---|
| 17 | #include "basisu_etc.h" | 
|---|
| 18 | #include "basisu_gpu_texture.h" | 
|---|
| 19 | #include "../transcoder/basisu_file_headers.h" | 
|---|
| 20 | #include "../transcoder/basisu_transcoder.h" | 
|---|
| 21 |  | 
|---|
| 22 | namespace basisu | 
|---|
| 23 | { | 
|---|
| 24 | struct opencl_context; | 
|---|
| 25 | typedef opencl_context* opencl_context_ptr; | 
|---|
| 26 |  | 
|---|
| 27 | struct vec2U | 
|---|
| 28 | { | 
|---|
| 29 | uint32_t m_comps[2]; | 
|---|
| 30 |  | 
|---|
| 31 | vec2U() { } | 
|---|
| 32 | vec2U(uint32_t a, uint32_t b) { set(a, b); } | 
|---|
| 33 |  | 
|---|
| 34 | void set(uint32_t a, uint32_t b) { m_comps[0] = a; m_comps[1] = b; } | 
|---|
| 35 |  | 
|---|
| 36 | uint32_t operator[] (uint32_t i) const { assert(i < 2); return m_comps[i]; } | 
|---|
| 37 | uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } | 
|---|
| 38 | }; | 
|---|
| 39 |  | 
|---|
| 40 | const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; | 
|---|
| 41 | const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; | 
|---|
| 42 |  | 
|---|
| 43 | class basisu_frontend | 
|---|
| 44 | { | 
|---|
| 45 | BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_frontend); | 
|---|
| 46 |  | 
|---|
| 47 | public: | 
|---|
| 48 |  | 
|---|
| 49 | basisu_frontend() : | 
|---|
| 50 | m_total_blocks(0), | 
|---|
| 51 | m_total_pixels(0), | 
|---|
| 52 | m_endpoint_refinement(false), | 
|---|
| 53 | m_use_hierarchical_endpoint_codebooks(false), | 
|---|
| 54 | m_use_hierarchical_selector_codebooks(false), | 
|---|
| 55 | m_num_endpoint_codebook_iterations(0), | 
|---|
| 56 | m_num_selector_codebook_iterations(0), | 
|---|
| 57 | m_opencl_failed(false) | 
|---|
| 58 | { | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | enum | 
|---|
| 62 | { | 
|---|
| 63 | cMaxEndpointClusters = 16128, | 
|---|
| 64 |  | 
|---|
| 65 | cMaxSelectorClusters = 16128, | 
|---|
| 66 | }; | 
|---|
| 67 |  | 
|---|
| 68 | struct params | 
|---|
| 69 | { | 
|---|
| 70 | params() : | 
|---|
| 71 | m_num_source_blocks(0), | 
|---|
| 72 | m_pSource_blocks(NULL), | 
|---|
| 73 | m_max_endpoint_clusters(256), | 
|---|
| 74 | m_max_selector_clusters(256), | 
|---|
| 75 | m_compression_level(BASISU_DEFAULT_COMPRESSION_LEVEL), | 
|---|
| 76 | m_perceptual(true), | 
|---|
| 77 | m_debug_stats(false), | 
|---|
| 78 | m_debug_images(false), | 
|---|
| 79 | m_dump_endpoint_clusterization(true), | 
|---|
| 80 | m_validate(false), | 
|---|
| 81 | m_multithreaded(false), | 
|---|
| 82 | m_disable_hierarchical_endpoint_codebooks(false), | 
|---|
| 83 | m_tex_type(basist::cBASISTexType2D), | 
|---|
| 84 | m_pOpenCL_context(nullptr), | 
|---|
| 85 | m_pJob_pool(nullptr) | 
|---|
| 86 | { | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | uint32_t m_num_source_blocks; | 
|---|
| 90 | pixel_block *m_pSource_blocks; | 
|---|
| 91 |  | 
|---|
| 92 | uint32_t m_max_endpoint_clusters; | 
|---|
| 93 | uint32_t m_max_selector_clusters; | 
|---|
| 94 |  | 
|---|
| 95 | uint32_t m_compression_level; | 
|---|
| 96 |  | 
|---|
| 97 | bool m_perceptual; | 
|---|
| 98 | bool m_debug_stats; | 
|---|
| 99 | bool m_debug_images; | 
|---|
| 100 | bool m_dump_endpoint_clusterization; | 
|---|
| 101 | bool m_validate; | 
|---|
| 102 | bool m_multithreaded; | 
|---|
| 103 | bool m_disable_hierarchical_endpoint_codebooks; | 
|---|
| 104 |  | 
|---|
| 105 | basist::basis_texture_type m_tex_type; | 
|---|
| 106 | const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; | 
|---|
| 107 |  | 
|---|
| 108 | opencl_context_ptr m_pOpenCL_context; | 
|---|
| 109 |  | 
|---|
| 110 | job_pool *m_pJob_pool; | 
|---|
| 111 | }; | 
|---|
| 112 |  | 
|---|
| 113 | bool init(const params &p); | 
|---|
| 114 |  | 
|---|
| 115 | bool compress(); | 
|---|
| 116 |  | 
|---|
| 117 | const params &get_params() const { return m_params; } | 
|---|
| 118 |  | 
|---|
| 119 | const pixel_block &get_source_pixel_block(uint32_t i) const { return m_source_blocks[i]; } | 
|---|
| 120 |  | 
|---|
| 121 | // RDO output blocks | 
|---|
| 122 | uint32_t get_total_output_blocks() const { return static_cast<uint32_t>(m_encoded_blocks.size()); } | 
|---|
| 123 |  | 
|---|
| 124 | const etc_block &get_output_block(uint32_t block_index) const { return m_encoded_blocks[block_index]; } | 
|---|
| 125 | const etc_block_vec &get_output_blocks() const { return m_encoded_blocks; } | 
|---|
| 126 |  | 
|---|
| 127 | // "Best" ETC1S blocks | 
|---|
| 128 | const etc_block &get_etc1s_block(uint32_t block_index) const { return m_etc1_blocks_etc1s[block_index]; } | 
|---|
| 129 |  | 
|---|
| 130 | // Per-block flags | 
|---|
| 131 | bool get_diff_flag(uint32_t block_index) const { return m_encoded_blocks[block_index].get_diff_bit(); } | 
|---|
| 132 |  | 
|---|
| 133 | // Endpoint clusters | 
|---|
| 134 | uint32_t get_total_endpoint_clusters() const { return static_cast<uint32_t>(m_endpoint_clusters.size()); } | 
|---|
| 135 | uint32_t get_subblock_endpoint_cluster_index(uint32_t block_index, uint32_t subblock_index) const { return m_block_endpoint_clusters_indices[block_index][subblock_index]; } | 
|---|
| 136 |  | 
|---|
| 137 | const color_rgba &get_endpoint_cluster_unscaled_color(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_unscaled[individual_mode]; } | 
|---|
| 138 | uint32_t get_endpoint_cluster_inten_table(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_inten_table[individual_mode]; } | 
|---|
| 139 |  | 
|---|
| 140 | bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } | 
|---|
| 141 |  | 
|---|
| 142 | // Selector clusters | 
|---|
| 143 | uint32_t get_total_selector_clusters() const { return static_cast<uint32_t>(m_selector_cluster_block_indices.size()); } | 
|---|
| 144 | uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } | 
|---|
| 145 | const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } | 
|---|
| 146 |  | 
|---|
| 147 | // Returns block indices using each selector cluster | 
|---|
| 148 | const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } | 
|---|
| 149 |  | 
|---|
| 150 | void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); | 
|---|
| 151 |  | 
|---|
| 152 | void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); | 
|---|
| 153 |  | 
|---|
| 154 | bool get_opencl_failed() const { return m_opencl_failed; } | 
|---|
| 155 |  | 
|---|
| 156 | private: | 
|---|
| 157 | params m_params; | 
|---|
| 158 | uint32_t m_total_blocks; | 
|---|
| 159 | uint32_t m_total_pixels; | 
|---|
| 160 |  | 
|---|
| 161 | bool m_endpoint_refinement; | 
|---|
| 162 | bool m_use_hierarchical_endpoint_codebooks; | 
|---|
| 163 | bool m_use_hierarchical_selector_codebooks; | 
|---|
| 164 |  | 
|---|
| 165 | uint32_t m_num_endpoint_codebook_iterations; | 
|---|
| 166 | uint32_t m_num_selector_codebook_iterations; | 
|---|
| 167 |  | 
|---|
| 168 | // Source pixels for each blocks | 
|---|
| 169 | pixel_block_vec m_source_blocks; | 
|---|
| 170 |  | 
|---|
| 171 | // The quantized ETC1S texture. | 
|---|
| 172 | etc_block_vec m_encoded_blocks; | 
|---|
| 173 |  | 
|---|
| 174 | // Quantized blocks after endpoint quant, but before selector quant | 
|---|
| 175 | etc_block_vec m_orig_encoded_blocks; | 
|---|
| 176 |  | 
|---|
| 177 | // Full quality ETC1S texture | 
|---|
| 178 | etc_block_vec m_etc1_blocks_etc1s; | 
|---|
| 179 |  | 
|---|
| 180 | typedef vec<6, float> vec6F; | 
|---|
| 181 |  | 
|---|
| 182 | // Endpoint clusterizer | 
|---|
| 183 | typedef tree_vector_quant<vec6F> vec6F_quantizer; | 
|---|
| 184 | vec6F_quantizer m_endpoint_clusterizer; | 
|---|
| 185 |  | 
|---|
| 186 | // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. | 
|---|
| 187 | basisu::vector<uint_vec> m_endpoint_clusters; | 
|---|
| 188 |  | 
|---|
| 189 | // Array of subblock indices for each parent endpoint cluster | 
|---|
| 190 | // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. | 
|---|
| 191 | // As the endpoint clusters are manipulated this constraint gets broken. | 
|---|
| 192 | basisu::vector<uint_vec> m_endpoint_parent_clusters; | 
|---|
| 193 |  | 
|---|
| 194 | // Each block's parent endpoint cluster index | 
|---|
| 195 | uint8_vec m_block_parent_endpoint_cluster; | 
|---|
| 196 |  | 
|---|
| 197 | // Array of endpoint cluster indices for each parent endpoint cluster | 
|---|
| 198 | basisu::vector<uint_vec> m_endpoint_clusters_within_each_parent_cluster; | 
|---|
| 199 |  | 
|---|
| 200 | struct endpoint_cluster_etc_params | 
|---|
| 201 | { | 
|---|
| 202 | endpoint_cluster_etc_params() | 
|---|
| 203 | { | 
|---|
| 204 | clear(); | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|
| 207 | void clear() | 
|---|
| 208 | { | 
|---|
| 209 | clear_obj(m_color_unscaled); | 
|---|
| 210 | clear_obj(m_inten_table); | 
|---|
| 211 | clear_obj(m_color_error); | 
|---|
| 212 | m_subblocks.clear(); | 
|---|
| 213 |  | 
|---|
| 214 | clear_obj(m_color_used); | 
|---|
| 215 | m_valid = false; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | // TODO: basisu doesn't use individual mode. | 
|---|
| 219 | color_rgba m_color_unscaled[2]; // [use_individual_mode] | 
|---|
| 220 | uint32_t m_inten_table[2]; | 
|---|
| 221 |  | 
|---|
| 222 | uint64_t m_color_error[2]; | 
|---|
| 223 |  | 
|---|
| 224 | uint_vec m_subblocks; | 
|---|
| 225 |  | 
|---|
| 226 | bool m_color_used[2]; | 
|---|
| 227 |  | 
|---|
| 228 | bool m_valid; | 
|---|
| 229 |  | 
|---|
| 230 | bool operator== (const endpoint_cluster_etc_params &other) const | 
|---|
| 231 | { | 
|---|
| 232 | for (uint32_t i = 0; i < 2; i++) | 
|---|
| 233 | { | 
|---|
| 234 | if (m_color_unscaled[i] != other.m_color_unscaled[i]) | 
|---|
| 235 | return false; | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | if (m_inten_table[0] != other.m_inten_table[0]) | 
|---|
| 239 | return false; | 
|---|
| 240 | if (m_inten_table[1] != other.m_inten_table[1]) | 
|---|
| 241 | return false; | 
|---|
| 242 |  | 
|---|
| 243 | return true; | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|
| 246 | bool operator< (const endpoint_cluster_etc_params &other) const | 
|---|
| 247 | { | 
|---|
| 248 | for (uint32_t i = 0; i < 2; i++) | 
|---|
| 249 | { | 
|---|
| 250 | if (m_color_unscaled[i] < other.m_color_unscaled[i]) | 
|---|
| 251 | return true; | 
|---|
| 252 | else if (m_color_unscaled[i] != other.m_color_unscaled[i]) | 
|---|
| 253 | return false; | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | if (m_inten_table[0] < other.m_inten_table[0]) | 
|---|
| 257 | return true; | 
|---|
| 258 | else if (m_inten_table[0] == other.m_inten_table[0]) | 
|---|
| 259 | { | 
|---|
| 260 | if (m_inten_table[1] < other.m_inten_table[1]) | 
|---|
| 261 | return true; | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | return false; | 
|---|
| 265 | } | 
|---|
| 266 | }; | 
|---|
| 267 |  | 
|---|
| 268 | typedef basisu::vector<endpoint_cluster_etc_params> cluster_subblock_etc_params_vec; | 
|---|
| 269 |  | 
|---|
| 270 | // Each endpoint cluster's ETC1S parameters | 
|---|
| 271 | cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; | 
|---|
| 272 |  | 
|---|
| 273 | // The endpoint cluster index used by each ETC1 subblock. | 
|---|
| 274 | basisu::vector<vec2U> m_block_endpoint_clusters_indices; | 
|---|
| 275 |  | 
|---|
| 276 | // The block(s) within each selector cluster | 
|---|
| 277 | // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! | 
|---|
| 278 | basisu::vector<uint_vec> m_selector_cluster_block_indices; | 
|---|
| 279 |  | 
|---|
| 280 | // The selector bits for each selector cluster. | 
|---|
| 281 | basisu::vector<etc_block> m_optimized_cluster_selectors; | 
|---|
| 282 |  | 
|---|
| 283 | // The block(s) within each parent selector cluster. | 
|---|
| 284 | basisu::vector<uint_vec> m_selector_parent_cluster_block_indices; | 
|---|
| 285 |  | 
|---|
| 286 | // Each block's parent selector cluster | 
|---|
| 287 | uint8_vec m_block_parent_selector_cluster; | 
|---|
| 288 |  | 
|---|
| 289 | // Array of selector cluster indices for each parent selector cluster | 
|---|
| 290 | basisu::vector<uint_vec> m_selector_clusters_within_each_parent_cluster; | 
|---|
| 291 |  | 
|---|
| 292 | // Each block's selector cluster index | 
|---|
| 293 | basisu::vector<uint32_t> m_block_selector_cluster_index; | 
|---|
| 294 |  | 
|---|
| 295 | struct subblock_endpoint_quant_err | 
|---|
| 296 | { | 
|---|
| 297 | uint64_t m_total_err; | 
|---|
| 298 | uint32_t m_cluster_index; | 
|---|
| 299 | uint32_t m_cluster_subblock_index; | 
|---|
| 300 | uint32_t m_block_index; | 
|---|
| 301 | uint32_t m_subblock_index; | 
|---|
| 302 |  | 
|---|
| 303 | bool operator< (const subblock_endpoint_quant_err &rhs) const | 
|---|
| 304 | { | 
|---|
| 305 | if (m_total_err < rhs.m_total_err) | 
|---|
| 306 | return true; | 
|---|
| 307 | else if (m_total_err == rhs.m_total_err) | 
|---|
| 308 | { | 
|---|
| 309 | if (m_block_index < rhs.m_block_index) | 
|---|
| 310 | return true; | 
|---|
| 311 | else if (m_block_index == rhs.m_block_index) | 
|---|
| 312 | return m_subblock_index < rhs.m_subblock_index; | 
|---|
| 313 | } | 
|---|
| 314 | return false; | 
|---|
| 315 | } | 
|---|
| 316 | }; | 
|---|
| 317 |  | 
|---|
| 318 | // The sorted subblock endpoint quant error for each endpoint cluster | 
|---|
| 319 | basisu::vector<subblock_endpoint_quant_err> m_subblock_endpoint_quant_err_vec; | 
|---|
| 320 |  | 
|---|
| 321 | std::mutex m_lock; | 
|---|
| 322 |  | 
|---|
| 323 | bool m_opencl_failed; | 
|---|
| 324 |  | 
|---|
| 325 | //----------------------------------------------------------------------------- | 
|---|
| 326 |  | 
|---|
| 327 | void init_etc1_images(); | 
|---|
| 328 | bool init_global_codebooks(); | 
|---|
| 329 | void init_endpoint_training_vectors(); | 
|---|
| 330 | void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); | 
|---|
| 331 | void generate_endpoint_clusters(); | 
|---|
| 332 | void compute_endpoint_subblock_error_vec(); | 
|---|
| 333 | void introduce_new_endpoint_clusters(); | 
|---|
| 334 | void generate_endpoint_codebook(uint32_t step); | 
|---|
| 335 | uint32_t refine_endpoint_clusterization(); | 
|---|
| 336 | void eliminate_redundant_or_empty_endpoint_clusters(); | 
|---|
| 337 | void generate_block_endpoint_clusters(); | 
|---|
| 338 | void compute_endpoint_clusters_within_each_parent_cluster(); | 
|---|
| 339 | void compute_selector_clusters_within_each_parent_cluster(); | 
|---|
| 340 | void create_initial_packed_texture(); | 
|---|
| 341 | void generate_selector_clusters(); | 
|---|
| 342 | void create_optimized_selector_codebook(uint32_t iter); | 
|---|
| 343 | void find_optimal_selector_clusters_for_each_block(); | 
|---|
| 344 | uint32_t refine_block_endpoints_given_selectors(); | 
|---|
| 345 | void finalize(); | 
|---|
| 346 | bool validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const; | 
|---|
| 347 | bool validate_output() const; | 
|---|
| 348 | void introduce_special_selector_clusters(); | 
|---|
| 349 | void optimize_selector_codebook(); | 
|---|
| 350 | bool check_etc1s_constraints() const; | 
|---|
| 351 | }; | 
|---|
| 352 |  | 
|---|
| 353 | } // namespace basisu | 
|---|
| 354 |  | 
|---|