| 1 | // basisu_enc.h |
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | #pragma once |
| 16 | #include "../transcoder/basisu.h" |
| 17 | #include "../transcoder/basisu_transcoder_internal.h" |
| 18 | |
| 19 | #include <mutex> |
| 20 | #include <atomic> |
| 21 | #include <condition_variable> |
| 22 | #include <functional> |
| 23 | #include <thread> |
| 24 | #include <unordered_map> |
| 25 | #include <ostream> |
| 26 | |
| 27 | #if !defined(_WIN32) || defined(__MINGW32__) |
| 28 | #include <libgen.h> |
| 29 | #endif |
| 30 | |
| 31 | // This module is really just a huge grab bag of classes and helper functions needed by the encoder. |
| 32 | |
| 33 | // If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time. |
| 34 | #define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0) |
| 35 | |
| 36 | #if BASISU_SUPPORT_SSE |
| 37 | // Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors. |
| 38 | extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16); |
| 39 | #endif |
| 40 | |
| 41 | namespace basisu |
| 42 | { |
| 43 | extern uint8_t g_hamming_dist[256]; |
| 44 | extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; |
| 45 | |
| 46 | // true if basisu_encoder_init() has been called and returned. |
| 47 | extern bool g_library_initialized; |
| 48 | |
| 49 | // Encoder library initialization. |
| 50 | // This function MUST be called before encoding anything! |
| 51 | void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); |
| 52 | void basisu_encoder_deinit(); |
| 53 | |
| 54 | // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 |
| 55 | extern void detect_sse41(); |
| 56 | |
| 57 | #if BASISU_SUPPORT_SSE |
| 58 | extern bool g_cpu_supports_sse41; |
| 59 | #else |
| 60 | const bool g_cpu_supports_sse41 = false; |
| 61 | #endif |
| 62 | |
| 63 | void error_vprintf(const char* pFmt, va_list args); |
| 64 | void error_printf(const char *pFmt, ...); |
| 65 | |
| 66 | // Helpers |
| 67 | |
| 68 | inline uint8_t clamp255(int32_t i) |
| 69 | { |
| 70 | return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); |
| 71 | } |
| 72 | |
| 73 | inline int32_t clampi(int32_t value, int32_t low, int32_t high) |
| 74 | { |
| 75 | if (value < low) |
| 76 | value = low; |
| 77 | else if (value > high) |
| 78 | value = high; |
| 79 | return value; |
| 80 | } |
| 81 | |
| 82 | inline uint8_t mul_8(uint32_t v, uint32_t a) |
| 83 | { |
| 84 | v = v * a + 128; |
| 85 | return (uint8_t)((v + (v >> 8)) >> 8); |
| 86 | } |
| 87 | |
| 88 | inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) |
| 89 | { |
| 90 | assert(codesize <= 64); |
| 91 | uint64_t bits = 0; |
| 92 | uint32_t total_bits = 0; |
| 93 | |
| 94 | while (total_bits < codesize) |
| 95 | { |
| 96 | uint32_t byte_bit_offset = bit_offset & 7; |
| 97 | uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); |
| 98 | |
| 99 | uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; |
| 100 | byte_bits &= ((1 << bits_to_read) - 1); |
| 101 | |
| 102 | bits |= ((uint64_t)(byte_bits) << total_bits); |
| 103 | |
| 104 | total_bits += bits_to_read; |
| 105 | bit_offset += bits_to_read; |
| 106 | } |
| 107 | |
| 108 | return bits; |
| 109 | } |
| 110 | |
| 111 | inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) |
| 112 | { |
| 113 | assert(codesize <= 32); |
| 114 | uint32_t bits = 0; |
| 115 | uint32_t total_bits = 0; |
| 116 | |
| 117 | while (total_bits < codesize) |
| 118 | { |
| 119 | uint32_t byte_bit_offset = bit_offset & 7; |
| 120 | uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); |
| 121 | |
| 122 | uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; |
| 123 | byte_bits &= ((1 << bits_to_read) - 1); |
| 124 | |
| 125 | bits |= (byte_bits << total_bits); |
| 126 | |
| 127 | total_bits += bits_to_read; |
| 128 | bit_offset += bits_to_read; |
| 129 | } |
| 130 | |
| 131 | return bits; |
| 132 | } |
| 133 | |
| 134 | // Hashing |
| 135 | |
| 136 | inline uint32_t bitmix32c(uint32_t v) |
| 137 | { |
| 138 | v = (v + 0x7ed55d16) + (v << 12); |
| 139 | v = (v ^ 0xc761c23c) ^ (v >> 19); |
| 140 | v = (v + 0x165667b1) + (v << 5); |
| 141 | v = (v + 0xd3a2646c) ^ (v << 9); |
| 142 | v = (v + 0xfd7046c5) + (v << 3); |
| 143 | v = (v ^ 0xb55a4f09) ^ (v >> 16); |
| 144 | return v; |
| 145 | } |
| 146 | |
| 147 | inline uint32_t bitmix32(uint32_t v) |
| 148 | { |
| 149 | v -= (v << 6); |
| 150 | v ^= (v >> 17); |
| 151 | v -= (v << 9); |
| 152 | v ^= (v << 4); |
| 153 | v -= (v << 3); |
| 154 | v ^= (v << 10); |
| 155 | v ^= (v >> 15); |
| 156 | return v; |
| 157 | } |
| 158 | |
| 159 | inline uint32_t wang_hash(uint32_t seed) |
| 160 | { |
| 161 | seed = (seed ^ 61) ^ (seed >> 16); |
| 162 | seed *= 9; |
| 163 | seed = seed ^ (seed >> 4); |
| 164 | seed *= 0x27d4eb2d; |
| 165 | seed = seed ^ (seed >> 15); |
| 166 | return seed; |
| 167 | } |
| 168 | |
| 169 | uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); |
| 170 | |
| 171 | template <typename Key> |
| 172 | struct bit_hasher |
| 173 | { |
| 174 | std::size_t operator()(const Key& k) const |
| 175 | { |
| 176 | return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k)); |
| 177 | } |
| 178 | }; |
| 179 | |
| 180 | class running_stat |
| 181 | { |
| 182 | public: |
| 183 | running_stat() { clear(); } |
| 184 | |
| 185 | void clear() |
| 186 | { |
| 187 | m_n = 0; |
| 188 | m_total = 0; |
| 189 | m_old_m = 0; |
| 190 | m_new_m = 0; |
| 191 | m_old_s = 0; |
| 192 | m_new_s = 0; |
| 193 | m_min = 0; |
| 194 | m_max = 0; |
| 195 | } |
| 196 | |
| 197 | void push(double x) |
| 198 | { |
| 199 | m_n++; |
| 200 | m_total += x; |
| 201 | if (m_n == 1) |
| 202 | { |
| 203 | m_old_m = m_new_m = x; |
| 204 | m_old_s = 0.0; |
| 205 | m_min = x; |
| 206 | m_max = x; |
| 207 | } |
| 208 | else |
| 209 | { |
| 210 | // See Knuth TAOCP vol 2, 3rd edition, page 232 |
| 211 | m_new_m = m_old_m + (x - m_old_m) / m_n; |
| 212 | m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); |
| 213 | m_old_m = m_new_m; |
| 214 | m_old_s = m_new_s; |
| 215 | m_min = basisu::minimum(x, m_min); |
| 216 | m_max = basisu::maximum(x, m_max); |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | uint32_t get_num() const |
| 221 | { |
| 222 | return m_n; |
| 223 | } |
| 224 | |
| 225 | double get_total() const |
| 226 | { |
| 227 | return m_total; |
| 228 | } |
| 229 | |
| 230 | double get_mean() const |
| 231 | { |
| 232 | return (m_n > 0) ? m_new_m : 0.0; |
| 233 | } |
| 234 | |
| 235 | // Returns sample variance |
| 236 | double get_variance() const |
| 237 | { |
| 238 | return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); |
| 239 | } |
| 240 | |
| 241 | double get_std_dev() const |
| 242 | { |
| 243 | return sqrt(get_variance()); |
| 244 | } |
| 245 | |
| 246 | double get_min() const |
| 247 | { |
| 248 | return m_min; |
| 249 | } |
| 250 | |
| 251 | double get_max() const |
| 252 | { |
| 253 | return m_max; |
| 254 | } |
| 255 | |
| 256 | private: |
| 257 | uint32_t m_n; |
| 258 | double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; |
| 259 | }; |
| 260 | |
| 261 | // Linear algebra |
| 262 | |
| 263 | template <uint32_t N, typename T> |
| 264 | class vec |
| 265 | { |
| 266 | protected: |
| 267 | T m_v[N]; |
| 268 | |
| 269 | public: |
| 270 | enum { num_elements = N }; |
| 271 | |
| 272 | inline vec() { } |
| 273 | inline vec(eZero) { set_zero(); } |
| 274 | |
| 275 | explicit inline vec(T val) { set(val); } |
| 276 | inline vec(T v0, T v1) { set(v0, v1); } |
| 277 | inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } |
| 278 | inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } |
| 279 | inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } |
| 280 | template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); } |
| 281 | |
| 282 | inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; } |
| 283 | inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } |
| 284 | |
| 285 | inline T getX() const { return m_v[0]; } |
| 286 | inline T getY() const { static_assert(N >= 2, "N too small" ); return m_v[1]; } |
| 287 | inline T getZ() const { static_assert(N >= 3, "N too small" ); return m_v[2]; } |
| 288 | inline T getW() const { static_assert(N >= 4, "N too small" ); return m_v[3]; } |
| 289 | |
| 290 | inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; } |
| 291 | inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } |
| 292 | |
| 293 | inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } |
| 294 | |
| 295 | template <uint32_t OtherN, typename OtherT> |
| 296 | inline vec &set(const vec<OtherN, OtherT> &other) |
| 297 | { |
| 298 | uint32_t i; |
| 299 | if ((const void *)(&other) == (const void *)(this)) |
| 300 | return *this; |
| 301 | const uint32_t m = minimum(OtherN, N); |
| 302 | for (i = 0; i < m; i++) |
| 303 | m_v[i] = static_cast<T>(other[i]); |
| 304 | for (; i < N; i++) |
| 305 | m_v[i] = 0; |
| 306 | return *this; |
| 307 | } |
| 308 | |
| 309 | inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } |
| 310 | inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } |
| 311 | inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } |
| 312 | |
| 313 | inline vec &set(T v0, T v1) |
| 314 | { |
| 315 | m_v[0] = v0; |
| 316 | if (N >= 2) |
| 317 | { |
| 318 | m_v[1] = v1; |
| 319 | clear_elements(2, N); |
| 320 | } |
| 321 | return *this; |
| 322 | } |
| 323 | |
| 324 | inline vec &set(T v0, T v1, T v2) |
| 325 | { |
| 326 | m_v[0] = v0; |
| 327 | if (N >= 2) |
| 328 | { |
| 329 | m_v[1] = v1; |
| 330 | if (N >= 3) |
| 331 | { |
| 332 | m_v[2] = v2; |
| 333 | clear_elements(3, N); |
| 334 | } |
| 335 | } |
| 336 | return *this; |
| 337 | } |
| 338 | |
| 339 | inline vec &set(T v0, T v1, T v2, T v3) |
| 340 | { |
| 341 | m_v[0] = v0; |
| 342 | if (N >= 2) |
| 343 | { |
| 344 | m_v[1] = v1; |
| 345 | if (N >= 3) |
| 346 | { |
| 347 | m_v[2] = v2; |
| 348 | |
| 349 | if (N >= 4) |
| 350 | { |
| 351 | m_v[3] = v3; |
| 352 | clear_elements(5, N); |
| 353 | } |
| 354 | } |
| 355 | } |
| 356 | return *this; |
| 357 | } |
| 358 | |
| 359 | inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } |
| 360 | template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; } |
| 361 | |
| 362 | inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); } |
| 363 | inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); } |
| 364 | |
| 365 | inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } |
| 366 | inline vec operator+ () const { return *this; } |
| 367 | inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } |
| 368 | inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } |
| 369 | inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } |
| 370 | inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } |
| 371 | inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } |
| 372 | inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } |
| 373 | |
| 374 | friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } |
| 375 | friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } |
| 376 | friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } |
| 377 | friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } |
| 378 | friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } |
| 379 | friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } |
| 380 | |
| 381 | static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } |
| 382 | |
| 383 | inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } |
| 384 | |
| 385 | inline T norm() const { return dot_product(*this, *this); } |
| 386 | inline T length() const { return sqrt(norm()); } |
| 387 | |
| 388 | inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } |
| 389 | inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; } |
| 390 | |
| 391 | inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); } |
| 392 | inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } |
| 393 | |
| 394 | inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } |
| 395 | |
| 396 | inline vec &clamp(T l, T h) |
| 397 | { |
| 398 | for (uint32_t i = 0; i < N; i++) |
| 399 | m_v[i] = basisu::clamp(m_v[i], l, h); |
| 400 | return *this; |
| 401 | } |
| 402 | |
| 403 | static vec component_min(const vec& a, const vec& b) |
| 404 | { |
| 405 | vec res; |
| 406 | for (uint32_t i = 0; i < N; i++) |
| 407 | res[i] = minimum(a[i], b[i]); |
| 408 | return res; |
| 409 | } |
| 410 | |
| 411 | static vec component_max(const vec& a, const vec& b) |
| 412 | { |
| 413 | vec res; |
| 414 | for (uint32_t i = 0; i < N; i++) |
| 415 | res[i] = maximum(a[i], b[i]); |
| 416 | return res; |
| 417 | } |
| 418 | }; |
| 419 | |
| 420 | typedef vec<4, double> vec4D; |
| 421 | typedef vec<3, double> vec3D; |
| 422 | typedef vec<2, double> vec2D; |
| 423 | typedef vec<1, double> vec1D; |
| 424 | |
| 425 | typedef vec<4, float> vec4F; |
| 426 | typedef vec<3, float> vec3F; |
| 427 | typedef vec<2, float> vec2F; |
| 428 | typedef vec<1, float> vec1F; |
| 429 | |
| 430 | typedef vec<16, float> vec16F; |
| 431 | |
| 432 | template <uint32_t Rows, uint32_t Cols, typename T> |
| 433 | class matrix |
| 434 | { |
| 435 | public: |
| 436 | typedef vec<Rows, T> col_vec; |
| 437 | typedef vec<Cols, T> row_vec; |
| 438 | |
| 439 | typedef T scalar_type; |
| 440 | |
| 441 | enum { rows = Rows, cols = Cols }; |
| 442 | |
| 443 | protected: |
| 444 | row_vec m_r[Rows]; |
| 445 | |
| 446 | public: |
| 447 | inline matrix() {} |
| 448 | inline matrix(eZero) { set_zero(); } |
| 449 | inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; } |
| 450 | inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; } |
| 451 | |
| 452 | inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } |
| 453 | inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } |
| 454 | |
| 455 | inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; } |
| 456 | inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; } |
| 457 | |
| 458 | inline matrix &set_zero() |
| 459 | { |
| 460 | for (uint32_t i = 0; i < Rows; i++) |
| 461 | m_r[i].set_zero(); |
| 462 | return *this; |
| 463 | } |
| 464 | |
| 465 | inline matrix &set_identity() |
| 466 | { |
| 467 | for (uint32_t i = 0; i < Rows; i++) |
| 468 | { |
| 469 | m_r[i].set_zero(); |
| 470 | if (i < Cols) |
| 471 | m_r[i][i] = 1.0f; |
| 472 | } |
| 473 | return *this; |
| 474 | } |
| 475 | }; |
| 476 | |
| 477 | template<uint32_t N, typename VectorType> |
| 478 | inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix) |
| 479 | { |
| 480 | VectorType axis; |
| 481 | if (N == 1) |
| 482 | axis.set(1.0f); |
| 483 | else |
| 484 | { |
| 485 | for (uint32_t i = 0; i < N; i++) |
| 486 | axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1))); |
| 487 | } |
| 488 | |
| 489 | VectorType prev_axis(axis); |
| 490 | |
| 491 | // Power iterations |
| 492 | for (uint32_t power_iter = 0; power_iter < 8; power_iter++) |
| 493 | { |
| 494 | VectorType trial_axis; |
| 495 | double max_sum = 0; |
| 496 | |
| 497 | for (uint32_t i = 0; i < N; i++) |
| 498 | { |
| 499 | double sum = 0; |
| 500 | for (uint32_t j = 0; j < N; j++) |
| 501 | sum += cmatrix[i][j] * axis[j]; |
| 502 | |
| 503 | trial_axis[i] = static_cast<float>(sum); |
| 504 | |
| 505 | max_sum = maximum(fabs(sum), max_sum); |
| 506 | } |
| 507 | |
| 508 | if (max_sum != 0.0f) |
| 509 | trial_axis *= static_cast<float>(1.0f / max_sum); |
| 510 | |
| 511 | VectorType delta_axis(prev_axis - trial_axis); |
| 512 | |
| 513 | prev_axis = axis; |
| 514 | axis = trial_axis; |
| 515 | |
| 516 | if (delta_axis.norm() < .0024f) |
| 517 | break; |
| 518 | } |
| 519 | |
| 520 | return axis.normalize_in_place(); |
| 521 | } |
| 522 | |
| 523 | template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys) |
| 524 | { |
| 525 | for (uint32_t i = 0; i < num_indices; i++) |
| 526 | pIndices[i] = i; |
| 527 | |
| 528 | std::sort( |
| 529 | pIndices, |
| 530 | pIndices + num_indices, |
| 531 | [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } |
| 532 | ); |
| 533 | } |
| 534 | |
| 535 | // 1-4 byte direct Radix sort. |
| 536 | template <typename T> |
| 537 | T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size) |
| 538 | { |
| 539 | assert(key_ofs < sizeof(T)); |
| 540 | assert((key_size >= 1) && (key_size <= 4)); |
| 541 | |
| 542 | uint32_t hist[256 * 4]; |
| 543 | |
| 544 | memset(hist, 0, sizeof(hist[0]) * 256 * key_size); |
| 545 | |
| 546 | #define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs)) |
| 547 | |
| 548 | if (key_size == 4) |
| 549 | { |
| 550 | T* p = pBuf0; |
| 551 | T* q = pBuf0 + num_vals; |
| 552 | for (; p != q; p++) |
| 553 | { |
| 554 | const uint32_t key = BASISU_GET_KEY(p); |
| 555 | |
| 556 | hist[key & 0xFF]++; |
| 557 | hist[256 + ((key >> 8) & 0xFF)]++; |
| 558 | hist[512 + ((key >> 16) & 0xFF)]++; |
| 559 | hist[768 + ((key >> 24) & 0xFF)]++; |
| 560 | } |
| 561 | } |
| 562 | else if (key_size == 3) |
| 563 | { |
| 564 | T* p = pBuf0; |
| 565 | T* q = pBuf0 + num_vals; |
| 566 | for (; p != q; p++) |
| 567 | { |
| 568 | const uint32_t key = BASISU_GET_KEY(p); |
| 569 | |
| 570 | hist[key & 0xFF]++; |
| 571 | hist[256 + ((key >> 8) & 0xFF)]++; |
| 572 | hist[512 + ((key >> 16) & 0xFF)]++; |
| 573 | } |
| 574 | } |
| 575 | else if (key_size == 2) |
| 576 | { |
| 577 | T* p = pBuf0; |
| 578 | T* q = pBuf0 + (num_vals >> 1) * 2; |
| 579 | |
| 580 | for (; p != q; p += 2) |
| 581 | { |
| 582 | const uint32_t key0 = BASISU_GET_KEY(p); |
| 583 | const uint32_t key1 = BASISU_GET_KEY(p + 1); |
| 584 | |
| 585 | hist[key0 & 0xFF]++; |
| 586 | hist[256 + ((key0 >> 8) & 0xFF)]++; |
| 587 | |
| 588 | hist[key1 & 0xFF]++; |
| 589 | hist[256 + ((key1 >> 8) & 0xFF)]++; |
| 590 | } |
| 591 | |
| 592 | if (num_vals & 1) |
| 593 | { |
| 594 | const uint32_t key = BASISU_GET_KEY(p); |
| 595 | |
| 596 | hist[key & 0xFF]++; |
| 597 | hist[256 + ((key >> 8) & 0xFF)]++; |
| 598 | } |
| 599 | } |
| 600 | else |
| 601 | { |
| 602 | assert(key_size == 1); |
| 603 | if (key_size != 1) |
| 604 | return NULL; |
| 605 | |
| 606 | T* p = pBuf0; |
| 607 | T* q = pBuf0 + (num_vals >> 1) * 2; |
| 608 | |
| 609 | for (; p != q; p += 2) |
| 610 | { |
| 611 | const uint32_t key0 = BASISU_GET_KEY(p); |
| 612 | const uint32_t key1 = BASISU_GET_KEY(p + 1); |
| 613 | |
| 614 | hist[key0 & 0xFF]++; |
| 615 | hist[key1 & 0xFF]++; |
| 616 | } |
| 617 | |
| 618 | if (num_vals & 1) |
| 619 | { |
| 620 | const uint32_t key = BASISU_GET_KEY(p); |
| 621 | hist[key & 0xFF]++; |
| 622 | } |
| 623 | } |
| 624 | |
| 625 | T* pCur = pBuf0; |
| 626 | T* pNew = pBuf1; |
| 627 | |
| 628 | for (uint32_t pass = 0; pass < key_size; pass++) |
| 629 | { |
| 630 | const uint32_t* pHist = &hist[pass << 8]; |
| 631 | |
| 632 | uint32_t offsets[256]; |
| 633 | |
| 634 | uint32_t cur_ofs = 0; |
| 635 | for (uint32_t i = 0; i < 256; i += 2) |
| 636 | { |
| 637 | offsets[i] = cur_ofs; |
| 638 | cur_ofs += pHist[i]; |
| 639 | |
| 640 | offsets[i + 1] = cur_ofs; |
| 641 | cur_ofs += pHist[i + 1]; |
| 642 | } |
| 643 | |
| 644 | const uint32_t pass_shift = pass << 3; |
| 645 | |
| 646 | T* p = pCur; |
| 647 | T* q = pCur + (num_vals >> 1) * 2; |
| 648 | |
| 649 | for (; p != q; p += 2) |
| 650 | { |
| 651 | uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; |
| 652 | uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF; |
| 653 | |
| 654 | if (c0 == c1) |
| 655 | { |
| 656 | uint32_t dst_offset0 = offsets[c0]; |
| 657 | |
| 658 | offsets[c0] = dst_offset0 + 2; |
| 659 | |
| 660 | pNew[dst_offset0] = p[0]; |
| 661 | pNew[dst_offset0 + 1] = p[1]; |
| 662 | } |
| 663 | else |
| 664 | { |
| 665 | uint32_t dst_offset0 = offsets[c0]++; |
| 666 | uint32_t dst_offset1 = offsets[c1]++; |
| 667 | |
| 668 | pNew[dst_offset0] = p[0]; |
| 669 | pNew[dst_offset1] = p[1]; |
| 670 | } |
| 671 | } |
| 672 | |
| 673 | if (num_vals & 1) |
| 674 | { |
| 675 | uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; |
| 676 | |
| 677 | uint32_t dst_offset = offsets[c]; |
| 678 | offsets[c] = dst_offset + 1; |
| 679 | |
| 680 | pNew[dst_offset] = *p; |
| 681 | } |
| 682 | |
| 683 | T* t = pCur; |
| 684 | pCur = pNew; |
| 685 | pNew = t; |
| 686 | } |
| 687 | |
| 688 | return pCur; |
| 689 | } |
| 690 | |
| 691 | #undef BASISU_GET_KEY |
| 692 | |
| 693 | // Very simple job pool with no dependencies. |
| 694 | class job_pool |
| 695 | { |
| 696 | BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); |
| 697 | |
| 698 | public: |
| 699 | // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. |
| 700 | job_pool(uint32_t num_threads); |
| 701 | ~job_pool(); |
| 702 | |
| 703 | void add_job(const std::function<void()>& job); |
| 704 | void add_job(std::function<void()>&& job); |
| 705 | |
| 706 | void wait_for_all(); |
| 707 | |
| 708 | size_t get_total_threads() const { return 1 + m_threads.size(); } |
| 709 | |
| 710 | private: |
| 711 | std::vector<std::thread> m_threads; |
| 712 | std::vector<std::function<void()> > m_queue; |
| 713 | |
| 714 | std::mutex m_mutex; |
| 715 | std::condition_variable m_has_work; |
| 716 | std::condition_variable m_no_more_jobs; |
| 717 | |
| 718 | uint32_t m_num_active_jobs; |
| 719 | |
| 720 | std::atomic<bool> m_kill_flag; |
| 721 | |
| 722 | void job_thread(uint32_t index); |
| 723 | }; |
| 724 | |
| 725 | // Simple 32-bit color class |
| 726 | |
| 727 | class color_rgba_i16 |
| 728 | { |
| 729 | public: |
| 730 | union |
| 731 | { |
| 732 | int16_t m_comps[4]; |
| 733 | |
| 734 | struct |
| 735 | { |
| 736 | int16_t r; |
| 737 | int16_t g; |
| 738 | int16_t b; |
| 739 | int16_t a; |
| 740 | }; |
| 741 | }; |
| 742 | |
| 743 | inline color_rgba_i16() |
| 744 | { |
| 745 | static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4" ); |
| 746 | } |
| 747 | |
| 748 | inline color_rgba_i16(int sr, int sg, int sb, int sa) |
| 749 | { |
| 750 | set(sr, sg, sb, sa); |
| 751 | } |
| 752 | |
| 753 | inline color_rgba_i16 &set(int sr, int sg, int sb, int sa) |
| 754 | { |
| 755 | m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX); |
| 756 | m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX); |
| 757 | m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX); |
| 758 | m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX); |
| 759 | return *this; |
| 760 | } |
| 761 | }; |
| 762 | |
| 763 | class color_rgba |
| 764 | { |
| 765 | public: |
| 766 | union |
| 767 | { |
| 768 | uint8_t m_comps[4]; |
| 769 | |
| 770 | struct |
| 771 | { |
| 772 | uint8_t r; |
| 773 | uint8_t g; |
| 774 | uint8_t b; |
| 775 | uint8_t a; |
| 776 | }; |
| 777 | }; |
| 778 | |
| 779 | inline color_rgba() |
| 780 | { |
| 781 | static_assert(sizeof(*this) == 4, "sizeof(*this) != 4" ); |
| 782 | static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)" ); |
| 783 | } |
| 784 | |
| 785 | // Not too hot about this idea. |
| 786 | inline color_rgba(const basist::color32& other) : |
| 787 | r(other.r), |
| 788 | g(other.g), |
| 789 | b(other.b), |
| 790 | a(other.a) |
| 791 | { |
| 792 | } |
| 793 | |
| 794 | color_rgba& operator= (const basist::color32& rhs) |
| 795 | { |
| 796 | r = rhs.r; |
| 797 | g = rhs.g; |
| 798 | b = rhs.b; |
| 799 | a = rhs.a; |
| 800 | return *this; |
| 801 | } |
| 802 | |
| 803 | inline color_rgba(int y) |
| 804 | { |
| 805 | set(y); |
| 806 | } |
| 807 | |
| 808 | inline color_rgba(int y, int na) |
| 809 | { |
| 810 | set(y, na); |
| 811 | } |
| 812 | |
| 813 | inline color_rgba(int sr, int sg, int sb, int sa) |
| 814 | { |
| 815 | set(sr, sg, sb, sa); |
| 816 | } |
| 817 | |
| 818 | inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) |
| 819 | { |
| 820 | set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); |
| 821 | } |
| 822 | |
| 823 | inline color_rgba& set_noclamp_y(int y) |
| 824 | { |
| 825 | m_comps[0] = (uint8_t)y; |
| 826 | m_comps[1] = (uint8_t)y; |
| 827 | m_comps[2] = (uint8_t)y; |
| 828 | m_comps[3] = (uint8_t)255; |
| 829 | return *this; |
| 830 | } |
| 831 | |
| 832 | inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) |
| 833 | { |
| 834 | m_comps[0] = (uint8_t)sr; |
| 835 | m_comps[1] = (uint8_t)sg; |
| 836 | m_comps[2] = (uint8_t)sb; |
| 837 | m_comps[3] = (uint8_t)sa; |
| 838 | return *this; |
| 839 | } |
| 840 | |
| 841 | inline color_rgba &set(int y) |
| 842 | { |
| 843 | m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255)); |
| 844 | m_comps[1] = m_comps[0]; |
| 845 | m_comps[2] = m_comps[0]; |
| 846 | m_comps[3] = 255; |
| 847 | return *this; |
| 848 | } |
| 849 | |
| 850 | inline color_rgba &set(int y, int na) |
| 851 | { |
| 852 | m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255)); |
| 853 | m_comps[1] = m_comps[0]; |
| 854 | m_comps[2] = m_comps[0]; |
| 855 | m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255)); |
| 856 | return *this; |
| 857 | } |
| 858 | |
| 859 | inline color_rgba &set(int sr, int sg, int sb, int sa) |
| 860 | { |
| 861 | m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255)); |
| 862 | m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255)); |
| 863 | m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255)); |
| 864 | m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255)); |
| 865 | return *this; |
| 866 | } |
| 867 | |
| 868 | inline color_rgba &set_rgb(int sr, int sg, int sb) |
| 869 | { |
| 870 | m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255)); |
| 871 | m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255)); |
| 872 | m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255)); |
| 873 | return *this; |
| 874 | } |
| 875 | |
| 876 | inline color_rgba &set_rgb(const color_rgba &other) |
| 877 | { |
| 878 | r = other.r; |
| 879 | g = other.g; |
| 880 | b = other.b; |
| 881 | return *this; |
| 882 | } |
| 883 | |
| 884 | inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } |
| 885 | inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } |
| 886 | |
| 887 | inline void clear() |
| 888 | { |
| 889 | m_comps[0] = 0; |
| 890 | m_comps[1] = 0; |
| 891 | m_comps[2] = 0; |
| 892 | m_comps[3] = 0; |
| 893 | } |
| 894 | |
| 895 | inline bool operator== (const color_rgba &rhs) const |
| 896 | { |
| 897 | if (m_comps[0] != rhs.m_comps[0]) return false; |
| 898 | if (m_comps[1] != rhs.m_comps[1]) return false; |
| 899 | if (m_comps[2] != rhs.m_comps[2]) return false; |
| 900 | if (m_comps[3] != rhs.m_comps[3]) return false; |
| 901 | return true; |
| 902 | } |
| 903 | |
| 904 | inline bool operator!= (const color_rgba &rhs) const |
| 905 | { |
| 906 | return !(*this == rhs); |
| 907 | } |
| 908 | |
| 909 | inline bool operator<(const color_rgba &rhs) const |
| 910 | { |
| 911 | for (int i = 0; i < 4; i++) |
| 912 | { |
| 913 | if (m_comps[i] < rhs.m_comps[i]) |
| 914 | return true; |
| 915 | else if (m_comps[i] != rhs.m_comps[i]) |
| 916 | return false; |
| 917 | } |
| 918 | return false; |
| 919 | } |
| 920 | |
| 921 | inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } |
| 922 | inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } |
| 923 | inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } |
| 924 | |
| 925 | inline basist::color32 get_color32() const |
| 926 | { |
| 927 | return basist::color32(r, g, b, a); |
| 928 | } |
| 929 | |
| 930 | static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } |
| 931 | static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } |
| 932 | }; |
| 933 | |
| 934 | typedef basisu::vector<color_rgba> color_rgba_vec; |
| 935 | |
| 936 | const color_rgba g_black_color(0, 0, 0, 255); |
| 937 | const color_rgba g_black_trans_color(0, 0, 0, 0); |
| 938 | const color_rgba g_white_color(255, 255, 255, 255); |
| 939 | |
| 940 | inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) |
| 941 | { |
| 942 | int dr = r0 - r1, dg = g0 - g1, db = b0 - b1; |
| 943 | return dr * dr + dg * dg + db * db; |
| 944 | } |
| 945 | |
| 946 | inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1) |
| 947 | { |
| 948 | int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1; |
| 949 | return dr * dr + dg * dg + db * db + da * da; |
| 950 | } |
| 951 | |
| 952 | inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha) |
| 953 | { |
| 954 | if (alpha) |
| 955 | return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); |
| 956 | else |
| 957 | return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); |
| 958 | } |
| 959 | |
| 960 | // TODO: Allow user to control channel weightings. |
| 961 | inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) |
| 962 | { |
| 963 | if (perceptual) |
| 964 | { |
| 965 | #if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE |
| 966 | const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; |
| 967 | const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; |
| 968 | |
| 969 | const float cr1 = e1.r - l1; |
| 970 | const float cr2 = e2.r - l2; |
| 971 | |
| 972 | const float cb1 = e1.b - l1; |
| 973 | const float cb2 = e2.b - l2; |
| 974 | |
| 975 | const float dl = l1 - l2; |
| 976 | const float dcr = cr1 - cr2; |
| 977 | const float dcb = cb1 - cb2; |
| 978 | |
| 979 | uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); |
| 980 | |
| 981 | if (alpha) |
| 982 | { |
| 983 | int da = static_cast<int>(e1.a) - static_cast<int>(e2.a); |
| 984 | d += static_cast<uint32_t>(128.0f*da*da); |
| 985 | } |
| 986 | |
| 987 | return d; |
| 988 | #elif 1 |
| 989 | int dr = e1.r - e2.r; |
| 990 | int dg = e1.g - e2.g; |
| 991 | int db = e1.b - e2.b; |
| 992 | |
| 993 | #if 0 |
| 994 | int delta_l = dr * 27 + dg * 92 + db * 9; |
| 995 | int delta_cr = dr * 128 - delta_l; |
| 996 | int delta_cb = db * 128 - delta_l; |
| 997 | |
| 998 | uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + |
| 999 | ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + |
| 1000 | ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); |
| 1001 | #else |
| 1002 | int64_t delta_l = dr * 27 + dg * 92 + db * 9; |
| 1003 | int64_t delta_cr = dr * 128 - delta_l; |
| 1004 | int64_t delta_cb = db * 128 - delta_l; |
| 1005 | |
| 1006 | uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) + |
| 1007 | ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) + |
| 1008 | ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U); |
| 1009 | #endif |
| 1010 | |
| 1011 | if (alpha) |
| 1012 | { |
| 1013 | int da = (e1.a - e2.a) << 7; |
| 1014 | // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. |
| 1015 | id += ((uint32_t)(da * da) >> 7U); |
| 1016 | } |
| 1017 | |
| 1018 | return id; |
| 1019 | #else |
| 1020 | int dr = e1.r - e2.r; |
| 1021 | int dg = e1.g - e2.g; |
| 1022 | int db = e1.b - e2.b; |
| 1023 | |
| 1024 | int64_t delta_l = dr * 27 + dg * 92 + db * 9; |
| 1025 | int64_t delta_cr = dr * 128 - delta_l; |
| 1026 | int64_t delta_cb = db * 128 - delta_l; |
| 1027 | |
| 1028 | int64_t id = ((delta_l * delta_l) * 128) + |
| 1029 | ((delta_cr * delta_cr) * 26) + |
| 1030 | ((delta_cb * delta_cb) * 3); |
| 1031 | |
| 1032 | if (alpha) |
| 1033 | { |
| 1034 | int64_t da = (e1.a - e2.a); |
| 1035 | id += (da * da) * 128; |
| 1036 | } |
| 1037 | |
| 1038 | int d = (id + 8192) >> 14; |
| 1039 | |
| 1040 | return d; |
| 1041 | #endif |
| 1042 | } |
| 1043 | else |
| 1044 | return color_distance(e1, e2, alpha); |
| 1045 | } |
| 1046 | |
| 1047 | static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) |
| 1048 | { |
| 1049 | const int dl = a.r - b.r; |
| 1050 | const int da = a.a - b.a; |
| 1051 | return dl * dl + da * da; |
| 1052 | } |
| 1053 | |
| 1054 | // String helpers |
| 1055 | |
| 1056 | inline int string_find_right(const std::string& filename, char c) |
| 1057 | { |
| 1058 | size_t result = filename.find_last_of(c); |
| 1059 | return (result == std::string::npos) ? -1 : (int)result; |
| 1060 | } |
| 1061 | |
| 1062 | inline std::string string_get_extension(const std::string &filename) |
| 1063 | { |
| 1064 | int sep = -1; |
| 1065 | #ifdef _WIN32 |
| 1066 | sep = string_find_right(filename, '\\'); |
| 1067 | #endif |
| 1068 | if (sep < 0) |
| 1069 | sep = string_find_right(filename, '/'); |
| 1070 | |
| 1071 | int dot = string_find_right(filename, '.'); |
| 1072 | if (dot <= sep) |
| 1073 | return "" ; |
| 1074 | |
| 1075 | std::string result(filename); |
| 1076 | result.erase(0, dot + 1); |
| 1077 | |
| 1078 | return result; |
| 1079 | } |
| 1080 | |
| 1081 | inline bool string_remove_extension(std::string &filename) |
| 1082 | { |
| 1083 | int sep = -1; |
| 1084 | #ifdef _WIN32 |
| 1085 | sep = string_find_right(filename, '\\'); |
| 1086 | #endif |
| 1087 | if (sep < 0) |
| 1088 | sep = string_find_right(filename, '/'); |
| 1089 | |
| 1090 | int dot = string_find_right(filename, '.'); |
| 1091 | if ((dot < sep) || (dot < 0)) |
| 1092 | return false; |
| 1093 | |
| 1094 | filename.resize(dot); |
| 1095 | |
| 1096 | return true; |
| 1097 | } |
| 1098 | |
| 1099 | inline std::string string_format(const char* pFmt, ...) |
| 1100 | { |
| 1101 | char buf[2048]; |
| 1102 | |
| 1103 | va_list args; |
| 1104 | va_start(args, pFmt); |
| 1105 | #ifdef _WIN32 |
| 1106 | vsprintf_s(buf, sizeof(buf), pFmt, args); |
| 1107 | #else |
| 1108 | vsnprintf(buf, sizeof(buf), pFmt, args); |
| 1109 | #endif |
| 1110 | va_end(args); |
| 1111 | |
| 1112 | return std::string(buf); |
| 1113 | } |
| 1114 | |
| 1115 | inline std::string string_tolower(const std::string& s) |
| 1116 | { |
| 1117 | std::string result(s); |
| 1118 | for (size_t i = 0; i < result.size(); i++) |
| 1119 | result[i] = (char)tolower((int)result[i]); |
| 1120 | return result; |
| 1121 | } |
| 1122 | |
| 1123 | inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc) |
| 1124 | { |
| 1125 | assert(pDst && pSrc && dst_len); |
| 1126 | if (!dst_len) |
| 1127 | return pDst; |
| 1128 | |
| 1129 | const size_t src_len = strlen(pSrc); |
| 1130 | const size_t src_len_plus_terminator = src_len + 1; |
| 1131 | |
| 1132 | if (src_len_plus_terminator <= dst_len) |
| 1133 | memcpy(pDst, pSrc, src_len_plus_terminator); |
| 1134 | else |
| 1135 | { |
| 1136 | if (dst_len > 1) |
| 1137 | memcpy(pDst, pSrc, dst_len - 1); |
| 1138 | pDst[dst_len - 1] = '\0'; |
| 1139 | } |
| 1140 | |
| 1141 | return pDst; |
| 1142 | } |
| 1143 | |
| 1144 | inline bool string_ends_with(const std::string& s, char c) |
| 1145 | { |
| 1146 | return (s.size() != 0) && (s.back() == c); |
| 1147 | } |
| 1148 | |
| 1149 | inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt) |
| 1150 | { |
| 1151 | #ifdef _MSC_VER |
| 1152 | char drive_buf[_MAX_DRIVE] = { 0 }; |
| 1153 | char dir_buf[_MAX_DIR] = { 0 }; |
| 1154 | char fname_buf[_MAX_FNAME] = { 0 }; |
| 1155 | char ext_buf[_MAX_EXT] = { 0 }; |
| 1156 | |
| 1157 | errno_t error = _splitpath_s(p, |
| 1158 | pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, |
| 1159 | pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, |
| 1160 | pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, |
| 1161 | pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); |
| 1162 | if (error != 0) |
| 1163 | return false; |
| 1164 | |
| 1165 | if (pDrive) *pDrive = drive_buf; |
| 1166 | if (pDir) *pDir = dir_buf; |
| 1167 | if (pFilename) *pFilename = fname_buf; |
| 1168 | if (pExt) *pExt = ext_buf; |
| 1169 | return true; |
| 1170 | #else |
| 1171 | char dirtmp[1024], nametmp[1024]; |
| 1172 | strcpy_safe(dirtmp, sizeof(dirtmp), p); |
| 1173 | strcpy_safe(nametmp, sizeof(nametmp), p); |
| 1174 | |
| 1175 | if (pDrive) |
| 1176 | pDrive->resize(0); |
| 1177 | |
| 1178 | const char *pDirName = dirname(dirtmp); |
| 1179 | const char* pBaseName = basename(nametmp); |
| 1180 | if ((!pDirName) || (!pBaseName)) |
| 1181 | return false; |
| 1182 | |
| 1183 | if (pDir) |
| 1184 | { |
| 1185 | *pDir = pDirName; |
| 1186 | if ((pDir->size()) && (pDir->back() != '/')) |
| 1187 | *pDir += "/" ; |
| 1188 | } |
| 1189 | |
| 1190 | if (pFilename) |
| 1191 | { |
| 1192 | *pFilename = pBaseName; |
| 1193 | string_remove_extension(*pFilename); |
| 1194 | } |
| 1195 | |
| 1196 | if (pExt) |
| 1197 | { |
| 1198 | *pExt = pBaseName; |
| 1199 | *pExt = string_get_extension(*pExt); |
| 1200 | if (pExt->size()) |
| 1201 | *pExt = "." + *pExt; |
| 1202 | } |
| 1203 | |
| 1204 | return true; |
| 1205 | #endif |
| 1206 | } |
| 1207 | |
| 1208 | inline bool is_path_separator(char c) |
| 1209 | { |
| 1210 | #ifdef _WIN32 |
| 1211 | return (c == '/') || (c == '\\'); |
| 1212 | #else |
| 1213 | return (c == '/'); |
| 1214 | #endif |
| 1215 | } |
| 1216 | |
| 1217 | inline bool is_drive_separator(char c) |
| 1218 | { |
| 1219 | #ifdef _WIN32 |
| 1220 | return (c == ':'); |
| 1221 | #else |
| 1222 | (void)c; |
| 1223 | return false; |
| 1224 | #endif |
| 1225 | } |
| 1226 | |
| 1227 | inline void string_combine_path(std::string &dst, const char *p, const char *q) |
| 1228 | { |
| 1229 | std::string temp(p); |
| 1230 | if (temp.size() && !is_path_separator(q[0])) |
| 1231 | { |
| 1232 | if (!is_path_separator(temp.back())) |
| 1233 | temp.append(1, BASISU_PATH_SEPERATOR_CHAR); |
| 1234 | } |
| 1235 | temp += q; |
| 1236 | dst.swap(temp); |
| 1237 | } |
| 1238 | |
| 1239 | inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r) |
| 1240 | { |
| 1241 | string_combine_path(dst, p, q); |
| 1242 | string_combine_path(dst, dst.c_str(), r); |
| 1243 | } |
| 1244 | |
| 1245 | inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) |
| 1246 | { |
| 1247 | string_combine_path(dst, p, q, r); |
| 1248 | if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.')) |
| 1249 | dst.append(1, '.'); |
| 1250 | dst.append(pExt); |
| 1251 | } |
| 1252 | |
| 1253 | inline bool string_get_pathname(const char *p, std::string &path) |
| 1254 | { |
| 1255 | std::string temp_drive, temp_path; |
| 1256 | if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL)) |
| 1257 | return false; |
| 1258 | string_combine_path(path, temp_drive.c_str(), temp_path.c_str()); |
| 1259 | return true; |
| 1260 | } |
| 1261 | |
| 1262 | inline bool string_get_filename(const char *p, std::string &filename) |
| 1263 | { |
| 1264 | std::string temp_ext; |
| 1265 | if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext)) |
| 1266 | return false; |
| 1267 | filename += temp_ext; |
| 1268 | return true; |
| 1269 | } |
| 1270 | |
| 1271 | class rand |
| 1272 | { |
| 1273 | std::mt19937 m_mt; |
| 1274 | |
| 1275 | public: |
| 1276 | rand() { } |
| 1277 | |
| 1278 | rand(uint32_t s) { seed(s); } |
| 1279 | void seed(uint32_t s) { m_mt.seed(s); } |
| 1280 | |
| 1281 | // between [l,h] |
| 1282 | int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); } |
| 1283 | |
| 1284 | uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); } |
| 1285 | |
| 1286 | bool bit() { return irand(0, 1) == 1; } |
| 1287 | |
| 1288 | uint8_t byte() { return static_cast<uint8_t>(urand32()); } |
| 1289 | |
| 1290 | // between [l,h) |
| 1291 | float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); } |
| 1292 | |
| 1293 | float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); } |
| 1294 | }; |
| 1295 | |
| 1296 | class priority_queue |
| 1297 | { |
| 1298 | public: |
| 1299 | priority_queue() : |
| 1300 | m_size(0) |
| 1301 | { |
| 1302 | } |
| 1303 | |
| 1304 | void clear() |
| 1305 | { |
| 1306 | m_heap.clear(); |
| 1307 | m_size = 0; |
| 1308 | } |
| 1309 | |
| 1310 | void init(uint32_t max_entries, uint32_t first_index, float first_priority) |
| 1311 | { |
| 1312 | m_heap.resize(max_entries + 1); |
| 1313 | m_heap[1].m_index = first_index; |
| 1314 | m_heap[1].m_priority = first_priority; |
| 1315 | m_size = 1; |
| 1316 | } |
| 1317 | |
| 1318 | inline uint32_t size() const { return m_size; } |
| 1319 | |
| 1320 | inline uint32_t get_top_index() const { return m_heap[1].m_index; } |
| 1321 | inline float get_top_priority() const { return m_heap[1].m_priority; } |
| 1322 | |
| 1323 | inline void delete_top() |
| 1324 | { |
| 1325 | assert(m_size > 0); |
| 1326 | m_heap[1] = m_heap[m_size]; |
| 1327 | m_size--; |
| 1328 | if (m_size) |
| 1329 | down_heap(1); |
| 1330 | } |
| 1331 | |
| 1332 | inline void add_heap(uint32_t index, float priority) |
| 1333 | { |
| 1334 | m_size++; |
| 1335 | |
| 1336 | uint32_t k = m_size; |
| 1337 | |
| 1338 | if (m_size >= m_heap.size()) |
| 1339 | m_heap.resize(m_size + 1); |
| 1340 | |
| 1341 | for (;;) |
| 1342 | { |
| 1343 | uint32_t parent_index = k >> 1; |
| 1344 | if ((!parent_index) || (m_heap[parent_index].m_priority > priority)) |
| 1345 | break; |
| 1346 | m_heap[k] = m_heap[parent_index]; |
| 1347 | k = parent_index; |
| 1348 | } |
| 1349 | |
| 1350 | m_heap[k].m_index = index; |
| 1351 | m_heap[k].m_priority = priority; |
| 1352 | } |
| 1353 | |
| 1354 | private: |
| 1355 | struct entry |
| 1356 | { |
| 1357 | uint32_t m_index; |
| 1358 | float m_priority; |
| 1359 | }; |
| 1360 | |
| 1361 | basisu::vector<entry> m_heap; |
| 1362 | uint32_t m_size; |
| 1363 | |
| 1364 | // Push down entry at index |
| 1365 | inline void down_heap(uint32_t heap_index) |
| 1366 | { |
| 1367 | uint32_t orig_index = m_heap[heap_index].m_index; |
| 1368 | const float orig_priority = m_heap[heap_index].m_priority; |
| 1369 | |
| 1370 | uint32_t child_index; |
| 1371 | while ((child_index = (heap_index << 1)) <= m_size) |
| 1372 | { |
| 1373 | if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index; |
| 1374 | if (orig_priority > m_heap[child_index].m_priority) |
| 1375 | break; |
| 1376 | m_heap[heap_index] = m_heap[child_index]; |
| 1377 | heap_index = child_index; |
| 1378 | } |
| 1379 | |
| 1380 | m_heap[heap_index].m_index = orig_index; |
| 1381 | m_heap[heap_index].m_priority = orig_priority; |
| 1382 | } |
| 1383 | }; |
| 1384 | |
| 1385 | // Tree structured vector quantization (TSVQ) |
| 1386 | |
| 1387 | template <typename TrainingVectorType> |
| 1388 | class tree_vector_quant |
| 1389 | { |
| 1390 | public: |
| 1391 | typedef TrainingVectorType training_vec_type; |
| 1392 | typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight; |
| 1393 | typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; |
| 1394 | |
| 1395 | tree_vector_quant() : |
| 1396 | m_next_codebook_index(0) |
| 1397 | { |
| 1398 | } |
| 1399 | |
| 1400 | void clear() |
| 1401 | { |
| 1402 | clear_vector(m_training_vecs); |
| 1403 | clear_vector(m_nodes); |
| 1404 | m_next_codebook_index = 0; |
| 1405 | } |
| 1406 | |
| 1407 | void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } |
| 1408 | |
| 1409 | size_t get_total_training_vecs() const { return m_training_vecs.size(); } |
| 1410 | const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } |
| 1411 | array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } |
| 1412 | |
| 1413 | void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const |
| 1414 | { |
| 1415 | for (uint32_t i = 0; i < m_nodes.size(); i++) |
| 1416 | { |
| 1417 | const tsvq_node &n = m_nodes[i]; |
| 1418 | if (!n.is_leaf()) |
| 1419 | continue; |
| 1420 | |
| 1421 | codebook.resize(codebook.size() + 1); |
| 1422 | codebook.back() = n.m_training_vecs; |
| 1423 | } |
| 1424 | } |
| 1425 | |
| 1426 | void retrieve(basisu::vector<TrainingVectorType> &codebook) const |
| 1427 | { |
| 1428 | for (uint32_t i = 0; i < m_nodes.size(); i++) |
| 1429 | { |
| 1430 | const tsvq_node &n = m_nodes[i]; |
| 1431 | if (!n.is_leaf()) |
| 1432 | continue; |
| 1433 | |
| 1434 | codebook.resize(codebook.size() + 1); |
| 1435 | codebook.back() = n.m_origin; |
| 1436 | } |
| 1437 | } |
| 1438 | |
| 1439 | void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const |
| 1440 | { |
| 1441 | uint_vec node_stack; |
| 1442 | node_stack.reserve(512); |
| 1443 | |
| 1444 | codebook.resize(0); |
| 1445 | codebook.reserve(max_clusters); |
| 1446 | |
| 1447 | uint32_t node_index = 0; |
| 1448 | |
| 1449 | while (true) |
| 1450 | { |
| 1451 | const tsvq_node& cur = m_nodes[node_index]; |
| 1452 | |
| 1453 | if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) |
| 1454 | { |
| 1455 | codebook.resize(codebook.size() + 1); |
| 1456 | codebook.back() = cur.m_training_vecs; |
| 1457 | |
| 1458 | if (node_stack.empty()) |
| 1459 | break; |
| 1460 | |
| 1461 | node_index = node_stack.back(); |
| 1462 | node_stack.pop_back(); |
| 1463 | continue; |
| 1464 | } |
| 1465 | |
| 1466 | node_stack.push_back(cur.m_right_index); |
| 1467 | node_index = cur.m_left_index; |
| 1468 | } |
| 1469 | } |
| 1470 | |
| 1471 | bool generate(uint32_t max_size) |
| 1472 | { |
| 1473 | if (!m_training_vecs.size()) |
| 1474 | return false; |
| 1475 | |
| 1476 | m_next_codebook_index = 0; |
| 1477 | |
| 1478 | clear_vector(m_nodes); |
| 1479 | m_nodes.reserve(max_size * 2 + 1); |
| 1480 | |
| 1481 | m_nodes.push_back(prepare_root()); |
| 1482 | |
| 1483 | priority_queue var_heap; |
| 1484 | var_heap.init(max_size, 0, m_nodes[0].m_var); |
| 1485 | |
| 1486 | basisu::vector<uint32_t> l_children, r_children; |
| 1487 | |
| 1488 | // Now split the worst nodes |
| 1489 | l_children.reserve(m_training_vecs.size() + 1); |
| 1490 | r_children.reserve(m_training_vecs.size() + 1); |
| 1491 | |
| 1492 | uint32_t total_leaf_nodes = 1; |
| 1493 | |
| 1494 | //interval_timer tm; |
| 1495 | //tm.start(); |
| 1496 | |
| 1497 | while ((var_heap.size()) && (total_leaf_nodes < max_size)) |
| 1498 | { |
| 1499 | const uint32_t node_index = var_heap.get_top_index(); |
| 1500 | const tsvq_node &node = m_nodes[node_index]; |
| 1501 | |
| 1502 | assert(node.m_var == var_heap.get_top_priority()); |
| 1503 | assert(node.is_leaf()); |
| 1504 | |
| 1505 | var_heap.delete_top(); |
| 1506 | |
| 1507 | if (node.m_training_vecs.size() > 1) |
| 1508 | { |
| 1509 | if (split_node(node_index, var_heap, l_children, r_children)) |
| 1510 | { |
| 1511 | // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total. |
| 1512 | total_leaf_nodes += 1; |
| 1513 | } |
| 1514 | } |
| 1515 | } |
| 1516 | |
| 1517 | //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs()); |
| 1518 | |
| 1519 | return true; |
| 1520 | } |
| 1521 | |
| 1522 | private: |
| 1523 | class tsvq_node |
| 1524 | { |
| 1525 | public: |
| 1526 | inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } |
| 1527 | |
| 1528 | // vecs is erased |
| 1529 | inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } |
| 1530 | |
| 1531 | inline bool is_leaf() const { return m_left_index < 0; } |
| 1532 | |
| 1533 | float m_var; |
| 1534 | uint64_t m_weight; |
| 1535 | TrainingVectorType m_origin; |
| 1536 | int32_t m_left_index, m_right_index; |
| 1537 | basisu::vector<uint32_t> m_training_vecs; |
| 1538 | int m_codebook_index; |
| 1539 | }; |
| 1540 | |
| 1541 | typedef basisu::vector<tsvq_node> tsvq_node_vec; |
| 1542 | tsvq_node_vec m_nodes; |
| 1543 | |
| 1544 | array_of_weighted_training_vecs m_training_vecs; |
| 1545 | |
| 1546 | uint32_t m_next_codebook_index; |
| 1547 | |
| 1548 | tsvq_node prepare_root() const |
| 1549 | { |
| 1550 | double ttsum = 0.0f; |
| 1551 | |
| 1552 | // Prepare root node containing all training vectors |
| 1553 | tsvq_node root; |
| 1554 | root.m_training_vecs.reserve(m_training_vecs.size()); |
| 1555 | |
| 1556 | for (uint32_t i = 0; i < m_training_vecs.size(); i++) |
| 1557 | { |
| 1558 | const TrainingVectorType &v = m_training_vecs[i].first; |
| 1559 | const uint64_t weight = m_training_vecs[i].second; |
| 1560 | |
| 1561 | root.m_training_vecs.push_back(i); |
| 1562 | |
| 1563 | root.m_origin += (v * static_cast<float>(weight)); |
| 1564 | root.m_weight += weight; |
| 1565 | |
| 1566 | ttsum += v.dot(v) * weight; |
| 1567 | } |
| 1568 | |
| 1569 | root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight)); |
| 1570 | |
| 1571 | root.m_origin *= (1.0f / root.m_weight); |
| 1572 | |
| 1573 | return root; |
| 1574 | } |
| 1575 | |
| 1576 | bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children) |
| 1577 | { |
| 1578 | TrainingVectorType l_child_org, r_child_org; |
| 1579 | uint64_t l_weight = 0, r_weight = 0; |
| 1580 | float l_var = 0.0f, r_var = 0.0f; |
| 1581 | |
| 1582 | // Compute initial left/right child origins |
| 1583 | if (!prep_split(m_nodes[node_index], l_child_org, r_child_org)) |
| 1584 | return false; |
| 1585 | |
| 1586 | // Use k-means iterations to refine these children vectors |
| 1587 | if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children)) |
| 1588 | return false; |
| 1589 | |
| 1590 | // Create children |
| 1591 | const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1; |
| 1592 | |
| 1593 | m_nodes[node_index].m_left_index = l_child_index; |
| 1594 | m_nodes[node_index].m_right_index = r_child_index; |
| 1595 | |
| 1596 | m_nodes[node_index].m_codebook_index = m_next_codebook_index; |
| 1597 | m_next_codebook_index++; |
| 1598 | |
| 1599 | m_nodes.resize(m_nodes.size() + 2); |
| 1600 | |
| 1601 | tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index]; |
| 1602 | |
| 1603 | l_child.set(l_child_org, l_weight, l_var, l_children); |
| 1604 | r_child.set(r_child_org, r_weight, r_var, r_children); |
| 1605 | |
| 1606 | if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) |
| 1607 | { |
| 1608 | TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); |
| 1609 | |
| 1610 | for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) |
| 1611 | { |
| 1612 | if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) |
| 1613 | { |
| 1614 | l_child.m_var = 1e-4f; |
| 1615 | break; |
| 1616 | } |
| 1617 | } |
| 1618 | } |
| 1619 | |
| 1620 | if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1)) |
| 1621 | { |
| 1622 | TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first); |
| 1623 | |
| 1624 | for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++) |
| 1625 | { |
| 1626 | if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first)) |
| 1627 | { |
| 1628 | r_child.m_var = 1e-4f; |
| 1629 | break; |
| 1630 | } |
| 1631 | } |
| 1632 | } |
| 1633 | |
| 1634 | if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) |
| 1635 | var_heap.add_heap(l_child_index, l_child.m_var); |
| 1636 | |
| 1637 | if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) |
| 1638 | var_heap.add_heap(r_child_index, r_child.m_var); |
| 1639 | |
| 1640 | return true; |
| 1641 | } |
| 1642 | |
| 1643 | TrainingVectorType compute_split_axis(const tsvq_node &node) const |
| 1644 | { |
| 1645 | const uint32_t N = TrainingVectorType::num_elements; |
| 1646 | |
| 1647 | matrix<N, N, float> cmatrix; |
| 1648 | |
| 1649 | if ((N != 16) || (!g_cpu_supports_sse41)) |
| 1650 | { |
| 1651 | cmatrix.set_zero(); |
| 1652 | |
| 1653 | // Compute covariance matrix from weighted input vectors |
| 1654 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1655 | { |
| 1656 | const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin); |
| 1657 | const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v); |
| 1658 | |
| 1659 | for (uint32_t x = 0; x < N; x++) |
| 1660 | for (uint32_t y = x; y < N; y++) |
| 1661 | cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y]; |
| 1662 | } |
| 1663 | } |
| 1664 | else |
| 1665 | { |
| 1666 | #if BASISU_SUPPORT_SSE |
| 1667 | // Specialize the case with 16x16 matrices, which are quite expensive without SIMD. |
| 1668 | // This SSE function takes pointers to void types, so do some sanity checks. |
| 1669 | assert(sizeof(TrainingVectorType) == sizeof(float) * 16); |
| 1670 | assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>)); |
| 1671 | update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix); |
| 1672 | #endif |
| 1673 | } |
| 1674 | |
| 1675 | const float renorm_scale = 1.0f / node.m_weight; |
| 1676 | |
| 1677 | for (uint32_t x = 0; x < N; x++) |
| 1678 | for (uint32_t y = x; y < N; y++) |
| 1679 | cmatrix[x][y] *= renorm_scale; |
| 1680 | |
| 1681 | // Diagonal flip |
| 1682 | for (uint32_t x = 0; x < (N - 1); x++) |
| 1683 | for (uint32_t y = x + 1; y < N; y++) |
| 1684 | cmatrix[y][x] = cmatrix[x][y]; |
| 1685 | |
| 1686 | return compute_pca_from_covar<N, TrainingVectorType>(cmatrix); |
| 1687 | } |
| 1688 | |
| 1689 | bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const |
| 1690 | { |
| 1691 | //const uint32_t N = TrainingVectorType::num_elements; |
| 1692 | |
| 1693 | if (2 == node.m_training_vecs.size()) |
| 1694 | { |
| 1695 | l_child_result = m_training_vecs[node.m_training_vecs[0]].first; |
| 1696 | r_child_result = m_training_vecs[node.m_training_vecs[1]].first; |
| 1697 | return true; |
| 1698 | } |
| 1699 | |
| 1700 | TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f); |
| 1701 | double l_weight = 0.0f, r_weight = 0.0f; |
| 1702 | |
| 1703 | // Compute initial left/right children |
| 1704 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1705 | { |
| 1706 | const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; |
| 1707 | |
| 1708 | const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; |
| 1709 | |
| 1710 | double t = (v - node.m_origin).dot(axis); |
| 1711 | if (t >= 0.0f) |
| 1712 | { |
| 1713 | r_child += v * weight; |
| 1714 | r_weight += weight; |
| 1715 | } |
| 1716 | else |
| 1717 | { |
| 1718 | l_child += v * weight; |
| 1719 | l_weight += weight; |
| 1720 | } |
| 1721 | } |
| 1722 | |
| 1723 | if ((l_weight > 0.0f) && (r_weight > 0.0f)) |
| 1724 | { |
| 1725 | l_child_result = l_child * static_cast<float>(1.0f / l_weight); |
| 1726 | r_child_result = r_child * static_cast<float>(1.0f / r_weight); |
| 1727 | } |
| 1728 | else |
| 1729 | { |
| 1730 | TrainingVectorType l(1e+20f); |
| 1731 | TrainingVectorType h(-1e+20f); |
| 1732 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1733 | { |
| 1734 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; |
| 1735 | |
| 1736 | l = TrainingVectorType::component_min(l, v); |
| 1737 | h = TrainingVectorType::component_max(h, v); |
| 1738 | } |
| 1739 | |
| 1740 | TrainingVectorType r(h - l); |
| 1741 | |
| 1742 | float largest_axis_v = 0.0f; |
| 1743 | int largest_axis_index = -1; |
| 1744 | for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++) |
| 1745 | { |
| 1746 | if (r[i] > largest_axis_v) |
| 1747 | { |
| 1748 | largest_axis_v = r[i]; |
| 1749 | largest_axis_index = i; |
| 1750 | } |
| 1751 | } |
| 1752 | |
| 1753 | if (largest_axis_index < 0) |
| 1754 | return false; |
| 1755 | |
| 1756 | basisu::vector<float> keys(node.m_training_vecs.size()); |
| 1757 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1758 | keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; |
| 1759 | |
| 1760 | uint_vec indices(node.m_training_vecs.size()); |
| 1761 | indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]); |
| 1762 | |
| 1763 | l_child.set_zero(); |
| 1764 | l_weight = 0; |
| 1765 | |
| 1766 | r_child.set_zero(); |
| 1767 | r_weight = 0; |
| 1768 | |
| 1769 | const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2; |
| 1770 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1771 | { |
| 1772 | const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; |
| 1773 | |
| 1774 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; |
| 1775 | |
| 1776 | if (i < half_index) |
| 1777 | { |
| 1778 | l_child += v * weight; |
| 1779 | l_weight += weight; |
| 1780 | } |
| 1781 | else |
| 1782 | { |
| 1783 | r_child += v * weight; |
| 1784 | r_weight += weight; |
| 1785 | } |
| 1786 | } |
| 1787 | |
| 1788 | if ((l_weight > 0.0f) && (r_weight > 0.0f)) |
| 1789 | { |
| 1790 | l_child_result = l_child * static_cast<float>(1.0f / l_weight); |
| 1791 | r_child_result = r_child * static_cast<float>(1.0f / r_weight); |
| 1792 | } |
| 1793 | else |
| 1794 | { |
| 1795 | l_child_result = l; |
| 1796 | r_child_result = h; |
| 1797 | } |
| 1798 | } |
| 1799 | |
| 1800 | return true; |
| 1801 | } |
| 1802 | |
| 1803 | bool refine_split(const tsvq_node &node, |
| 1804 | TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children, |
| 1805 | TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const |
| 1806 | { |
| 1807 | l_children.reserve(node.m_training_vecs.size()); |
| 1808 | r_children.reserve(node.m_training_vecs.size()); |
| 1809 | |
| 1810 | float prev_total_variance = 1e+10f; |
| 1811 | |
| 1812 | // Refine left/right children locations using k-means iterations |
| 1813 | const uint32_t cMaxIters = 6; |
| 1814 | for (uint32_t iter = 0; iter < cMaxIters; iter++) |
| 1815 | { |
| 1816 | l_children.resize(0); |
| 1817 | r_children.resize(0); |
| 1818 | |
| 1819 | TrainingVectorType new_l_child(cZero), new_r_child(cZero); |
| 1820 | |
| 1821 | double l_ttsum = 0.0f, r_ttsum = 0.0f; |
| 1822 | |
| 1823 | l_weight = 0; |
| 1824 | r_weight = 0; |
| 1825 | |
| 1826 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1827 | { |
| 1828 | const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; |
| 1829 | const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; |
| 1830 | |
| 1831 | double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v); |
| 1832 | |
| 1833 | if (left_dist2 >= right_dist2) |
| 1834 | { |
| 1835 | new_r_child += (v * static_cast<float>(weight)); |
| 1836 | r_weight += weight; |
| 1837 | |
| 1838 | r_ttsum += weight * v.dot(v); |
| 1839 | r_children.push_back(node.m_training_vecs[i]); |
| 1840 | } |
| 1841 | else |
| 1842 | { |
| 1843 | new_l_child += (v * static_cast<float>(weight)); |
| 1844 | l_weight += weight; |
| 1845 | |
| 1846 | l_ttsum += weight * v.dot(v); |
| 1847 | l_children.push_back(node.m_training_vecs[i]); |
| 1848 | } |
| 1849 | } |
| 1850 | |
| 1851 | // Node is unsplittable using the above algorithm - try something else to split it up. |
| 1852 | if ((!l_weight) || (!r_weight)) |
| 1853 | { |
| 1854 | l_children.resize(0); |
| 1855 | new_l_child.set(0.0f); |
| 1856 | l_ttsum = 0.0f; |
| 1857 | l_weight = 0; |
| 1858 | |
| 1859 | r_children.resize(0); |
| 1860 | new_r_child.set(0.0f); |
| 1861 | r_ttsum = 0.0f; |
| 1862 | r_weight = 0; |
| 1863 | |
| 1864 | TrainingVectorType firstVec; |
| 1865 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
| 1866 | { |
| 1867 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; |
| 1868 | const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; |
| 1869 | |
| 1870 | if ((!i) || (v == firstVec)) |
| 1871 | { |
| 1872 | firstVec = v; |
| 1873 | |
| 1874 | new_r_child += (v * static_cast<float>(weight)); |
| 1875 | r_weight += weight; |
| 1876 | |
| 1877 | r_ttsum += weight * v.dot(v); |
| 1878 | r_children.push_back(node.m_training_vecs[i]); |
| 1879 | } |
| 1880 | else |
| 1881 | { |
| 1882 | new_l_child += (v * static_cast<float>(weight)); |
| 1883 | l_weight += weight; |
| 1884 | |
| 1885 | l_ttsum += weight * v.dot(v); |
| 1886 | l_children.push_back(node.m_training_vecs[i]); |
| 1887 | } |
| 1888 | } |
| 1889 | |
| 1890 | if ((!l_weight) || (!r_weight)) |
| 1891 | return false; |
| 1892 | } |
| 1893 | |
| 1894 | l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight)); |
| 1895 | r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight)); |
| 1896 | |
| 1897 | new_l_child *= (1.0f / l_weight); |
| 1898 | new_r_child *= (1.0f / r_weight); |
| 1899 | |
| 1900 | l_child = new_l_child; |
| 1901 | r_child = new_r_child; |
| 1902 | |
| 1903 | float total_var = l_var + r_var; |
| 1904 | const float cGiveupVariance = .00001f; |
| 1905 | if (total_var < cGiveupVariance) |
| 1906 | break; |
| 1907 | |
| 1908 | // Check to see if the variance has settled |
| 1909 | const float cVarianceDeltaThresh = .00125f; |
| 1910 | if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh) |
| 1911 | break; |
| 1912 | |
| 1913 | prev_total_variance = total_var; |
| 1914 | } |
| 1915 | |
| 1916 | return true; |
| 1917 | } |
| 1918 | }; |
| 1919 | |
| 1920 | struct weighted_block_group |
| 1921 | { |
| 1922 | uint64_t m_total_weight; |
| 1923 | uint_vec m_indices; |
| 1924 | }; |
| 1925 | |
| 1926 | template<typename Quantizer> |
| 1927 | bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, |
| 1928 | uint32_t max_codebook_size, uint32_t max_parent_codebook_size, |
| 1929 | basisu::vector<uint_vec>& codebook, |
| 1930 | basisu::vector<uint_vec>& parent_codebook, |
| 1931 | uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) |
| 1932 | { |
| 1933 | codebook.resize(0); |
| 1934 | parent_codebook.resize(0); |
| 1935 | |
| 1936 | if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16)) |
| 1937 | { |
| 1938 | if (!q.generate(max_codebook_size)) |
| 1939 | return false; |
| 1940 | |
| 1941 | q.retrieve(codebook); |
| 1942 | |
| 1943 | if (max_parent_codebook_size) |
| 1944 | q.retrieve(max_parent_codebook_size, parent_codebook); |
| 1945 | |
| 1946 | return true; |
| 1947 | } |
| 1948 | |
| 1949 | const uint32_t cMaxThreads = 16; |
| 1950 | if (max_threads > cMaxThreads) |
| 1951 | max_threads = cMaxThreads; |
| 1952 | |
| 1953 | if (!q.generate(max_threads)) |
| 1954 | return false; |
| 1955 | |
| 1956 | basisu::vector<uint_vec> initial_codebook; |
| 1957 | |
| 1958 | q.retrieve(initial_codebook); |
| 1959 | |
| 1960 | if (initial_codebook.size() < max_threads) |
| 1961 | { |
| 1962 | codebook = initial_codebook; |
| 1963 | |
| 1964 | if (max_parent_codebook_size) |
| 1965 | q.retrieve(max_parent_codebook_size, parent_codebook); |
| 1966 | |
| 1967 | return true; |
| 1968 | } |
| 1969 | |
| 1970 | Quantizer quantizers[cMaxThreads]; |
| 1971 | |
| 1972 | bool success_flags[cMaxThreads]; |
| 1973 | clear_obj(success_flags); |
| 1974 | |
| 1975 | basisu::vector<uint_vec> local_clusters[cMaxThreads]; |
| 1976 | basisu::vector<uint_vec> local_parent_clusters[cMaxThreads]; |
| 1977 | |
| 1978 | for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) |
| 1979 | { |
| 1980 | #ifndef __EMSCRIPTEN__ |
| 1981 | pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { |
| 1982 | #endif |
| 1983 | |
| 1984 | Quantizer& lq = quantizers[thread_iter]; |
| 1985 | uint_vec& cluster_indices = initial_codebook[thread_iter]; |
| 1986 | |
| 1987 | uint_vec local_to_global(cluster_indices.size()); |
| 1988 | |
| 1989 | for (uint32_t i = 0; i < cluster_indices.size(); i++) |
| 1990 | { |
| 1991 | const uint32_t global_training_vec_index = cluster_indices[i]; |
| 1992 | local_to_global[i] = global_training_vec_index; |
| 1993 | |
| 1994 | lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second); |
| 1995 | } |
| 1996 | |
| 1997 | const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs(); |
| 1998 | |
| 1999 | success_flags[thread_iter] = lq.generate(max_clusters); |
| 2000 | |
| 2001 | if (success_flags[thread_iter]) |
| 2002 | { |
| 2003 | lq.retrieve(local_clusters[thread_iter]); |
| 2004 | |
| 2005 | for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++) |
| 2006 | { |
| 2007 | for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++) |
| 2008 | local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]]; |
| 2009 | } |
| 2010 | |
| 2011 | if (max_parent_codebook_size) |
| 2012 | { |
| 2013 | lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]); |
| 2014 | |
| 2015 | for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++) |
| 2016 | { |
| 2017 | for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++) |
| 2018 | local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]]; |
| 2019 | } |
| 2020 | } |
| 2021 | } |
| 2022 | |
| 2023 | #ifndef __EMSCRIPTEN__ |
| 2024 | } ); |
| 2025 | #endif |
| 2026 | |
| 2027 | } // thread_iter |
| 2028 | |
| 2029 | #ifndef __EMSCRIPTEN__ |
| 2030 | pJob_pool->wait_for_all(); |
| 2031 | #endif |
| 2032 | |
| 2033 | uint32_t total_clusters = 0, total_parent_clusters = 0; |
| 2034 | |
| 2035 | for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++) |
| 2036 | { |
| 2037 | if (!success_flags[thread_iter]) |
| 2038 | return false; |
| 2039 | total_clusters += (uint32_t)local_clusters[thread_iter].size(); |
| 2040 | total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size(); |
| 2041 | } |
| 2042 | |
| 2043 | codebook.reserve(total_clusters); |
| 2044 | parent_codebook.reserve(total_parent_clusters); |
| 2045 | |
| 2046 | for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) |
| 2047 | { |
| 2048 | for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++) |
| 2049 | { |
| 2050 | codebook.resize(codebook.size() + 1); |
| 2051 | codebook.back().swap(local_clusters[thread_iter][j]); |
| 2052 | } |
| 2053 | |
| 2054 | for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++) |
| 2055 | { |
| 2056 | parent_codebook.resize(parent_codebook.size() + 1); |
| 2057 | parent_codebook.back().swap(local_parent_clusters[thread_iter][j]); |
| 2058 | } |
| 2059 | } |
| 2060 | |
| 2061 | return true; |
| 2062 | } |
| 2063 | |
| 2064 | template<typename Quantizer> |
| 2065 | bool generate_hierarchical_codebook_threaded(Quantizer& q, |
| 2066 | uint32_t max_codebook_size, uint32_t max_parent_codebook_size, |
| 2067 | basisu::vector<uint_vec>& codebook, |
| 2068 | basisu::vector<uint_vec>& parent_codebook, |
| 2069 | uint32_t max_threads, job_pool *pJob_pool, |
| 2070 | bool even_odd_input_pairs_equal) |
| 2071 | { |
| 2072 | typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher; |
| 2073 | |
| 2074 | typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, |
| 2075 | training_vec_bit_hasher> group_hash; |
| 2076 | |
| 2077 | //interval_timer tm; |
| 2078 | //tm.start(); |
| 2079 | |
| 2080 | group_hash unique_vecs; |
| 2081 | |
| 2082 | unique_vecs.reserve(20000); |
| 2083 | |
| 2084 | weighted_block_group g; |
| 2085 | |
| 2086 | if (even_odd_input_pairs_equal) |
| 2087 | { |
| 2088 | g.m_indices.resize(2); |
| 2089 | |
| 2090 | assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0); |
| 2091 | |
| 2092 | for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2) |
| 2093 | { |
| 2094 | assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first); |
| 2095 | |
| 2096 | g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second; |
| 2097 | g.m_indices[0] = i; |
| 2098 | g.m_indices[1] = i + 1; |
| 2099 | |
| 2100 | auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); |
| 2101 | |
| 2102 | if (!ins_res.second) |
| 2103 | { |
| 2104 | (ins_res.first)->second.m_total_weight += g.m_total_weight; |
| 2105 | (ins_res.first)->second.m_indices.push_back(i); |
| 2106 | (ins_res.first)->second.m_indices.push_back(i + 1); |
| 2107 | } |
| 2108 | } |
| 2109 | } |
| 2110 | else |
| 2111 | { |
| 2112 | g.m_indices.resize(1); |
| 2113 | |
| 2114 | for (uint32_t i = 0; i < q.get_training_vecs().size(); i++) |
| 2115 | { |
| 2116 | g.m_total_weight = q.get_training_vecs()[i].second; |
| 2117 | g.m_indices[0] = i; |
| 2118 | |
| 2119 | auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); |
| 2120 | |
| 2121 | if (!ins_res.second) |
| 2122 | { |
| 2123 | (ins_res.first)->second.m_total_weight += g.m_total_weight; |
| 2124 | (ins_res.first)->second.m_indices.push_back(i); |
| 2125 | } |
| 2126 | } |
| 2127 | } |
| 2128 | |
| 2129 | //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs()); |
| 2130 | debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n" , q.get_total_training_vecs(), (uint32_t)unique_vecs.size()); |
| 2131 | |
| 2132 | Quantizer group_quant; |
| 2133 | typedef typename group_hash::const_iterator group_hash_const_iter; |
| 2134 | basisu::vector<group_hash_const_iter> unique_vec_iters; |
| 2135 | unique_vec_iters.reserve(unique_vecs.size()); |
| 2136 | |
| 2137 | for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) |
| 2138 | { |
| 2139 | group_quant.add_training_vec(iter->first, iter->second.m_total_weight); |
| 2140 | unique_vec_iters.push_back(iter); |
| 2141 | } |
| 2142 | |
| 2143 | bool limit_clusterizers = true; |
| 2144 | if (unique_vecs.size() <= max_codebook_size) |
| 2145 | limit_clusterizers = false; |
| 2146 | |
| 2147 | debug_printf("Limit clusterizers: %u\n" , limit_clusterizers); |
| 2148 | |
| 2149 | basisu::vector<uint_vec> group_codebook, group_parent_codebook; |
| 2150 | bool status = generate_hierarchical_codebook_threaded_internal(group_quant, |
| 2151 | max_codebook_size, max_parent_codebook_size, |
| 2152 | group_codebook, |
| 2153 | group_parent_codebook, |
| 2154 | (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool); |
| 2155 | |
| 2156 | if (!status) |
| 2157 | return false; |
| 2158 | |
| 2159 | codebook.resize(0); |
| 2160 | for (uint32_t i = 0; i < group_codebook.size(); i++) |
| 2161 | { |
| 2162 | codebook.resize(codebook.size() + 1); |
| 2163 | |
| 2164 | for (uint32_t j = 0; j < group_codebook[i].size(); j++) |
| 2165 | { |
| 2166 | const uint32_t group_index = group_codebook[i][j]; |
| 2167 | |
| 2168 | typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; |
| 2169 | const uint_vec& training_vec_indices = group_iter->second.m_indices; |
| 2170 | |
| 2171 | append_vector(codebook.back(), training_vec_indices); |
| 2172 | } |
| 2173 | } |
| 2174 | |
| 2175 | parent_codebook.resize(0); |
| 2176 | for (uint32_t i = 0; i < group_parent_codebook.size(); i++) |
| 2177 | { |
| 2178 | parent_codebook.resize(parent_codebook.size() + 1); |
| 2179 | |
| 2180 | for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++) |
| 2181 | { |
| 2182 | const uint32_t group_index = group_parent_codebook[i][j]; |
| 2183 | |
| 2184 | typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; |
| 2185 | const uint_vec& training_vec_indices = group_iter->second.m_indices; |
| 2186 | |
| 2187 | append_vector(parent_codebook.back(), training_vec_indices); |
| 2188 | } |
| 2189 | } |
| 2190 | |
| 2191 | return true; |
| 2192 | } |
| 2193 | |
| 2194 | // Canonical Huffman coding |
| 2195 | |
| 2196 | class histogram |
| 2197 | { |
| 2198 | basisu::vector<uint32_t> m_hist; |
| 2199 | |
| 2200 | public: |
| 2201 | histogram(uint32_t size = 0) { init(size); } |
| 2202 | |
| 2203 | void clear() |
| 2204 | { |
| 2205 | clear_vector(m_hist); |
| 2206 | } |
| 2207 | |
| 2208 | void init(uint32_t size) |
| 2209 | { |
| 2210 | m_hist.resize(0); |
| 2211 | m_hist.resize(size); |
| 2212 | } |
| 2213 | |
| 2214 | inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); } |
| 2215 | |
| 2216 | inline const uint32_t &operator[] (uint32_t index) const |
| 2217 | { |
| 2218 | return m_hist[index]; |
| 2219 | } |
| 2220 | |
| 2221 | inline uint32_t &operator[] (uint32_t index) |
| 2222 | { |
| 2223 | return m_hist[index]; |
| 2224 | } |
| 2225 | |
| 2226 | inline void inc(uint32_t index) |
| 2227 | { |
| 2228 | m_hist[index]++; |
| 2229 | } |
| 2230 | |
| 2231 | uint64_t get_total() const |
| 2232 | { |
| 2233 | uint64_t total = 0; |
| 2234 | for (uint32_t i = 0; i < m_hist.size(); ++i) |
| 2235 | total += m_hist[i]; |
| 2236 | return total; |
| 2237 | } |
| 2238 | |
| 2239 | double get_entropy() const |
| 2240 | { |
| 2241 | double total = static_cast<double>(get_total()); |
| 2242 | if (total == 0.0f) |
| 2243 | return 0.0f; |
| 2244 | |
| 2245 | const double inv_total = 1.0f / total; |
| 2246 | const double neg_inv_log2 = -1.0f / log(2.0f); |
| 2247 | |
| 2248 | double e = 0.0f; |
| 2249 | for (uint32_t i = 0; i < m_hist.size(); i++) |
| 2250 | if (m_hist[i]) |
| 2251 | e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]); |
| 2252 | |
| 2253 | return e; |
| 2254 | } |
| 2255 | }; |
| 2256 | |
| 2257 | struct sym_freq |
| 2258 | { |
| 2259 | uint32_t m_key; |
| 2260 | uint16_t m_sym_index; |
| 2261 | }; |
| 2262 | |
| 2263 | sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); |
| 2264 | void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); |
| 2265 | void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); |
| 2266 | |
| 2267 | class huffman_encoding_table |
| 2268 | { |
| 2269 | public: |
| 2270 | huffman_encoding_table() |
| 2271 | { |
| 2272 | } |
| 2273 | |
| 2274 | void clear() |
| 2275 | { |
| 2276 | clear_vector(m_codes); |
| 2277 | clear_vector(m_code_sizes); |
| 2278 | } |
| 2279 | |
| 2280 | bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize) |
| 2281 | { |
| 2282 | return init(h.size(), &h[0], max_code_size); |
| 2283 | } |
| 2284 | |
| 2285 | bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); |
| 2286 | bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); |
| 2287 | |
| 2288 | inline const uint16_vec &get_codes() const { return m_codes; } |
| 2289 | inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } |
| 2290 | |
| 2291 | uint32_t get_total_used_codes() const |
| 2292 | { |
| 2293 | for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--) |
| 2294 | if (m_code_sizes[i]) |
| 2295 | return i + 1; |
| 2296 | return 0; |
| 2297 | } |
| 2298 | |
| 2299 | private: |
| 2300 | uint16_vec m_codes; |
| 2301 | uint8_vec m_code_sizes; |
| 2302 | }; |
| 2303 | |
| 2304 | class bitwise_coder |
| 2305 | { |
| 2306 | public: |
| 2307 | bitwise_coder() : |
| 2308 | m_bit_buffer(0), |
| 2309 | m_bit_buffer_size(0), |
| 2310 | m_total_bits(0) |
| 2311 | { |
| 2312 | } |
| 2313 | |
| 2314 | inline void clear() |
| 2315 | { |
| 2316 | clear_vector(m_bytes); |
| 2317 | m_bit_buffer = 0; |
| 2318 | m_bit_buffer_size = 0; |
| 2319 | m_total_bits = 0; |
| 2320 | } |
| 2321 | |
| 2322 | inline const uint8_vec &get_bytes() const { return m_bytes; } |
| 2323 | |
| 2324 | inline uint64_t get_total_bits() const { return m_total_bits; } |
| 2325 | inline void clear_total_bits() { m_total_bits = 0; } |
| 2326 | |
| 2327 | inline void init(uint32_t reserve_size = 1024) |
| 2328 | { |
| 2329 | m_bytes.reserve(reserve_size); |
| 2330 | m_bytes.resize(0); |
| 2331 | |
| 2332 | m_bit_buffer = 0; |
| 2333 | m_bit_buffer_size = 0; |
| 2334 | m_total_bits = 0; |
| 2335 | } |
| 2336 | |
| 2337 | inline uint32_t flush() |
| 2338 | { |
| 2339 | if (m_bit_buffer_size) |
| 2340 | { |
| 2341 | m_total_bits += 8 - (m_bit_buffer_size & 7); |
| 2342 | append_byte(static_cast<uint8_t>(m_bit_buffer)); |
| 2343 | |
| 2344 | m_bit_buffer = 0; |
| 2345 | m_bit_buffer_size = 0; |
| 2346 | |
| 2347 | return 8; |
| 2348 | } |
| 2349 | |
| 2350 | return 0; |
| 2351 | } |
| 2352 | |
| 2353 | inline uint32_t put_bits(uint32_t bits, uint32_t num_bits) |
| 2354 | { |
| 2355 | assert(num_bits <= 32); |
| 2356 | assert(bits < (1ULL << num_bits)); |
| 2357 | |
| 2358 | if (!num_bits) |
| 2359 | return 0; |
| 2360 | |
| 2361 | m_total_bits += num_bits; |
| 2362 | |
| 2363 | uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer; |
| 2364 | m_bit_buffer_size += num_bits; |
| 2365 | |
| 2366 | while (m_bit_buffer_size >= 8) |
| 2367 | { |
| 2368 | append_byte(static_cast<uint8_t>(v)); |
| 2369 | v >>= 8; |
| 2370 | m_bit_buffer_size -= 8; |
| 2371 | } |
| 2372 | |
| 2373 | m_bit_buffer = static_cast<uint8_t>(v); |
| 2374 | return num_bits; |
| 2375 | } |
| 2376 | |
| 2377 | inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab) |
| 2378 | { |
| 2379 | uint32_t code = tab.get_codes()[sym]; |
| 2380 | uint32_t code_size = tab.get_code_sizes()[sym]; |
| 2381 | assert(code_size >= 1); |
| 2382 | put_bits(code, code_size); |
| 2383 | return code_size; |
| 2384 | } |
| 2385 | |
| 2386 | inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) |
| 2387 | { |
| 2388 | assert((n >= 2) && (v < n)); |
| 2389 | |
| 2390 | uint32_t k = floor_log2i(n); |
| 2391 | uint32_t u = (1 << (k + 1)) - n; |
| 2392 | |
| 2393 | if (v < u) |
| 2394 | return put_bits(v, k); |
| 2395 | |
| 2396 | uint32_t x = v + u; |
| 2397 | assert((x >> 1) >= u); |
| 2398 | |
| 2399 | put_bits(x >> 1, k); |
| 2400 | put_bits(x & 1, 1); |
| 2401 | return k + 1; |
| 2402 | } |
| 2403 | |
| 2404 | inline uint32_t put_rice(uint32_t v, uint32_t m) |
| 2405 | { |
| 2406 | assert(m); |
| 2407 | |
| 2408 | const uint64_t start_bits = m_total_bits; |
| 2409 | |
| 2410 | uint32_t q = v >> m, r = v & ((1 << m) - 1); |
| 2411 | |
| 2412 | // rice coding sanity check |
| 2413 | assert(q <= 64); |
| 2414 | |
| 2415 | for (; q > 16; q -= 16) |
| 2416 | put_bits(0xFFFF, 16); |
| 2417 | |
| 2418 | put_bits((1 << q) - 1, q); |
| 2419 | put_bits(r << 1, m + 1); |
| 2420 | |
| 2421 | return (uint32_t)(m_total_bits - start_bits); |
| 2422 | } |
| 2423 | |
| 2424 | inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits) |
| 2425 | { |
| 2426 | assert(chunk_bits); |
| 2427 | |
| 2428 | const uint32_t chunk_size = 1 << chunk_bits; |
| 2429 | const uint32_t chunk_mask = chunk_size - 1; |
| 2430 | |
| 2431 | uint32_t total_bits = 0; |
| 2432 | |
| 2433 | for ( ; ; ) |
| 2434 | { |
| 2435 | uint32_t next_v = v >> chunk_bits; |
| 2436 | |
| 2437 | total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); |
| 2438 | if (!next_v) |
| 2439 | break; |
| 2440 | |
| 2441 | v = next_v; |
| 2442 | } |
| 2443 | |
| 2444 | return total_bits; |
| 2445 | } |
| 2446 | |
| 2447 | uint32_t emit_huffman_table(const huffman_encoding_table &tab); |
| 2448 | |
| 2449 | private: |
| 2450 | uint8_vec m_bytes; |
| 2451 | uint32_t m_bit_buffer, m_bit_buffer_size; |
| 2452 | uint64_t m_total_bits; |
| 2453 | |
| 2454 | void append_byte(uint8_t c) |
| 2455 | { |
| 2456 | m_bytes.resize(m_bytes.size() + 1); |
| 2457 | m_bytes.back() = c; |
| 2458 | } |
| 2459 | |
| 2460 | static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len); |
| 2461 | static void end_zero_run(uint16_vec &syms, uint32_t &run_size); |
| 2462 | }; |
| 2463 | |
| 2464 | class huff2D |
| 2465 | { |
| 2466 | public: |
| 2467 | huff2D() { } |
| 2468 | huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); } |
| 2469 | |
| 2470 | inline const histogram &get_histogram() const { return m_histogram; } |
| 2471 | inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; } |
| 2472 | |
| 2473 | inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) |
| 2474 | { |
| 2475 | assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); |
| 2476 | |
| 2477 | m_bits_per_sym = bits_per_sym; |
| 2478 | m_total_syms_per_group = total_syms_per_group; |
| 2479 | m_cur_sym_bits = 0; |
| 2480 | m_cur_num_syms = 0; |
| 2481 | m_decode_syms_remaining = 0; |
| 2482 | m_next_decoder_group_index = 0; |
| 2483 | |
| 2484 | m_histogram.init(1 << (bits_per_sym * total_syms_per_group)); |
| 2485 | } |
| 2486 | |
| 2487 | inline void clear() |
| 2488 | { |
| 2489 | m_group_bits.clear(); |
| 2490 | |
| 2491 | m_cur_sym_bits = 0; |
| 2492 | m_cur_num_syms = 0; |
| 2493 | m_decode_syms_remaining = 0; |
| 2494 | m_next_decoder_group_index = 0; |
| 2495 | } |
| 2496 | |
| 2497 | inline void emit(uint32_t sym) |
| 2498 | { |
| 2499 | m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym)); |
| 2500 | m_cur_num_syms++; |
| 2501 | |
| 2502 | if (m_cur_num_syms == m_total_syms_per_group) |
| 2503 | flush(); |
| 2504 | } |
| 2505 | |
| 2506 | inline void flush() |
| 2507 | { |
| 2508 | if (m_cur_num_syms) |
| 2509 | { |
| 2510 | m_group_bits.push_back(m_cur_sym_bits); |
| 2511 | m_histogram.inc(m_cur_sym_bits); |
| 2512 | |
| 2513 | m_cur_sym_bits = 0; |
| 2514 | m_cur_num_syms = 0; |
| 2515 | } |
| 2516 | } |
| 2517 | |
| 2518 | inline bool start_encoding(uint32_t code_size_limit = 16) |
| 2519 | { |
| 2520 | flush(); |
| 2521 | |
| 2522 | if (!m_encoding_table.init(m_histogram, code_size_limit)) |
| 2523 | return false; |
| 2524 | |
| 2525 | m_decode_syms_remaining = 0; |
| 2526 | m_next_decoder_group_index = 0; |
| 2527 | |
| 2528 | return true; |
| 2529 | } |
| 2530 | |
| 2531 | inline uint32_t emit_next_sym(bitwise_coder &c) |
| 2532 | { |
| 2533 | uint32_t bits = 0; |
| 2534 | |
| 2535 | if (!m_decode_syms_remaining) |
| 2536 | { |
| 2537 | bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table); |
| 2538 | m_decode_syms_remaining = m_total_syms_per_group; |
| 2539 | } |
| 2540 | |
| 2541 | m_decode_syms_remaining--; |
| 2542 | return bits; |
| 2543 | } |
| 2544 | |
| 2545 | inline void emit_flush() |
| 2546 | { |
| 2547 | m_decode_syms_remaining = 0; |
| 2548 | } |
| 2549 | |
| 2550 | private: |
| 2551 | uint_vec m_group_bits; |
| 2552 | huffman_encoding_table m_encoding_table; |
| 2553 | histogram m_histogram; |
| 2554 | uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining; |
| 2555 | }; |
| 2556 | |
| 2557 | bool huffman_test(int rand_seed); |
| 2558 | |
| 2559 | // VQ index reordering |
| 2560 | |
| 2561 | class palette_index_reorderer |
| 2562 | { |
| 2563 | public: |
| 2564 | palette_index_reorderer() |
| 2565 | { |
| 2566 | } |
| 2567 | |
| 2568 | void clear() |
| 2569 | { |
| 2570 | clear_vector(m_hist); |
| 2571 | clear_vector(m_total_count_to_picked); |
| 2572 | clear_vector(m_entries_picked); |
| 2573 | clear_vector(m_entries_to_do); |
| 2574 | clear_vector(m_remap_table); |
| 2575 | } |
| 2576 | |
| 2577 | // returns [0,1] distance of entry i to entry j |
| 2578 | typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); |
| 2579 | |
| 2580 | void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); |
| 2581 | |
| 2582 | // Table remaps old to new symbol indices |
| 2583 | inline const uint_vec &get_remap_table() const { return m_remap_table; } |
| 2584 | |
| 2585 | private: |
| 2586 | uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table; |
| 2587 | |
| 2588 | inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; } |
| 2589 | inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } } |
| 2590 | |
| 2591 | void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices); |
| 2592 | void find_initial(uint32_t num_syms); |
| 2593 | void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); |
| 2594 | float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); |
| 2595 | }; |
| 2596 | |
| 2597 | // Simple 32-bit 2D image class |
| 2598 | |
| 2599 | class image |
| 2600 | { |
| 2601 | public: |
| 2602 | image() : |
| 2603 | m_width(0), m_height(0), m_pitch(0) |
| 2604 | { |
| 2605 | } |
| 2606 | |
| 2607 | image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : |
| 2608 | m_width(0), m_height(0), m_pitch(0) |
| 2609 | { |
| 2610 | resize(w, h, p); |
| 2611 | } |
| 2612 | |
| 2613 | image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : |
| 2614 | m_width(0), m_height(0), m_pitch(0) |
| 2615 | { |
| 2616 | init(pImage, width, height, comps); |
| 2617 | } |
| 2618 | |
| 2619 | image(const image &other) : |
| 2620 | m_width(0), m_height(0), m_pitch(0) |
| 2621 | { |
| 2622 | *this = other; |
| 2623 | } |
| 2624 | |
| 2625 | image &swap(image &other) |
| 2626 | { |
| 2627 | std::swap(m_width, other.m_width); |
| 2628 | std::swap(m_height, other.m_height); |
| 2629 | std::swap(m_pitch, other.m_pitch); |
| 2630 | m_pixels.swap(other.m_pixels); |
| 2631 | return *this; |
| 2632 | } |
| 2633 | |
| 2634 | image &operator= (const image &rhs) |
| 2635 | { |
| 2636 | if (this != &rhs) |
| 2637 | { |
| 2638 | m_width = rhs.m_width; |
| 2639 | m_height = rhs.m_height; |
| 2640 | m_pitch = rhs.m_pitch; |
| 2641 | m_pixels = rhs.m_pixels; |
| 2642 | } |
| 2643 | return *this; |
| 2644 | } |
| 2645 | |
| 2646 | image &clear() |
| 2647 | { |
| 2648 | m_width = 0; |
| 2649 | m_height = 0; |
| 2650 | m_pitch = 0; |
| 2651 | clear_vector(m_pixels); |
| 2652 | return *this; |
| 2653 | } |
| 2654 | |
| 2655 | image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) |
| 2656 | { |
| 2657 | return crop(w, h, p, background); |
| 2658 | } |
| 2659 | |
| 2660 | image &set_all(const color_rgba &c) |
| 2661 | { |
| 2662 | for (uint32_t i = 0; i < m_pixels.size(); i++) |
| 2663 | m_pixels[i] = c; |
| 2664 | return *this; |
| 2665 | } |
| 2666 | |
| 2667 | void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) |
| 2668 | { |
| 2669 | assert(comps >= 1 && comps <= 4); |
| 2670 | |
| 2671 | resize(width, height); |
| 2672 | |
| 2673 | for (uint32_t y = 0; y < height; y++) |
| 2674 | { |
| 2675 | for (uint32_t x = 0; x < width; x++) |
| 2676 | { |
| 2677 | const uint8_t *pSrc = &pImage[(x + y * width) * comps]; |
| 2678 | color_rgba &dst = (*this)(x, y); |
| 2679 | |
| 2680 | if (comps == 1) |
| 2681 | { |
| 2682 | dst.r = pSrc[0]; |
| 2683 | dst.g = pSrc[0]; |
| 2684 | dst.b = pSrc[0]; |
| 2685 | dst.a = 255; |
| 2686 | } |
| 2687 | else if (comps == 2) |
| 2688 | { |
| 2689 | dst.r = pSrc[0]; |
| 2690 | dst.g = pSrc[0]; |
| 2691 | dst.b = pSrc[0]; |
| 2692 | dst.a = pSrc[1]; |
| 2693 | } |
| 2694 | else |
| 2695 | { |
| 2696 | dst.r = pSrc[0]; |
| 2697 | dst.g = pSrc[1]; |
| 2698 | dst.b = pSrc[2]; |
| 2699 | if (comps == 4) |
| 2700 | dst.a = pSrc[3]; |
| 2701 | else |
| 2702 | dst.a = 255; |
| 2703 | } |
| 2704 | } |
| 2705 | } |
| 2706 | } |
| 2707 | |
| 2708 | image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) |
| 2709 | { |
| 2710 | for (uint32_t iy = 0; iy < h; iy++) |
| 2711 | for (uint32_t ix = 0; ix < w; ix++) |
| 2712 | set_clipped(x + ix, y + iy, c); |
| 2713 | return *this; |
| 2714 | } |
| 2715 | |
| 2716 | image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) |
| 2717 | { |
| 2718 | for (uint32_t iy = 0; iy < h; iy++) |
| 2719 | for (uint32_t ix = 0; ix < w; ix++) |
| 2720 | set_clipped_alpha(x + ix, y + iy, c); |
| 2721 | return *this; |
| 2722 | } |
| 2723 | |
| 2724 | image &crop_dup_borders(uint32_t w, uint32_t h) |
| 2725 | { |
| 2726 | const uint32_t orig_w = m_width, orig_h = m_height; |
| 2727 | |
| 2728 | crop(w, h); |
| 2729 | |
| 2730 | if (orig_w && orig_h) |
| 2731 | { |
| 2732 | if (m_width > orig_w) |
| 2733 | { |
| 2734 | for (uint32_t x = orig_w; x < m_width; x++) |
| 2735 | for (uint32_t y = 0; y < m_height; y++) |
| 2736 | set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); |
| 2737 | } |
| 2738 | |
| 2739 | if (m_height > orig_h) |
| 2740 | { |
| 2741 | for (uint32_t y = orig_h; y < m_height; y++) |
| 2742 | for (uint32_t x = 0; x < m_width; x++) |
| 2743 | set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); |
| 2744 | } |
| 2745 | } |
| 2746 | return *this; |
| 2747 | } |
| 2748 | |
| 2749 | // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer). |
| 2750 | image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) |
| 2751 | { |
| 2752 | if (p == UINT32_MAX) |
| 2753 | p = w; |
| 2754 | |
| 2755 | clear(); |
| 2756 | |
| 2757 | if ((!p) || (!w) || (!h)) |
| 2758 | return *this; |
| 2759 | |
| 2760 | m_pixels.grant_ownership(pPixels, p * h, p * h); |
| 2761 | |
| 2762 | m_width = w; |
| 2763 | m_height = h; |
| 2764 | m_pitch = p; |
| 2765 | |
| 2766 | return *this; |
| 2767 | } |
| 2768 | |
| 2769 | image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true) |
| 2770 | { |
| 2771 | if (p == UINT32_MAX) |
| 2772 | p = w; |
| 2773 | |
| 2774 | if ((w == m_width) && (m_height == h) && (m_pitch == p)) |
| 2775 | return *this; |
| 2776 | |
| 2777 | if ((!w) || (!h) || (!p)) |
| 2778 | { |
| 2779 | clear(); |
| 2780 | return *this; |
| 2781 | } |
| 2782 | |
| 2783 | color_rgba_vec cur_state; |
| 2784 | cur_state.swap(m_pixels); |
| 2785 | |
| 2786 | m_pixels.resize(p * h); |
| 2787 | |
| 2788 | if (init_image) |
| 2789 | { |
| 2790 | if (m_width || m_height) |
| 2791 | { |
| 2792 | for (uint32_t y = 0; y < h; y++) |
| 2793 | { |
| 2794 | for (uint32_t x = 0; x < w; x++) |
| 2795 | { |
| 2796 | if ((x < m_width) && (y < m_height)) |
| 2797 | m_pixels[x + y * p] = cur_state[x + y * m_pitch]; |
| 2798 | else |
| 2799 | m_pixels[x + y * p] = background; |
| 2800 | } |
| 2801 | } |
| 2802 | } |
| 2803 | else |
| 2804 | { |
| 2805 | m_pixels.set_all(background); |
| 2806 | } |
| 2807 | } |
| 2808 | |
| 2809 | m_width = w; |
| 2810 | m_height = h; |
| 2811 | m_pitch = p; |
| 2812 | |
| 2813 | return *this; |
| 2814 | } |
| 2815 | |
| 2816 | inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
| 2817 | inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
| 2818 | |
| 2819 | inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
| 2820 | inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
| 2821 | |
| 2822 | inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const |
| 2823 | { |
| 2824 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
| 2825 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
| 2826 | return m_pixels[x + y * m_pitch]; |
| 2827 | } |
| 2828 | |
| 2829 | inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) |
| 2830 | { |
| 2831 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
| 2832 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
| 2833 | return m_pixels[x + y * m_pitch]; |
| 2834 | } |
| 2835 | |
| 2836 | inline image &set_clipped(int x, int y, const color_rgba &c) |
| 2837 | { |
| 2838 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) |
| 2839 | (*this)(x, y) = c; |
| 2840 | return *this; |
| 2841 | } |
| 2842 | |
| 2843 | inline image& set_clipped_alpha(int x, int y, const color_rgba& c) |
| 2844 | { |
| 2845 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) |
| 2846 | (*this)(x, y).m_comps[3] = c.m_comps[3]; |
| 2847 | return *this; |
| 2848 | } |
| 2849 | |
| 2850 | // Very straightforward blit with full clipping. Not fast, but it works. |
| 2851 | image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) |
| 2852 | { |
| 2853 | for (int y = 0; y < src_h; y++) |
| 2854 | { |
| 2855 | const int sy = src_y + y; |
| 2856 | if (sy < 0) |
| 2857 | continue; |
| 2858 | else if (sy >= (int)src.get_height()) |
| 2859 | break; |
| 2860 | |
| 2861 | for (int x = 0; x < src_w; x++) |
| 2862 | { |
| 2863 | const int sx = src_x + x; |
| 2864 | if (sx < 0) |
| 2865 | continue; |
| 2866 | else if (sx >= (int)src.get_height()) |
| 2867 | break; |
| 2868 | |
| 2869 | set_clipped(dst_x + x, dst_y + y, src(sx, sy)); |
| 2870 | } |
| 2871 | } |
| 2872 | |
| 2873 | return *this; |
| 2874 | } |
| 2875 | |
| 2876 | const image &(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const |
| 2877 | { |
| 2878 | if (((src_x + w) > m_width) || ((src_y + h) > m_height)) |
| 2879 | { |
| 2880 | // Slower clamping case |
| 2881 | for (uint32_t y = 0; y < h; y++) |
| 2882 | for (uint32_t x = 0; x < w; x++) |
| 2883 | *pDst++ = get_clamped(src_x + x, src_y + y); |
| 2884 | } |
| 2885 | else |
| 2886 | { |
| 2887 | const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch]; |
| 2888 | |
| 2889 | for (uint32_t y = 0; y < h; y++) |
| 2890 | { |
| 2891 | memcpy(pDst, pSrc, w * sizeof(color_rgba)); |
| 2892 | pSrc += m_pitch; |
| 2893 | pDst += w; |
| 2894 | } |
| 2895 | } |
| 2896 | |
| 2897 | return *this; |
| 2898 | } |
| 2899 | |
| 2900 | image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) |
| 2901 | { |
| 2902 | for (uint32_t y = 0; y < h; y++) |
| 2903 | for (uint32_t x = 0; x < w; x++) |
| 2904 | set_clipped(dst_x + x, dst_y + y, *pSrc++); |
| 2905 | return *this; |
| 2906 | } |
| 2907 | |
| 2908 | inline uint32_t get_width() const { return m_width; } |
| 2909 | inline uint32_t get_height() const { return m_height; } |
| 2910 | inline uint32_t get_pitch() const { return m_pitch; } |
| 2911 | inline uint32_t get_total_pixels() const { return m_width * m_height; } |
| 2912 | |
| 2913 | inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } |
| 2914 | inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } |
| 2915 | inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } |
| 2916 | |
| 2917 | inline const color_rgba_vec &get_pixels() const { return m_pixels; } |
| 2918 | inline color_rgba_vec &get_pixels() { return m_pixels; } |
| 2919 | |
| 2920 | inline const color_rgba *get_ptr() const { return &m_pixels[0]; } |
| 2921 | inline color_rgba *get_ptr() { return &m_pixels[0]; } |
| 2922 | |
| 2923 | bool has_alpha() const |
| 2924 | { |
| 2925 | for (uint32_t y = 0; y < m_height; ++y) |
| 2926 | for (uint32_t x = 0; x < m_width; ++x) |
| 2927 | if ((*this)(x, y).a < 255) |
| 2928 | return true; |
| 2929 | |
| 2930 | return false; |
| 2931 | } |
| 2932 | |
| 2933 | image &set_alpha(uint8_t a) |
| 2934 | { |
| 2935 | for (uint32_t y = 0; y < m_height; ++y) |
| 2936 | for (uint32_t x = 0; x < m_width; ++x) |
| 2937 | (*this)(x, y).a = a; |
| 2938 | return *this; |
| 2939 | } |
| 2940 | |
| 2941 | image &flip_y() |
| 2942 | { |
| 2943 | for (uint32_t y = 0; y < m_height / 2; ++y) |
| 2944 | for (uint32_t x = 0; x < m_width; ++x) |
| 2945 | std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); |
| 2946 | return *this; |
| 2947 | } |
| 2948 | |
| 2949 | // TODO: There are many ways to do this, not sure this is the best way. |
| 2950 | image &renormalize_normal_map() |
| 2951 | { |
| 2952 | for (uint32_t y = 0; y < m_height; y++) |
| 2953 | { |
| 2954 | for (uint32_t x = 0; x < m_width; x++) |
| 2955 | { |
| 2956 | color_rgba &c = (*this)(x, y); |
| 2957 | if ((c.r == 128) && (c.g == 128) && (c.b == 128)) |
| 2958 | continue; |
| 2959 | |
| 2960 | vec3F v(c.r, c.g, c.b); |
| 2961 | v = (v * (2.0f / 255.0f)) - vec3F(1.0f); |
| 2962 | v.clamp(-1.0f, 1.0f); |
| 2963 | |
| 2964 | float length = v.length(); |
| 2965 | const float cValidThresh = .077f; |
| 2966 | if (length < cValidThresh) |
| 2967 | { |
| 2968 | c.set(128, 128, 128, c.a); |
| 2969 | } |
| 2970 | else if (fabs(length - 1.0f) > cValidThresh) |
| 2971 | { |
| 2972 | if (length) |
| 2973 | v /= length; |
| 2974 | |
| 2975 | for (uint32_t i = 0; i < 3; i++) |
| 2976 | c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); |
| 2977 | |
| 2978 | if ((c.g == 128) && (c.r == 128)) |
| 2979 | { |
| 2980 | if (c.b < 128) |
| 2981 | c.b = 0; |
| 2982 | else |
| 2983 | c.b = 255; |
| 2984 | } |
| 2985 | } |
| 2986 | } |
| 2987 | } |
| 2988 | return *this; |
| 2989 | } |
| 2990 | |
| 2991 | void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); |
| 2992 | |
| 2993 | private: |
| 2994 | uint32_t m_width, m_height, m_pitch; // all in pixels |
| 2995 | color_rgba_vec m_pixels; |
| 2996 | }; |
| 2997 | |
| 2998 | // Float images |
| 2999 | |
| 3000 | typedef basisu::vector<vec4F> vec4F_vec; |
| 3001 | |
| 3002 | class imagef |
| 3003 | { |
| 3004 | public: |
| 3005 | imagef() : |
| 3006 | m_width(0), m_height(0), m_pitch(0) |
| 3007 | { |
| 3008 | } |
| 3009 | |
| 3010 | imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : |
| 3011 | m_width(0), m_height(0), m_pitch(0) |
| 3012 | { |
| 3013 | resize(w, h, p); |
| 3014 | } |
| 3015 | |
| 3016 | imagef(const imagef &other) : |
| 3017 | m_width(0), m_height(0), m_pitch(0) |
| 3018 | { |
| 3019 | *this = other; |
| 3020 | } |
| 3021 | |
| 3022 | imagef &swap(imagef &other) |
| 3023 | { |
| 3024 | std::swap(m_width, other.m_width); |
| 3025 | std::swap(m_height, other.m_height); |
| 3026 | std::swap(m_pitch, other.m_pitch); |
| 3027 | m_pixels.swap(other.m_pixels); |
| 3028 | return *this; |
| 3029 | } |
| 3030 | |
| 3031 | imagef &operator= (const imagef &rhs) |
| 3032 | { |
| 3033 | if (this != &rhs) |
| 3034 | { |
| 3035 | m_width = rhs.m_width; |
| 3036 | m_height = rhs.m_height; |
| 3037 | m_pitch = rhs.m_pitch; |
| 3038 | m_pixels = rhs.m_pixels; |
| 3039 | } |
| 3040 | return *this; |
| 3041 | } |
| 3042 | |
| 3043 | imagef &clear() |
| 3044 | { |
| 3045 | m_width = 0; |
| 3046 | m_height = 0; |
| 3047 | m_pitch = 0; |
| 3048 | clear_vector(m_pixels); |
| 3049 | return *this; |
| 3050 | } |
| 3051 | |
| 3052 | imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0)) |
| 3053 | { |
| 3054 | const uint32_t width = src.get_width(); |
| 3055 | const uint32_t height = src.get_height(); |
| 3056 | |
| 3057 | resize(width, height); |
| 3058 | |
| 3059 | for (int y = 0; y < (int)height; y++) |
| 3060 | { |
| 3061 | for (uint32_t x = 0; x < width; x++) |
| 3062 | { |
| 3063 | const color_rgba &src_pixel = src(x, y); |
| 3064 | (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); |
| 3065 | } |
| 3066 | } |
| 3067 | |
| 3068 | return *this; |
| 3069 | } |
| 3070 | |
| 3071 | imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) |
| 3072 | { |
| 3073 | return resize(other.get_width(), other.get_height(), p, background); |
| 3074 | } |
| 3075 | |
| 3076 | imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) |
| 3077 | { |
| 3078 | return crop(w, h, p, background); |
| 3079 | } |
| 3080 | |
| 3081 | imagef &set_all(const vec4F &c) |
| 3082 | { |
| 3083 | for (uint32_t i = 0; i < m_pixels.size(); i++) |
| 3084 | m_pixels[i] = c; |
| 3085 | return *this; |
| 3086 | } |
| 3087 | |
| 3088 | imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c) |
| 3089 | { |
| 3090 | for (uint32_t iy = 0; iy < h; iy++) |
| 3091 | for (uint32_t ix = 0; ix < w; ix++) |
| 3092 | set_clipped(x + ix, y + iy, c); |
| 3093 | return *this; |
| 3094 | } |
| 3095 | |
| 3096 | imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) |
| 3097 | { |
| 3098 | if (p == UINT32_MAX) |
| 3099 | p = w; |
| 3100 | |
| 3101 | if ((w == m_width) && (m_height == h) && (m_pitch == p)) |
| 3102 | return *this; |
| 3103 | |
| 3104 | if ((!w) || (!h) || (!p)) |
| 3105 | { |
| 3106 | clear(); |
| 3107 | return *this; |
| 3108 | } |
| 3109 | |
| 3110 | vec4F_vec cur_state; |
| 3111 | cur_state.swap(m_pixels); |
| 3112 | |
| 3113 | m_pixels.resize(p * h); |
| 3114 | |
| 3115 | for (uint32_t y = 0; y < h; y++) |
| 3116 | { |
| 3117 | for (uint32_t x = 0; x < w; x++) |
| 3118 | { |
| 3119 | if ((x < m_width) && (y < m_height)) |
| 3120 | m_pixels[x + y * p] = cur_state[x + y * m_pitch]; |
| 3121 | else |
| 3122 | m_pixels[x + y * p] = background; |
| 3123 | } |
| 3124 | } |
| 3125 | |
| 3126 | m_width = w; |
| 3127 | m_height = h; |
| 3128 | m_pitch = p; |
| 3129 | |
| 3130 | return *this; |
| 3131 | } |
| 3132 | |
| 3133 | inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
| 3134 | inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
| 3135 | |
| 3136 | inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
| 3137 | inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
| 3138 | |
| 3139 | inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const |
| 3140 | { |
| 3141 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
| 3142 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
| 3143 | return m_pixels[x + y * m_pitch]; |
| 3144 | } |
| 3145 | |
| 3146 | inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) |
| 3147 | { |
| 3148 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
| 3149 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
| 3150 | return m_pixels[x + y * m_pitch]; |
| 3151 | } |
| 3152 | |
| 3153 | inline imagef &set_clipped(int x, int y, const vec4F &c) |
| 3154 | { |
| 3155 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) |
| 3156 | (*this)(x, y) = c; |
| 3157 | return *this; |
| 3158 | } |
| 3159 | |
| 3160 | // Very straightforward blit with full clipping. Not fast, but it works. |
| 3161 | imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) |
| 3162 | { |
| 3163 | for (int y = 0; y < src_h; y++) |
| 3164 | { |
| 3165 | const int sy = src_y + y; |
| 3166 | if (sy < 0) |
| 3167 | continue; |
| 3168 | else if (sy >= (int)src.get_height()) |
| 3169 | break; |
| 3170 | |
| 3171 | for (int x = 0; x < src_w; x++) |
| 3172 | { |
| 3173 | const int sx = src_x + x; |
| 3174 | if (sx < 0) |
| 3175 | continue; |
| 3176 | else if (sx >= (int)src.get_height()) |
| 3177 | break; |
| 3178 | |
| 3179 | set_clipped(dst_x + x, dst_y + y, src(sx, sy)); |
| 3180 | } |
| 3181 | } |
| 3182 | |
| 3183 | return *this; |
| 3184 | } |
| 3185 | |
| 3186 | const imagef &(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const |
| 3187 | { |
| 3188 | for (uint32_t y = 0; y < h; y++) |
| 3189 | for (uint32_t x = 0; x < w; x++) |
| 3190 | *pDst++ = get_clamped(src_x + x, src_y + y); |
| 3191 | return *this; |
| 3192 | } |
| 3193 | |
| 3194 | imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) |
| 3195 | { |
| 3196 | for (uint32_t y = 0; y < h; y++) |
| 3197 | for (uint32_t x = 0; x < w; x++) |
| 3198 | set_clipped(dst_x + x, dst_y + y, *pSrc++); |
| 3199 | return *this; |
| 3200 | } |
| 3201 | |
| 3202 | inline uint32_t get_width() const { return m_width; } |
| 3203 | inline uint32_t get_height() const { return m_height; } |
| 3204 | inline uint32_t get_pitch() const { return m_pitch; } |
| 3205 | inline uint32_t get_total_pixels() const { return m_width * m_height; } |
| 3206 | |
| 3207 | inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } |
| 3208 | inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } |
| 3209 | inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } |
| 3210 | |
| 3211 | inline const vec4F_vec &get_pixels() const { return m_pixels; } |
| 3212 | inline vec4F_vec &get_pixels() { return m_pixels; } |
| 3213 | |
| 3214 | inline const vec4F *get_ptr() const { return &m_pixels[0]; } |
| 3215 | inline vec4F *get_ptr() { return &m_pixels[0]; } |
| 3216 | |
| 3217 | private: |
| 3218 | uint32_t m_width, m_height, m_pitch; // all in pixels |
| 3219 | vec4F_vec m_pixels; |
| 3220 | }; |
| 3221 | |
| 3222 | // Image metrics |
| 3223 | |
| 3224 | class image_metrics |
| 3225 | { |
| 3226 | public: |
| 3227 | // TODO: Add ssim |
| 3228 | float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; |
| 3229 | |
| 3230 | image_metrics() |
| 3231 | { |
| 3232 | clear(); |
| 3233 | } |
| 3234 | |
| 3235 | void clear() |
| 3236 | { |
| 3237 | m_max = 0; |
| 3238 | m_mean = 0; |
| 3239 | m_mean_squared = 0; |
| 3240 | m_rms = 0; |
| 3241 | m_psnr = 0; |
| 3242 | m_ssim = 0; |
| 3243 | } |
| 3244 | |
| 3245 | void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n" , pPrefix ? pPrefix : "" , m_max, m_mean, m_rms, m_psnr); } |
| 3246 | |
| 3247 | void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); |
| 3248 | }; |
| 3249 | |
| 3250 | // Image saving/loading/resampling |
| 3251 | |
| 3252 | bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); |
| 3253 | bool load_png(const char* pFilename, image& img); |
| 3254 | inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } |
| 3255 | |
| 3256 | bool load_tga(const char* pFilename, image& img); |
| 3257 | inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } |
| 3258 | |
| 3259 | bool load_jpg(const char *pFilename, image& img); |
| 3260 | inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } |
| 3261 | |
| 3262 | // Currently loads .PNG, .TGA, or .JPG |
| 3263 | bool load_image(const char* pFilename, image& img); |
| 3264 | inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } |
| 3265 | |
| 3266 | uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); |
| 3267 | uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); |
| 3268 | |
| 3269 | enum |
| 3270 | { |
| 3271 | cImageSaveGrayscale = 1, |
| 3272 | cImageSaveIgnoreAlpha = 2 |
| 3273 | }; |
| 3274 | |
| 3275 | bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); |
| 3276 | inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } |
| 3277 | |
| 3278 | bool read_file_to_vec(const char* pFilename, uint8_vec& data); |
| 3279 | |
| 3280 | bool write_data_to_file(const char* pFilename, const void* pData, size_t len); |
| 3281 | |
| 3282 | inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "" , 0); } |
| 3283 | |
| 3284 | float linear_to_srgb(float l); |
| 3285 | float srgb_to_linear(float s); |
| 3286 | |
| 3287 | bool image_resample(const image &src, image &dst, bool srgb = false, |
| 3288 | const char *pFilter = "lanczos4" , float filter_scale = 1.0f, |
| 3289 | bool wrapping = false, |
| 3290 | uint32_t first_comp = 0, uint32_t num_comps = 4); |
| 3291 | |
| 3292 | // Timing |
| 3293 | |
| 3294 | typedef uint64_t timer_ticks; |
| 3295 | |
| 3296 | class interval_timer |
| 3297 | { |
| 3298 | public: |
| 3299 | interval_timer(); |
| 3300 | |
| 3301 | void start(); |
| 3302 | void stop(); |
| 3303 | |
| 3304 | double get_elapsed_secs() const; |
| 3305 | inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } |
| 3306 | |
| 3307 | static void init(); |
| 3308 | static inline timer_ticks get_ticks_per_sec() { return g_freq; } |
| 3309 | static timer_ticks get_ticks(); |
| 3310 | static double ticks_to_secs(timer_ticks ticks); |
| 3311 | static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } |
| 3312 | |
| 3313 | private: |
| 3314 | static timer_ticks g_init_ticks, g_freq; |
| 3315 | static double g_timer_freq; |
| 3316 | |
| 3317 | timer_ticks m_start_time, m_stop_time; |
| 3318 | |
| 3319 | bool m_started, m_stopped; |
| 3320 | }; |
| 3321 | |
| 3322 | // 2D array |
| 3323 | |
| 3324 | template<typename T> |
| 3325 | class vector2D |
| 3326 | { |
| 3327 | typedef basisu::vector<T> TVec; |
| 3328 | |
| 3329 | uint32_t m_width, m_height; |
| 3330 | TVec m_values; |
| 3331 | |
| 3332 | public: |
| 3333 | vector2D() : |
| 3334 | m_width(0), |
| 3335 | m_height(0) |
| 3336 | { |
| 3337 | } |
| 3338 | |
| 3339 | vector2D(uint32_t w, uint32_t h) : |
| 3340 | m_width(0), |
| 3341 | m_height(0) |
| 3342 | { |
| 3343 | resize(w, h); |
| 3344 | } |
| 3345 | |
| 3346 | vector2D(const vector2D &other) |
| 3347 | { |
| 3348 | *this = other; |
| 3349 | } |
| 3350 | |
| 3351 | vector2D &operator= (const vector2D &other) |
| 3352 | { |
| 3353 | if (this != &other) |
| 3354 | { |
| 3355 | m_width = other.m_width; |
| 3356 | m_height = other.m_height; |
| 3357 | m_values = other.m_values; |
| 3358 | } |
| 3359 | return *this; |
| 3360 | } |
| 3361 | |
| 3362 | inline bool operator== (const vector2D &rhs) const |
| 3363 | { |
| 3364 | return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); |
| 3365 | } |
| 3366 | |
| 3367 | inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); } |
| 3368 | |
| 3369 | inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } |
| 3370 | inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } |
| 3371 | |
| 3372 | inline const T &operator[] (uint32_t i) const { return m_values[i]; } |
| 3373 | inline T &operator[] (uint32_t i) { return m_values[i]; } |
| 3374 | |
| 3375 | inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } |
| 3376 | inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } |
| 3377 | |
| 3378 | void clear() |
| 3379 | { |
| 3380 | m_width = 0; |
| 3381 | m_height = 0; |
| 3382 | m_values.clear(); |
| 3383 | } |
| 3384 | |
| 3385 | void set_all(const T&val) |
| 3386 | { |
| 3387 | vector_set_all(m_values, val); |
| 3388 | } |
| 3389 | |
| 3390 | inline const T* get_ptr() const { return &m_values[0]; } |
| 3391 | inline T* get_ptr() { return &m_values[0]; } |
| 3392 | |
| 3393 | vector2D &resize(uint32_t new_width, uint32_t new_height) |
| 3394 | { |
| 3395 | if ((m_width == new_width) && (m_height == new_height)) |
| 3396 | return *this; |
| 3397 | |
| 3398 | TVec oldVals(new_width * new_height); |
| 3399 | oldVals.swap(m_values); |
| 3400 | |
| 3401 | const uint32_t w = minimum(m_width, new_width); |
| 3402 | const uint32_t h = minimum(m_height, new_height); |
| 3403 | |
| 3404 | if ((w) && (h)) |
| 3405 | { |
| 3406 | for (uint32_t y = 0; y < h; y++) |
| 3407 | for (uint32_t x = 0; x < w; x++) |
| 3408 | m_values[x + y * new_width] = oldVals[x + y * m_width]; |
| 3409 | } |
| 3410 | |
| 3411 | m_width = new_width; |
| 3412 | m_height = new_height; |
| 3413 | |
| 3414 | return *this; |
| 3415 | } |
| 3416 | }; |
| 3417 | |
| 3418 | inline FILE *fopen_safe(const char *pFilename, const char *pMode) |
| 3419 | { |
| 3420 | #ifdef _WIN32 |
| 3421 | FILE *pFile = nullptr; |
| 3422 | fopen_s(&pFile, pFilename, pMode); |
| 3423 | return pFile; |
| 3424 | #else |
| 3425 | return fopen(pFilename, pMode); |
| 3426 | #endif |
| 3427 | } |
| 3428 | |
| 3429 | void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); |
| 3430 | |
| 3431 | const uint32_t cPixelBlockWidth = 4; |
| 3432 | const uint32_t cPixelBlockHeight = 4; |
| 3433 | const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight; |
| 3434 | |
| 3435 | struct pixel_block |
| 3436 | { |
| 3437 | color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] |
| 3438 | |
| 3439 | inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } |
| 3440 | inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } |
| 3441 | |
| 3442 | inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; } |
| 3443 | inline color_rgba* get_ptr() { return &m_pixels[0][0]; } |
| 3444 | |
| 3445 | inline void clear() { clear_obj(*this); } |
| 3446 | |
| 3447 | inline bool operator== (const pixel_block& rhs) const |
| 3448 | { |
| 3449 | return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; |
| 3450 | } |
| 3451 | }; |
| 3452 | typedef basisu::vector<pixel_block> pixel_block_vec; |
| 3453 | |
| 3454 | } // namespace basisu |
| 3455 | |
| 3456 | |
| 3457 | |