| 1 | // basisu_enc.h | 
|---|
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. | 
|---|
| 3 | // | 
|---|
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 5 | // you may not use this file except in compliance with the License. | 
|---|
| 6 | // You may obtain a copy of the License at | 
|---|
| 7 | // | 
|---|
| 8 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 9 | // | 
|---|
| 10 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 13 | // See the License for the specific language governing permissions and | 
|---|
| 14 | // limitations under the License. | 
|---|
| 15 | #pragma once | 
|---|
| 16 | #include "../transcoder/basisu.h" | 
|---|
| 17 | #include "../transcoder/basisu_transcoder_internal.h" | 
|---|
| 18 |  | 
|---|
| 19 | #include <mutex> | 
|---|
| 20 | #include <atomic> | 
|---|
| 21 | #include <condition_variable> | 
|---|
| 22 | #include <functional> | 
|---|
| 23 | #include <thread> | 
|---|
| 24 | #include <unordered_map> | 
|---|
| 25 | #include <ostream> | 
|---|
| 26 |  | 
|---|
| 27 | #if !defined(_WIN32) || defined(__MINGW32__) | 
|---|
| 28 | #include <libgen.h> | 
|---|
| 29 | #endif | 
|---|
| 30 |  | 
|---|
| 31 | // This module is really just a huge grab bag of classes and helper functions needed by the encoder. | 
|---|
| 32 |  | 
|---|
| 33 | // If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time. | 
|---|
| 34 | #define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0) | 
|---|
| 35 |  | 
|---|
| 36 | #if BASISU_SUPPORT_SSE | 
|---|
| 37 | // Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors. | 
|---|
| 38 | extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16); | 
|---|
| 39 | #endif | 
|---|
| 40 |  | 
|---|
| 41 | namespace basisu | 
|---|
| 42 | { | 
|---|
| 43 | extern uint8_t g_hamming_dist[256]; | 
|---|
| 44 | extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; | 
|---|
| 45 |  | 
|---|
| 46 | // true if basisu_encoder_init() has been called and returned. | 
|---|
| 47 | extern bool g_library_initialized; | 
|---|
| 48 |  | 
|---|
| 49 | // Encoder library initialization. | 
|---|
| 50 | // This function MUST be called before encoding anything! | 
|---|
| 51 | void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); | 
|---|
| 52 | void basisu_encoder_deinit(); | 
|---|
| 53 |  | 
|---|
| 54 | // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 | 
|---|
| 55 | extern void detect_sse41(); | 
|---|
| 56 |  | 
|---|
| 57 | #if BASISU_SUPPORT_SSE | 
|---|
| 58 | extern bool g_cpu_supports_sse41; | 
|---|
| 59 | #else | 
|---|
| 60 | const bool g_cpu_supports_sse41 = false; | 
|---|
| 61 | #endif | 
|---|
| 62 |  | 
|---|
| 63 | void error_vprintf(const char* pFmt, va_list args); | 
|---|
| 64 | void error_printf(const char *pFmt, ...); | 
|---|
| 65 |  | 
|---|
| 66 | // Helpers | 
|---|
| 67 |  | 
|---|
| 68 | inline uint8_t clamp255(int32_t i) | 
|---|
| 69 | { | 
|---|
| 70 | return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | inline int32_t clampi(int32_t value, int32_t low, int32_t high) | 
|---|
| 74 | { | 
|---|
| 75 | if (value < low) | 
|---|
| 76 | value = low; | 
|---|
| 77 | else if (value > high) | 
|---|
| 78 | value = high; | 
|---|
| 79 | return value; | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | inline uint8_t mul_8(uint32_t v, uint32_t a) | 
|---|
| 83 | { | 
|---|
| 84 | v = v * a + 128; | 
|---|
| 85 | return (uint8_t)((v + (v >> 8)) >> 8); | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) | 
|---|
| 89 | { | 
|---|
| 90 | assert(codesize <= 64); | 
|---|
| 91 | uint64_t bits = 0; | 
|---|
| 92 | uint32_t total_bits = 0; | 
|---|
| 93 |  | 
|---|
| 94 | while (total_bits < codesize) | 
|---|
| 95 | { | 
|---|
| 96 | uint32_t byte_bit_offset = bit_offset & 7; | 
|---|
| 97 | uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); | 
|---|
| 98 |  | 
|---|
| 99 | uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; | 
|---|
| 100 | byte_bits &= ((1 << bits_to_read) - 1); | 
|---|
| 101 |  | 
|---|
| 102 | bits |= ((uint64_t)(byte_bits) << total_bits); | 
|---|
| 103 |  | 
|---|
| 104 | total_bits += bits_to_read; | 
|---|
| 105 | bit_offset += bits_to_read; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | return bits; | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) | 
|---|
| 112 | { | 
|---|
| 113 | assert(codesize <= 32); | 
|---|
| 114 | uint32_t bits = 0; | 
|---|
| 115 | uint32_t total_bits = 0; | 
|---|
| 116 |  | 
|---|
| 117 | while (total_bits < codesize) | 
|---|
| 118 | { | 
|---|
| 119 | uint32_t byte_bit_offset = bit_offset & 7; | 
|---|
| 120 | uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); | 
|---|
| 121 |  | 
|---|
| 122 | uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; | 
|---|
| 123 | byte_bits &= ((1 << bits_to_read) - 1); | 
|---|
| 124 |  | 
|---|
| 125 | bits |= (byte_bits << total_bits); | 
|---|
| 126 |  | 
|---|
| 127 | total_bits += bits_to_read; | 
|---|
| 128 | bit_offset += bits_to_read; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | return bits; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | // Hashing | 
|---|
| 135 |  | 
|---|
| 136 | inline uint32_t bitmix32c(uint32_t v) | 
|---|
| 137 | { | 
|---|
| 138 | v = (v + 0x7ed55d16) + (v << 12); | 
|---|
| 139 | v = (v ^ 0xc761c23c) ^ (v >> 19); | 
|---|
| 140 | v = (v + 0x165667b1) + (v << 5); | 
|---|
| 141 | v = (v + 0xd3a2646c) ^ (v << 9); | 
|---|
| 142 | v = (v + 0xfd7046c5) + (v << 3); | 
|---|
| 143 | v = (v ^ 0xb55a4f09) ^ (v >> 16); | 
|---|
| 144 | return v; | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | inline uint32_t bitmix32(uint32_t v) | 
|---|
| 148 | { | 
|---|
| 149 | v -= (v << 6); | 
|---|
| 150 | v ^= (v >> 17); | 
|---|
| 151 | v -= (v << 9); | 
|---|
| 152 | v ^= (v << 4); | 
|---|
| 153 | v -= (v << 3); | 
|---|
| 154 | v ^= (v << 10); | 
|---|
| 155 | v ^= (v >> 15); | 
|---|
| 156 | return v; | 
|---|
| 157 | } | 
|---|
| 158 |  | 
|---|
| 159 | inline uint32_t wang_hash(uint32_t seed) | 
|---|
| 160 | { | 
|---|
| 161 | seed = (seed ^ 61) ^ (seed >> 16); | 
|---|
| 162 | seed *= 9; | 
|---|
| 163 | seed = seed ^ (seed >> 4); | 
|---|
| 164 | seed *= 0x27d4eb2d; | 
|---|
| 165 | seed = seed ^ (seed >> 15); | 
|---|
| 166 | return seed; | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); | 
|---|
| 170 |  | 
|---|
| 171 | template <typename Key> | 
|---|
| 172 | struct bit_hasher | 
|---|
| 173 | { | 
|---|
| 174 | std::size_t operator()(const Key& k) const | 
|---|
| 175 | { | 
|---|
| 176 | return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k)); | 
|---|
| 177 | } | 
|---|
| 178 | }; | 
|---|
| 179 |  | 
|---|
| 180 | class running_stat | 
|---|
| 181 | { | 
|---|
| 182 | public: | 
|---|
| 183 | running_stat() { clear(); } | 
|---|
| 184 |  | 
|---|
| 185 | void clear() | 
|---|
| 186 | { | 
|---|
| 187 | m_n = 0; | 
|---|
| 188 | m_total = 0; | 
|---|
| 189 | m_old_m = 0; | 
|---|
| 190 | m_new_m = 0; | 
|---|
| 191 | m_old_s = 0; | 
|---|
| 192 | m_new_s = 0; | 
|---|
| 193 | m_min = 0; | 
|---|
| 194 | m_max = 0; | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | void push(double x) | 
|---|
| 198 | { | 
|---|
| 199 | m_n++; | 
|---|
| 200 | m_total += x; | 
|---|
| 201 | if (m_n == 1) | 
|---|
| 202 | { | 
|---|
| 203 | m_old_m = m_new_m = x; | 
|---|
| 204 | m_old_s = 0.0; | 
|---|
| 205 | m_min = x; | 
|---|
| 206 | m_max = x; | 
|---|
| 207 | } | 
|---|
| 208 | else | 
|---|
| 209 | { | 
|---|
| 210 | // See Knuth TAOCP vol 2, 3rd edition, page 232 | 
|---|
| 211 | m_new_m = m_old_m + (x - m_old_m) / m_n; | 
|---|
| 212 | m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); | 
|---|
| 213 | m_old_m = m_new_m; | 
|---|
| 214 | m_old_s = m_new_s; | 
|---|
| 215 | m_min = basisu::minimum(x, m_min); | 
|---|
| 216 | m_max = basisu::maximum(x, m_max); | 
|---|
| 217 | } | 
|---|
| 218 | } | 
|---|
| 219 |  | 
|---|
| 220 | uint32_t get_num() const | 
|---|
| 221 | { | 
|---|
| 222 | return m_n; | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | double get_total() const | 
|---|
| 226 | { | 
|---|
| 227 | return m_total; | 
|---|
| 228 | } | 
|---|
| 229 |  | 
|---|
| 230 | double get_mean() const | 
|---|
| 231 | { | 
|---|
| 232 | return (m_n > 0) ? m_new_m : 0.0; | 
|---|
| 233 | } | 
|---|
| 234 |  | 
|---|
| 235 | // Returns sample variance | 
|---|
| 236 | double get_variance() const | 
|---|
| 237 | { | 
|---|
| 238 | return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); | 
|---|
| 239 | } | 
|---|
| 240 |  | 
|---|
| 241 | double get_std_dev() const | 
|---|
| 242 | { | 
|---|
| 243 | return sqrt(get_variance()); | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|
| 246 | double get_min() const | 
|---|
| 247 | { | 
|---|
| 248 | return m_min; | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | double get_max() const | 
|---|
| 252 | { | 
|---|
| 253 | return m_max; | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | private: | 
|---|
| 257 | uint32_t m_n; | 
|---|
| 258 | double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; | 
|---|
| 259 | }; | 
|---|
| 260 |  | 
|---|
| 261 | // Linear algebra | 
|---|
| 262 |  | 
|---|
| 263 | template <uint32_t N, typename T> | 
|---|
| 264 | class vec | 
|---|
| 265 | { | 
|---|
| 266 | protected: | 
|---|
| 267 | T m_v[N]; | 
|---|
| 268 |  | 
|---|
| 269 | public: | 
|---|
| 270 | enum { num_elements = N }; | 
|---|
| 271 |  | 
|---|
| 272 | inline vec() { } | 
|---|
| 273 | inline vec(eZero) { set_zero();  } | 
|---|
| 274 |  | 
|---|
| 275 | explicit inline vec(T val) { set(val); } | 
|---|
| 276 | inline vec(T v0, T v1) { set(v0, v1); } | 
|---|
| 277 | inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } | 
|---|
| 278 | inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } | 
|---|
| 279 | inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } | 
|---|
| 280 | template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); } | 
|---|
| 281 |  | 
|---|
| 282 | inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; } | 
|---|
| 283 | inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } | 
|---|
| 284 |  | 
|---|
| 285 | inline T getX() const { return m_v[0]; } | 
|---|
| 286 | inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; } | 
|---|
| 287 | inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; } | 
|---|
| 288 | inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; } | 
|---|
| 289 |  | 
|---|
| 290 | inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false;	return true; } | 
|---|
| 291 | inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } | 
|---|
| 292 |  | 
|---|
| 293 | inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } | 
|---|
| 294 |  | 
|---|
| 295 | template <uint32_t OtherN, typename OtherT> | 
|---|
| 296 | inline vec &set(const vec<OtherN, OtherT> &other) | 
|---|
| 297 | { | 
|---|
| 298 | uint32_t i; | 
|---|
| 299 | if ((const void *)(&other) == (const void *)(this)) | 
|---|
| 300 | return *this; | 
|---|
| 301 | const uint32_t m = minimum(OtherN, N); | 
|---|
| 302 | for (i = 0; i < m; i++) | 
|---|
| 303 | m_v[i] = static_cast<T>(other[i]); | 
|---|
| 304 | for (; i < N; i++) | 
|---|
| 305 | m_v[i] = 0; | 
|---|
| 306 | return *this; | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } | 
|---|
| 310 | inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } | 
|---|
| 311 | inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } | 
|---|
| 312 |  | 
|---|
| 313 | inline vec &set(T v0, T v1) | 
|---|
| 314 | { | 
|---|
| 315 | m_v[0] = v0; | 
|---|
| 316 | if (N >= 2) | 
|---|
| 317 | { | 
|---|
| 318 | m_v[1] = v1; | 
|---|
| 319 | clear_elements(2, N); | 
|---|
| 320 | } | 
|---|
| 321 | return *this; | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | inline vec &set(T v0, T v1, T v2) | 
|---|
| 325 | { | 
|---|
| 326 | m_v[0] = v0; | 
|---|
| 327 | if (N >= 2) | 
|---|
| 328 | { | 
|---|
| 329 | m_v[1] = v1; | 
|---|
| 330 | if (N >= 3) | 
|---|
| 331 | { | 
|---|
| 332 | m_v[2] = v2; | 
|---|
| 333 | clear_elements(3, N); | 
|---|
| 334 | } | 
|---|
| 335 | } | 
|---|
| 336 | return *this; | 
|---|
| 337 | } | 
|---|
| 338 |  | 
|---|
| 339 | inline vec &set(T v0, T v1, T v2, T v3) | 
|---|
| 340 | { | 
|---|
| 341 | m_v[0] = v0; | 
|---|
| 342 | if (N >= 2) | 
|---|
| 343 | { | 
|---|
| 344 | m_v[1] = v1; | 
|---|
| 345 | if (N >= 3) | 
|---|
| 346 | { | 
|---|
| 347 | m_v[2] = v2; | 
|---|
| 348 |  | 
|---|
| 349 | if (N >= 4) | 
|---|
| 350 | { | 
|---|
| 351 | m_v[3] = v3; | 
|---|
| 352 | clear_elements(5, N); | 
|---|
| 353 | } | 
|---|
| 354 | } | 
|---|
| 355 | } | 
|---|
| 356 | return *this; | 
|---|
| 357 | } | 
|---|
| 358 |  | 
|---|
| 359 | inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } | 
|---|
| 360 | template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; } | 
|---|
| 361 |  | 
|---|
| 362 | inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); } | 
|---|
| 363 | inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); } | 
|---|
| 364 |  | 
|---|
| 365 | inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } | 
|---|
| 366 | inline vec operator+ () const { return *this; } | 
|---|
| 367 | inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } | 
|---|
| 368 | inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } | 
|---|
| 369 | inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } | 
|---|
| 370 | inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } | 
|---|
| 371 | inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } | 
|---|
| 372 | inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } | 
|---|
| 373 |  | 
|---|
| 374 | friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } | 
|---|
| 375 | friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } | 
|---|
| 376 | friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } | 
|---|
| 377 | friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } | 
|---|
| 378 | friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } | 
|---|
| 379 | friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } | 
|---|
| 380 |  | 
|---|
| 381 | static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } | 
|---|
| 382 |  | 
|---|
| 383 | inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } | 
|---|
| 384 |  | 
|---|
| 385 | inline T norm() const { return dot_product(*this, *this); } | 
|---|
| 386 | inline T length() const { return sqrt(norm()); } | 
|---|
| 387 |  | 
|---|
| 388 | inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } | 
|---|
| 389 | inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; } | 
|---|
| 390 |  | 
|---|
| 391 | inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); } | 
|---|
| 392 | inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } | 
|---|
| 393 |  | 
|---|
| 394 | inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len);	return *this; } | 
|---|
| 395 |  | 
|---|
| 396 | inline vec &clamp(T l, T h) | 
|---|
| 397 | { | 
|---|
| 398 | for (uint32_t i = 0; i < N; i++) | 
|---|
| 399 | m_v[i] = basisu::clamp(m_v[i], l, h); | 
|---|
| 400 | return *this; | 
|---|
| 401 | } | 
|---|
| 402 |  | 
|---|
| 403 | static vec component_min(const vec& a, const vec& b) | 
|---|
| 404 | { | 
|---|
| 405 | vec res; | 
|---|
| 406 | for (uint32_t i = 0; i < N; i++) | 
|---|
| 407 | res[i] = minimum(a[i], b[i]); | 
|---|
| 408 | return res; | 
|---|
| 409 | } | 
|---|
| 410 |  | 
|---|
| 411 | static vec component_max(const vec& a, const vec& b) | 
|---|
| 412 | { | 
|---|
| 413 | vec res; | 
|---|
| 414 | for (uint32_t i = 0; i < N; i++) | 
|---|
| 415 | res[i] = maximum(a[i], b[i]); | 
|---|
| 416 | return res; | 
|---|
| 417 | } | 
|---|
| 418 | }; | 
|---|
| 419 |  | 
|---|
| 420 | typedef vec<4, double> vec4D; | 
|---|
| 421 | typedef vec<3, double> vec3D; | 
|---|
| 422 | typedef vec<2, double> vec2D; | 
|---|
| 423 | typedef vec<1, double> vec1D; | 
|---|
| 424 |  | 
|---|
| 425 | typedef vec<4, float> vec4F; | 
|---|
| 426 | typedef vec<3, float> vec3F; | 
|---|
| 427 | typedef vec<2, float> vec2F; | 
|---|
| 428 | typedef vec<1, float> vec1F; | 
|---|
| 429 |  | 
|---|
| 430 | typedef vec<16, float> vec16F; | 
|---|
| 431 |  | 
|---|
| 432 | template <uint32_t Rows, uint32_t Cols, typename T> | 
|---|
| 433 | class matrix | 
|---|
| 434 | { | 
|---|
| 435 | public: | 
|---|
| 436 | typedef vec<Rows, T> col_vec; | 
|---|
| 437 | typedef vec<Cols, T> row_vec; | 
|---|
| 438 |  | 
|---|
| 439 | typedef T scalar_type; | 
|---|
| 440 |  | 
|---|
| 441 | enum { rows = Rows, cols = Cols }; | 
|---|
| 442 |  | 
|---|
| 443 | protected: | 
|---|
| 444 | row_vec m_r[Rows]; | 
|---|
| 445 |  | 
|---|
| 446 | public: | 
|---|
| 447 | inline matrix() {} | 
|---|
| 448 | inline matrix(eZero) { set_zero();  } | 
|---|
| 449 | inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i];	} | 
|---|
| 450 | inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; } | 
|---|
| 451 |  | 
|---|
| 452 | inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } | 
|---|
| 453 | inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } | 
|---|
| 454 |  | 
|---|
| 455 | inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; } | 
|---|
| 456 | inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; } | 
|---|
| 457 |  | 
|---|
| 458 | inline matrix &set_zero() | 
|---|
| 459 | { | 
|---|
| 460 | for (uint32_t i = 0; i < Rows; i++) | 
|---|
| 461 | m_r[i].set_zero(); | 
|---|
| 462 | return *this; | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | inline matrix &set_identity() | 
|---|
| 466 | { | 
|---|
| 467 | for (uint32_t i = 0; i < Rows; i++) | 
|---|
| 468 | { | 
|---|
| 469 | m_r[i].set_zero(); | 
|---|
| 470 | if (i < Cols) | 
|---|
| 471 | m_r[i][i] = 1.0f; | 
|---|
| 472 | } | 
|---|
| 473 | return *this; | 
|---|
| 474 | } | 
|---|
| 475 | }; | 
|---|
| 476 |  | 
|---|
| 477 | template<uint32_t N, typename VectorType> | 
|---|
| 478 | inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix) | 
|---|
| 479 | { | 
|---|
| 480 | VectorType axis; | 
|---|
| 481 | if (N == 1) | 
|---|
| 482 | axis.set(1.0f); | 
|---|
| 483 | else | 
|---|
| 484 | { | 
|---|
| 485 | for (uint32_t i = 0; i < N; i++) | 
|---|
| 486 | axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1))); | 
|---|
| 487 | } | 
|---|
| 488 |  | 
|---|
| 489 | VectorType prev_axis(axis); | 
|---|
| 490 |  | 
|---|
| 491 | // Power iterations | 
|---|
| 492 | for (uint32_t power_iter = 0; power_iter < 8; power_iter++) | 
|---|
| 493 | { | 
|---|
| 494 | VectorType trial_axis; | 
|---|
| 495 | double max_sum = 0; | 
|---|
| 496 |  | 
|---|
| 497 | for (uint32_t i = 0; i < N; i++) | 
|---|
| 498 | { | 
|---|
| 499 | double sum = 0; | 
|---|
| 500 | for (uint32_t j = 0; j < N; j++) | 
|---|
| 501 | sum += cmatrix[i][j] * axis[j]; | 
|---|
| 502 |  | 
|---|
| 503 | trial_axis[i] = static_cast<float>(sum); | 
|---|
| 504 |  | 
|---|
| 505 | max_sum = maximum(fabs(sum), max_sum); | 
|---|
| 506 | } | 
|---|
| 507 |  | 
|---|
| 508 | if (max_sum != 0.0f) | 
|---|
| 509 | trial_axis *= static_cast<float>(1.0f / max_sum); | 
|---|
| 510 |  | 
|---|
| 511 | VectorType delta_axis(prev_axis - trial_axis); | 
|---|
| 512 |  | 
|---|
| 513 | prev_axis = axis; | 
|---|
| 514 | axis = trial_axis; | 
|---|
| 515 |  | 
|---|
| 516 | if (delta_axis.norm() < .0024f) | 
|---|
| 517 | break; | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 | return axis.normalize_in_place(); | 
|---|
| 521 | } | 
|---|
| 522 |  | 
|---|
| 523 | template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys) | 
|---|
| 524 | { | 
|---|
| 525 | for (uint32_t i = 0; i < num_indices; i++) | 
|---|
| 526 | pIndices[i] = i; | 
|---|
| 527 |  | 
|---|
| 528 | std::sort( | 
|---|
| 529 | pIndices, | 
|---|
| 530 | pIndices + num_indices, | 
|---|
| 531 | [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } | 
|---|
| 532 | ); | 
|---|
| 533 | } | 
|---|
| 534 |  | 
|---|
| 535 | // 1-4 byte direct Radix sort. | 
|---|
| 536 | template <typename T> | 
|---|
| 537 | T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size) | 
|---|
| 538 | { | 
|---|
| 539 | assert(key_ofs < sizeof(T)); | 
|---|
| 540 | assert((key_size >= 1) && (key_size <= 4)); | 
|---|
| 541 |  | 
|---|
| 542 | uint32_t hist[256 * 4]; | 
|---|
| 543 |  | 
|---|
| 544 | memset(hist, 0, sizeof(hist[0]) * 256 * key_size); | 
|---|
| 545 |  | 
|---|
| 546 | #define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs)) | 
|---|
| 547 |  | 
|---|
| 548 | if (key_size == 4) | 
|---|
| 549 | { | 
|---|
| 550 | T* p = pBuf0; | 
|---|
| 551 | T* q = pBuf0 + num_vals; | 
|---|
| 552 | for (; p != q; p++) | 
|---|
| 553 | { | 
|---|
| 554 | const uint32_t key = BASISU_GET_KEY(p); | 
|---|
| 555 |  | 
|---|
| 556 | hist[key & 0xFF]++; | 
|---|
| 557 | hist[256 + ((key >> 8) & 0xFF)]++; | 
|---|
| 558 | hist[512 + ((key >> 16) & 0xFF)]++; | 
|---|
| 559 | hist[768 + ((key >> 24) & 0xFF)]++; | 
|---|
| 560 | } | 
|---|
| 561 | } | 
|---|
| 562 | else if (key_size == 3) | 
|---|
| 563 | { | 
|---|
| 564 | T* p = pBuf0; | 
|---|
| 565 | T* q = pBuf0 + num_vals; | 
|---|
| 566 | for (; p != q; p++) | 
|---|
| 567 | { | 
|---|
| 568 | const uint32_t key = BASISU_GET_KEY(p); | 
|---|
| 569 |  | 
|---|
| 570 | hist[key & 0xFF]++; | 
|---|
| 571 | hist[256 + ((key >> 8) & 0xFF)]++; | 
|---|
| 572 | hist[512 + ((key >> 16) & 0xFF)]++; | 
|---|
| 573 | } | 
|---|
| 574 | } | 
|---|
| 575 | else if (key_size == 2) | 
|---|
| 576 | { | 
|---|
| 577 | T* p = pBuf0; | 
|---|
| 578 | T* q = pBuf0 + (num_vals >> 1) * 2; | 
|---|
| 579 |  | 
|---|
| 580 | for (; p != q; p += 2) | 
|---|
| 581 | { | 
|---|
| 582 | const uint32_t key0 = BASISU_GET_KEY(p); | 
|---|
| 583 | const uint32_t key1 = BASISU_GET_KEY(p + 1); | 
|---|
| 584 |  | 
|---|
| 585 | hist[key0 & 0xFF]++; | 
|---|
| 586 | hist[256 + ((key0 >> 8) & 0xFF)]++; | 
|---|
| 587 |  | 
|---|
| 588 | hist[key1 & 0xFF]++; | 
|---|
| 589 | hist[256 + ((key1 >> 8) & 0xFF)]++; | 
|---|
| 590 | } | 
|---|
| 591 |  | 
|---|
| 592 | if (num_vals & 1) | 
|---|
| 593 | { | 
|---|
| 594 | const uint32_t key = BASISU_GET_KEY(p); | 
|---|
| 595 |  | 
|---|
| 596 | hist[key & 0xFF]++; | 
|---|
| 597 | hist[256 + ((key >> 8) & 0xFF)]++; | 
|---|
| 598 | } | 
|---|
| 599 | } | 
|---|
| 600 | else | 
|---|
| 601 | { | 
|---|
| 602 | assert(key_size == 1); | 
|---|
| 603 | if (key_size != 1) | 
|---|
| 604 | return NULL; | 
|---|
| 605 |  | 
|---|
| 606 | T* p = pBuf0; | 
|---|
| 607 | T* q = pBuf0 + (num_vals >> 1) * 2; | 
|---|
| 608 |  | 
|---|
| 609 | for (; p != q; p += 2) | 
|---|
| 610 | { | 
|---|
| 611 | const uint32_t key0 = BASISU_GET_KEY(p); | 
|---|
| 612 | const uint32_t key1 = BASISU_GET_KEY(p + 1); | 
|---|
| 613 |  | 
|---|
| 614 | hist[key0 & 0xFF]++; | 
|---|
| 615 | hist[key1 & 0xFF]++; | 
|---|
| 616 | } | 
|---|
| 617 |  | 
|---|
| 618 | if (num_vals & 1) | 
|---|
| 619 | { | 
|---|
| 620 | const uint32_t key = BASISU_GET_KEY(p); | 
|---|
| 621 | hist[key & 0xFF]++; | 
|---|
| 622 | } | 
|---|
| 623 | } | 
|---|
| 624 |  | 
|---|
| 625 | T* pCur = pBuf0; | 
|---|
| 626 | T* pNew = pBuf1; | 
|---|
| 627 |  | 
|---|
| 628 | for (uint32_t pass = 0; pass < key_size; pass++) | 
|---|
| 629 | { | 
|---|
| 630 | const uint32_t* pHist = &hist[pass << 8]; | 
|---|
| 631 |  | 
|---|
| 632 | uint32_t offsets[256]; | 
|---|
| 633 |  | 
|---|
| 634 | uint32_t cur_ofs = 0; | 
|---|
| 635 | for (uint32_t i = 0; i < 256; i += 2) | 
|---|
| 636 | { | 
|---|
| 637 | offsets[i] = cur_ofs; | 
|---|
| 638 | cur_ofs += pHist[i]; | 
|---|
| 639 |  | 
|---|
| 640 | offsets[i + 1] = cur_ofs; | 
|---|
| 641 | cur_ofs += pHist[i + 1]; | 
|---|
| 642 | } | 
|---|
| 643 |  | 
|---|
| 644 | const uint32_t pass_shift = pass << 3; | 
|---|
| 645 |  | 
|---|
| 646 | T* p = pCur; | 
|---|
| 647 | T* q = pCur + (num_vals >> 1) * 2; | 
|---|
| 648 |  | 
|---|
| 649 | for (; p != q; p += 2) | 
|---|
| 650 | { | 
|---|
| 651 | uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; | 
|---|
| 652 | uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF; | 
|---|
| 653 |  | 
|---|
| 654 | if (c0 == c1) | 
|---|
| 655 | { | 
|---|
| 656 | uint32_t dst_offset0 = offsets[c0]; | 
|---|
| 657 |  | 
|---|
| 658 | offsets[c0] = dst_offset0 + 2; | 
|---|
| 659 |  | 
|---|
| 660 | pNew[dst_offset0] = p[0]; | 
|---|
| 661 | pNew[dst_offset0 + 1] = p[1]; | 
|---|
| 662 | } | 
|---|
| 663 | else | 
|---|
| 664 | { | 
|---|
| 665 | uint32_t dst_offset0 = offsets[c0]++; | 
|---|
| 666 | uint32_t dst_offset1 = offsets[c1]++; | 
|---|
| 667 |  | 
|---|
| 668 | pNew[dst_offset0] = p[0]; | 
|---|
| 669 | pNew[dst_offset1] = p[1]; | 
|---|
| 670 | } | 
|---|
| 671 | } | 
|---|
| 672 |  | 
|---|
| 673 | if (num_vals & 1) | 
|---|
| 674 | { | 
|---|
| 675 | uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; | 
|---|
| 676 |  | 
|---|
| 677 | uint32_t dst_offset = offsets[c]; | 
|---|
| 678 | offsets[c] = dst_offset + 1; | 
|---|
| 679 |  | 
|---|
| 680 | pNew[dst_offset] = *p; | 
|---|
| 681 | } | 
|---|
| 682 |  | 
|---|
| 683 | T* t = pCur; | 
|---|
| 684 | pCur = pNew; | 
|---|
| 685 | pNew = t; | 
|---|
| 686 | } | 
|---|
| 687 |  | 
|---|
| 688 | return pCur; | 
|---|
| 689 | } | 
|---|
| 690 |  | 
|---|
| 691 | #undef BASISU_GET_KEY | 
|---|
| 692 |  | 
|---|
| 693 | // Very simple job pool with no dependencies. | 
|---|
| 694 | class job_pool | 
|---|
| 695 | { | 
|---|
| 696 | BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); | 
|---|
| 697 |  | 
|---|
| 698 | public: | 
|---|
| 699 | // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. | 
|---|
| 700 | job_pool(uint32_t num_threads); | 
|---|
| 701 | ~job_pool(); | 
|---|
| 702 |  | 
|---|
| 703 | void add_job(const std::function<void()>& job); | 
|---|
| 704 | void add_job(std::function<void()>&& job); | 
|---|
| 705 |  | 
|---|
| 706 | void wait_for_all(); | 
|---|
| 707 |  | 
|---|
| 708 | size_t get_total_threads() const { return 1 + m_threads.size(); } | 
|---|
| 709 |  | 
|---|
| 710 | private: | 
|---|
| 711 | std::vector<std::thread> m_threads; | 
|---|
| 712 | std::vector<std::function<void()> > m_queue; | 
|---|
| 713 |  | 
|---|
| 714 | std::mutex m_mutex; | 
|---|
| 715 | std::condition_variable m_has_work; | 
|---|
| 716 | std::condition_variable m_no_more_jobs; | 
|---|
| 717 |  | 
|---|
| 718 | uint32_t m_num_active_jobs; | 
|---|
| 719 |  | 
|---|
| 720 | std::atomic<bool> m_kill_flag; | 
|---|
| 721 |  | 
|---|
| 722 | void job_thread(uint32_t index); | 
|---|
| 723 | }; | 
|---|
| 724 |  | 
|---|
| 725 | // Simple 32-bit color class | 
|---|
| 726 |  | 
|---|
| 727 | class color_rgba_i16 | 
|---|
| 728 | { | 
|---|
| 729 | public: | 
|---|
| 730 | union | 
|---|
| 731 | { | 
|---|
| 732 | int16_t m_comps[4]; | 
|---|
| 733 |  | 
|---|
| 734 | struct | 
|---|
| 735 | { | 
|---|
| 736 | int16_t r; | 
|---|
| 737 | int16_t g; | 
|---|
| 738 | int16_t b; | 
|---|
| 739 | int16_t a; | 
|---|
| 740 | }; | 
|---|
| 741 | }; | 
|---|
| 742 |  | 
|---|
| 743 | inline color_rgba_i16() | 
|---|
| 744 | { | 
|---|
| 745 | static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4"); | 
|---|
| 746 | } | 
|---|
| 747 |  | 
|---|
| 748 | inline color_rgba_i16(int sr, int sg, int sb, int sa) | 
|---|
| 749 | { | 
|---|
| 750 | set(sr, sg, sb, sa); | 
|---|
| 751 | } | 
|---|
| 752 |  | 
|---|
| 753 | inline color_rgba_i16 &set(int sr, int sg, int sb, int sa) | 
|---|
| 754 | { | 
|---|
| 755 | m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX); | 
|---|
| 756 | m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX); | 
|---|
| 757 | m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX); | 
|---|
| 758 | m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX); | 
|---|
| 759 | return *this; | 
|---|
| 760 | } | 
|---|
| 761 | }; | 
|---|
| 762 |  | 
|---|
| 763 | class color_rgba | 
|---|
| 764 | { | 
|---|
| 765 | public: | 
|---|
| 766 | union | 
|---|
| 767 | { | 
|---|
| 768 | uint8_t m_comps[4]; | 
|---|
| 769 |  | 
|---|
| 770 | struct | 
|---|
| 771 | { | 
|---|
| 772 | uint8_t r; | 
|---|
| 773 | uint8_t g; | 
|---|
| 774 | uint8_t b; | 
|---|
| 775 | uint8_t a; | 
|---|
| 776 | }; | 
|---|
| 777 | }; | 
|---|
| 778 |  | 
|---|
| 779 | inline color_rgba() | 
|---|
| 780 | { | 
|---|
| 781 | static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); | 
|---|
| 782 | static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)"); | 
|---|
| 783 | } | 
|---|
| 784 |  | 
|---|
| 785 | // Not too hot about this idea. | 
|---|
| 786 | inline color_rgba(const basist::color32& other) : | 
|---|
| 787 | r(other.r), | 
|---|
| 788 | g(other.g), | 
|---|
| 789 | b(other.b), | 
|---|
| 790 | a(other.a) | 
|---|
| 791 | { | 
|---|
| 792 | } | 
|---|
| 793 |  | 
|---|
| 794 | color_rgba& operator= (const basist::color32& rhs) | 
|---|
| 795 | { | 
|---|
| 796 | r = rhs.r; | 
|---|
| 797 | g = rhs.g; | 
|---|
| 798 | b = rhs.b; | 
|---|
| 799 | a = rhs.a; | 
|---|
| 800 | return *this; | 
|---|
| 801 | } | 
|---|
| 802 |  | 
|---|
| 803 | inline color_rgba(int y) | 
|---|
| 804 | { | 
|---|
| 805 | set(y); | 
|---|
| 806 | } | 
|---|
| 807 |  | 
|---|
| 808 | inline color_rgba(int y, int na) | 
|---|
| 809 | { | 
|---|
| 810 | set(y, na); | 
|---|
| 811 | } | 
|---|
| 812 |  | 
|---|
| 813 | inline color_rgba(int sr, int sg, int sb, int sa) | 
|---|
| 814 | { | 
|---|
| 815 | set(sr, sg, sb, sa); | 
|---|
| 816 | } | 
|---|
| 817 |  | 
|---|
| 818 | inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) | 
|---|
| 819 | { | 
|---|
| 820 | set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); | 
|---|
| 821 | } | 
|---|
| 822 |  | 
|---|
| 823 | inline color_rgba& set_noclamp_y(int y) | 
|---|
| 824 | { | 
|---|
| 825 | m_comps[0] = (uint8_t)y; | 
|---|
| 826 | m_comps[1] = (uint8_t)y; | 
|---|
| 827 | m_comps[2] = (uint8_t)y; | 
|---|
| 828 | m_comps[3] = (uint8_t)255; | 
|---|
| 829 | return *this; | 
|---|
| 830 | } | 
|---|
| 831 |  | 
|---|
| 832 | inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) | 
|---|
| 833 | { | 
|---|
| 834 | m_comps[0] = (uint8_t)sr; | 
|---|
| 835 | m_comps[1] = (uint8_t)sg; | 
|---|
| 836 | m_comps[2] = (uint8_t)sb; | 
|---|
| 837 | m_comps[3] = (uint8_t)sa; | 
|---|
| 838 | return *this; | 
|---|
| 839 | } | 
|---|
| 840 |  | 
|---|
| 841 | inline color_rgba &set(int y) | 
|---|
| 842 | { | 
|---|
| 843 | m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255)); | 
|---|
| 844 | m_comps[1] = m_comps[0]; | 
|---|
| 845 | m_comps[2] = m_comps[0]; | 
|---|
| 846 | m_comps[3] = 255; | 
|---|
| 847 | return *this; | 
|---|
| 848 | } | 
|---|
| 849 |  | 
|---|
| 850 | inline color_rgba &set(int y, int na) | 
|---|
| 851 | { | 
|---|
| 852 | m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255)); | 
|---|
| 853 | m_comps[1] = m_comps[0]; | 
|---|
| 854 | m_comps[2] = m_comps[0]; | 
|---|
| 855 | m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255)); | 
|---|
| 856 | return *this; | 
|---|
| 857 | } | 
|---|
| 858 |  | 
|---|
| 859 | inline color_rgba &set(int sr, int sg, int sb, int sa) | 
|---|
| 860 | { | 
|---|
| 861 | m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255)); | 
|---|
| 862 | m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255)); | 
|---|
| 863 | m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255)); | 
|---|
| 864 | m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255)); | 
|---|
| 865 | return *this; | 
|---|
| 866 | } | 
|---|
| 867 |  | 
|---|
| 868 | inline color_rgba &set_rgb(int sr, int sg, int sb) | 
|---|
| 869 | { | 
|---|
| 870 | m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255)); | 
|---|
| 871 | m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255)); | 
|---|
| 872 | m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255)); | 
|---|
| 873 | return *this; | 
|---|
| 874 | } | 
|---|
| 875 |  | 
|---|
| 876 | inline color_rgba &set_rgb(const color_rgba &other) | 
|---|
| 877 | { | 
|---|
| 878 | r = other.r; | 
|---|
| 879 | g = other.g; | 
|---|
| 880 | b = other.b; | 
|---|
| 881 | return *this; | 
|---|
| 882 | } | 
|---|
| 883 |  | 
|---|
| 884 | inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } | 
|---|
| 885 | inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } | 
|---|
| 886 |  | 
|---|
| 887 | inline void clear() | 
|---|
| 888 | { | 
|---|
| 889 | m_comps[0] = 0; | 
|---|
| 890 | m_comps[1] = 0; | 
|---|
| 891 | m_comps[2] = 0; | 
|---|
| 892 | m_comps[3] = 0; | 
|---|
| 893 | } | 
|---|
| 894 |  | 
|---|
| 895 | inline bool operator== (const color_rgba &rhs) const | 
|---|
| 896 | { | 
|---|
| 897 | if (m_comps[0] != rhs.m_comps[0]) return false; | 
|---|
| 898 | if (m_comps[1] != rhs.m_comps[1]) return false; | 
|---|
| 899 | if (m_comps[2] != rhs.m_comps[2]) return false; | 
|---|
| 900 | if (m_comps[3] != rhs.m_comps[3]) return false; | 
|---|
| 901 | return true; | 
|---|
| 902 | } | 
|---|
| 903 |  | 
|---|
| 904 | inline bool operator!= (const color_rgba &rhs) const | 
|---|
| 905 | { | 
|---|
| 906 | return !(*this == rhs); | 
|---|
| 907 | } | 
|---|
| 908 |  | 
|---|
| 909 | inline bool operator<(const color_rgba &rhs) const | 
|---|
| 910 | { | 
|---|
| 911 | for (int i = 0; i < 4; i++) | 
|---|
| 912 | { | 
|---|
| 913 | if (m_comps[i] < rhs.m_comps[i]) | 
|---|
| 914 | return true; | 
|---|
| 915 | else if (m_comps[i] != rhs.m_comps[i]) | 
|---|
| 916 | return false; | 
|---|
| 917 | } | 
|---|
| 918 | return false; | 
|---|
| 919 | } | 
|---|
| 920 |  | 
|---|
| 921 | inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } | 
|---|
| 922 | inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } | 
|---|
| 923 | inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } | 
|---|
| 924 |  | 
|---|
| 925 | inline basist::color32 get_color32() const | 
|---|
| 926 | { | 
|---|
| 927 | return basist::color32(r, g, b, a); | 
|---|
| 928 | } | 
|---|
| 929 |  | 
|---|
| 930 | static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } | 
|---|
| 931 | static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } | 
|---|
| 932 | }; | 
|---|
| 933 |  | 
|---|
| 934 | typedef basisu::vector<color_rgba> color_rgba_vec; | 
|---|
| 935 |  | 
|---|
| 936 | const color_rgba g_black_color(0, 0, 0, 255); | 
|---|
| 937 | const color_rgba g_black_trans_color(0, 0, 0, 0); | 
|---|
| 938 | const color_rgba g_white_color(255, 255, 255, 255); | 
|---|
| 939 |  | 
|---|
| 940 | inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) | 
|---|
| 941 | { | 
|---|
| 942 | int dr = r0 - r1, dg = g0 - g1, db = b0 - b1; | 
|---|
| 943 | return dr * dr + dg * dg + db * db; | 
|---|
| 944 | } | 
|---|
| 945 |  | 
|---|
| 946 | inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1) | 
|---|
| 947 | { | 
|---|
| 948 | int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1; | 
|---|
| 949 | return dr * dr + dg * dg + db * db + da * da; | 
|---|
| 950 | } | 
|---|
| 951 |  | 
|---|
| 952 | inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha) | 
|---|
| 953 | { | 
|---|
| 954 | if (alpha) | 
|---|
| 955 | return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); | 
|---|
| 956 | else | 
|---|
| 957 | return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); | 
|---|
| 958 | } | 
|---|
| 959 |  | 
|---|
| 960 | // TODO: Allow user to control channel weightings. | 
|---|
| 961 | inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) | 
|---|
| 962 | { | 
|---|
| 963 | if (perceptual) | 
|---|
| 964 | { | 
|---|
| 965 | #if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE | 
|---|
| 966 | const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; | 
|---|
| 967 | const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; | 
|---|
| 968 |  | 
|---|
| 969 | const float cr1 = e1.r - l1; | 
|---|
| 970 | const float cr2 = e2.r - l2; | 
|---|
| 971 |  | 
|---|
| 972 | const float cb1 = e1.b - l1; | 
|---|
| 973 | const float cb2 = e2.b - l2; | 
|---|
| 974 |  | 
|---|
| 975 | const float dl = l1 - l2; | 
|---|
| 976 | const float dcr = cr1 - cr2; | 
|---|
| 977 | const float dcb = cb1 - cb2; | 
|---|
| 978 |  | 
|---|
| 979 | uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); | 
|---|
| 980 |  | 
|---|
| 981 | if (alpha) | 
|---|
| 982 | { | 
|---|
| 983 | int da = static_cast<int>(e1.a) - static_cast<int>(e2.a); | 
|---|
| 984 | d += static_cast<uint32_t>(128.0f*da*da); | 
|---|
| 985 | } | 
|---|
| 986 |  | 
|---|
| 987 | return d; | 
|---|
| 988 | #elif 1 | 
|---|
| 989 | int dr = e1.r - e2.r; | 
|---|
| 990 | int dg = e1.g - e2.g; | 
|---|
| 991 | int db = e1.b - e2.b; | 
|---|
| 992 |  | 
|---|
| 993 | #if 0 | 
|---|
| 994 | int delta_l = dr * 27 + dg * 92 + db * 9; | 
|---|
| 995 | int delta_cr = dr * 128 - delta_l; | 
|---|
| 996 | int delta_cb = db * 128 - delta_l; | 
|---|
| 997 |  | 
|---|
| 998 | uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + | 
|---|
| 999 | ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + | 
|---|
| 1000 | ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); | 
|---|
| 1001 | #else | 
|---|
| 1002 | int64_t delta_l = dr * 27 + dg * 92 + db * 9; | 
|---|
| 1003 | int64_t delta_cr = dr * 128 - delta_l; | 
|---|
| 1004 | int64_t delta_cb = db * 128 - delta_l; | 
|---|
| 1005 |  | 
|---|
| 1006 | uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) + | 
|---|
| 1007 | ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) + | 
|---|
| 1008 | ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U); | 
|---|
| 1009 | #endif | 
|---|
| 1010 |  | 
|---|
| 1011 | if (alpha) | 
|---|
| 1012 | { | 
|---|
| 1013 | int da = (e1.a - e2.a) << 7; | 
|---|
| 1014 | // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. | 
|---|
| 1015 | id += ((uint32_t)(da * da) >> 7U); | 
|---|
| 1016 | } | 
|---|
| 1017 |  | 
|---|
| 1018 | return id; | 
|---|
| 1019 | #else | 
|---|
| 1020 | int dr = e1.r - e2.r; | 
|---|
| 1021 | int dg = e1.g - e2.g; | 
|---|
| 1022 | int db = e1.b - e2.b; | 
|---|
| 1023 |  | 
|---|
| 1024 | int64_t delta_l = dr * 27 + dg * 92 + db * 9; | 
|---|
| 1025 | int64_t delta_cr = dr * 128 - delta_l; | 
|---|
| 1026 | int64_t delta_cb = db * 128 - delta_l; | 
|---|
| 1027 |  | 
|---|
| 1028 | int64_t id = ((delta_l * delta_l) * 128) + | 
|---|
| 1029 | ((delta_cr * delta_cr) * 26) + | 
|---|
| 1030 | ((delta_cb * delta_cb) * 3); | 
|---|
| 1031 |  | 
|---|
| 1032 | if (alpha) | 
|---|
| 1033 | { | 
|---|
| 1034 | int64_t da = (e1.a - e2.a); | 
|---|
| 1035 | id += (da * da) * 128; | 
|---|
| 1036 | } | 
|---|
| 1037 |  | 
|---|
| 1038 | int d = (id + 8192) >> 14; | 
|---|
| 1039 |  | 
|---|
| 1040 | return d; | 
|---|
| 1041 | #endif | 
|---|
| 1042 | } | 
|---|
| 1043 | else | 
|---|
| 1044 | return color_distance(e1, e2, alpha); | 
|---|
| 1045 | } | 
|---|
| 1046 |  | 
|---|
| 1047 | static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) | 
|---|
| 1048 | { | 
|---|
| 1049 | const int dl = a.r - b.r; | 
|---|
| 1050 | const int da = a.a - b.a; | 
|---|
| 1051 | return dl * dl + da * da; | 
|---|
| 1052 | } | 
|---|
| 1053 |  | 
|---|
| 1054 | // String helpers | 
|---|
| 1055 |  | 
|---|
| 1056 | inline int string_find_right(const std::string& filename, char c) | 
|---|
| 1057 | { | 
|---|
| 1058 | size_t result = filename.find_last_of(c); | 
|---|
| 1059 | return (result == std::string::npos) ? -1 : (int)result; | 
|---|
| 1060 | } | 
|---|
| 1061 |  | 
|---|
| 1062 | inline std::string string_get_extension(const std::string &filename) | 
|---|
| 1063 | { | 
|---|
| 1064 | int sep = -1; | 
|---|
| 1065 | #ifdef _WIN32 | 
|---|
| 1066 | sep = string_find_right(filename, '\\'); | 
|---|
| 1067 | #endif | 
|---|
| 1068 | if (sep < 0) | 
|---|
| 1069 | sep = string_find_right(filename, '/'); | 
|---|
| 1070 |  | 
|---|
| 1071 | int dot = string_find_right(filename, '.'); | 
|---|
| 1072 | if (dot <= sep) | 
|---|
| 1073 | return ""; | 
|---|
| 1074 |  | 
|---|
| 1075 | std::string result(filename); | 
|---|
| 1076 | result.erase(0, dot + 1); | 
|---|
| 1077 |  | 
|---|
| 1078 | return result; | 
|---|
| 1079 | } | 
|---|
| 1080 |  | 
|---|
| 1081 | inline bool string_remove_extension(std::string &filename) | 
|---|
| 1082 | { | 
|---|
| 1083 | int sep = -1; | 
|---|
| 1084 | #ifdef _WIN32 | 
|---|
| 1085 | sep = string_find_right(filename, '\\'); | 
|---|
| 1086 | #endif | 
|---|
| 1087 | if (sep < 0) | 
|---|
| 1088 | sep = string_find_right(filename, '/'); | 
|---|
| 1089 |  | 
|---|
| 1090 | int dot = string_find_right(filename, '.'); | 
|---|
| 1091 | if ((dot < sep) || (dot < 0)) | 
|---|
| 1092 | return false; | 
|---|
| 1093 |  | 
|---|
| 1094 | filename.resize(dot); | 
|---|
| 1095 |  | 
|---|
| 1096 | return true; | 
|---|
| 1097 | } | 
|---|
| 1098 |  | 
|---|
| 1099 | inline std::string string_format(const char* pFmt, ...) | 
|---|
| 1100 | { | 
|---|
| 1101 | char buf[2048]; | 
|---|
| 1102 |  | 
|---|
| 1103 | va_list args; | 
|---|
| 1104 | va_start(args, pFmt); | 
|---|
| 1105 | #ifdef _WIN32 | 
|---|
| 1106 | vsprintf_s(buf, sizeof(buf), pFmt, args); | 
|---|
| 1107 | #else | 
|---|
| 1108 | vsnprintf(buf, sizeof(buf), pFmt, args); | 
|---|
| 1109 | #endif | 
|---|
| 1110 | va_end(args); | 
|---|
| 1111 |  | 
|---|
| 1112 | return std::string(buf); | 
|---|
| 1113 | } | 
|---|
| 1114 |  | 
|---|
| 1115 | inline std::string string_tolower(const std::string& s) | 
|---|
| 1116 | { | 
|---|
| 1117 | std::string result(s); | 
|---|
| 1118 | for (size_t i = 0; i < result.size(); i++) | 
|---|
| 1119 | result[i] = (char)tolower((int)result[i]); | 
|---|
| 1120 | return result; | 
|---|
| 1121 | } | 
|---|
| 1122 |  | 
|---|
| 1123 | inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc) | 
|---|
| 1124 | { | 
|---|
| 1125 | assert(pDst && pSrc && dst_len); | 
|---|
| 1126 | if (!dst_len) | 
|---|
| 1127 | return pDst; | 
|---|
| 1128 |  | 
|---|
| 1129 | const size_t src_len = strlen(pSrc); | 
|---|
| 1130 | const size_t src_len_plus_terminator = src_len + 1; | 
|---|
| 1131 |  | 
|---|
| 1132 | if (src_len_plus_terminator <= dst_len) | 
|---|
| 1133 | memcpy(pDst, pSrc, src_len_plus_terminator); | 
|---|
| 1134 | else | 
|---|
| 1135 | { | 
|---|
| 1136 | if (dst_len > 1) | 
|---|
| 1137 | memcpy(pDst, pSrc, dst_len - 1); | 
|---|
| 1138 | pDst[dst_len - 1] = '\0'; | 
|---|
| 1139 | } | 
|---|
| 1140 |  | 
|---|
| 1141 | return pDst; | 
|---|
| 1142 | } | 
|---|
| 1143 |  | 
|---|
| 1144 | inline bool string_ends_with(const std::string& s, char c) | 
|---|
| 1145 | { | 
|---|
| 1146 | return (s.size() != 0) && (s.back() == c); | 
|---|
| 1147 | } | 
|---|
| 1148 |  | 
|---|
| 1149 | inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt) | 
|---|
| 1150 | { | 
|---|
| 1151 | #ifdef _MSC_VER | 
|---|
| 1152 | char drive_buf[_MAX_DRIVE] = { 0 }; | 
|---|
| 1153 | char dir_buf[_MAX_DIR] = { 0 }; | 
|---|
| 1154 | char fname_buf[_MAX_FNAME] = { 0 }; | 
|---|
| 1155 | char ext_buf[_MAX_EXT] = { 0 }; | 
|---|
| 1156 |  | 
|---|
| 1157 | errno_t error = _splitpath_s(p, | 
|---|
| 1158 | pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, | 
|---|
| 1159 | pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, | 
|---|
| 1160 | pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, | 
|---|
| 1161 | pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); | 
|---|
| 1162 | if (error != 0) | 
|---|
| 1163 | return false; | 
|---|
| 1164 |  | 
|---|
| 1165 | if (pDrive) *pDrive = drive_buf; | 
|---|
| 1166 | if (pDir) *pDir = dir_buf; | 
|---|
| 1167 | if (pFilename) *pFilename = fname_buf; | 
|---|
| 1168 | if (pExt) *pExt = ext_buf; | 
|---|
| 1169 | return true; | 
|---|
| 1170 | #else | 
|---|
| 1171 | char dirtmp[1024], nametmp[1024]; | 
|---|
| 1172 | strcpy_safe(dirtmp, sizeof(dirtmp), p); | 
|---|
| 1173 | strcpy_safe(nametmp, sizeof(nametmp), p); | 
|---|
| 1174 |  | 
|---|
| 1175 | if (pDrive) | 
|---|
| 1176 | pDrive->resize(0); | 
|---|
| 1177 |  | 
|---|
| 1178 | const char *pDirName = dirname(dirtmp); | 
|---|
| 1179 | const char* pBaseName = basename(nametmp); | 
|---|
| 1180 | if ((!pDirName) || (!pBaseName)) | 
|---|
| 1181 | return false; | 
|---|
| 1182 |  | 
|---|
| 1183 | if (pDir) | 
|---|
| 1184 | { | 
|---|
| 1185 | *pDir = pDirName; | 
|---|
| 1186 | if ((pDir->size()) && (pDir->back() != '/')) | 
|---|
| 1187 | *pDir += "/"; | 
|---|
| 1188 | } | 
|---|
| 1189 |  | 
|---|
| 1190 | if (pFilename) | 
|---|
| 1191 | { | 
|---|
| 1192 | *pFilename = pBaseName; | 
|---|
| 1193 | string_remove_extension(*pFilename); | 
|---|
| 1194 | } | 
|---|
| 1195 |  | 
|---|
| 1196 | if (pExt) | 
|---|
| 1197 | { | 
|---|
| 1198 | *pExt = pBaseName; | 
|---|
| 1199 | *pExt = string_get_extension(*pExt); | 
|---|
| 1200 | if (pExt->size()) | 
|---|
| 1201 | *pExt = "."+ *pExt; | 
|---|
| 1202 | } | 
|---|
| 1203 |  | 
|---|
| 1204 | return true; | 
|---|
| 1205 | #endif | 
|---|
| 1206 | } | 
|---|
| 1207 |  | 
|---|
| 1208 | inline bool is_path_separator(char c) | 
|---|
| 1209 | { | 
|---|
| 1210 | #ifdef _WIN32 | 
|---|
| 1211 | return (c == '/') || (c == '\\'); | 
|---|
| 1212 | #else | 
|---|
| 1213 | return (c == '/'); | 
|---|
| 1214 | #endif | 
|---|
| 1215 | } | 
|---|
| 1216 |  | 
|---|
| 1217 | inline bool is_drive_separator(char c) | 
|---|
| 1218 | { | 
|---|
| 1219 | #ifdef _WIN32 | 
|---|
| 1220 | return (c == ':'); | 
|---|
| 1221 | #else | 
|---|
| 1222 | (void)c; | 
|---|
| 1223 | return false; | 
|---|
| 1224 | #endif | 
|---|
| 1225 | } | 
|---|
| 1226 |  | 
|---|
| 1227 | inline void string_combine_path(std::string &dst, const char *p, const char *q) | 
|---|
| 1228 | { | 
|---|
| 1229 | std::string temp(p); | 
|---|
| 1230 | if (temp.size() && !is_path_separator(q[0])) | 
|---|
| 1231 | { | 
|---|
| 1232 | if (!is_path_separator(temp.back())) | 
|---|
| 1233 | temp.append(1, BASISU_PATH_SEPERATOR_CHAR); | 
|---|
| 1234 | } | 
|---|
| 1235 | temp += q; | 
|---|
| 1236 | dst.swap(temp); | 
|---|
| 1237 | } | 
|---|
| 1238 |  | 
|---|
| 1239 | inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r) | 
|---|
| 1240 | { | 
|---|
| 1241 | string_combine_path(dst, p, q); | 
|---|
| 1242 | string_combine_path(dst, dst.c_str(), r); | 
|---|
| 1243 | } | 
|---|
| 1244 |  | 
|---|
| 1245 | inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) | 
|---|
| 1246 | { | 
|---|
| 1247 | string_combine_path(dst, p, q, r); | 
|---|
| 1248 | if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.')) | 
|---|
| 1249 | dst.append(1, '.'); | 
|---|
| 1250 | dst.append(pExt); | 
|---|
| 1251 | } | 
|---|
| 1252 |  | 
|---|
| 1253 | inline bool string_get_pathname(const char *p, std::string &path) | 
|---|
| 1254 | { | 
|---|
| 1255 | std::string temp_drive, temp_path; | 
|---|
| 1256 | if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL)) | 
|---|
| 1257 | return false; | 
|---|
| 1258 | string_combine_path(path, temp_drive.c_str(), temp_path.c_str()); | 
|---|
| 1259 | return true; | 
|---|
| 1260 | } | 
|---|
| 1261 |  | 
|---|
| 1262 | inline bool string_get_filename(const char *p, std::string &filename) | 
|---|
| 1263 | { | 
|---|
| 1264 | std::string temp_ext; | 
|---|
| 1265 | if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext)) | 
|---|
| 1266 | return false; | 
|---|
| 1267 | filename += temp_ext; | 
|---|
| 1268 | return true; | 
|---|
| 1269 | } | 
|---|
| 1270 |  | 
|---|
| 1271 | class rand | 
|---|
| 1272 | { | 
|---|
| 1273 | std::mt19937 m_mt; | 
|---|
| 1274 |  | 
|---|
| 1275 | public: | 
|---|
| 1276 | rand() {	} | 
|---|
| 1277 |  | 
|---|
| 1278 | rand(uint32_t s) { seed(s); } | 
|---|
| 1279 | void seed(uint32_t s) { m_mt.seed(s); } | 
|---|
| 1280 |  | 
|---|
| 1281 | // between [l,h] | 
|---|
| 1282 | int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); } | 
|---|
| 1283 |  | 
|---|
| 1284 | uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); } | 
|---|
| 1285 |  | 
|---|
| 1286 | bool bit() { return irand(0, 1) == 1; } | 
|---|
| 1287 |  | 
|---|
| 1288 | uint8_t byte() { return static_cast<uint8_t>(urand32()); } | 
|---|
| 1289 |  | 
|---|
| 1290 | // between [l,h) | 
|---|
| 1291 | float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); } | 
|---|
| 1292 |  | 
|---|
| 1293 | float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); } | 
|---|
| 1294 | }; | 
|---|
| 1295 |  | 
|---|
| 1296 | class priority_queue | 
|---|
| 1297 | { | 
|---|
| 1298 | public: | 
|---|
| 1299 | priority_queue() : | 
|---|
| 1300 | m_size(0) | 
|---|
| 1301 | { | 
|---|
| 1302 | } | 
|---|
| 1303 |  | 
|---|
| 1304 | void clear() | 
|---|
| 1305 | { | 
|---|
| 1306 | m_heap.clear(); | 
|---|
| 1307 | m_size = 0; | 
|---|
| 1308 | } | 
|---|
| 1309 |  | 
|---|
| 1310 | void init(uint32_t max_entries, uint32_t first_index, float first_priority) | 
|---|
| 1311 | { | 
|---|
| 1312 | m_heap.resize(max_entries + 1); | 
|---|
| 1313 | m_heap[1].m_index = first_index; | 
|---|
| 1314 | m_heap[1].m_priority = first_priority; | 
|---|
| 1315 | m_size = 1; | 
|---|
| 1316 | } | 
|---|
| 1317 |  | 
|---|
| 1318 | inline uint32_t size() const { return m_size; } | 
|---|
| 1319 |  | 
|---|
| 1320 | inline uint32_t get_top_index() const { return m_heap[1].m_index; } | 
|---|
| 1321 | inline float get_top_priority() const { return m_heap[1].m_priority; } | 
|---|
| 1322 |  | 
|---|
| 1323 | inline void delete_top() | 
|---|
| 1324 | { | 
|---|
| 1325 | assert(m_size > 0); | 
|---|
| 1326 | m_heap[1] = m_heap[m_size]; | 
|---|
| 1327 | m_size--; | 
|---|
| 1328 | if (m_size) | 
|---|
| 1329 | down_heap(1); | 
|---|
| 1330 | } | 
|---|
| 1331 |  | 
|---|
| 1332 | inline void add_heap(uint32_t index, float priority) | 
|---|
| 1333 | { | 
|---|
| 1334 | m_size++; | 
|---|
| 1335 |  | 
|---|
| 1336 | uint32_t k = m_size; | 
|---|
| 1337 |  | 
|---|
| 1338 | if (m_size >= m_heap.size()) | 
|---|
| 1339 | m_heap.resize(m_size + 1); | 
|---|
| 1340 |  | 
|---|
| 1341 | for (;;) | 
|---|
| 1342 | { | 
|---|
| 1343 | uint32_t parent_index = k >> 1; | 
|---|
| 1344 | if ((!parent_index) || (m_heap[parent_index].m_priority > priority)) | 
|---|
| 1345 | break; | 
|---|
| 1346 | m_heap[k] = m_heap[parent_index]; | 
|---|
| 1347 | k = parent_index; | 
|---|
| 1348 | } | 
|---|
| 1349 |  | 
|---|
| 1350 | m_heap[k].m_index = index; | 
|---|
| 1351 | m_heap[k].m_priority = priority; | 
|---|
| 1352 | } | 
|---|
| 1353 |  | 
|---|
| 1354 | private: | 
|---|
| 1355 | struct entry | 
|---|
| 1356 | { | 
|---|
| 1357 | uint32_t m_index; | 
|---|
| 1358 | float m_priority; | 
|---|
| 1359 | }; | 
|---|
| 1360 |  | 
|---|
| 1361 | basisu::vector<entry> m_heap; | 
|---|
| 1362 | uint32_t m_size; | 
|---|
| 1363 |  | 
|---|
| 1364 | // Push down entry at index | 
|---|
| 1365 | inline void down_heap(uint32_t heap_index) | 
|---|
| 1366 | { | 
|---|
| 1367 | uint32_t orig_index = m_heap[heap_index].m_index; | 
|---|
| 1368 | const float orig_priority = m_heap[heap_index].m_priority; | 
|---|
| 1369 |  | 
|---|
| 1370 | uint32_t child_index; | 
|---|
| 1371 | while ((child_index = (heap_index << 1)) <= m_size) | 
|---|
| 1372 | { | 
|---|
| 1373 | if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index; | 
|---|
| 1374 | if (orig_priority > m_heap[child_index].m_priority) | 
|---|
| 1375 | break; | 
|---|
| 1376 | m_heap[heap_index] = m_heap[child_index]; | 
|---|
| 1377 | heap_index = child_index; | 
|---|
| 1378 | } | 
|---|
| 1379 |  | 
|---|
| 1380 | m_heap[heap_index].m_index = orig_index; | 
|---|
| 1381 | m_heap[heap_index].m_priority = orig_priority; | 
|---|
| 1382 | } | 
|---|
| 1383 | }; | 
|---|
| 1384 |  | 
|---|
| 1385 | // Tree structured vector quantization (TSVQ) | 
|---|
| 1386 |  | 
|---|
| 1387 | template <typename TrainingVectorType> | 
|---|
| 1388 | class tree_vector_quant | 
|---|
| 1389 | { | 
|---|
| 1390 | public: | 
|---|
| 1391 | typedef TrainingVectorType training_vec_type; | 
|---|
| 1392 | typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight; | 
|---|
| 1393 | typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; | 
|---|
| 1394 |  | 
|---|
| 1395 | tree_vector_quant() : | 
|---|
| 1396 | m_next_codebook_index(0) | 
|---|
| 1397 | { | 
|---|
| 1398 | } | 
|---|
| 1399 |  | 
|---|
| 1400 | void clear() | 
|---|
| 1401 | { | 
|---|
| 1402 | clear_vector(m_training_vecs); | 
|---|
| 1403 | clear_vector(m_nodes); | 
|---|
| 1404 | m_next_codebook_index = 0; | 
|---|
| 1405 | } | 
|---|
| 1406 |  | 
|---|
| 1407 | void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } | 
|---|
| 1408 |  | 
|---|
| 1409 | size_t get_total_training_vecs() const { return m_training_vecs.size(); } | 
|---|
| 1410 | const array_of_weighted_training_vecs &get_training_vecs() const	{ return m_training_vecs; } | 
|---|
| 1411 | array_of_weighted_training_vecs &get_training_vecs()			{ return m_training_vecs; } | 
|---|
| 1412 |  | 
|---|
| 1413 | void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const | 
|---|
| 1414 | { | 
|---|
| 1415 | for (uint32_t i = 0; i < m_nodes.size(); i++) | 
|---|
| 1416 | { | 
|---|
| 1417 | const tsvq_node &n = m_nodes[i]; | 
|---|
| 1418 | if (!n.is_leaf()) | 
|---|
| 1419 | continue; | 
|---|
| 1420 |  | 
|---|
| 1421 | codebook.resize(codebook.size() + 1); | 
|---|
| 1422 | codebook.back() = n.m_training_vecs; | 
|---|
| 1423 | } | 
|---|
| 1424 | } | 
|---|
| 1425 |  | 
|---|
| 1426 | void retrieve(basisu::vector<TrainingVectorType> &codebook) const | 
|---|
| 1427 | { | 
|---|
| 1428 | for (uint32_t i = 0; i < m_nodes.size(); i++) | 
|---|
| 1429 | { | 
|---|
| 1430 | const tsvq_node &n = m_nodes[i]; | 
|---|
| 1431 | if (!n.is_leaf()) | 
|---|
| 1432 | continue; | 
|---|
| 1433 |  | 
|---|
| 1434 | codebook.resize(codebook.size() + 1); | 
|---|
| 1435 | codebook.back() = n.m_origin; | 
|---|
| 1436 | } | 
|---|
| 1437 | } | 
|---|
| 1438 |  | 
|---|
| 1439 | void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const | 
|---|
| 1440 | { | 
|---|
| 1441 | uint_vec node_stack; | 
|---|
| 1442 | node_stack.reserve(512); | 
|---|
| 1443 |  | 
|---|
| 1444 | codebook.resize(0); | 
|---|
| 1445 | codebook.reserve(max_clusters); | 
|---|
| 1446 |  | 
|---|
| 1447 | uint32_t node_index = 0; | 
|---|
| 1448 |  | 
|---|
| 1449 | while (true) | 
|---|
| 1450 | { | 
|---|
| 1451 | const tsvq_node& cur = m_nodes[node_index]; | 
|---|
| 1452 |  | 
|---|
| 1453 | if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) | 
|---|
| 1454 | { | 
|---|
| 1455 | codebook.resize(codebook.size() + 1); | 
|---|
| 1456 | codebook.back() = cur.m_training_vecs; | 
|---|
| 1457 |  | 
|---|
| 1458 | if (node_stack.empty()) | 
|---|
| 1459 | break; | 
|---|
| 1460 |  | 
|---|
| 1461 | node_index = node_stack.back(); | 
|---|
| 1462 | node_stack.pop_back(); | 
|---|
| 1463 | continue; | 
|---|
| 1464 | } | 
|---|
| 1465 |  | 
|---|
| 1466 | node_stack.push_back(cur.m_right_index); | 
|---|
| 1467 | node_index = cur.m_left_index; | 
|---|
| 1468 | } | 
|---|
| 1469 | } | 
|---|
| 1470 |  | 
|---|
| 1471 | bool generate(uint32_t max_size) | 
|---|
| 1472 | { | 
|---|
| 1473 | if (!m_training_vecs.size()) | 
|---|
| 1474 | return false; | 
|---|
| 1475 |  | 
|---|
| 1476 | m_next_codebook_index = 0; | 
|---|
| 1477 |  | 
|---|
| 1478 | clear_vector(m_nodes); | 
|---|
| 1479 | m_nodes.reserve(max_size * 2 + 1); | 
|---|
| 1480 |  | 
|---|
| 1481 | m_nodes.push_back(prepare_root()); | 
|---|
| 1482 |  | 
|---|
| 1483 | priority_queue var_heap; | 
|---|
| 1484 | var_heap.init(max_size, 0, m_nodes[0].m_var); | 
|---|
| 1485 |  | 
|---|
| 1486 | basisu::vector<uint32_t> l_children, r_children; | 
|---|
| 1487 |  | 
|---|
| 1488 | // Now split the worst nodes | 
|---|
| 1489 | l_children.reserve(m_training_vecs.size() + 1); | 
|---|
| 1490 | r_children.reserve(m_training_vecs.size() + 1); | 
|---|
| 1491 |  | 
|---|
| 1492 | uint32_t total_leaf_nodes = 1; | 
|---|
| 1493 |  | 
|---|
| 1494 | //interval_timer tm; | 
|---|
| 1495 | //tm.start(); | 
|---|
| 1496 |  | 
|---|
| 1497 | while ((var_heap.size()) && (total_leaf_nodes < max_size)) | 
|---|
| 1498 | { | 
|---|
| 1499 | const uint32_t node_index = var_heap.get_top_index(); | 
|---|
| 1500 | const tsvq_node &node = m_nodes[node_index]; | 
|---|
| 1501 |  | 
|---|
| 1502 | assert(node.m_var == var_heap.get_top_priority()); | 
|---|
| 1503 | assert(node.is_leaf()); | 
|---|
| 1504 |  | 
|---|
| 1505 | var_heap.delete_top(); | 
|---|
| 1506 |  | 
|---|
| 1507 | if (node.m_training_vecs.size() > 1) | 
|---|
| 1508 | { | 
|---|
| 1509 | if (split_node(node_index, var_heap, l_children, r_children)) | 
|---|
| 1510 | { | 
|---|
| 1511 | // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total. | 
|---|
| 1512 | total_leaf_nodes += 1; | 
|---|
| 1513 | } | 
|---|
| 1514 | } | 
|---|
| 1515 | } | 
|---|
| 1516 |  | 
|---|
| 1517 | //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs()); | 
|---|
| 1518 |  | 
|---|
| 1519 | return true; | 
|---|
| 1520 | } | 
|---|
| 1521 |  | 
|---|
| 1522 | private: | 
|---|
| 1523 | class tsvq_node | 
|---|
| 1524 | { | 
|---|
| 1525 | public: | 
|---|
| 1526 | inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } | 
|---|
| 1527 |  | 
|---|
| 1528 | // vecs is erased | 
|---|
| 1529 | inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } | 
|---|
| 1530 |  | 
|---|
| 1531 | inline bool is_leaf() const { return m_left_index < 0; } | 
|---|
| 1532 |  | 
|---|
| 1533 | float m_var; | 
|---|
| 1534 | uint64_t m_weight; | 
|---|
| 1535 | TrainingVectorType m_origin; | 
|---|
| 1536 | int32_t m_left_index, m_right_index; | 
|---|
| 1537 | basisu::vector<uint32_t> m_training_vecs; | 
|---|
| 1538 | int m_codebook_index; | 
|---|
| 1539 | }; | 
|---|
| 1540 |  | 
|---|
| 1541 | typedef basisu::vector<tsvq_node> tsvq_node_vec; | 
|---|
| 1542 | tsvq_node_vec m_nodes; | 
|---|
| 1543 |  | 
|---|
| 1544 | array_of_weighted_training_vecs m_training_vecs; | 
|---|
| 1545 |  | 
|---|
| 1546 | uint32_t m_next_codebook_index; | 
|---|
| 1547 |  | 
|---|
| 1548 | tsvq_node prepare_root() const | 
|---|
| 1549 | { | 
|---|
| 1550 | double ttsum = 0.0f; | 
|---|
| 1551 |  | 
|---|
| 1552 | // Prepare root node containing all training vectors | 
|---|
| 1553 | tsvq_node root; | 
|---|
| 1554 | root.m_training_vecs.reserve(m_training_vecs.size()); | 
|---|
| 1555 |  | 
|---|
| 1556 | for (uint32_t i = 0; i < m_training_vecs.size(); i++) | 
|---|
| 1557 | { | 
|---|
| 1558 | const TrainingVectorType &v = m_training_vecs[i].first; | 
|---|
| 1559 | const uint64_t weight = m_training_vecs[i].second; | 
|---|
| 1560 |  | 
|---|
| 1561 | root.m_training_vecs.push_back(i); | 
|---|
| 1562 |  | 
|---|
| 1563 | root.m_origin += (v * static_cast<float>(weight)); | 
|---|
| 1564 | root.m_weight += weight; | 
|---|
| 1565 |  | 
|---|
| 1566 | ttsum += v.dot(v) * weight; | 
|---|
| 1567 | } | 
|---|
| 1568 |  | 
|---|
| 1569 | root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight)); | 
|---|
| 1570 |  | 
|---|
| 1571 | root.m_origin *= (1.0f / root.m_weight); | 
|---|
| 1572 |  | 
|---|
| 1573 | return root; | 
|---|
| 1574 | } | 
|---|
| 1575 |  | 
|---|
| 1576 | bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children) | 
|---|
| 1577 | { | 
|---|
| 1578 | TrainingVectorType l_child_org, r_child_org; | 
|---|
| 1579 | uint64_t l_weight = 0, r_weight = 0; | 
|---|
| 1580 | float l_var = 0.0f, r_var = 0.0f; | 
|---|
| 1581 |  | 
|---|
| 1582 | // Compute initial left/right child origins | 
|---|
| 1583 | if (!prep_split(m_nodes[node_index], l_child_org, r_child_org)) | 
|---|
| 1584 | return false; | 
|---|
| 1585 |  | 
|---|
| 1586 | // Use k-means iterations to refine these children vectors | 
|---|
| 1587 | if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children)) | 
|---|
| 1588 | return false; | 
|---|
| 1589 |  | 
|---|
| 1590 | // Create children | 
|---|
| 1591 | const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1; | 
|---|
| 1592 |  | 
|---|
| 1593 | m_nodes[node_index].m_left_index = l_child_index; | 
|---|
| 1594 | m_nodes[node_index].m_right_index = r_child_index; | 
|---|
| 1595 |  | 
|---|
| 1596 | m_nodes[node_index].m_codebook_index = m_next_codebook_index; | 
|---|
| 1597 | m_next_codebook_index++; | 
|---|
| 1598 |  | 
|---|
| 1599 | m_nodes.resize(m_nodes.size() + 2); | 
|---|
| 1600 |  | 
|---|
| 1601 | tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index]; | 
|---|
| 1602 |  | 
|---|
| 1603 | l_child.set(l_child_org, l_weight, l_var, l_children); | 
|---|
| 1604 | r_child.set(r_child_org, r_weight, r_var, r_children); | 
|---|
| 1605 |  | 
|---|
| 1606 | if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) | 
|---|
| 1607 | { | 
|---|
| 1608 | TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); | 
|---|
| 1609 |  | 
|---|
| 1610 | for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) | 
|---|
| 1611 | { | 
|---|
| 1612 | if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) | 
|---|
| 1613 | { | 
|---|
| 1614 | l_child.m_var = 1e-4f; | 
|---|
| 1615 | break; | 
|---|
| 1616 | } | 
|---|
| 1617 | } | 
|---|
| 1618 | } | 
|---|
| 1619 |  | 
|---|
| 1620 | if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1)) | 
|---|
| 1621 | { | 
|---|
| 1622 | TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first); | 
|---|
| 1623 |  | 
|---|
| 1624 | for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++) | 
|---|
| 1625 | { | 
|---|
| 1626 | if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first)) | 
|---|
| 1627 | { | 
|---|
| 1628 | r_child.m_var = 1e-4f; | 
|---|
| 1629 | break; | 
|---|
| 1630 | } | 
|---|
| 1631 | } | 
|---|
| 1632 | } | 
|---|
| 1633 |  | 
|---|
| 1634 | if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) | 
|---|
| 1635 | var_heap.add_heap(l_child_index, l_child.m_var); | 
|---|
| 1636 |  | 
|---|
| 1637 | if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) | 
|---|
| 1638 | var_heap.add_heap(r_child_index, r_child.m_var); | 
|---|
| 1639 |  | 
|---|
| 1640 | return true; | 
|---|
| 1641 | } | 
|---|
| 1642 |  | 
|---|
| 1643 | TrainingVectorType compute_split_axis(const tsvq_node &node) const | 
|---|
| 1644 | { | 
|---|
| 1645 | const uint32_t N = TrainingVectorType::num_elements; | 
|---|
| 1646 |  | 
|---|
| 1647 | matrix<N, N, float> cmatrix; | 
|---|
| 1648 |  | 
|---|
| 1649 | if ((N != 16) || (!g_cpu_supports_sse41)) | 
|---|
| 1650 | { | 
|---|
| 1651 | cmatrix.set_zero(); | 
|---|
| 1652 |  | 
|---|
| 1653 | // Compute covariance matrix from weighted input vectors | 
|---|
| 1654 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1655 | { | 
|---|
| 1656 | const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin); | 
|---|
| 1657 | const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v); | 
|---|
| 1658 |  | 
|---|
| 1659 | for (uint32_t x = 0; x < N; x++) | 
|---|
| 1660 | for (uint32_t y = x; y < N; y++) | 
|---|
| 1661 | cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y]; | 
|---|
| 1662 | } | 
|---|
| 1663 | } | 
|---|
| 1664 | else | 
|---|
| 1665 | { | 
|---|
| 1666 | #if BASISU_SUPPORT_SSE | 
|---|
| 1667 | // Specialize the case with 16x16 matrices, which are quite expensive without SIMD. | 
|---|
| 1668 | // This SSE function takes pointers to void types, so do some sanity checks. | 
|---|
| 1669 | assert(sizeof(TrainingVectorType) == sizeof(float) * 16); | 
|---|
| 1670 | assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>)); | 
|---|
| 1671 | update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix); | 
|---|
| 1672 | #endif | 
|---|
| 1673 | } | 
|---|
| 1674 |  | 
|---|
| 1675 | const float renorm_scale = 1.0f / node.m_weight; | 
|---|
| 1676 |  | 
|---|
| 1677 | for (uint32_t x = 0; x < N; x++) | 
|---|
| 1678 | for (uint32_t y = x; y < N; y++) | 
|---|
| 1679 | cmatrix[x][y] *= renorm_scale; | 
|---|
| 1680 |  | 
|---|
| 1681 | // Diagonal flip | 
|---|
| 1682 | for (uint32_t x = 0; x < (N - 1); x++) | 
|---|
| 1683 | for (uint32_t y = x + 1; y < N; y++) | 
|---|
| 1684 | cmatrix[y][x] = cmatrix[x][y]; | 
|---|
| 1685 |  | 
|---|
| 1686 | return compute_pca_from_covar<N, TrainingVectorType>(cmatrix); | 
|---|
| 1687 | } | 
|---|
| 1688 |  | 
|---|
| 1689 | bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const | 
|---|
| 1690 | { | 
|---|
| 1691 | //const uint32_t N = TrainingVectorType::num_elements; | 
|---|
| 1692 |  | 
|---|
| 1693 | if (2 == node.m_training_vecs.size()) | 
|---|
| 1694 | { | 
|---|
| 1695 | l_child_result = m_training_vecs[node.m_training_vecs[0]].first; | 
|---|
| 1696 | r_child_result = m_training_vecs[node.m_training_vecs[1]].first; | 
|---|
| 1697 | return true; | 
|---|
| 1698 | } | 
|---|
| 1699 |  | 
|---|
| 1700 | TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f); | 
|---|
| 1701 | double l_weight = 0.0f, r_weight = 0.0f; | 
|---|
| 1702 |  | 
|---|
| 1703 | // Compute initial left/right children | 
|---|
| 1704 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1705 | { | 
|---|
| 1706 | const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; | 
|---|
| 1707 |  | 
|---|
| 1708 | const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; | 
|---|
| 1709 |  | 
|---|
| 1710 | double t = (v - node.m_origin).dot(axis); | 
|---|
| 1711 | if (t >= 0.0f) | 
|---|
| 1712 | { | 
|---|
| 1713 | r_child += v * weight; | 
|---|
| 1714 | r_weight += weight; | 
|---|
| 1715 | } | 
|---|
| 1716 | else | 
|---|
| 1717 | { | 
|---|
| 1718 | l_child += v * weight; | 
|---|
| 1719 | l_weight += weight; | 
|---|
| 1720 | } | 
|---|
| 1721 | } | 
|---|
| 1722 |  | 
|---|
| 1723 | if ((l_weight > 0.0f) && (r_weight > 0.0f)) | 
|---|
| 1724 | { | 
|---|
| 1725 | l_child_result = l_child * static_cast<float>(1.0f / l_weight); | 
|---|
| 1726 | r_child_result = r_child * static_cast<float>(1.0f / r_weight); | 
|---|
| 1727 | } | 
|---|
| 1728 | else | 
|---|
| 1729 | { | 
|---|
| 1730 | TrainingVectorType l(1e+20f); | 
|---|
| 1731 | TrainingVectorType h(-1e+20f); | 
|---|
| 1732 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1733 | { | 
|---|
| 1734 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; | 
|---|
| 1735 |  | 
|---|
| 1736 | l = TrainingVectorType::component_min(l, v); | 
|---|
| 1737 | h = TrainingVectorType::component_max(h, v); | 
|---|
| 1738 | } | 
|---|
| 1739 |  | 
|---|
| 1740 | TrainingVectorType r(h - l); | 
|---|
| 1741 |  | 
|---|
| 1742 | float largest_axis_v = 0.0f; | 
|---|
| 1743 | int largest_axis_index = -1; | 
|---|
| 1744 | for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++) | 
|---|
| 1745 | { | 
|---|
| 1746 | if (r[i] > largest_axis_v) | 
|---|
| 1747 | { | 
|---|
| 1748 | largest_axis_v = r[i]; | 
|---|
| 1749 | largest_axis_index = i; | 
|---|
| 1750 | } | 
|---|
| 1751 | } | 
|---|
| 1752 |  | 
|---|
| 1753 | if (largest_axis_index < 0) | 
|---|
| 1754 | return false; | 
|---|
| 1755 |  | 
|---|
| 1756 | basisu::vector<float> keys(node.m_training_vecs.size()); | 
|---|
| 1757 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1758 | keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; | 
|---|
| 1759 |  | 
|---|
| 1760 | uint_vec indices(node.m_training_vecs.size()); | 
|---|
| 1761 | indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]); | 
|---|
| 1762 |  | 
|---|
| 1763 | l_child.set_zero(); | 
|---|
| 1764 | l_weight = 0; | 
|---|
| 1765 |  | 
|---|
| 1766 | r_child.set_zero(); | 
|---|
| 1767 | r_weight = 0; | 
|---|
| 1768 |  | 
|---|
| 1769 | const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2; | 
|---|
| 1770 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1771 | { | 
|---|
| 1772 | const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; | 
|---|
| 1773 |  | 
|---|
| 1774 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; | 
|---|
| 1775 |  | 
|---|
| 1776 | if (i < half_index) | 
|---|
| 1777 | { | 
|---|
| 1778 | l_child += v * weight; | 
|---|
| 1779 | l_weight += weight; | 
|---|
| 1780 | } | 
|---|
| 1781 | else | 
|---|
| 1782 | { | 
|---|
| 1783 | r_child += v * weight; | 
|---|
| 1784 | r_weight += weight; | 
|---|
| 1785 | } | 
|---|
| 1786 | } | 
|---|
| 1787 |  | 
|---|
| 1788 | if ((l_weight > 0.0f) && (r_weight > 0.0f)) | 
|---|
| 1789 | { | 
|---|
| 1790 | l_child_result = l_child * static_cast<float>(1.0f / l_weight); | 
|---|
| 1791 | r_child_result = r_child * static_cast<float>(1.0f / r_weight); | 
|---|
| 1792 | } | 
|---|
| 1793 | else | 
|---|
| 1794 | { | 
|---|
| 1795 | l_child_result = l; | 
|---|
| 1796 | r_child_result = h; | 
|---|
| 1797 | } | 
|---|
| 1798 | } | 
|---|
| 1799 |  | 
|---|
| 1800 | return true; | 
|---|
| 1801 | } | 
|---|
| 1802 |  | 
|---|
| 1803 | bool refine_split(const tsvq_node &node, | 
|---|
| 1804 | TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children, | 
|---|
| 1805 | TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const | 
|---|
| 1806 | { | 
|---|
| 1807 | l_children.reserve(node.m_training_vecs.size()); | 
|---|
| 1808 | r_children.reserve(node.m_training_vecs.size()); | 
|---|
| 1809 |  | 
|---|
| 1810 | float prev_total_variance = 1e+10f; | 
|---|
| 1811 |  | 
|---|
| 1812 | // Refine left/right children locations using k-means iterations | 
|---|
| 1813 | const uint32_t cMaxIters = 6; | 
|---|
| 1814 | for (uint32_t iter = 0; iter < cMaxIters; iter++) | 
|---|
| 1815 | { | 
|---|
| 1816 | l_children.resize(0); | 
|---|
| 1817 | r_children.resize(0); | 
|---|
| 1818 |  | 
|---|
| 1819 | TrainingVectorType new_l_child(cZero), new_r_child(cZero); | 
|---|
| 1820 |  | 
|---|
| 1821 | double l_ttsum = 0.0f, r_ttsum = 0.0f; | 
|---|
| 1822 |  | 
|---|
| 1823 | l_weight = 0; | 
|---|
| 1824 | r_weight = 0; | 
|---|
| 1825 |  | 
|---|
| 1826 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1827 | { | 
|---|
| 1828 | const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; | 
|---|
| 1829 | const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; | 
|---|
| 1830 |  | 
|---|
| 1831 | double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v); | 
|---|
| 1832 |  | 
|---|
| 1833 | if (left_dist2 >= right_dist2) | 
|---|
| 1834 | { | 
|---|
| 1835 | new_r_child += (v * static_cast<float>(weight)); | 
|---|
| 1836 | r_weight += weight; | 
|---|
| 1837 |  | 
|---|
| 1838 | r_ttsum += weight * v.dot(v); | 
|---|
| 1839 | r_children.push_back(node.m_training_vecs[i]); | 
|---|
| 1840 | } | 
|---|
| 1841 | else | 
|---|
| 1842 | { | 
|---|
| 1843 | new_l_child += (v * static_cast<float>(weight)); | 
|---|
| 1844 | l_weight += weight; | 
|---|
| 1845 |  | 
|---|
| 1846 | l_ttsum += weight * v.dot(v); | 
|---|
| 1847 | l_children.push_back(node.m_training_vecs[i]); | 
|---|
| 1848 | } | 
|---|
| 1849 | } | 
|---|
| 1850 |  | 
|---|
| 1851 | // Node is unsplittable using the above algorithm - try something else to split it up. | 
|---|
| 1852 | if ((!l_weight) || (!r_weight)) | 
|---|
| 1853 | { | 
|---|
| 1854 | l_children.resize(0); | 
|---|
| 1855 | new_l_child.set(0.0f); | 
|---|
| 1856 | l_ttsum = 0.0f; | 
|---|
| 1857 | l_weight = 0; | 
|---|
| 1858 |  | 
|---|
| 1859 | r_children.resize(0); | 
|---|
| 1860 | new_r_child.set(0.0f); | 
|---|
| 1861 | r_ttsum = 0.0f; | 
|---|
| 1862 | r_weight = 0; | 
|---|
| 1863 |  | 
|---|
| 1864 | TrainingVectorType firstVec; | 
|---|
| 1865 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) | 
|---|
| 1866 | { | 
|---|
| 1867 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; | 
|---|
| 1868 | const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; | 
|---|
| 1869 |  | 
|---|
| 1870 | if ((!i) || (v == firstVec)) | 
|---|
| 1871 | { | 
|---|
| 1872 | firstVec = v; | 
|---|
| 1873 |  | 
|---|
| 1874 | new_r_child += (v * static_cast<float>(weight)); | 
|---|
| 1875 | r_weight += weight; | 
|---|
| 1876 |  | 
|---|
| 1877 | r_ttsum += weight * v.dot(v); | 
|---|
| 1878 | r_children.push_back(node.m_training_vecs[i]); | 
|---|
| 1879 | } | 
|---|
| 1880 | else | 
|---|
| 1881 | { | 
|---|
| 1882 | new_l_child += (v * static_cast<float>(weight)); | 
|---|
| 1883 | l_weight += weight; | 
|---|
| 1884 |  | 
|---|
| 1885 | l_ttsum += weight * v.dot(v); | 
|---|
| 1886 | l_children.push_back(node.m_training_vecs[i]); | 
|---|
| 1887 | } | 
|---|
| 1888 | } | 
|---|
| 1889 |  | 
|---|
| 1890 | if ((!l_weight) || (!r_weight)) | 
|---|
| 1891 | return false; | 
|---|
| 1892 | } | 
|---|
| 1893 |  | 
|---|
| 1894 | l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight)); | 
|---|
| 1895 | r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight)); | 
|---|
| 1896 |  | 
|---|
| 1897 | new_l_child *= (1.0f / l_weight); | 
|---|
| 1898 | new_r_child *= (1.0f / r_weight); | 
|---|
| 1899 |  | 
|---|
| 1900 | l_child = new_l_child; | 
|---|
| 1901 | r_child = new_r_child; | 
|---|
| 1902 |  | 
|---|
| 1903 | float total_var = l_var + r_var; | 
|---|
| 1904 | const float cGiveupVariance = .00001f; | 
|---|
| 1905 | if (total_var < cGiveupVariance) | 
|---|
| 1906 | break; | 
|---|
| 1907 |  | 
|---|
| 1908 | // Check to see if the variance has settled | 
|---|
| 1909 | const float cVarianceDeltaThresh = .00125f; | 
|---|
| 1910 | if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh) | 
|---|
| 1911 | break; | 
|---|
| 1912 |  | 
|---|
| 1913 | prev_total_variance = total_var; | 
|---|
| 1914 | } | 
|---|
| 1915 |  | 
|---|
| 1916 | return true; | 
|---|
| 1917 | } | 
|---|
| 1918 | }; | 
|---|
| 1919 |  | 
|---|
| 1920 | struct weighted_block_group | 
|---|
| 1921 | { | 
|---|
| 1922 | uint64_t m_total_weight; | 
|---|
| 1923 | uint_vec m_indices; | 
|---|
| 1924 | }; | 
|---|
| 1925 |  | 
|---|
| 1926 | template<typename Quantizer> | 
|---|
| 1927 | bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, | 
|---|
| 1928 | uint32_t max_codebook_size, uint32_t max_parent_codebook_size, | 
|---|
| 1929 | basisu::vector<uint_vec>& codebook, | 
|---|
| 1930 | basisu::vector<uint_vec>& parent_codebook, | 
|---|
| 1931 | uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) | 
|---|
| 1932 | { | 
|---|
| 1933 | codebook.resize(0); | 
|---|
| 1934 | parent_codebook.resize(0); | 
|---|
| 1935 |  | 
|---|
| 1936 | if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16)) | 
|---|
| 1937 | { | 
|---|
| 1938 | if (!q.generate(max_codebook_size)) | 
|---|
| 1939 | return false; | 
|---|
| 1940 |  | 
|---|
| 1941 | q.retrieve(codebook); | 
|---|
| 1942 |  | 
|---|
| 1943 | if (max_parent_codebook_size) | 
|---|
| 1944 | q.retrieve(max_parent_codebook_size, parent_codebook); | 
|---|
| 1945 |  | 
|---|
| 1946 | return true; | 
|---|
| 1947 | } | 
|---|
| 1948 |  | 
|---|
| 1949 | const uint32_t cMaxThreads = 16; | 
|---|
| 1950 | if (max_threads > cMaxThreads) | 
|---|
| 1951 | max_threads = cMaxThreads; | 
|---|
| 1952 |  | 
|---|
| 1953 | if (!q.generate(max_threads)) | 
|---|
| 1954 | return false; | 
|---|
| 1955 |  | 
|---|
| 1956 | basisu::vector<uint_vec> initial_codebook; | 
|---|
| 1957 |  | 
|---|
| 1958 | q.retrieve(initial_codebook); | 
|---|
| 1959 |  | 
|---|
| 1960 | if (initial_codebook.size() < max_threads) | 
|---|
| 1961 | { | 
|---|
| 1962 | codebook = initial_codebook; | 
|---|
| 1963 |  | 
|---|
| 1964 | if (max_parent_codebook_size) | 
|---|
| 1965 | q.retrieve(max_parent_codebook_size, parent_codebook); | 
|---|
| 1966 |  | 
|---|
| 1967 | return true; | 
|---|
| 1968 | } | 
|---|
| 1969 |  | 
|---|
| 1970 | Quantizer quantizers[cMaxThreads]; | 
|---|
| 1971 |  | 
|---|
| 1972 | bool success_flags[cMaxThreads]; | 
|---|
| 1973 | clear_obj(success_flags); | 
|---|
| 1974 |  | 
|---|
| 1975 | basisu::vector<uint_vec> local_clusters[cMaxThreads]; | 
|---|
| 1976 | basisu::vector<uint_vec> local_parent_clusters[cMaxThreads]; | 
|---|
| 1977 |  | 
|---|
| 1978 | for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) | 
|---|
| 1979 | { | 
|---|
| 1980 | #ifndef __EMSCRIPTEN__ | 
|---|
| 1981 | pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { | 
|---|
| 1982 | #endif | 
|---|
| 1983 |  | 
|---|
| 1984 | Quantizer& lq = quantizers[thread_iter]; | 
|---|
| 1985 | uint_vec& cluster_indices = initial_codebook[thread_iter]; | 
|---|
| 1986 |  | 
|---|
| 1987 | uint_vec local_to_global(cluster_indices.size()); | 
|---|
| 1988 |  | 
|---|
| 1989 | for (uint32_t i = 0; i < cluster_indices.size(); i++) | 
|---|
| 1990 | { | 
|---|
| 1991 | const uint32_t global_training_vec_index = cluster_indices[i]; | 
|---|
| 1992 | local_to_global[i] = global_training_vec_index; | 
|---|
| 1993 |  | 
|---|
| 1994 | lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second); | 
|---|
| 1995 | } | 
|---|
| 1996 |  | 
|---|
| 1997 | const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs(); | 
|---|
| 1998 |  | 
|---|
| 1999 | success_flags[thread_iter] = lq.generate(max_clusters); | 
|---|
| 2000 |  | 
|---|
| 2001 | if (success_flags[thread_iter]) | 
|---|
| 2002 | { | 
|---|
| 2003 | lq.retrieve(local_clusters[thread_iter]); | 
|---|
| 2004 |  | 
|---|
| 2005 | for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++) | 
|---|
| 2006 | { | 
|---|
| 2007 | for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++) | 
|---|
| 2008 | local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]]; | 
|---|
| 2009 | } | 
|---|
| 2010 |  | 
|---|
| 2011 | if (max_parent_codebook_size) | 
|---|
| 2012 | { | 
|---|
| 2013 | lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]); | 
|---|
| 2014 |  | 
|---|
| 2015 | for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++) | 
|---|
| 2016 | { | 
|---|
| 2017 | for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++) | 
|---|
| 2018 | local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]]; | 
|---|
| 2019 | } | 
|---|
| 2020 | } | 
|---|
| 2021 | } | 
|---|
| 2022 |  | 
|---|
| 2023 | #ifndef __EMSCRIPTEN__ | 
|---|
| 2024 | } ); | 
|---|
| 2025 | #endif | 
|---|
| 2026 |  | 
|---|
| 2027 | } // thread_iter | 
|---|
| 2028 |  | 
|---|
| 2029 | #ifndef __EMSCRIPTEN__ | 
|---|
| 2030 | pJob_pool->wait_for_all(); | 
|---|
| 2031 | #endif | 
|---|
| 2032 |  | 
|---|
| 2033 | uint32_t total_clusters = 0, total_parent_clusters = 0; | 
|---|
| 2034 |  | 
|---|
| 2035 | for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++) | 
|---|
| 2036 | { | 
|---|
| 2037 | if (!success_flags[thread_iter]) | 
|---|
| 2038 | return false; | 
|---|
| 2039 | total_clusters += (uint32_t)local_clusters[thread_iter].size(); | 
|---|
| 2040 | total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size(); | 
|---|
| 2041 | } | 
|---|
| 2042 |  | 
|---|
| 2043 | codebook.reserve(total_clusters); | 
|---|
| 2044 | parent_codebook.reserve(total_parent_clusters); | 
|---|
| 2045 |  | 
|---|
| 2046 | for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) | 
|---|
| 2047 | { | 
|---|
| 2048 | for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++) | 
|---|
| 2049 | { | 
|---|
| 2050 | codebook.resize(codebook.size() + 1); | 
|---|
| 2051 | codebook.back().swap(local_clusters[thread_iter][j]); | 
|---|
| 2052 | } | 
|---|
| 2053 |  | 
|---|
| 2054 | for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++) | 
|---|
| 2055 | { | 
|---|
| 2056 | parent_codebook.resize(parent_codebook.size() + 1); | 
|---|
| 2057 | parent_codebook.back().swap(local_parent_clusters[thread_iter][j]); | 
|---|
| 2058 | } | 
|---|
| 2059 | } | 
|---|
| 2060 |  | 
|---|
| 2061 | return true; | 
|---|
| 2062 | } | 
|---|
| 2063 |  | 
|---|
| 2064 | template<typename Quantizer> | 
|---|
| 2065 | bool generate_hierarchical_codebook_threaded(Quantizer& q, | 
|---|
| 2066 | uint32_t max_codebook_size, uint32_t max_parent_codebook_size, | 
|---|
| 2067 | basisu::vector<uint_vec>& codebook, | 
|---|
| 2068 | basisu::vector<uint_vec>& parent_codebook, | 
|---|
| 2069 | uint32_t max_threads, job_pool *pJob_pool, | 
|---|
| 2070 | bool even_odd_input_pairs_equal) | 
|---|
| 2071 | { | 
|---|
| 2072 | typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher; | 
|---|
| 2073 |  | 
|---|
| 2074 | typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, | 
|---|
| 2075 | training_vec_bit_hasher> group_hash; | 
|---|
| 2076 |  | 
|---|
| 2077 | //interval_timer tm; | 
|---|
| 2078 | //tm.start(); | 
|---|
| 2079 |  | 
|---|
| 2080 | group_hash unique_vecs; | 
|---|
| 2081 |  | 
|---|
| 2082 | unique_vecs.reserve(20000); | 
|---|
| 2083 |  | 
|---|
| 2084 | weighted_block_group g; | 
|---|
| 2085 |  | 
|---|
| 2086 | if (even_odd_input_pairs_equal) | 
|---|
| 2087 | { | 
|---|
| 2088 | g.m_indices.resize(2); | 
|---|
| 2089 |  | 
|---|
| 2090 | assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0); | 
|---|
| 2091 |  | 
|---|
| 2092 | for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2) | 
|---|
| 2093 | { | 
|---|
| 2094 | assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first); | 
|---|
| 2095 |  | 
|---|
| 2096 | g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second; | 
|---|
| 2097 | g.m_indices[0] = i; | 
|---|
| 2098 | g.m_indices[1] = i + 1; | 
|---|
| 2099 |  | 
|---|
| 2100 | auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); | 
|---|
| 2101 |  | 
|---|
| 2102 | if (!ins_res.second) | 
|---|
| 2103 | { | 
|---|
| 2104 | (ins_res.first)->second.m_total_weight += g.m_total_weight; | 
|---|
| 2105 | (ins_res.first)->second.m_indices.push_back(i); | 
|---|
| 2106 | (ins_res.first)->second.m_indices.push_back(i + 1); | 
|---|
| 2107 | } | 
|---|
| 2108 | } | 
|---|
| 2109 | } | 
|---|
| 2110 | else | 
|---|
| 2111 | { | 
|---|
| 2112 | g.m_indices.resize(1); | 
|---|
| 2113 |  | 
|---|
| 2114 | for (uint32_t i = 0; i < q.get_training_vecs().size(); i++) | 
|---|
| 2115 | { | 
|---|
| 2116 | g.m_total_weight = q.get_training_vecs()[i].second; | 
|---|
| 2117 | g.m_indices[0] = i; | 
|---|
| 2118 |  | 
|---|
| 2119 | auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); | 
|---|
| 2120 |  | 
|---|
| 2121 | if (!ins_res.second) | 
|---|
| 2122 | { | 
|---|
| 2123 | (ins_res.first)->second.m_total_weight += g.m_total_weight; | 
|---|
| 2124 | (ins_res.first)->second.m_indices.push_back(i); | 
|---|
| 2125 | } | 
|---|
| 2126 | } | 
|---|
| 2127 | } | 
|---|
| 2128 |  | 
|---|
| 2129 | //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs()); | 
|---|
| 2130 | debug_printf( "generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size()); | 
|---|
| 2131 |  | 
|---|
| 2132 | Quantizer group_quant; | 
|---|
| 2133 | typedef typename group_hash::const_iterator group_hash_const_iter; | 
|---|
| 2134 | basisu::vector<group_hash_const_iter> unique_vec_iters; | 
|---|
| 2135 | unique_vec_iters.reserve(unique_vecs.size()); | 
|---|
| 2136 |  | 
|---|
| 2137 | for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) | 
|---|
| 2138 | { | 
|---|
| 2139 | group_quant.add_training_vec(iter->first, iter->second.m_total_weight); | 
|---|
| 2140 | unique_vec_iters.push_back(iter); | 
|---|
| 2141 | } | 
|---|
| 2142 |  | 
|---|
| 2143 | bool limit_clusterizers = true; | 
|---|
| 2144 | if (unique_vecs.size() <= max_codebook_size) | 
|---|
| 2145 | limit_clusterizers = false; | 
|---|
| 2146 |  | 
|---|
| 2147 | debug_printf( "Limit clusterizers: %u\n", limit_clusterizers); | 
|---|
| 2148 |  | 
|---|
| 2149 | basisu::vector<uint_vec> group_codebook, group_parent_codebook; | 
|---|
| 2150 | bool status = generate_hierarchical_codebook_threaded_internal(group_quant, | 
|---|
| 2151 | max_codebook_size, max_parent_codebook_size, | 
|---|
| 2152 | group_codebook, | 
|---|
| 2153 | group_parent_codebook, | 
|---|
| 2154 | (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool); | 
|---|
| 2155 |  | 
|---|
| 2156 | if (!status) | 
|---|
| 2157 | return false; | 
|---|
| 2158 |  | 
|---|
| 2159 | codebook.resize(0); | 
|---|
| 2160 | for (uint32_t i = 0; i < group_codebook.size(); i++) | 
|---|
| 2161 | { | 
|---|
| 2162 | codebook.resize(codebook.size() + 1); | 
|---|
| 2163 |  | 
|---|
| 2164 | for (uint32_t j = 0; j < group_codebook[i].size(); j++) | 
|---|
| 2165 | { | 
|---|
| 2166 | const uint32_t group_index = group_codebook[i][j]; | 
|---|
| 2167 |  | 
|---|
| 2168 | typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; | 
|---|
| 2169 | const uint_vec& training_vec_indices = group_iter->second.m_indices; | 
|---|
| 2170 |  | 
|---|
| 2171 | append_vector(codebook.back(), training_vec_indices); | 
|---|
| 2172 | } | 
|---|
| 2173 | } | 
|---|
| 2174 |  | 
|---|
| 2175 | parent_codebook.resize(0); | 
|---|
| 2176 | for (uint32_t i = 0; i < group_parent_codebook.size(); i++) | 
|---|
| 2177 | { | 
|---|
| 2178 | parent_codebook.resize(parent_codebook.size() + 1); | 
|---|
| 2179 |  | 
|---|
| 2180 | for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++) | 
|---|
| 2181 | { | 
|---|
| 2182 | const uint32_t group_index = group_parent_codebook[i][j]; | 
|---|
| 2183 |  | 
|---|
| 2184 | typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; | 
|---|
| 2185 | const uint_vec& training_vec_indices = group_iter->second.m_indices; | 
|---|
| 2186 |  | 
|---|
| 2187 | append_vector(parent_codebook.back(), training_vec_indices); | 
|---|
| 2188 | } | 
|---|
| 2189 | } | 
|---|
| 2190 |  | 
|---|
| 2191 | return true; | 
|---|
| 2192 | } | 
|---|
| 2193 |  | 
|---|
| 2194 | // Canonical Huffman coding | 
|---|
| 2195 |  | 
|---|
| 2196 | class histogram | 
|---|
| 2197 | { | 
|---|
| 2198 | basisu::vector<uint32_t> m_hist; | 
|---|
| 2199 |  | 
|---|
| 2200 | public: | 
|---|
| 2201 | histogram(uint32_t size = 0) { init(size); } | 
|---|
| 2202 |  | 
|---|
| 2203 | void clear() | 
|---|
| 2204 | { | 
|---|
| 2205 | clear_vector(m_hist); | 
|---|
| 2206 | } | 
|---|
| 2207 |  | 
|---|
| 2208 | void init(uint32_t size) | 
|---|
| 2209 | { | 
|---|
| 2210 | m_hist.resize(0); | 
|---|
| 2211 | m_hist.resize(size); | 
|---|
| 2212 | } | 
|---|
| 2213 |  | 
|---|
| 2214 | inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); } | 
|---|
| 2215 |  | 
|---|
| 2216 | inline const uint32_t &operator[] (uint32_t index) const | 
|---|
| 2217 | { | 
|---|
| 2218 | return m_hist[index]; | 
|---|
| 2219 | } | 
|---|
| 2220 |  | 
|---|
| 2221 | inline uint32_t &operator[] (uint32_t index) | 
|---|
| 2222 | { | 
|---|
| 2223 | return m_hist[index]; | 
|---|
| 2224 | } | 
|---|
| 2225 |  | 
|---|
| 2226 | inline void inc(uint32_t index) | 
|---|
| 2227 | { | 
|---|
| 2228 | m_hist[index]++; | 
|---|
| 2229 | } | 
|---|
| 2230 |  | 
|---|
| 2231 | uint64_t get_total() const | 
|---|
| 2232 | { | 
|---|
| 2233 | uint64_t total = 0; | 
|---|
| 2234 | for (uint32_t i = 0; i < m_hist.size(); ++i) | 
|---|
| 2235 | total += m_hist[i]; | 
|---|
| 2236 | return total; | 
|---|
| 2237 | } | 
|---|
| 2238 |  | 
|---|
| 2239 | double get_entropy() const | 
|---|
| 2240 | { | 
|---|
| 2241 | double total = static_cast<double>(get_total()); | 
|---|
| 2242 | if (total == 0.0f) | 
|---|
| 2243 | return 0.0f; | 
|---|
| 2244 |  | 
|---|
| 2245 | const double inv_total = 1.0f / total; | 
|---|
| 2246 | const double neg_inv_log2 = -1.0f / log(2.0f); | 
|---|
| 2247 |  | 
|---|
| 2248 | double e = 0.0f; | 
|---|
| 2249 | for (uint32_t i = 0; i < m_hist.size(); i++) | 
|---|
| 2250 | if (m_hist[i]) | 
|---|
| 2251 | e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]); | 
|---|
| 2252 |  | 
|---|
| 2253 | return e; | 
|---|
| 2254 | } | 
|---|
| 2255 | }; | 
|---|
| 2256 |  | 
|---|
| 2257 | struct sym_freq | 
|---|
| 2258 | { | 
|---|
| 2259 | uint32_t m_key; | 
|---|
| 2260 | uint16_t m_sym_index; | 
|---|
| 2261 | }; | 
|---|
| 2262 |  | 
|---|
| 2263 | sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); | 
|---|
| 2264 | void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); | 
|---|
| 2265 | void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); | 
|---|
| 2266 |  | 
|---|
| 2267 | class huffman_encoding_table | 
|---|
| 2268 | { | 
|---|
| 2269 | public: | 
|---|
| 2270 | huffman_encoding_table() | 
|---|
| 2271 | { | 
|---|
| 2272 | } | 
|---|
| 2273 |  | 
|---|
| 2274 | void clear() | 
|---|
| 2275 | { | 
|---|
| 2276 | clear_vector(m_codes); | 
|---|
| 2277 | clear_vector(m_code_sizes); | 
|---|
| 2278 | } | 
|---|
| 2279 |  | 
|---|
| 2280 | bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize) | 
|---|
| 2281 | { | 
|---|
| 2282 | return init(h.size(), &h[0], max_code_size); | 
|---|
| 2283 | } | 
|---|
| 2284 |  | 
|---|
| 2285 | bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); | 
|---|
| 2286 | bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); | 
|---|
| 2287 |  | 
|---|
| 2288 | inline const uint16_vec &get_codes() const { return m_codes; } | 
|---|
| 2289 | inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } | 
|---|
| 2290 |  | 
|---|
| 2291 | uint32_t get_total_used_codes() const | 
|---|
| 2292 | { | 
|---|
| 2293 | for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--) | 
|---|
| 2294 | if (m_code_sizes[i]) | 
|---|
| 2295 | return i + 1; | 
|---|
| 2296 | return 0; | 
|---|
| 2297 | } | 
|---|
| 2298 |  | 
|---|
| 2299 | private: | 
|---|
| 2300 | uint16_vec m_codes; | 
|---|
| 2301 | uint8_vec m_code_sizes; | 
|---|
| 2302 | }; | 
|---|
| 2303 |  | 
|---|
| 2304 | class bitwise_coder | 
|---|
| 2305 | { | 
|---|
| 2306 | public: | 
|---|
| 2307 | bitwise_coder() : | 
|---|
| 2308 | m_bit_buffer(0), | 
|---|
| 2309 | m_bit_buffer_size(0), | 
|---|
| 2310 | m_total_bits(0) | 
|---|
| 2311 | { | 
|---|
| 2312 | } | 
|---|
| 2313 |  | 
|---|
| 2314 | inline void clear() | 
|---|
| 2315 | { | 
|---|
| 2316 | clear_vector(m_bytes); | 
|---|
| 2317 | m_bit_buffer = 0; | 
|---|
| 2318 | m_bit_buffer_size = 0; | 
|---|
| 2319 | m_total_bits = 0; | 
|---|
| 2320 | } | 
|---|
| 2321 |  | 
|---|
| 2322 | inline const uint8_vec &get_bytes() const { return m_bytes; } | 
|---|
| 2323 |  | 
|---|
| 2324 | inline uint64_t get_total_bits() const { return m_total_bits; } | 
|---|
| 2325 | inline void clear_total_bits() { m_total_bits = 0; } | 
|---|
| 2326 |  | 
|---|
| 2327 | inline void init(uint32_t reserve_size = 1024) | 
|---|
| 2328 | { | 
|---|
| 2329 | m_bytes.reserve(reserve_size); | 
|---|
| 2330 | m_bytes.resize(0); | 
|---|
| 2331 |  | 
|---|
| 2332 | m_bit_buffer = 0; | 
|---|
| 2333 | m_bit_buffer_size = 0; | 
|---|
| 2334 | m_total_bits = 0; | 
|---|
| 2335 | } | 
|---|
| 2336 |  | 
|---|
| 2337 | inline uint32_t flush() | 
|---|
| 2338 | { | 
|---|
| 2339 | if (m_bit_buffer_size) | 
|---|
| 2340 | { | 
|---|
| 2341 | m_total_bits += 8 - (m_bit_buffer_size & 7); | 
|---|
| 2342 | append_byte(static_cast<uint8_t>(m_bit_buffer)); | 
|---|
| 2343 |  | 
|---|
| 2344 | m_bit_buffer = 0; | 
|---|
| 2345 | m_bit_buffer_size = 0; | 
|---|
| 2346 |  | 
|---|
| 2347 | return 8; | 
|---|
| 2348 | } | 
|---|
| 2349 |  | 
|---|
| 2350 | return 0; | 
|---|
| 2351 | } | 
|---|
| 2352 |  | 
|---|
| 2353 | inline uint32_t put_bits(uint32_t bits, uint32_t num_bits) | 
|---|
| 2354 | { | 
|---|
| 2355 | assert(num_bits <= 32); | 
|---|
| 2356 | assert(bits < (1ULL << num_bits)); | 
|---|
| 2357 |  | 
|---|
| 2358 | if (!num_bits) | 
|---|
| 2359 | return 0; | 
|---|
| 2360 |  | 
|---|
| 2361 | m_total_bits += num_bits; | 
|---|
| 2362 |  | 
|---|
| 2363 | uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer; | 
|---|
| 2364 | m_bit_buffer_size += num_bits; | 
|---|
| 2365 |  | 
|---|
| 2366 | while (m_bit_buffer_size >= 8) | 
|---|
| 2367 | { | 
|---|
| 2368 | append_byte(static_cast<uint8_t>(v)); | 
|---|
| 2369 | v >>= 8; | 
|---|
| 2370 | m_bit_buffer_size -= 8; | 
|---|
| 2371 | } | 
|---|
| 2372 |  | 
|---|
| 2373 | m_bit_buffer = static_cast<uint8_t>(v); | 
|---|
| 2374 | return num_bits; | 
|---|
| 2375 | } | 
|---|
| 2376 |  | 
|---|
| 2377 | inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab) | 
|---|
| 2378 | { | 
|---|
| 2379 | uint32_t code = tab.get_codes()[sym]; | 
|---|
| 2380 | uint32_t code_size = tab.get_code_sizes()[sym]; | 
|---|
| 2381 | assert(code_size >= 1); | 
|---|
| 2382 | put_bits(code, code_size); | 
|---|
| 2383 | return code_size; | 
|---|
| 2384 | } | 
|---|
| 2385 |  | 
|---|
| 2386 | inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) | 
|---|
| 2387 | { | 
|---|
| 2388 | assert((n >= 2) && (v < n)); | 
|---|
| 2389 |  | 
|---|
| 2390 | uint32_t k = floor_log2i(n); | 
|---|
| 2391 | uint32_t u = (1 << (k + 1)) - n; | 
|---|
| 2392 |  | 
|---|
| 2393 | if (v < u) | 
|---|
| 2394 | return put_bits(v, k); | 
|---|
| 2395 |  | 
|---|
| 2396 | uint32_t x = v + u; | 
|---|
| 2397 | assert((x >> 1) >= u); | 
|---|
| 2398 |  | 
|---|
| 2399 | put_bits(x >> 1, k); | 
|---|
| 2400 | put_bits(x & 1, 1); | 
|---|
| 2401 | return k + 1; | 
|---|
| 2402 | } | 
|---|
| 2403 |  | 
|---|
| 2404 | inline uint32_t put_rice(uint32_t v, uint32_t m) | 
|---|
| 2405 | { | 
|---|
| 2406 | assert(m); | 
|---|
| 2407 |  | 
|---|
| 2408 | const uint64_t start_bits = m_total_bits; | 
|---|
| 2409 |  | 
|---|
| 2410 | uint32_t q = v >> m, r = v & ((1 << m) - 1); | 
|---|
| 2411 |  | 
|---|
| 2412 | // rice coding sanity check | 
|---|
| 2413 | assert(q <= 64); | 
|---|
| 2414 |  | 
|---|
| 2415 | for (; q > 16; q -= 16) | 
|---|
| 2416 | put_bits(0xFFFF, 16); | 
|---|
| 2417 |  | 
|---|
| 2418 | put_bits((1 << q) - 1, q); | 
|---|
| 2419 | put_bits(r << 1, m + 1); | 
|---|
| 2420 |  | 
|---|
| 2421 | return (uint32_t)(m_total_bits - start_bits); | 
|---|
| 2422 | } | 
|---|
| 2423 |  | 
|---|
| 2424 | inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits) | 
|---|
| 2425 | { | 
|---|
| 2426 | assert(chunk_bits); | 
|---|
| 2427 |  | 
|---|
| 2428 | const uint32_t chunk_size = 1 << chunk_bits; | 
|---|
| 2429 | const uint32_t chunk_mask = chunk_size - 1; | 
|---|
| 2430 |  | 
|---|
| 2431 | uint32_t total_bits = 0; | 
|---|
| 2432 |  | 
|---|
| 2433 | for ( ; ; ) | 
|---|
| 2434 | { | 
|---|
| 2435 | uint32_t next_v = v >> chunk_bits; | 
|---|
| 2436 |  | 
|---|
| 2437 | total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); | 
|---|
| 2438 | if (!next_v) | 
|---|
| 2439 | break; | 
|---|
| 2440 |  | 
|---|
| 2441 | v = next_v; | 
|---|
| 2442 | } | 
|---|
| 2443 |  | 
|---|
| 2444 | return total_bits; | 
|---|
| 2445 | } | 
|---|
| 2446 |  | 
|---|
| 2447 | uint32_t emit_huffman_table(const huffman_encoding_table &tab); | 
|---|
| 2448 |  | 
|---|
| 2449 | private: | 
|---|
| 2450 | uint8_vec m_bytes; | 
|---|
| 2451 | uint32_t m_bit_buffer, m_bit_buffer_size; | 
|---|
| 2452 | uint64_t m_total_bits; | 
|---|
| 2453 |  | 
|---|
| 2454 | void append_byte(uint8_t c) | 
|---|
| 2455 | { | 
|---|
| 2456 | m_bytes.resize(m_bytes.size() + 1); | 
|---|
| 2457 | m_bytes.back() = c; | 
|---|
| 2458 | } | 
|---|
| 2459 |  | 
|---|
| 2460 | static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len); | 
|---|
| 2461 | static void end_zero_run(uint16_vec &syms, uint32_t &run_size); | 
|---|
| 2462 | }; | 
|---|
| 2463 |  | 
|---|
| 2464 | class huff2D | 
|---|
| 2465 | { | 
|---|
| 2466 | public: | 
|---|
| 2467 | huff2D() { } | 
|---|
| 2468 | huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); } | 
|---|
| 2469 |  | 
|---|
| 2470 | inline const histogram &get_histogram() const { return m_histogram; } | 
|---|
| 2471 | inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; } | 
|---|
| 2472 |  | 
|---|
| 2473 | inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) | 
|---|
| 2474 | { | 
|---|
| 2475 | assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); | 
|---|
| 2476 |  | 
|---|
| 2477 | m_bits_per_sym = bits_per_sym; | 
|---|
| 2478 | m_total_syms_per_group = total_syms_per_group; | 
|---|
| 2479 | m_cur_sym_bits = 0; | 
|---|
| 2480 | m_cur_num_syms = 0; | 
|---|
| 2481 | m_decode_syms_remaining = 0; | 
|---|
| 2482 | m_next_decoder_group_index = 0; | 
|---|
| 2483 |  | 
|---|
| 2484 | m_histogram.init(1 << (bits_per_sym * total_syms_per_group)); | 
|---|
| 2485 | } | 
|---|
| 2486 |  | 
|---|
| 2487 | inline void clear() | 
|---|
| 2488 | { | 
|---|
| 2489 | m_group_bits.clear(); | 
|---|
| 2490 |  | 
|---|
| 2491 | m_cur_sym_bits = 0; | 
|---|
| 2492 | m_cur_num_syms = 0; | 
|---|
| 2493 | m_decode_syms_remaining = 0; | 
|---|
| 2494 | m_next_decoder_group_index = 0; | 
|---|
| 2495 | } | 
|---|
| 2496 |  | 
|---|
| 2497 | inline void emit(uint32_t sym) | 
|---|
| 2498 | { | 
|---|
| 2499 | m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym)); | 
|---|
| 2500 | m_cur_num_syms++; | 
|---|
| 2501 |  | 
|---|
| 2502 | if (m_cur_num_syms == m_total_syms_per_group) | 
|---|
| 2503 | flush(); | 
|---|
| 2504 | } | 
|---|
| 2505 |  | 
|---|
| 2506 | inline void flush() | 
|---|
| 2507 | { | 
|---|
| 2508 | if (m_cur_num_syms) | 
|---|
| 2509 | { | 
|---|
| 2510 | m_group_bits.push_back(m_cur_sym_bits); | 
|---|
| 2511 | m_histogram.inc(m_cur_sym_bits); | 
|---|
| 2512 |  | 
|---|
| 2513 | m_cur_sym_bits = 0; | 
|---|
| 2514 | m_cur_num_syms = 0; | 
|---|
| 2515 | } | 
|---|
| 2516 | } | 
|---|
| 2517 |  | 
|---|
| 2518 | inline bool start_encoding(uint32_t code_size_limit = 16) | 
|---|
| 2519 | { | 
|---|
| 2520 | flush(); | 
|---|
| 2521 |  | 
|---|
| 2522 | if (!m_encoding_table.init(m_histogram, code_size_limit)) | 
|---|
| 2523 | return false; | 
|---|
| 2524 |  | 
|---|
| 2525 | m_decode_syms_remaining = 0; | 
|---|
| 2526 | m_next_decoder_group_index = 0; | 
|---|
| 2527 |  | 
|---|
| 2528 | return true; | 
|---|
| 2529 | } | 
|---|
| 2530 |  | 
|---|
| 2531 | inline uint32_t emit_next_sym(bitwise_coder &c) | 
|---|
| 2532 | { | 
|---|
| 2533 | uint32_t bits = 0; | 
|---|
| 2534 |  | 
|---|
| 2535 | if (!m_decode_syms_remaining) | 
|---|
| 2536 | { | 
|---|
| 2537 | bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table); | 
|---|
| 2538 | m_decode_syms_remaining = m_total_syms_per_group; | 
|---|
| 2539 | } | 
|---|
| 2540 |  | 
|---|
| 2541 | m_decode_syms_remaining--; | 
|---|
| 2542 | return bits; | 
|---|
| 2543 | } | 
|---|
| 2544 |  | 
|---|
| 2545 | inline void emit_flush() | 
|---|
| 2546 | { | 
|---|
| 2547 | m_decode_syms_remaining = 0; | 
|---|
| 2548 | } | 
|---|
| 2549 |  | 
|---|
| 2550 | private: | 
|---|
| 2551 | uint_vec m_group_bits; | 
|---|
| 2552 | huffman_encoding_table m_encoding_table; | 
|---|
| 2553 | histogram m_histogram; | 
|---|
| 2554 | uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining; | 
|---|
| 2555 | }; | 
|---|
| 2556 |  | 
|---|
| 2557 | bool huffman_test(int rand_seed); | 
|---|
| 2558 |  | 
|---|
| 2559 | // VQ index reordering | 
|---|
| 2560 |  | 
|---|
| 2561 | class palette_index_reorderer | 
|---|
| 2562 | { | 
|---|
| 2563 | public: | 
|---|
| 2564 | palette_index_reorderer() | 
|---|
| 2565 | { | 
|---|
| 2566 | } | 
|---|
| 2567 |  | 
|---|
| 2568 | void clear() | 
|---|
| 2569 | { | 
|---|
| 2570 | clear_vector(m_hist); | 
|---|
| 2571 | clear_vector(m_total_count_to_picked); | 
|---|
| 2572 | clear_vector(m_entries_picked); | 
|---|
| 2573 | clear_vector(m_entries_to_do); | 
|---|
| 2574 | clear_vector(m_remap_table); | 
|---|
| 2575 | } | 
|---|
| 2576 |  | 
|---|
| 2577 | // returns [0,1] distance of entry i to entry j | 
|---|
| 2578 | typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); | 
|---|
| 2579 |  | 
|---|
| 2580 | void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); | 
|---|
| 2581 |  | 
|---|
| 2582 | // Table remaps old to new symbol indices | 
|---|
| 2583 | inline const uint_vec &get_remap_table() const { return m_remap_table; } | 
|---|
| 2584 |  | 
|---|
| 2585 | private: | 
|---|
| 2586 | uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table; | 
|---|
| 2587 |  | 
|---|
| 2588 | inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; } | 
|---|
| 2589 | inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } } | 
|---|
| 2590 |  | 
|---|
| 2591 | void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices); | 
|---|
| 2592 | void find_initial(uint32_t num_syms); | 
|---|
| 2593 | void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); | 
|---|
| 2594 | float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); | 
|---|
| 2595 | }; | 
|---|
| 2596 |  | 
|---|
| 2597 | // Simple 32-bit 2D image class | 
|---|
| 2598 |  | 
|---|
| 2599 | class image | 
|---|
| 2600 | { | 
|---|
| 2601 | public: | 
|---|
| 2602 | image() : | 
|---|
| 2603 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 2604 | { | 
|---|
| 2605 | } | 
|---|
| 2606 |  | 
|---|
| 2607 | image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : | 
|---|
| 2608 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 2609 | { | 
|---|
| 2610 | resize(w, h, p); | 
|---|
| 2611 | } | 
|---|
| 2612 |  | 
|---|
| 2613 | image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : | 
|---|
| 2614 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 2615 | { | 
|---|
| 2616 | init(pImage, width, height, comps); | 
|---|
| 2617 | } | 
|---|
| 2618 |  | 
|---|
| 2619 | image(const image &other) : | 
|---|
| 2620 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 2621 | { | 
|---|
| 2622 | *this = other; | 
|---|
| 2623 | } | 
|---|
| 2624 |  | 
|---|
| 2625 | image &swap(image &other) | 
|---|
| 2626 | { | 
|---|
| 2627 | std::swap(m_width, other.m_width); | 
|---|
| 2628 | std::swap(m_height, other.m_height); | 
|---|
| 2629 | std::swap(m_pitch, other.m_pitch); | 
|---|
| 2630 | m_pixels.swap(other.m_pixels); | 
|---|
| 2631 | return *this; | 
|---|
| 2632 | } | 
|---|
| 2633 |  | 
|---|
| 2634 | image &operator= (const image &rhs) | 
|---|
| 2635 | { | 
|---|
| 2636 | if (this != &rhs) | 
|---|
| 2637 | { | 
|---|
| 2638 | m_width = rhs.m_width; | 
|---|
| 2639 | m_height = rhs.m_height; | 
|---|
| 2640 | m_pitch = rhs.m_pitch; | 
|---|
| 2641 | m_pixels = rhs.m_pixels; | 
|---|
| 2642 | } | 
|---|
| 2643 | return *this; | 
|---|
| 2644 | } | 
|---|
| 2645 |  | 
|---|
| 2646 | image &clear() | 
|---|
| 2647 | { | 
|---|
| 2648 | m_width = 0; | 
|---|
| 2649 | m_height = 0; | 
|---|
| 2650 | m_pitch = 0; | 
|---|
| 2651 | clear_vector(m_pixels); | 
|---|
| 2652 | return *this; | 
|---|
| 2653 | } | 
|---|
| 2654 |  | 
|---|
| 2655 | image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) | 
|---|
| 2656 | { | 
|---|
| 2657 | return crop(w, h, p, background); | 
|---|
| 2658 | } | 
|---|
| 2659 |  | 
|---|
| 2660 | image &set_all(const color_rgba &c) | 
|---|
| 2661 | { | 
|---|
| 2662 | for (uint32_t i = 0; i < m_pixels.size(); i++) | 
|---|
| 2663 | m_pixels[i] = c; | 
|---|
| 2664 | return *this; | 
|---|
| 2665 | } | 
|---|
| 2666 |  | 
|---|
| 2667 | void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) | 
|---|
| 2668 | { | 
|---|
| 2669 | assert(comps >= 1 && comps <= 4); | 
|---|
| 2670 |  | 
|---|
| 2671 | resize(width, height); | 
|---|
| 2672 |  | 
|---|
| 2673 | for (uint32_t y = 0; y < height; y++) | 
|---|
| 2674 | { | 
|---|
| 2675 | for (uint32_t x = 0; x < width; x++) | 
|---|
| 2676 | { | 
|---|
| 2677 | const uint8_t *pSrc = &pImage[(x + y * width) * comps]; | 
|---|
| 2678 | color_rgba &dst = (*this)(x, y); | 
|---|
| 2679 |  | 
|---|
| 2680 | if (comps == 1) | 
|---|
| 2681 | { | 
|---|
| 2682 | dst.r = pSrc[0]; | 
|---|
| 2683 | dst.g = pSrc[0]; | 
|---|
| 2684 | dst.b = pSrc[0]; | 
|---|
| 2685 | dst.a = 255; | 
|---|
| 2686 | } | 
|---|
| 2687 | else if (comps == 2) | 
|---|
| 2688 | { | 
|---|
| 2689 | dst.r = pSrc[0]; | 
|---|
| 2690 | dst.g = pSrc[0]; | 
|---|
| 2691 | dst.b = pSrc[0]; | 
|---|
| 2692 | dst.a = pSrc[1]; | 
|---|
| 2693 | } | 
|---|
| 2694 | else | 
|---|
| 2695 | { | 
|---|
| 2696 | dst.r = pSrc[0]; | 
|---|
| 2697 | dst.g = pSrc[1]; | 
|---|
| 2698 | dst.b = pSrc[2]; | 
|---|
| 2699 | if (comps == 4) | 
|---|
| 2700 | dst.a = pSrc[3]; | 
|---|
| 2701 | else | 
|---|
| 2702 | dst.a = 255; | 
|---|
| 2703 | } | 
|---|
| 2704 | } | 
|---|
| 2705 | } | 
|---|
| 2706 | } | 
|---|
| 2707 |  | 
|---|
| 2708 | image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) | 
|---|
| 2709 | { | 
|---|
| 2710 | for (uint32_t iy = 0; iy < h; iy++) | 
|---|
| 2711 | for (uint32_t ix = 0; ix < w; ix++) | 
|---|
| 2712 | set_clipped(x + ix, y + iy, c); | 
|---|
| 2713 | return *this; | 
|---|
| 2714 | } | 
|---|
| 2715 |  | 
|---|
| 2716 | image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) | 
|---|
| 2717 | { | 
|---|
| 2718 | for (uint32_t iy = 0; iy < h; iy++) | 
|---|
| 2719 | for (uint32_t ix = 0; ix < w; ix++) | 
|---|
| 2720 | set_clipped_alpha(x + ix, y + iy, c); | 
|---|
| 2721 | return *this; | 
|---|
| 2722 | } | 
|---|
| 2723 |  | 
|---|
| 2724 | image &crop_dup_borders(uint32_t w, uint32_t h) | 
|---|
| 2725 | { | 
|---|
| 2726 | const uint32_t orig_w = m_width, orig_h = m_height; | 
|---|
| 2727 |  | 
|---|
| 2728 | crop(w, h); | 
|---|
| 2729 |  | 
|---|
| 2730 | if (orig_w && orig_h) | 
|---|
| 2731 | { | 
|---|
| 2732 | if (m_width > orig_w) | 
|---|
| 2733 | { | 
|---|
| 2734 | for (uint32_t x = orig_w; x < m_width; x++) | 
|---|
| 2735 | for (uint32_t y = 0; y < m_height; y++) | 
|---|
| 2736 | set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); | 
|---|
| 2737 | } | 
|---|
| 2738 |  | 
|---|
| 2739 | if (m_height > orig_h) | 
|---|
| 2740 | { | 
|---|
| 2741 | for (uint32_t y = orig_h; y < m_height; y++) | 
|---|
| 2742 | for (uint32_t x = 0; x < m_width; x++) | 
|---|
| 2743 | set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); | 
|---|
| 2744 | } | 
|---|
| 2745 | } | 
|---|
| 2746 | return *this; | 
|---|
| 2747 | } | 
|---|
| 2748 |  | 
|---|
| 2749 | // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer). | 
|---|
| 2750 | image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) | 
|---|
| 2751 | { | 
|---|
| 2752 | if (p == UINT32_MAX) | 
|---|
| 2753 | p = w; | 
|---|
| 2754 |  | 
|---|
| 2755 | clear(); | 
|---|
| 2756 |  | 
|---|
| 2757 | if ((!p) || (!w) || (!h)) | 
|---|
| 2758 | return *this; | 
|---|
| 2759 |  | 
|---|
| 2760 | m_pixels.grant_ownership(pPixels, p * h, p * h); | 
|---|
| 2761 |  | 
|---|
| 2762 | m_width = w; | 
|---|
| 2763 | m_height = h; | 
|---|
| 2764 | m_pitch = p; | 
|---|
| 2765 |  | 
|---|
| 2766 | return *this; | 
|---|
| 2767 | } | 
|---|
| 2768 |  | 
|---|
| 2769 | image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true) | 
|---|
| 2770 | { | 
|---|
| 2771 | if (p == UINT32_MAX) | 
|---|
| 2772 | p = w; | 
|---|
| 2773 |  | 
|---|
| 2774 | if ((w == m_width) && (m_height == h) && (m_pitch == p)) | 
|---|
| 2775 | return *this; | 
|---|
| 2776 |  | 
|---|
| 2777 | if ((!w) || (!h) || (!p)) | 
|---|
| 2778 | { | 
|---|
| 2779 | clear(); | 
|---|
| 2780 | return *this; | 
|---|
| 2781 | } | 
|---|
| 2782 |  | 
|---|
| 2783 | color_rgba_vec cur_state; | 
|---|
| 2784 | cur_state.swap(m_pixels); | 
|---|
| 2785 |  | 
|---|
| 2786 | m_pixels.resize(p * h); | 
|---|
| 2787 |  | 
|---|
| 2788 | if (init_image) | 
|---|
| 2789 | { | 
|---|
| 2790 | if (m_width || m_height) | 
|---|
| 2791 | { | 
|---|
| 2792 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 2793 | { | 
|---|
| 2794 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 2795 | { | 
|---|
| 2796 | if ((x < m_width) && (y < m_height)) | 
|---|
| 2797 | m_pixels[x + y * p] = cur_state[x + y * m_pitch]; | 
|---|
| 2798 | else | 
|---|
| 2799 | m_pixels[x + y * p] = background; | 
|---|
| 2800 | } | 
|---|
| 2801 | } | 
|---|
| 2802 | } | 
|---|
| 2803 | else | 
|---|
| 2804 | { | 
|---|
| 2805 | m_pixels.set_all(background); | 
|---|
| 2806 | } | 
|---|
| 2807 | } | 
|---|
| 2808 |  | 
|---|
| 2809 | m_width = w; | 
|---|
| 2810 | m_height = h; | 
|---|
| 2811 | m_pitch = p; | 
|---|
| 2812 |  | 
|---|
| 2813 | return *this; | 
|---|
| 2814 | } | 
|---|
| 2815 |  | 
|---|
| 2816 | inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } | 
|---|
| 2817 | inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } | 
|---|
| 2818 |  | 
|---|
| 2819 | inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } | 
|---|
| 2820 | inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } | 
|---|
| 2821 |  | 
|---|
| 2822 | inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const | 
|---|
| 2823 | { | 
|---|
| 2824 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); | 
|---|
| 2825 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); | 
|---|
| 2826 | return m_pixels[x + y * m_pitch]; | 
|---|
| 2827 | } | 
|---|
| 2828 |  | 
|---|
| 2829 | inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) | 
|---|
| 2830 | { | 
|---|
| 2831 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); | 
|---|
| 2832 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); | 
|---|
| 2833 | return m_pixels[x + y * m_pitch]; | 
|---|
| 2834 | } | 
|---|
| 2835 |  | 
|---|
| 2836 | inline image &set_clipped(int x, int y, const color_rgba &c) | 
|---|
| 2837 | { | 
|---|
| 2838 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) | 
|---|
| 2839 | (*this)(x, y) = c; | 
|---|
| 2840 | return *this; | 
|---|
| 2841 | } | 
|---|
| 2842 |  | 
|---|
| 2843 | inline image& set_clipped_alpha(int x, int y, const color_rgba& c) | 
|---|
| 2844 | { | 
|---|
| 2845 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) | 
|---|
| 2846 | (*this)(x, y).m_comps[3] = c.m_comps[3]; | 
|---|
| 2847 | return *this; | 
|---|
| 2848 | } | 
|---|
| 2849 |  | 
|---|
| 2850 | // Very straightforward blit with full clipping. Not fast, but it works. | 
|---|
| 2851 | image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) | 
|---|
| 2852 | { | 
|---|
| 2853 | for (int y = 0; y < src_h; y++) | 
|---|
| 2854 | { | 
|---|
| 2855 | const int sy = src_y + y; | 
|---|
| 2856 | if (sy < 0) | 
|---|
| 2857 | continue; | 
|---|
| 2858 | else if (sy >= (int)src.get_height()) | 
|---|
| 2859 | break; | 
|---|
| 2860 |  | 
|---|
| 2861 | for (int x = 0; x < src_w; x++) | 
|---|
| 2862 | { | 
|---|
| 2863 | const int sx = src_x + x; | 
|---|
| 2864 | if (sx < 0) | 
|---|
| 2865 | continue; | 
|---|
| 2866 | else if (sx >= (int)src.get_height()) | 
|---|
| 2867 | break; | 
|---|
| 2868 |  | 
|---|
| 2869 | set_clipped(dst_x + x, dst_y + y, src(sx, sy)); | 
|---|
| 2870 | } | 
|---|
| 2871 | } | 
|---|
| 2872 |  | 
|---|
| 2873 | return *this; | 
|---|
| 2874 | } | 
|---|
| 2875 |  | 
|---|
| 2876 | const image &(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const | 
|---|
| 2877 | { | 
|---|
| 2878 | if (((src_x + w) > m_width) || ((src_y + h) > m_height)) | 
|---|
| 2879 | { | 
|---|
| 2880 | // Slower clamping case | 
|---|
| 2881 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 2882 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 2883 | *pDst++ = get_clamped(src_x + x, src_y + y); | 
|---|
| 2884 | } | 
|---|
| 2885 | else | 
|---|
| 2886 | { | 
|---|
| 2887 | const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch]; | 
|---|
| 2888 |  | 
|---|
| 2889 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 2890 | { | 
|---|
| 2891 | memcpy(pDst, pSrc, w * sizeof(color_rgba)); | 
|---|
| 2892 | pSrc += m_pitch; | 
|---|
| 2893 | pDst += w; | 
|---|
| 2894 | } | 
|---|
| 2895 | } | 
|---|
| 2896 |  | 
|---|
| 2897 | return *this; | 
|---|
| 2898 | } | 
|---|
| 2899 |  | 
|---|
| 2900 | image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) | 
|---|
| 2901 | { | 
|---|
| 2902 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 2903 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 2904 | set_clipped(dst_x + x, dst_y + y, *pSrc++); | 
|---|
| 2905 | return *this; | 
|---|
| 2906 | } | 
|---|
| 2907 |  | 
|---|
| 2908 | inline uint32_t get_width() const { return m_width; } | 
|---|
| 2909 | inline uint32_t get_height() const { return m_height; } | 
|---|
| 2910 | inline uint32_t get_pitch() const { return m_pitch; } | 
|---|
| 2911 | inline uint32_t get_total_pixels() const { return m_width * m_height; } | 
|---|
| 2912 |  | 
|---|
| 2913 | inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } | 
|---|
| 2914 | inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } | 
|---|
| 2915 | inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } | 
|---|
| 2916 |  | 
|---|
| 2917 | inline const color_rgba_vec &get_pixels() const { return m_pixels; } | 
|---|
| 2918 | inline color_rgba_vec &get_pixels() { return m_pixels; } | 
|---|
| 2919 |  | 
|---|
| 2920 | inline const color_rgba *get_ptr() const { return &m_pixels[0]; } | 
|---|
| 2921 | inline color_rgba *get_ptr() { return &m_pixels[0]; } | 
|---|
| 2922 |  | 
|---|
| 2923 | bool has_alpha() const | 
|---|
| 2924 | { | 
|---|
| 2925 | for (uint32_t y = 0; y < m_height; ++y) | 
|---|
| 2926 | for (uint32_t x = 0; x < m_width; ++x) | 
|---|
| 2927 | if ((*this)(x, y).a < 255) | 
|---|
| 2928 | return true; | 
|---|
| 2929 |  | 
|---|
| 2930 | return false; | 
|---|
| 2931 | } | 
|---|
| 2932 |  | 
|---|
| 2933 | image &set_alpha(uint8_t a) | 
|---|
| 2934 | { | 
|---|
| 2935 | for (uint32_t y = 0; y < m_height; ++y) | 
|---|
| 2936 | for (uint32_t x = 0; x < m_width; ++x) | 
|---|
| 2937 | (*this)(x, y).a = a; | 
|---|
| 2938 | return *this; | 
|---|
| 2939 | } | 
|---|
| 2940 |  | 
|---|
| 2941 | image &flip_y() | 
|---|
| 2942 | { | 
|---|
| 2943 | for (uint32_t y = 0; y < m_height / 2; ++y) | 
|---|
| 2944 | for (uint32_t x = 0; x < m_width; ++x) | 
|---|
| 2945 | std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); | 
|---|
| 2946 | return *this; | 
|---|
| 2947 | } | 
|---|
| 2948 |  | 
|---|
| 2949 | // TODO: There are many ways to do this, not sure this is the best way. | 
|---|
| 2950 | image &renormalize_normal_map() | 
|---|
| 2951 | { | 
|---|
| 2952 | for (uint32_t y = 0; y < m_height; y++) | 
|---|
| 2953 | { | 
|---|
| 2954 | for (uint32_t x = 0; x < m_width; x++) | 
|---|
| 2955 | { | 
|---|
| 2956 | color_rgba &c = (*this)(x, y); | 
|---|
| 2957 | if ((c.r == 128) && (c.g == 128) && (c.b == 128)) | 
|---|
| 2958 | continue; | 
|---|
| 2959 |  | 
|---|
| 2960 | vec3F v(c.r, c.g, c.b); | 
|---|
| 2961 | v = (v * (2.0f / 255.0f)) - vec3F(1.0f); | 
|---|
| 2962 | v.clamp(-1.0f, 1.0f); | 
|---|
| 2963 |  | 
|---|
| 2964 | float length = v.length(); | 
|---|
| 2965 | const float cValidThresh = .077f; | 
|---|
| 2966 | if (length < cValidThresh) | 
|---|
| 2967 | { | 
|---|
| 2968 | c.set(128, 128, 128, c.a); | 
|---|
| 2969 | } | 
|---|
| 2970 | else if (fabs(length - 1.0f) > cValidThresh) | 
|---|
| 2971 | { | 
|---|
| 2972 | if (length) | 
|---|
| 2973 | v /= length; | 
|---|
| 2974 |  | 
|---|
| 2975 | for (uint32_t i = 0; i < 3; i++) | 
|---|
| 2976 | c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); | 
|---|
| 2977 |  | 
|---|
| 2978 | if ((c.g == 128) && (c.r == 128)) | 
|---|
| 2979 | { | 
|---|
| 2980 | if (c.b < 128) | 
|---|
| 2981 | c.b = 0; | 
|---|
| 2982 | else | 
|---|
| 2983 | c.b = 255; | 
|---|
| 2984 | } | 
|---|
| 2985 | } | 
|---|
| 2986 | } | 
|---|
| 2987 | } | 
|---|
| 2988 | return *this; | 
|---|
| 2989 | } | 
|---|
| 2990 |  | 
|---|
| 2991 | void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); | 
|---|
| 2992 |  | 
|---|
| 2993 | private: | 
|---|
| 2994 | uint32_t m_width, m_height, m_pitch;  // all in pixels | 
|---|
| 2995 | color_rgba_vec m_pixels; | 
|---|
| 2996 | }; | 
|---|
| 2997 |  | 
|---|
| 2998 | // Float images | 
|---|
| 2999 |  | 
|---|
| 3000 | typedef basisu::vector<vec4F> vec4F_vec; | 
|---|
| 3001 |  | 
|---|
| 3002 | class imagef | 
|---|
| 3003 | { | 
|---|
| 3004 | public: | 
|---|
| 3005 | imagef() : | 
|---|
| 3006 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 3007 | { | 
|---|
| 3008 | } | 
|---|
| 3009 |  | 
|---|
| 3010 | imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : | 
|---|
| 3011 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 3012 | { | 
|---|
| 3013 | resize(w, h, p); | 
|---|
| 3014 | } | 
|---|
| 3015 |  | 
|---|
| 3016 | imagef(const imagef &other) : | 
|---|
| 3017 | m_width(0), m_height(0), m_pitch(0) | 
|---|
| 3018 | { | 
|---|
| 3019 | *this = other; | 
|---|
| 3020 | } | 
|---|
| 3021 |  | 
|---|
| 3022 | imagef &swap(imagef &other) | 
|---|
| 3023 | { | 
|---|
| 3024 | std::swap(m_width, other.m_width); | 
|---|
| 3025 | std::swap(m_height, other.m_height); | 
|---|
| 3026 | std::swap(m_pitch, other.m_pitch); | 
|---|
| 3027 | m_pixels.swap(other.m_pixels); | 
|---|
| 3028 | return *this; | 
|---|
| 3029 | } | 
|---|
| 3030 |  | 
|---|
| 3031 | imagef &operator= (const imagef &rhs) | 
|---|
| 3032 | { | 
|---|
| 3033 | if (this != &rhs) | 
|---|
| 3034 | { | 
|---|
| 3035 | m_width = rhs.m_width; | 
|---|
| 3036 | m_height = rhs.m_height; | 
|---|
| 3037 | m_pitch = rhs.m_pitch; | 
|---|
| 3038 | m_pixels = rhs.m_pixels; | 
|---|
| 3039 | } | 
|---|
| 3040 | return *this; | 
|---|
| 3041 | } | 
|---|
| 3042 |  | 
|---|
| 3043 | imagef &clear() | 
|---|
| 3044 | { | 
|---|
| 3045 | m_width = 0; | 
|---|
| 3046 | m_height = 0; | 
|---|
| 3047 | m_pitch = 0; | 
|---|
| 3048 | clear_vector(m_pixels); | 
|---|
| 3049 | return *this; | 
|---|
| 3050 | } | 
|---|
| 3051 |  | 
|---|
| 3052 | imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0)) | 
|---|
| 3053 | { | 
|---|
| 3054 | const uint32_t width = src.get_width(); | 
|---|
| 3055 | const uint32_t height = src.get_height(); | 
|---|
| 3056 |  | 
|---|
| 3057 | resize(width, height); | 
|---|
| 3058 |  | 
|---|
| 3059 | for (int y = 0; y < (int)height; y++) | 
|---|
| 3060 | { | 
|---|
| 3061 | for (uint32_t x = 0; x < width; x++) | 
|---|
| 3062 | { | 
|---|
| 3063 | const color_rgba &src_pixel = src(x, y); | 
|---|
| 3064 | (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); | 
|---|
| 3065 | } | 
|---|
| 3066 | } | 
|---|
| 3067 |  | 
|---|
| 3068 | return *this; | 
|---|
| 3069 | } | 
|---|
| 3070 |  | 
|---|
| 3071 | imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) | 
|---|
| 3072 | { | 
|---|
| 3073 | return resize(other.get_width(), other.get_height(), p, background); | 
|---|
| 3074 | } | 
|---|
| 3075 |  | 
|---|
| 3076 | imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) | 
|---|
| 3077 | { | 
|---|
| 3078 | return crop(w, h, p, background); | 
|---|
| 3079 | } | 
|---|
| 3080 |  | 
|---|
| 3081 | imagef &set_all(const vec4F &c) | 
|---|
| 3082 | { | 
|---|
| 3083 | for (uint32_t i = 0; i < m_pixels.size(); i++) | 
|---|
| 3084 | m_pixels[i] = c; | 
|---|
| 3085 | return *this; | 
|---|
| 3086 | } | 
|---|
| 3087 |  | 
|---|
| 3088 | imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c) | 
|---|
| 3089 | { | 
|---|
| 3090 | for (uint32_t iy = 0; iy < h; iy++) | 
|---|
| 3091 | for (uint32_t ix = 0; ix < w; ix++) | 
|---|
| 3092 | set_clipped(x + ix, y + iy, c); | 
|---|
| 3093 | return *this; | 
|---|
| 3094 | } | 
|---|
| 3095 |  | 
|---|
| 3096 | imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) | 
|---|
| 3097 | { | 
|---|
| 3098 | if (p == UINT32_MAX) | 
|---|
| 3099 | p = w; | 
|---|
| 3100 |  | 
|---|
| 3101 | if ((w == m_width) && (m_height == h) && (m_pitch == p)) | 
|---|
| 3102 | return *this; | 
|---|
| 3103 |  | 
|---|
| 3104 | if ((!w) || (!h) || (!p)) | 
|---|
| 3105 | { | 
|---|
| 3106 | clear(); | 
|---|
| 3107 | return *this; | 
|---|
| 3108 | } | 
|---|
| 3109 |  | 
|---|
| 3110 | vec4F_vec cur_state; | 
|---|
| 3111 | cur_state.swap(m_pixels); | 
|---|
| 3112 |  | 
|---|
| 3113 | m_pixels.resize(p * h); | 
|---|
| 3114 |  | 
|---|
| 3115 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 3116 | { | 
|---|
| 3117 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 3118 | { | 
|---|
| 3119 | if ((x < m_width) && (y < m_height)) | 
|---|
| 3120 | m_pixels[x + y * p] = cur_state[x + y * m_pitch]; | 
|---|
| 3121 | else | 
|---|
| 3122 | m_pixels[x + y * p] = background; | 
|---|
| 3123 | } | 
|---|
| 3124 | } | 
|---|
| 3125 |  | 
|---|
| 3126 | m_width = w; | 
|---|
| 3127 | m_height = h; | 
|---|
| 3128 | m_pitch = p; | 
|---|
| 3129 |  | 
|---|
| 3130 | return *this; | 
|---|
| 3131 | } | 
|---|
| 3132 |  | 
|---|
| 3133 | inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } | 
|---|
| 3134 | inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } | 
|---|
| 3135 |  | 
|---|
| 3136 | inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } | 
|---|
| 3137 | inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } | 
|---|
| 3138 |  | 
|---|
| 3139 | inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const | 
|---|
| 3140 | { | 
|---|
| 3141 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); | 
|---|
| 3142 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); | 
|---|
| 3143 | return m_pixels[x + y * m_pitch]; | 
|---|
| 3144 | } | 
|---|
| 3145 |  | 
|---|
| 3146 | inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) | 
|---|
| 3147 | { | 
|---|
| 3148 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); | 
|---|
| 3149 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); | 
|---|
| 3150 | return m_pixels[x + y * m_pitch]; | 
|---|
| 3151 | } | 
|---|
| 3152 |  | 
|---|
| 3153 | inline imagef &set_clipped(int x, int y, const vec4F &c) | 
|---|
| 3154 | { | 
|---|
| 3155 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) | 
|---|
| 3156 | (*this)(x, y) = c; | 
|---|
| 3157 | return *this; | 
|---|
| 3158 | } | 
|---|
| 3159 |  | 
|---|
| 3160 | // Very straightforward blit with full clipping. Not fast, but it works. | 
|---|
| 3161 | imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) | 
|---|
| 3162 | { | 
|---|
| 3163 | for (int y = 0; y < src_h; y++) | 
|---|
| 3164 | { | 
|---|
| 3165 | const int sy = src_y + y; | 
|---|
| 3166 | if (sy < 0) | 
|---|
| 3167 | continue; | 
|---|
| 3168 | else if (sy >= (int)src.get_height()) | 
|---|
| 3169 | break; | 
|---|
| 3170 |  | 
|---|
| 3171 | for (int x = 0; x < src_w; x++) | 
|---|
| 3172 | { | 
|---|
| 3173 | const int sx = src_x + x; | 
|---|
| 3174 | if (sx < 0) | 
|---|
| 3175 | continue; | 
|---|
| 3176 | else if (sx >= (int)src.get_height()) | 
|---|
| 3177 | break; | 
|---|
| 3178 |  | 
|---|
| 3179 | set_clipped(dst_x + x, dst_y + y, src(sx, sy)); | 
|---|
| 3180 | } | 
|---|
| 3181 | } | 
|---|
| 3182 |  | 
|---|
| 3183 | return *this; | 
|---|
| 3184 | } | 
|---|
| 3185 |  | 
|---|
| 3186 | const imagef &(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const | 
|---|
| 3187 | { | 
|---|
| 3188 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 3189 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 3190 | *pDst++ = get_clamped(src_x + x, src_y + y); | 
|---|
| 3191 | return *this; | 
|---|
| 3192 | } | 
|---|
| 3193 |  | 
|---|
| 3194 | imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) | 
|---|
| 3195 | { | 
|---|
| 3196 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 3197 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 3198 | set_clipped(dst_x + x, dst_y + y, *pSrc++); | 
|---|
| 3199 | return *this; | 
|---|
| 3200 | } | 
|---|
| 3201 |  | 
|---|
| 3202 | inline uint32_t get_width() const { return m_width; } | 
|---|
| 3203 | inline uint32_t get_height() const { return m_height; } | 
|---|
| 3204 | inline uint32_t get_pitch() const { return m_pitch; } | 
|---|
| 3205 | inline uint32_t get_total_pixels() const { return m_width * m_height; } | 
|---|
| 3206 |  | 
|---|
| 3207 | inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } | 
|---|
| 3208 | inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } | 
|---|
| 3209 | inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } | 
|---|
| 3210 |  | 
|---|
| 3211 | inline const vec4F_vec &get_pixels() const { return m_pixels; } | 
|---|
| 3212 | inline vec4F_vec &get_pixels() { return m_pixels; } | 
|---|
| 3213 |  | 
|---|
| 3214 | inline const vec4F *get_ptr() const { return &m_pixels[0]; } | 
|---|
| 3215 | inline vec4F *get_ptr() { return &m_pixels[0]; } | 
|---|
| 3216 |  | 
|---|
| 3217 | private: | 
|---|
| 3218 | uint32_t m_width, m_height, m_pitch;  // all in pixels | 
|---|
| 3219 | vec4F_vec m_pixels; | 
|---|
| 3220 | }; | 
|---|
| 3221 |  | 
|---|
| 3222 | // Image metrics | 
|---|
| 3223 |  | 
|---|
| 3224 | class image_metrics | 
|---|
| 3225 | { | 
|---|
| 3226 | public: | 
|---|
| 3227 | // TODO: Add ssim | 
|---|
| 3228 | float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; | 
|---|
| 3229 |  | 
|---|
| 3230 | image_metrics() | 
|---|
| 3231 | { | 
|---|
| 3232 | clear(); | 
|---|
| 3233 | } | 
|---|
| 3234 |  | 
|---|
| 3235 | void clear() | 
|---|
| 3236 | { | 
|---|
| 3237 | m_max = 0; | 
|---|
| 3238 | m_mean = 0; | 
|---|
| 3239 | m_mean_squared = 0; | 
|---|
| 3240 | m_rms = 0; | 
|---|
| 3241 | m_psnr = 0; | 
|---|
| 3242 | m_ssim = 0; | 
|---|
| 3243 | } | 
|---|
| 3244 |  | 
|---|
| 3245 | void print(const char *pPrefix = nullptr)	{ printf( "%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr);	} | 
|---|
| 3246 |  | 
|---|
| 3247 | void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); | 
|---|
| 3248 | }; | 
|---|
| 3249 |  | 
|---|
| 3250 | // Image saving/loading/resampling | 
|---|
| 3251 |  | 
|---|
| 3252 | bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); | 
|---|
| 3253 | bool load_png(const char* pFilename, image& img); | 
|---|
| 3254 | inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } | 
|---|
| 3255 |  | 
|---|
| 3256 | bool load_tga(const char* pFilename, image& img); | 
|---|
| 3257 | inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } | 
|---|
| 3258 |  | 
|---|
| 3259 | bool load_jpg(const char *pFilename, image& img); | 
|---|
| 3260 | inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } | 
|---|
| 3261 |  | 
|---|
| 3262 | // Currently loads .PNG, .TGA, or .JPG | 
|---|
| 3263 | bool load_image(const char* pFilename, image& img); | 
|---|
| 3264 | inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } | 
|---|
| 3265 |  | 
|---|
| 3266 | uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); | 
|---|
| 3267 | uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); | 
|---|
| 3268 |  | 
|---|
| 3269 | enum | 
|---|
| 3270 | { | 
|---|
| 3271 | cImageSaveGrayscale = 1, | 
|---|
| 3272 | cImageSaveIgnoreAlpha = 2 | 
|---|
| 3273 | }; | 
|---|
| 3274 |  | 
|---|
| 3275 | bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); | 
|---|
| 3276 | inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } | 
|---|
| 3277 |  | 
|---|
| 3278 | bool read_file_to_vec(const char* pFilename, uint8_vec& data); | 
|---|
| 3279 |  | 
|---|
| 3280 | bool write_data_to_file(const char* pFilename, const void* pData, size_t len); | 
|---|
| 3281 |  | 
|---|
| 3282 | inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) {	return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } | 
|---|
| 3283 |  | 
|---|
| 3284 | float linear_to_srgb(float l); | 
|---|
| 3285 | float srgb_to_linear(float s); | 
|---|
| 3286 |  | 
|---|
| 3287 | bool image_resample(const image &src, image &dst, bool srgb = false, | 
|---|
| 3288 | const char *pFilter = "lanczos4", float filter_scale = 1.0f, | 
|---|
| 3289 | bool wrapping = false, | 
|---|
| 3290 | uint32_t first_comp = 0, uint32_t num_comps = 4); | 
|---|
| 3291 |  | 
|---|
| 3292 | // Timing | 
|---|
| 3293 |  | 
|---|
| 3294 | typedef uint64_t timer_ticks; | 
|---|
| 3295 |  | 
|---|
| 3296 | class interval_timer | 
|---|
| 3297 | { | 
|---|
| 3298 | public: | 
|---|
| 3299 | interval_timer(); | 
|---|
| 3300 |  | 
|---|
| 3301 | void start(); | 
|---|
| 3302 | void stop(); | 
|---|
| 3303 |  | 
|---|
| 3304 | double get_elapsed_secs() const; | 
|---|
| 3305 | inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } | 
|---|
| 3306 |  | 
|---|
| 3307 | static void init(); | 
|---|
| 3308 | static inline timer_ticks get_ticks_per_sec() { return g_freq; } | 
|---|
| 3309 | static timer_ticks get_ticks(); | 
|---|
| 3310 | static double ticks_to_secs(timer_ticks ticks); | 
|---|
| 3311 | static inline double ticks_to_ms(timer_ticks ticks) {	return ticks_to_secs(ticks) * 1000.0f; } | 
|---|
| 3312 |  | 
|---|
| 3313 | private: | 
|---|
| 3314 | static timer_ticks g_init_ticks, g_freq; | 
|---|
| 3315 | static double g_timer_freq; | 
|---|
| 3316 |  | 
|---|
| 3317 | timer_ticks m_start_time, m_stop_time; | 
|---|
| 3318 |  | 
|---|
| 3319 | bool m_started, m_stopped; | 
|---|
| 3320 | }; | 
|---|
| 3321 |  | 
|---|
| 3322 | // 2D array | 
|---|
| 3323 |  | 
|---|
| 3324 | template<typename T> | 
|---|
| 3325 | class vector2D | 
|---|
| 3326 | { | 
|---|
| 3327 | typedef basisu::vector<T> TVec; | 
|---|
| 3328 |  | 
|---|
| 3329 | uint32_t m_width, m_height; | 
|---|
| 3330 | TVec m_values; | 
|---|
| 3331 |  | 
|---|
| 3332 | public: | 
|---|
| 3333 | vector2D() : | 
|---|
| 3334 | m_width(0), | 
|---|
| 3335 | m_height(0) | 
|---|
| 3336 | { | 
|---|
| 3337 | } | 
|---|
| 3338 |  | 
|---|
| 3339 | vector2D(uint32_t w, uint32_t h) : | 
|---|
| 3340 | m_width(0), | 
|---|
| 3341 | m_height(0) | 
|---|
| 3342 | { | 
|---|
| 3343 | resize(w, h); | 
|---|
| 3344 | } | 
|---|
| 3345 |  | 
|---|
| 3346 | vector2D(const vector2D &other) | 
|---|
| 3347 | { | 
|---|
| 3348 | *this = other; | 
|---|
| 3349 | } | 
|---|
| 3350 |  | 
|---|
| 3351 | vector2D &operator= (const vector2D &other) | 
|---|
| 3352 | { | 
|---|
| 3353 | if (this != &other) | 
|---|
| 3354 | { | 
|---|
| 3355 | m_width = other.m_width; | 
|---|
| 3356 | m_height = other.m_height; | 
|---|
| 3357 | m_values = other.m_values; | 
|---|
| 3358 | } | 
|---|
| 3359 | return *this; | 
|---|
| 3360 | } | 
|---|
| 3361 |  | 
|---|
| 3362 | inline bool operator== (const vector2D &rhs) const | 
|---|
| 3363 | { | 
|---|
| 3364 | return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); | 
|---|
| 3365 | } | 
|---|
| 3366 |  | 
|---|
| 3367 | inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); } | 
|---|
| 3368 |  | 
|---|
| 3369 | inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } | 
|---|
| 3370 | inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } | 
|---|
| 3371 |  | 
|---|
| 3372 | inline const T &operator[] (uint32_t i) const { return m_values[i]; } | 
|---|
| 3373 | inline T &operator[] (uint32_t i) { return m_values[i]; } | 
|---|
| 3374 |  | 
|---|
| 3375 | inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } | 
|---|
| 3376 | inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } | 
|---|
| 3377 |  | 
|---|
| 3378 | void clear() | 
|---|
| 3379 | { | 
|---|
| 3380 | m_width = 0; | 
|---|
| 3381 | m_height = 0; | 
|---|
| 3382 | m_values.clear(); | 
|---|
| 3383 | } | 
|---|
| 3384 |  | 
|---|
| 3385 | void set_all(const T&val) | 
|---|
| 3386 | { | 
|---|
| 3387 | vector_set_all(m_values, val); | 
|---|
| 3388 | } | 
|---|
| 3389 |  | 
|---|
| 3390 | inline const T* get_ptr() const { return &m_values[0]; } | 
|---|
| 3391 | inline T* get_ptr() { return &m_values[0]; } | 
|---|
| 3392 |  | 
|---|
| 3393 | vector2D &resize(uint32_t new_width, uint32_t new_height) | 
|---|
| 3394 | { | 
|---|
| 3395 | if ((m_width == new_width) && (m_height == new_height)) | 
|---|
| 3396 | return *this; | 
|---|
| 3397 |  | 
|---|
| 3398 | TVec oldVals(new_width * new_height); | 
|---|
| 3399 | oldVals.swap(m_values); | 
|---|
| 3400 |  | 
|---|
| 3401 | const uint32_t w = minimum(m_width, new_width); | 
|---|
| 3402 | const uint32_t h = minimum(m_height, new_height); | 
|---|
| 3403 |  | 
|---|
| 3404 | if ((w) && (h)) | 
|---|
| 3405 | { | 
|---|
| 3406 | for (uint32_t y = 0; y < h; y++) | 
|---|
| 3407 | for (uint32_t x = 0; x < w; x++) | 
|---|
| 3408 | m_values[x + y * new_width] = oldVals[x + y * m_width]; | 
|---|
| 3409 | } | 
|---|
| 3410 |  | 
|---|
| 3411 | m_width = new_width; | 
|---|
| 3412 | m_height = new_height; | 
|---|
| 3413 |  | 
|---|
| 3414 | return *this; | 
|---|
| 3415 | } | 
|---|
| 3416 | }; | 
|---|
| 3417 |  | 
|---|
| 3418 | inline FILE *fopen_safe(const char *pFilename, const char *pMode) | 
|---|
| 3419 | { | 
|---|
| 3420 | #ifdef _WIN32 | 
|---|
| 3421 | FILE *pFile = nullptr; | 
|---|
| 3422 | fopen_s(&pFile, pFilename, pMode); | 
|---|
| 3423 | return pFile; | 
|---|
| 3424 | #else | 
|---|
| 3425 | return fopen(pFilename, pMode); | 
|---|
| 3426 | #endif | 
|---|
| 3427 | } | 
|---|
| 3428 |  | 
|---|
| 3429 | void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); | 
|---|
| 3430 |  | 
|---|
| 3431 | const uint32_t cPixelBlockWidth = 4; | 
|---|
| 3432 | const uint32_t cPixelBlockHeight = 4; | 
|---|
| 3433 | const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight; | 
|---|
| 3434 |  | 
|---|
| 3435 | struct pixel_block | 
|---|
| 3436 | { | 
|---|
| 3437 | color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] | 
|---|
| 3438 |  | 
|---|
| 3439 | inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } | 
|---|
| 3440 | inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } | 
|---|
| 3441 |  | 
|---|
| 3442 | inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; } | 
|---|
| 3443 | inline color_rgba* get_ptr() { return &m_pixels[0][0]; } | 
|---|
| 3444 |  | 
|---|
| 3445 | inline void clear() { clear_obj(*this); } | 
|---|
| 3446 |  | 
|---|
| 3447 | inline bool operator== (const pixel_block& rhs) const | 
|---|
| 3448 | { | 
|---|
| 3449 | return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; | 
|---|
| 3450 | } | 
|---|
| 3451 | }; | 
|---|
| 3452 | typedef basisu::vector<pixel_block> pixel_block_vec; | 
|---|
| 3453 |  | 
|---|
| 3454 | } // namespace basisu | 
|---|
| 3455 |  | 
|---|
| 3456 |  | 
|---|
| 3457 |  | 
|---|