1 | // basisu_enc.h |
2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // http://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | #pragma once |
16 | #include "../transcoder/basisu.h" |
17 | #include "../transcoder/basisu_transcoder_internal.h" |
18 | |
19 | #include <mutex> |
20 | #include <atomic> |
21 | #include <condition_variable> |
22 | #include <functional> |
23 | #include <thread> |
24 | #include <unordered_map> |
25 | #include <ostream> |
26 | |
27 | #if !defined(_WIN32) || defined(__MINGW32__) |
28 | #include <libgen.h> |
29 | #endif |
30 | |
31 | // This module is really just a huge grab bag of classes and helper functions needed by the encoder. |
32 | |
33 | // If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time. |
34 | #define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0) |
35 | |
36 | #if BASISU_SUPPORT_SSE |
37 | // Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors. |
38 | extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16); |
39 | #endif |
40 | |
41 | namespace basisu |
42 | { |
43 | extern uint8_t g_hamming_dist[256]; |
44 | extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; |
45 | |
46 | // true if basisu_encoder_init() has been called and returned. |
47 | extern bool g_library_initialized; |
48 | |
49 | // Encoder library initialization. |
50 | // This function MUST be called before encoding anything! |
51 | void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false); |
52 | void basisu_encoder_deinit(); |
53 | |
54 | // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 |
55 | extern void detect_sse41(); |
56 | |
57 | #if BASISU_SUPPORT_SSE |
58 | extern bool g_cpu_supports_sse41; |
59 | #else |
60 | const bool g_cpu_supports_sse41 = false; |
61 | #endif |
62 | |
63 | void error_vprintf(const char* pFmt, va_list args); |
64 | void error_printf(const char *pFmt, ...); |
65 | |
66 | // Helpers |
67 | |
68 | inline uint8_t clamp255(int32_t i) |
69 | { |
70 | return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); |
71 | } |
72 | |
73 | inline int32_t clampi(int32_t value, int32_t low, int32_t high) |
74 | { |
75 | if (value < low) |
76 | value = low; |
77 | else if (value > high) |
78 | value = high; |
79 | return value; |
80 | } |
81 | |
82 | inline uint8_t mul_8(uint32_t v, uint32_t a) |
83 | { |
84 | v = v * a + 128; |
85 | return (uint8_t)((v + (v >> 8)) >> 8); |
86 | } |
87 | |
88 | inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) |
89 | { |
90 | assert(codesize <= 64); |
91 | uint64_t bits = 0; |
92 | uint32_t total_bits = 0; |
93 | |
94 | while (total_bits < codesize) |
95 | { |
96 | uint32_t byte_bit_offset = bit_offset & 7; |
97 | uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); |
98 | |
99 | uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; |
100 | byte_bits &= ((1 << bits_to_read) - 1); |
101 | |
102 | bits |= ((uint64_t)(byte_bits) << total_bits); |
103 | |
104 | total_bits += bits_to_read; |
105 | bit_offset += bits_to_read; |
106 | } |
107 | |
108 | return bits; |
109 | } |
110 | |
111 | inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) |
112 | { |
113 | assert(codesize <= 32); |
114 | uint32_t bits = 0; |
115 | uint32_t total_bits = 0; |
116 | |
117 | while (total_bits < codesize) |
118 | { |
119 | uint32_t byte_bit_offset = bit_offset & 7; |
120 | uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); |
121 | |
122 | uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; |
123 | byte_bits &= ((1 << bits_to_read) - 1); |
124 | |
125 | bits |= (byte_bits << total_bits); |
126 | |
127 | total_bits += bits_to_read; |
128 | bit_offset += bits_to_read; |
129 | } |
130 | |
131 | return bits; |
132 | } |
133 | |
134 | // Hashing |
135 | |
136 | inline uint32_t bitmix32c(uint32_t v) |
137 | { |
138 | v = (v + 0x7ed55d16) + (v << 12); |
139 | v = (v ^ 0xc761c23c) ^ (v >> 19); |
140 | v = (v + 0x165667b1) + (v << 5); |
141 | v = (v + 0xd3a2646c) ^ (v << 9); |
142 | v = (v + 0xfd7046c5) + (v << 3); |
143 | v = (v ^ 0xb55a4f09) ^ (v >> 16); |
144 | return v; |
145 | } |
146 | |
147 | inline uint32_t bitmix32(uint32_t v) |
148 | { |
149 | v -= (v << 6); |
150 | v ^= (v >> 17); |
151 | v -= (v << 9); |
152 | v ^= (v << 4); |
153 | v -= (v << 3); |
154 | v ^= (v << 10); |
155 | v ^= (v >> 15); |
156 | return v; |
157 | } |
158 | |
159 | inline uint32_t wang_hash(uint32_t seed) |
160 | { |
161 | seed = (seed ^ 61) ^ (seed >> 16); |
162 | seed *= 9; |
163 | seed = seed ^ (seed >> 4); |
164 | seed *= 0x27d4eb2d; |
165 | seed = seed ^ (seed >> 15); |
166 | return seed; |
167 | } |
168 | |
169 | uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); |
170 | |
171 | template <typename Key> |
172 | struct bit_hasher |
173 | { |
174 | std::size_t operator()(const Key& k) const |
175 | { |
176 | return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k)); |
177 | } |
178 | }; |
179 | |
180 | class running_stat |
181 | { |
182 | public: |
183 | running_stat() { clear(); } |
184 | |
185 | void clear() |
186 | { |
187 | m_n = 0; |
188 | m_total = 0; |
189 | m_old_m = 0; |
190 | m_new_m = 0; |
191 | m_old_s = 0; |
192 | m_new_s = 0; |
193 | m_min = 0; |
194 | m_max = 0; |
195 | } |
196 | |
197 | void push(double x) |
198 | { |
199 | m_n++; |
200 | m_total += x; |
201 | if (m_n == 1) |
202 | { |
203 | m_old_m = m_new_m = x; |
204 | m_old_s = 0.0; |
205 | m_min = x; |
206 | m_max = x; |
207 | } |
208 | else |
209 | { |
210 | // See Knuth TAOCP vol 2, 3rd edition, page 232 |
211 | m_new_m = m_old_m + (x - m_old_m) / m_n; |
212 | m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); |
213 | m_old_m = m_new_m; |
214 | m_old_s = m_new_s; |
215 | m_min = basisu::minimum(x, m_min); |
216 | m_max = basisu::maximum(x, m_max); |
217 | } |
218 | } |
219 | |
220 | uint32_t get_num() const |
221 | { |
222 | return m_n; |
223 | } |
224 | |
225 | double get_total() const |
226 | { |
227 | return m_total; |
228 | } |
229 | |
230 | double get_mean() const |
231 | { |
232 | return (m_n > 0) ? m_new_m : 0.0; |
233 | } |
234 | |
235 | // Returns sample variance |
236 | double get_variance() const |
237 | { |
238 | return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); |
239 | } |
240 | |
241 | double get_std_dev() const |
242 | { |
243 | return sqrt(get_variance()); |
244 | } |
245 | |
246 | double get_min() const |
247 | { |
248 | return m_min; |
249 | } |
250 | |
251 | double get_max() const |
252 | { |
253 | return m_max; |
254 | } |
255 | |
256 | private: |
257 | uint32_t m_n; |
258 | double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; |
259 | }; |
260 | |
261 | // Linear algebra |
262 | |
263 | template <uint32_t N, typename T> |
264 | class vec |
265 | { |
266 | protected: |
267 | T m_v[N]; |
268 | |
269 | public: |
270 | enum { num_elements = N }; |
271 | |
272 | inline vec() { } |
273 | inline vec(eZero) { set_zero(); } |
274 | |
275 | explicit inline vec(T val) { set(val); } |
276 | inline vec(T v0, T v1) { set(v0, v1); } |
277 | inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } |
278 | inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } |
279 | inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } |
280 | template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); } |
281 | |
282 | inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; } |
283 | inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } |
284 | |
285 | inline T getX() const { return m_v[0]; } |
286 | inline T getY() const { static_assert(N >= 2, "N too small" ); return m_v[1]; } |
287 | inline T getZ() const { static_assert(N >= 3, "N too small" ); return m_v[2]; } |
288 | inline T getW() const { static_assert(N >= 4, "N too small" ); return m_v[3]; } |
289 | |
290 | inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; } |
291 | inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } |
292 | |
293 | inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } |
294 | |
295 | template <uint32_t OtherN, typename OtherT> |
296 | inline vec &set(const vec<OtherN, OtherT> &other) |
297 | { |
298 | uint32_t i; |
299 | if ((const void *)(&other) == (const void *)(this)) |
300 | return *this; |
301 | const uint32_t m = minimum(OtherN, N); |
302 | for (i = 0; i < m; i++) |
303 | m_v[i] = static_cast<T>(other[i]); |
304 | for (; i < N; i++) |
305 | m_v[i] = 0; |
306 | return *this; |
307 | } |
308 | |
309 | inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } |
310 | inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } |
311 | inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } |
312 | |
313 | inline vec &set(T v0, T v1) |
314 | { |
315 | m_v[0] = v0; |
316 | if (N >= 2) |
317 | { |
318 | m_v[1] = v1; |
319 | clear_elements(2, N); |
320 | } |
321 | return *this; |
322 | } |
323 | |
324 | inline vec &set(T v0, T v1, T v2) |
325 | { |
326 | m_v[0] = v0; |
327 | if (N >= 2) |
328 | { |
329 | m_v[1] = v1; |
330 | if (N >= 3) |
331 | { |
332 | m_v[2] = v2; |
333 | clear_elements(3, N); |
334 | } |
335 | } |
336 | return *this; |
337 | } |
338 | |
339 | inline vec &set(T v0, T v1, T v2, T v3) |
340 | { |
341 | m_v[0] = v0; |
342 | if (N >= 2) |
343 | { |
344 | m_v[1] = v1; |
345 | if (N >= 3) |
346 | { |
347 | m_v[2] = v2; |
348 | |
349 | if (N >= 4) |
350 | { |
351 | m_v[3] = v3; |
352 | clear_elements(5, N); |
353 | } |
354 | } |
355 | } |
356 | return *this; |
357 | } |
358 | |
359 | inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } |
360 | template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; } |
361 | |
362 | inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); } |
363 | inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); } |
364 | |
365 | inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } |
366 | inline vec operator+ () const { return *this; } |
367 | inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } |
368 | inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } |
369 | inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } |
370 | inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } |
371 | inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } |
372 | inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } |
373 | |
374 | friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } |
375 | friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } |
376 | friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } |
377 | friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } |
378 | friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } |
379 | friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } |
380 | |
381 | static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } |
382 | |
383 | inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } |
384 | |
385 | inline T norm() const { return dot_product(*this, *this); } |
386 | inline T length() const { return sqrt(norm()); } |
387 | |
388 | inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } |
389 | inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; } |
390 | |
391 | inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); } |
392 | inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } |
393 | |
394 | inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } |
395 | |
396 | inline vec &clamp(T l, T h) |
397 | { |
398 | for (uint32_t i = 0; i < N; i++) |
399 | m_v[i] = basisu::clamp(m_v[i], l, h); |
400 | return *this; |
401 | } |
402 | |
403 | static vec component_min(const vec& a, const vec& b) |
404 | { |
405 | vec res; |
406 | for (uint32_t i = 0; i < N; i++) |
407 | res[i] = minimum(a[i], b[i]); |
408 | return res; |
409 | } |
410 | |
411 | static vec component_max(const vec& a, const vec& b) |
412 | { |
413 | vec res; |
414 | for (uint32_t i = 0; i < N; i++) |
415 | res[i] = maximum(a[i], b[i]); |
416 | return res; |
417 | } |
418 | }; |
419 | |
420 | typedef vec<4, double> vec4D; |
421 | typedef vec<3, double> vec3D; |
422 | typedef vec<2, double> vec2D; |
423 | typedef vec<1, double> vec1D; |
424 | |
425 | typedef vec<4, float> vec4F; |
426 | typedef vec<3, float> vec3F; |
427 | typedef vec<2, float> vec2F; |
428 | typedef vec<1, float> vec1F; |
429 | |
430 | typedef vec<16, float> vec16F; |
431 | |
432 | template <uint32_t Rows, uint32_t Cols, typename T> |
433 | class matrix |
434 | { |
435 | public: |
436 | typedef vec<Rows, T> col_vec; |
437 | typedef vec<Cols, T> row_vec; |
438 | |
439 | typedef T scalar_type; |
440 | |
441 | enum { rows = Rows, cols = Cols }; |
442 | |
443 | protected: |
444 | row_vec m_r[Rows]; |
445 | |
446 | public: |
447 | inline matrix() {} |
448 | inline matrix(eZero) { set_zero(); } |
449 | inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; } |
450 | inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; } |
451 | |
452 | inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } |
453 | inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } |
454 | |
455 | inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; } |
456 | inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; } |
457 | |
458 | inline matrix &set_zero() |
459 | { |
460 | for (uint32_t i = 0; i < Rows; i++) |
461 | m_r[i].set_zero(); |
462 | return *this; |
463 | } |
464 | |
465 | inline matrix &set_identity() |
466 | { |
467 | for (uint32_t i = 0; i < Rows; i++) |
468 | { |
469 | m_r[i].set_zero(); |
470 | if (i < Cols) |
471 | m_r[i][i] = 1.0f; |
472 | } |
473 | return *this; |
474 | } |
475 | }; |
476 | |
477 | template<uint32_t N, typename VectorType> |
478 | inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix) |
479 | { |
480 | VectorType axis; |
481 | if (N == 1) |
482 | axis.set(1.0f); |
483 | else |
484 | { |
485 | for (uint32_t i = 0; i < N; i++) |
486 | axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1))); |
487 | } |
488 | |
489 | VectorType prev_axis(axis); |
490 | |
491 | // Power iterations |
492 | for (uint32_t power_iter = 0; power_iter < 8; power_iter++) |
493 | { |
494 | VectorType trial_axis; |
495 | double max_sum = 0; |
496 | |
497 | for (uint32_t i = 0; i < N; i++) |
498 | { |
499 | double sum = 0; |
500 | for (uint32_t j = 0; j < N; j++) |
501 | sum += cmatrix[i][j] * axis[j]; |
502 | |
503 | trial_axis[i] = static_cast<float>(sum); |
504 | |
505 | max_sum = maximum(fabs(sum), max_sum); |
506 | } |
507 | |
508 | if (max_sum != 0.0f) |
509 | trial_axis *= static_cast<float>(1.0f / max_sum); |
510 | |
511 | VectorType delta_axis(prev_axis - trial_axis); |
512 | |
513 | prev_axis = axis; |
514 | axis = trial_axis; |
515 | |
516 | if (delta_axis.norm() < .0024f) |
517 | break; |
518 | } |
519 | |
520 | return axis.normalize_in_place(); |
521 | } |
522 | |
523 | template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys) |
524 | { |
525 | for (uint32_t i = 0; i < num_indices; i++) |
526 | pIndices[i] = i; |
527 | |
528 | std::sort( |
529 | pIndices, |
530 | pIndices + num_indices, |
531 | [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } |
532 | ); |
533 | } |
534 | |
535 | // 1-4 byte direct Radix sort. |
536 | template <typename T> |
537 | T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size) |
538 | { |
539 | assert(key_ofs < sizeof(T)); |
540 | assert((key_size >= 1) && (key_size <= 4)); |
541 | |
542 | uint32_t hist[256 * 4]; |
543 | |
544 | memset(hist, 0, sizeof(hist[0]) * 256 * key_size); |
545 | |
546 | #define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs)) |
547 | |
548 | if (key_size == 4) |
549 | { |
550 | T* p = pBuf0; |
551 | T* q = pBuf0 + num_vals; |
552 | for (; p != q; p++) |
553 | { |
554 | const uint32_t key = BASISU_GET_KEY(p); |
555 | |
556 | hist[key & 0xFF]++; |
557 | hist[256 + ((key >> 8) & 0xFF)]++; |
558 | hist[512 + ((key >> 16) & 0xFF)]++; |
559 | hist[768 + ((key >> 24) & 0xFF)]++; |
560 | } |
561 | } |
562 | else if (key_size == 3) |
563 | { |
564 | T* p = pBuf0; |
565 | T* q = pBuf0 + num_vals; |
566 | for (; p != q; p++) |
567 | { |
568 | const uint32_t key = BASISU_GET_KEY(p); |
569 | |
570 | hist[key & 0xFF]++; |
571 | hist[256 + ((key >> 8) & 0xFF)]++; |
572 | hist[512 + ((key >> 16) & 0xFF)]++; |
573 | } |
574 | } |
575 | else if (key_size == 2) |
576 | { |
577 | T* p = pBuf0; |
578 | T* q = pBuf0 + (num_vals >> 1) * 2; |
579 | |
580 | for (; p != q; p += 2) |
581 | { |
582 | const uint32_t key0 = BASISU_GET_KEY(p); |
583 | const uint32_t key1 = BASISU_GET_KEY(p + 1); |
584 | |
585 | hist[key0 & 0xFF]++; |
586 | hist[256 + ((key0 >> 8) & 0xFF)]++; |
587 | |
588 | hist[key1 & 0xFF]++; |
589 | hist[256 + ((key1 >> 8) & 0xFF)]++; |
590 | } |
591 | |
592 | if (num_vals & 1) |
593 | { |
594 | const uint32_t key = BASISU_GET_KEY(p); |
595 | |
596 | hist[key & 0xFF]++; |
597 | hist[256 + ((key >> 8) & 0xFF)]++; |
598 | } |
599 | } |
600 | else |
601 | { |
602 | assert(key_size == 1); |
603 | if (key_size != 1) |
604 | return NULL; |
605 | |
606 | T* p = pBuf0; |
607 | T* q = pBuf0 + (num_vals >> 1) * 2; |
608 | |
609 | for (; p != q; p += 2) |
610 | { |
611 | const uint32_t key0 = BASISU_GET_KEY(p); |
612 | const uint32_t key1 = BASISU_GET_KEY(p + 1); |
613 | |
614 | hist[key0 & 0xFF]++; |
615 | hist[key1 & 0xFF]++; |
616 | } |
617 | |
618 | if (num_vals & 1) |
619 | { |
620 | const uint32_t key = BASISU_GET_KEY(p); |
621 | hist[key & 0xFF]++; |
622 | } |
623 | } |
624 | |
625 | T* pCur = pBuf0; |
626 | T* pNew = pBuf1; |
627 | |
628 | for (uint32_t pass = 0; pass < key_size; pass++) |
629 | { |
630 | const uint32_t* pHist = &hist[pass << 8]; |
631 | |
632 | uint32_t offsets[256]; |
633 | |
634 | uint32_t cur_ofs = 0; |
635 | for (uint32_t i = 0; i < 256; i += 2) |
636 | { |
637 | offsets[i] = cur_ofs; |
638 | cur_ofs += pHist[i]; |
639 | |
640 | offsets[i + 1] = cur_ofs; |
641 | cur_ofs += pHist[i + 1]; |
642 | } |
643 | |
644 | const uint32_t pass_shift = pass << 3; |
645 | |
646 | T* p = pCur; |
647 | T* q = pCur + (num_vals >> 1) * 2; |
648 | |
649 | for (; p != q; p += 2) |
650 | { |
651 | uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; |
652 | uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF; |
653 | |
654 | if (c0 == c1) |
655 | { |
656 | uint32_t dst_offset0 = offsets[c0]; |
657 | |
658 | offsets[c0] = dst_offset0 + 2; |
659 | |
660 | pNew[dst_offset0] = p[0]; |
661 | pNew[dst_offset0 + 1] = p[1]; |
662 | } |
663 | else |
664 | { |
665 | uint32_t dst_offset0 = offsets[c0]++; |
666 | uint32_t dst_offset1 = offsets[c1]++; |
667 | |
668 | pNew[dst_offset0] = p[0]; |
669 | pNew[dst_offset1] = p[1]; |
670 | } |
671 | } |
672 | |
673 | if (num_vals & 1) |
674 | { |
675 | uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF; |
676 | |
677 | uint32_t dst_offset = offsets[c]; |
678 | offsets[c] = dst_offset + 1; |
679 | |
680 | pNew[dst_offset] = *p; |
681 | } |
682 | |
683 | T* t = pCur; |
684 | pCur = pNew; |
685 | pNew = t; |
686 | } |
687 | |
688 | return pCur; |
689 | } |
690 | |
691 | #undef BASISU_GET_KEY |
692 | |
693 | // Very simple job pool with no dependencies. |
694 | class job_pool |
695 | { |
696 | BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); |
697 | |
698 | public: |
699 | // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. |
700 | job_pool(uint32_t num_threads); |
701 | ~job_pool(); |
702 | |
703 | void add_job(const std::function<void()>& job); |
704 | void add_job(std::function<void()>&& job); |
705 | |
706 | void wait_for_all(); |
707 | |
708 | size_t get_total_threads() const { return 1 + m_threads.size(); } |
709 | |
710 | private: |
711 | std::vector<std::thread> m_threads; |
712 | std::vector<std::function<void()> > m_queue; |
713 | |
714 | std::mutex m_mutex; |
715 | std::condition_variable m_has_work; |
716 | std::condition_variable m_no_more_jobs; |
717 | |
718 | uint32_t m_num_active_jobs; |
719 | |
720 | std::atomic<bool> m_kill_flag; |
721 | |
722 | void job_thread(uint32_t index); |
723 | }; |
724 | |
725 | // Simple 32-bit color class |
726 | |
727 | class color_rgba_i16 |
728 | { |
729 | public: |
730 | union |
731 | { |
732 | int16_t m_comps[4]; |
733 | |
734 | struct |
735 | { |
736 | int16_t r; |
737 | int16_t g; |
738 | int16_t b; |
739 | int16_t a; |
740 | }; |
741 | }; |
742 | |
743 | inline color_rgba_i16() |
744 | { |
745 | static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4" ); |
746 | } |
747 | |
748 | inline color_rgba_i16(int sr, int sg, int sb, int sa) |
749 | { |
750 | set(sr, sg, sb, sa); |
751 | } |
752 | |
753 | inline color_rgba_i16 &set(int sr, int sg, int sb, int sa) |
754 | { |
755 | m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX); |
756 | m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX); |
757 | m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX); |
758 | m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX); |
759 | return *this; |
760 | } |
761 | }; |
762 | |
763 | class color_rgba |
764 | { |
765 | public: |
766 | union |
767 | { |
768 | uint8_t m_comps[4]; |
769 | |
770 | struct |
771 | { |
772 | uint8_t r; |
773 | uint8_t g; |
774 | uint8_t b; |
775 | uint8_t a; |
776 | }; |
777 | }; |
778 | |
779 | inline color_rgba() |
780 | { |
781 | static_assert(sizeof(*this) == 4, "sizeof(*this) != 4" ); |
782 | static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)" ); |
783 | } |
784 | |
785 | // Not too hot about this idea. |
786 | inline color_rgba(const basist::color32& other) : |
787 | r(other.r), |
788 | g(other.g), |
789 | b(other.b), |
790 | a(other.a) |
791 | { |
792 | } |
793 | |
794 | color_rgba& operator= (const basist::color32& rhs) |
795 | { |
796 | r = rhs.r; |
797 | g = rhs.g; |
798 | b = rhs.b; |
799 | a = rhs.a; |
800 | return *this; |
801 | } |
802 | |
803 | inline color_rgba(int y) |
804 | { |
805 | set(y); |
806 | } |
807 | |
808 | inline color_rgba(int y, int na) |
809 | { |
810 | set(y, na); |
811 | } |
812 | |
813 | inline color_rgba(int sr, int sg, int sb, int sa) |
814 | { |
815 | set(sr, sg, sb, sa); |
816 | } |
817 | |
818 | inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) |
819 | { |
820 | set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); |
821 | } |
822 | |
823 | inline color_rgba& set_noclamp_y(int y) |
824 | { |
825 | m_comps[0] = (uint8_t)y; |
826 | m_comps[1] = (uint8_t)y; |
827 | m_comps[2] = (uint8_t)y; |
828 | m_comps[3] = (uint8_t)255; |
829 | return *this; |
830 | } |
831 | |
832 | inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) |
833 | { |
834 | m_comps[0] = (uint8_t)sr; |
835 | m_comps[1] = (uint8_t)sg; |
836 | m_comps[2] = (uint8_t)sb; |
837 | m_comps[3] = (uint8_t)sa; |
838 | return *this; |
839 | } |
840 | |
841 | inline color_rgba &set(int y) |
842 | { |
843 | m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255)); |
844 | m_comps[1] = m_comps[0]; |
845 | m_comps[2] = m_comps[0]; |
846 | m_comps[3] = 255; |
847 | return *this; |
848 | } |
849 | |
850 | inline color_rgba &set(int y, int na) |
851 | { |
852 | m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255)); |
853 | m_comps[1] = m_comps[0]; |
854 | m_comps[2] = m_comps[0]; |
855 | m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255)); |
856 | return *this; |
857 | } |
858 | |
859 | inline color_rgba &set(int sr, int sg, int sb, int sa) |
860 | { |
861 | m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255)); |
862 | m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255)); |
863 | m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255)); |
864 | m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255)); |
865 | return *this; |
866 | } |
867 | |
868 | inline color_rgba &set_rgb(int sr, int sg, int sb) |
869 | { |
870 | m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255)); |
871 | m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255)); |
872 | m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255)); |
873 | return *this; |
874 | } |
875 | |
876 | inline color_rgba &set_rgb(const color_rgba &other) |
877 | { |
878 | r = other.r; |
879 | g = other.g; |
880 | b = other.b; |
881 | return *this; |
882 | } |
883 | |
884 | inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } |
885 | inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } |
886 | |
887 | inline void clear() |
888 | { |
889 | m_comps[0] = 0; |
890 | m_comps[1] = 0; |
891 | m_comps[2] = 0; |
892 | m_comps[3] = 0; |
893 | } |
894 | |
895 | inline bool operator== (const color_rgba &rhs) const |
896 | { |
897 | if (m_comps[0] != rhs.m_comps[0]) return false; |
898 | if (m_comps[1] != rhs.m_comps[1]) return false; |
899 | if (m_comps[2] != rhs.m_comps[2]) return false; |
900 | if (m_comps[3] != rhs.m_comps[3]) return false; |
901 | return true; |
902 | } |
903 | |
904 | inline bool operator!= (const color_rgba &rhs) const |
905 | { |
906 | return !(*this == rhs); |
907 | } |
908 | |
909 | inline bool operator<(const color_rgba &rhs) const |
910 | { |
911 | for (int i = 0; i < 4; i++) |
912 | { |
913 | if (m_comps[i] < rhs.m_comps[i]) |
914 | return true; |
915 | else if (m_comps[i] != rhs.m_comps[i]) |
916 | return false; |
917 | } |
918 | return false; |
919 | } |
920 | |
921 | inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } |
922 | inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } |
923 | inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } |
924 | |
925 | inline basist::color32 get_color32() const |
926 | { |
927 | return basist::color32(r, g, b, a); |
928 | } |
929 | |
930 | static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } |
931 | static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } |
932 | }; |
933 | |
934 | typedef basisu::vector<color_rgba> color_rgba_vec; |
935 | |
936 | const color_rgba g_black_color(0, 0, 0, 255); |
937 | const color_rgba g_black_trans_color(0, 0, 0, 0); |
938 | const color_rgba g_white_color(255, 255, 255, 255); |
939 | |
940 | inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) |
941 | { |
942 | int dr = r0 - r1, dg = g0 - g1, db = b0 - b1; |
943 | return dr * dr + dg * dg + db * db; |
944 | } |
945 | |
946 | inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1) |
947 | { |
948 | int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1; |
949 | return dr * dr + dg * dg + db * db + da * da; |
950 | } |
951 | |
952 | inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha) |
953 | { |
954 | if (alpha) |
955 | return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); |
956 | else |
957 | return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); |
958 | } |
959 | |
960 | // TODO: Allow user to control channel weightings. |
961 | inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) |
962 | { |
963 | if (perceptual) |
964 | { |
965 | #if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE |
966 | const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; |
967 | const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; |
968 | |
969 | const float cr1 = e1.r - l1; |
970 | const float cr2 = e2.r - l2; |
971 | |
972 | const float cb1 = e1.b - l1; |
973 | const float cb2 = e2.b - l2; |
974 | |
975 | const float dl = l1 - l2; |
976 | const float dcr = cr1 - cr2; |
977 | const float dcb = cb1 - cb2; |
978 | |
979 | uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); |
980 | |
981 | if (alpha) |
982 | { |
983 | int da = static_cast<int>(e1.a) - static_cast<int>(e2.a); |
984 | d += static_cast<uint32_t>(128.0f*da*da); |
985 | } |
986 | |
987 | return d; |
988 | #elif 1 |
989 | int dr = e1.r - e2.r; |
990 | int dg = e1.g - e2.g; |
991 | int db = e1.b - e2.b; |
992 | |
993 | #if 0 |
994 | int delta_l = dr * 27 + dg * 92 + db * 9; |
995 | int delta_cr = dr * 128 - delta_l; |
996 | int delta_cb = db * 128 - delta_l; |
997 | |
998 | uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + |
999 | ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + |
1000 | ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); |
1001 | #else |
1002 | int64_t delta_l = dr * 27 + dg * 92 + db * 9; |
1003 | int64_t delta_cr = dr * 128 - delta_l; |
1004 | int64_t delta_cb = db * 128 - delta_l; |
1005 | |
1006 | uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) + |
1007 | ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) + |
1008 | ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U); |
1009 | #endif |
1010 | |
1011 | if (alpha) |
1012 | { |
1013 | int da = (e1.a - e2.a) << 7; |
1014 | // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. |
1015 | id += ((uint32_t)(da * da) >> 7U); |
1016 | } |
1017 | |
1018 | return id; |
1019 | #else |
1020 | int dr = e1.r - e2.r; |
1021 | int dg = e1.g - e2.g; |
1022 | int db = e1.b - e2.b; |
1023 | |
1024 | int64_t delta_l = dr * 27 + dg * 92 + db * 9; |
1025 | int64_t delta_cr = dr * 128 - delta_l; |
1026 | int64_t delta_cb = db * 128 - delta_l; |
1027 | |
1028 | int64_t id = ((delta_l * delta_l) * 128) + |
1029 | ((delta_cr * delta_cr) * 26) + |
1030 | ((delta_cb * delta_cb) * 3); |
1031 | |
1032 | if (alpha) |
1033 | { |
1034 | int64_t da = (e1.a - e2.a); |
1035 | id += (da * da) * 128; |
1036 | } |
1037 | |
1038 | int d = (id + 8192) >> 14; |
1039 | |
1040 | return d; |
1041 | #endif |
1042 | } |
1043 | else |
1044 | return color_distance(e1, e2, alpha); |
1045 | } |
1046 | |
1047 | static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) |
1048 | { |
1049 | const int dl = a.r - b.r; |
1050 | const int da = a.a - b.a; |
1051 | return dl * dl + da * da; |
1052 | } |
1053 | |
1054 | // String helpers |
1055 | |
1056 | inline int string_find_right(const std::string& filename, char c) |
1057 | { |
1058 | size_t result = filename.find_last_of(c); |
1059 | return (result == std::string::npos) ? -1 : (int)result; |
1060 | } |
1061 | |
1062 | inline std::string string_get_extension(const std::string &filename) |
1063 | { |
1064 | int sep = -1; |
1065 | #ifdef _WIN32 |
1066 | sep = string_find_right(filename, '\\'); |
1067 | #endif |
1068 | if (sep < 0) |
1069 | sep = string_find_right(filename, '/'); |
1070 | |
1071 | int dot = string_find_right(filename, '.'); |
1072 | if (dot <= sep) |
1073 | return "" ; |
1074 | |
1075 | std::string result(filename); |
1076 | result.erase(0, dot + 1); |
1077 | |
1078 | return result; |
1079 | } |
1080 | |
1081 | inline bool string_remove_extension(std::string &filename) |
1082 | { |
1083 | int sep = -1; |
1084 | #ifdef _WIN32 |
1085 | sep = string_find_right(filename, '\\'); |
1086 | #endif |
1087 | if (sep < 0) |
1088 | sep = string_find_right(filename, '/'); |
1089 | |
1090 | int dot = string_find_right(filename, '.'); |
1091 | if ((dot < sep) || (dot < 0)) |
1092 | return false; |
1093 | |
1094 | filename.resize(dot); |
1095 | |
1096 | return true; |
1097 | } |
1098 | |
1099 | inline std::string string_format(const char* pFmt, ...) |
1100 | { |
1101 | char buf[2048]; |
1102 | |
1103 | va_list args; |
1104 | va_start(args, pFmt); |
1105 | #ifdef _WIN32 |
1106 | vsprintf_s(buf, sizeof(buf), pFmt, args); |
1107 | #else |
1108 | vsnprintf(buf, sizeof(buf), pFmt, args); |
1109 | #endif |
1110 | va_end(args); |
1111 | |
1112 | return std::string(buf); |
1113 | } |
1114 | |
1115 | inline std::string string_tolower(const std::string& s) |
1116 | { |
1117 | std::string result(s); |
1118 | for (size_t i = 0; i < result.size(); i++) |
1119 | result[i] = (char)tolower((int)result[i]); |
1120 | return result; |
1121 | } |
1122 | |
1123 | inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc) |
1124 | { |
1125 | assert(pDst && pSrc && dst_len); |
1126 | if (!dst_len) |
1127 | return pDst; |
1128 | |
1129 | const size_t src_len = strlen(pSrc); |
1130 | const size_t src_len_plus_terminator = src_len + 1; |
1131 | |
1132 | if (src_len_plus_terminator <= dst_len) |
1133 | memcpy(pDst, pSrc, src_len_plus_terminator); |
1134 | else |
1135 | { |
1136 | if (dst_len > 1) |
1137 | memcpy(pDst, pSrc, dst_len - 1); |
1138 | pDst[dst_len - 1] = '\0'; |
1139 | } |
1140 | |
1141 | return pDst; |
1142 | } |
1143 | |
1144 | inline bool string_ends_with(const std::string& s, char c) |
1145 | { |
1146 | return (s.size() != 0) && (s.back() == c); |
1147 | } |
1148 | |
1149 | inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt) |
1150 | { |
1151 | #ifdef _MSC_VER |
1152 | char drive_buf[_MAX_DRIVE] = { 0 }; |
1153 | char dir_buf[_MAX_DIR] = { 0 }; |
1154 | char fname_buf[_MAX_FNAME] = { 0 }; |
1155 | char ext_buf[_MAX_EXT] = { 0 }; |
1156 | |
1157 | errno_t error = _splitpath_s(p, |
1158 | pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, |
1159 | pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, |
1160 | pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, |
1161 | pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); |
1162 | if (error != 0) |
1163 | return false; |
1164 | |
1165 | if (pDrive) *pDrive = drive_buf; |
1166 | if (pDir) *pDir = dir_buf; |
1167 | if (pFilename) *pFilename = fname_buf; |
1168 | if (pExt) *pExt = ext_buf; |
1169 | return true; |
1170 | #else |
1171 | char dirtmp[1024], nametmp[1024]; |
1172 | strcpy_safe(dirtmp, sizeof(dirtmp), p); |
1173 | strcpy_safe(nametmp, sizeof(nametmp), p); |
1174 | |
1175 | if (pDrive) |
1176 | pDrive->resize(0); |
1177 | |
1178 | const char *pDirName = dirname(dirtmp); |
1179 | const char* pBaseName = basename(nametmp); |
1180 | if ((!pDirName) || (!pBaseName)) |
1181 | return false; |
1182 | |
1183 | if (pDir) |
1184 | { |
1185 | *pDir = pDirName; |
1186 | if ((pDir->size()) && (pDir->back() != '/')) |
1187 | *pDir += "/" ; |
1188 | } |
1189 | |
1190 | if (pFilename) |
1191 | { |
1192 | *pFilename = pBaseName; |
1193 | string_remove_extension(*pFilename); |
1194 | } |
1195 | |
1196 | if (pExt) |
1197 | { |
1198 | *pExt = pBaseName; |
1199 | *pExt = string_get_extension(*pExt); |
1200 | if (pExt->size()) |
1201 | *pExt = "." + *pExt; |
1202 | } |
1203 | |
1204 | return true; |
1205 | #endif |
1206 | } |
1207 | |
1208 | inline bool is_path_separator(char c) |
1209 | { |
1210 | #ifdef _WIN32 |
1211 | return (c == '/') || (c == '\\'); |
1212 | #else |
1213 | return (c == '/'); |
1214 | #endif |
1215 | } |
1216 | |
1217 | inline bool is_drive_separator(char c) |
1218 | { |
1219 | #ifdef _WIN32 |
1220 | return (c == ':'); |
1221 | #else |
1222 | (void)c; |
1223 | return false; |
1224 | #endif |
1225 | } |
1226 | |
1227 | inline void string_combine_path(std::string &dst, const char *p, const char *q) |
1228 | { |
1229 | std::string temp(p); |
1230 | if (temp.size() && !is_path_separator(q[0])) |
1231 | { |
1232 | if (!is_path_separator(temp.back())) |
1233 | temp.append(1, BASISU_PATH_SEPERATOR_CHAR); |
1234 | } |
1235 | temp += q; |
1236 | dst.swap(temp); |
1237 | } |
1238 | |
1239 | inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r) |
1240 | { |
1241 | string_combine_path(dst, p, q); |
1242 | string_combine_path(dst, dst.c_str(), r); |
1243 | } |
1244 | |
1245 | inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) |
1246 | { |
1247 | string_combine_path(dst, p, q, r); |
1248 | if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.')) |
1249 | dst.append(1, '.'); |
1250 | dst.append(pExt); |
1251 | } |
1252 | |
1253 | inline bool string_get_pathname(const char *p, std::string &path) |
1254 | { |
1255 | std::string temp_drive, temp_path; |
1256 | if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL)) |
1257 | return false; |
1258 | string_combine_path(path, temp_drive.c_str(), temp_path.c_str()); |
1259 | return true; |
1260 | } |
1261 | |
1262 | inline bool string_get_filename(const char *p, std::string &filename) |
1263 | { |
1264 | std::string temp_ext; |
1265 | if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext)) |
1266 | return false; |
1267 | filename += temp_ext; |
1268 | return true; |
1269 | } |
1270 | |
1271 | class rand |
1272 | { |
1273 | std::mt19937 m_mt; |
1274 | |
1275 | public: |
1276 | rand() { } |
1277 | |
1278 | rand(uint32_t s) { seed(s); } |
1279 | void seed(uint32_t s) { m_mt.seed(s); } |
1280 | |
1281 | // between [l,h] |
1282 | int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); } |
1283 | |
1284 | uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); } |
1285 | |
1286 | bool bit() { return irand(0, 1) == 1; } |
1287 | |
1288 | uint8_t byte() { return static_cast<uint8_t>(urand32()); } |
1289 | |
1290 | // between [l,h) |
1291 | float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); } |
1292 | |
1293 | float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); } |
1294 | }; |
1295 | |
1296 | class priority_queue |
1297 | { |
1298 | public: |
1299 | priority_queue() : |
1300 | m_size(0) |
1301 | { |
1302 | } |
1303 | |
1304 | void clear() |
1305 | { |
1306 | m_heap.clear(); |
1307 | m_size = 0; |
1308 | } |
1309 | |
1310 | void init(uint32_t max_entries, uint32_t first_index, float first_priority) |
1311 | { |
1312 | m_heap.resize(max_entries + 1); |
1313 | m_heap[1].m_index = first_index; |
1314 | m_heap[1].m_priority = first_priority; |
1315 | m_size = 1; |
1316 | } |
1317 | |
1318 | inline uint32_t size() const { return m_size; } |
1319 | |
1320 | inline uint32_t get_top_index() const { return m_heap[1].m_index; } |
1321 | inline float get_top_priority() const { return m_heap[1].m_priority; } |
1322 | |
1323 | inline void delete_top() |
1324 | { |
1325 | assert(m_size > 0); |
1326 | m_heap[1] = m_heap[m_size]; |
1327 | m_size--; |
1328 | if (m_size) |
1329 | down_heap(1); |
1330 | } |
1331 | |
1332 | inline void add_heap(uint32_t index, float priority) |
1333 | { |
1334 | m_size++; |
1335 | |
1336 | uint32_t k = m_size; |
1337 | |
1338 | if (m_size >= m_heap.size()) |
1339 | m_heap.resize(m_size + 1); |
1340 | |
1341 | for (;;) |
1342 | { |
1343 | uint32_t parent_index = k >> 1; |
1344 | if ((!parent_index) || (m_heap[parent_index].m_priority > priority)) |
1345 | break; |
1346 | m_heap[k] = m_heap[parent_index]; |
1347 | k = parent_index; |
1348 | } |
1349 | |
1350 | m_heap[k].m_index = index; |
1351 | m_heap[k].m_priority = priority; |
1352 | } |
1353 | |
1354 | private: |
1355 | struct entry |
1356 | { |
1357 | uint32_t m_index; |
1358 | float m_priority; |
1359 | }; |
1360 | |
1361 | basisu::vector<entry> m_heap; |
1362 | uint32_t m_size; |
1363 | |
1364 | // Push down entry at index |
1365 | inline void down_heap(uint32_t heap_index) |
1366 | { |
1367 | uint32_t orig_index = m_heap[heap_index].m_index; |
1368 | const float orig_priority = m_heap[heap_index].m_priority; |
1369 | |
1370 | uint32_t child_index; |
1371 | while ((child_index = (heap_index << 1)) <= m_size) |
1372 | { |
1373 | if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index; |
1374 | if (orig_priority > m_heap[child_index].m_priority) |
1375 | break; |
1376 | m_heap[heap_index] = m_heap[child_index]; |
1377 | heap_index = child_index; |
1378 | } |
1379 | |
1380 | m_heap[heap_index].m_index = orig_index; |
1381 | m_heap[heap_index].m_priority = orig_priority; |
1382 | } |
1383 | }; |
1384 | |
1385 | // Tree structured vector quantization (TSVQ) |
1386 | |
1387 | template <typename TrainingVectorType> |
1388 | class tree_vector_quant |
1389 | { |
1390 | public: |
1391 | typedef TrainingVectorType training_vec_type; |
1392 | typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight; |
1393 | typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; |
1394 | |
1395 | tree_vector_quant() : |
1396 | m_next_codebook_index(0) |
1397 | { |
1398 | } |
1399 | |
1400 | void clear() |
1401 | { |
1402 | clear_vector(m_training_vecs); |
1403 | clear_vector(m_nodes); |
1404 | m_next_codebook_index = 0; |
1405 | } |
1406 | |
1407 | void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } |
1408 | |
1409 | size_t get_total_training_vecs() const { return m_training_vecs.size(); } |
1410 | const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } |
1411 | array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } |
1412 | |
1413 | void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const |
1414 | { |
1415 | for (uint32_t i = 0; i < m_nodes.size(); i++) |
1416 | { |
1417 | const tsvq_node &n = m_nodes[i]; |
1418 | if (!n.is_leaf()) |
1419 | continue; |
1420 | |
1421 | codebook.resize(codebook.size() + 1); |
1422 | codebook.back() = n.m_training_vecs; |
1423 | } |
1424 | } |
1425 | |
1426 | void retrieve(basisu::vector<TrainingVectorType> &codebook) const |
1427 | { |
1428 | for (uint32_t i = 0; i < m_nodes.size(); i++) |
1429 | { |
1430 | const tsvq_node &n = m_nodes[i]; |
1431 | if (!n.is_leaf()) |
1432 | continue; |
1433 | |
1434 | codebook.resize(codebook.size() + 1); |
1435 | codebook.back() = n.m_origin; |
1436 | } |
1437 | } |
1438 | |
1439 | void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const |
1440 | { |
1441 | uint_vec node_stack; |
1442 | node_stack.reserve(512); |
1443 | |
1444 | codebook.resize(0); |
1445 | codebook.reserve(max_clusters); |
1446 | |
1447 | uint32_t node_index = 0; |
1448 | |
1449 | while (true) |
1450 | { |
1451 | const tsvq_node& cur = m_nodes[node_index]; |
1452 | |
1453 | if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters)) |
1454 | { |
1455 | codebook.resize(codebook.size() + 1); |
1456 | codebook.back() = cur.m_training_vecs; |
1457 | |
1458 | if (node_stack.empty()) |
1459 | break; |
1460 | |
1461 | node_index = node_stack.back(); |
1462 | node_stack.pop_back(); |
1463 | continue; |
1464 | } |
1465 | |
1466 | node_stack.push_back(cur.m_right_index); |
1467 | node_index = cur.m_left_index; |
1468 | } |
1469 | } |
1470 | |
1471 | bool generate(uint32_t max_size) |
1472 | { |
1473 | if (!m_training_vecs.size()) |
1474 | return false; |
1475 | |
1476 | m_next_codebook_index = 0; |
1477 | |
1478 | clear_vector(m_nodes); |
1479 | m_nodes.reserve(max_size * 2 + 1); |
1480 | |
1481 | m_nodes.push_back(prepare_root()); |
1482 | |
1483 | priority_queue var_heap; |
1484 | var_heap.init(max_size, 0, m_nodes[0].m_var); |
1485 | |
1486 | basisu::vector<uint32_t> l_children, r_children; |
1487 | |
1488 | // Now split the worst nodes |
1489 | l_children.reserve(m_training_vecs.size() + 1); |
1490 | r_children.reserve(m_training_vecs.size() + 1); |
1491 | |
1492 | uint32_t total_leaf_nodes = 1; |
1493 | |
1494 | //interval_timer tm; |
1495 | //tm.start(); |
1496 | |
1497 | while ((var_heap.size()) && (total_leaf_nodes < max_size)) |
1498 | { |
1499 | const uint32_t node_index = var_heap.get_top_index(); |
1500 | const tsvq_node &node = m_nodes[node_index]; |
1501 | |
1502 | assert(node.m_var == var_heap.get_top_priority()); |
1503 | assert(node.is_leaf()); |
1504 | |
1505 | var_heap.delete_top(); |
1506 | |
1507 | if (node.m_training_vecs.size() > 1) |
1508 | { |
1509 | if (split_node(node_index, var_heap, l_children, r_children)) |
1510 | { |
1511 | // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total. |
1512 | total_leaf_nodes += 1; |
1513 | } |
1514 | } |
1515 | } |
1516 | |
1517 | //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs()); |
1518 | |
1519 | return true; |
1520 | } |
1521 | |
1522 | private: |
1523 | class tsvq_node |
1524 | { |
1525 | public: |
1526 | inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } |
1527 | |
1528 | // vecs is erased |
1529 | inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } |
1530 | |
1531 | inline bool is_leaf() const { return m_left_index < 0; } |
1532 | |
1533 | float m_var; |
1534 | uint64_t m_weight; |
1535 | TrainingVectorType m_origin; |
1536 | int32_t m_left_index, m_right_index; |
1537 | basisu::vector<uint32_t> m_training_vecs; |
1538 | int m_codebook_index; |
1539 | }; |
1540 | |
1541 | typedef basisu::vector<tsvq_node> tsvq_node_vec; |
1542 | tsvq_node_vec m_nodes; |
1543 | |
1544 | array_of_weighted_training_vecs m_training_vecs; |
1545 | |
1546 | uint32_t m_next_codebook_index; |
1547 | |
1548 | tsvq_node prepare_root() const |
1549 | { |
1550 | double ttsum = 0.0f; |
1551 | |
1552 | // Prepare root node containing all training vectors |
1553 | tsvq_node root; |
1554 | root.m_training_vecs.reserve(m_training_vecs.size()); |
1555 | |
1556 | for (uint32_t i = 0; i < m_training_vecs.size(); i++) |
1557 | { |
1558 | const TrainingVectorType &v = m_training_vecs[i].first; |
1559 | const uint64_t weight = m_training_vecs[i].second; |
1560 | |
1561 | root.m_training_vecs.push_back(i); |
1562 | |
1563 | root.m_origin += (v * static_cast<float>(weight)); |
1564 | root.m_weight += weight; |
1565 | |
1566 | ttsum += v.dot(v) * weight; |
1567 | } |
1568 | |
1569 | root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight)); |
1570 | |
1571 | root.m_origin *= (1.0f / root.m_weight); |
1572 | |
1573 | return root; |
1574 | } |
1575 | |
1576 | bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children) |
1577 | { |
1578 | TrainingVectorType l_child_org, r_child_org; |
1579 | uint64_t l_weight = 0, r_weight = 0; |
1580 | float l_var = 0.0f, r_var = 0.0f; |
1581 | |
1582 | // Compute initial left/right child origins |
1583 | if (!prep_split(m_nodes[node_index], l_child_org, r_child_org)) |
1584 | return false; |
1585 | |
1586 | // Use k-means iterations to refine these children vectors |
1587 | if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children)) |
1588 | return false; |
1589 | |
1590 | // Create children |
1591 | const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1; |
1592 | |
1593 | m_nodes[node_index].m_left_index = l_child_index; |
1594 | m_nodes[node_index].m_right_index = r_child_index; |
1595 | |
1596 | m_nodes[node_index].m_codebook_index = m_next_codebook_index; |
1597 | m_next_codebook_index++; |
1598 | |
1599 | m_nodes.resize(m_nodes.size() + 2); |
1600 | |
1601 | tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index]; |
1602 | |
1603 | l_child.set(l_child_org, l_weight, l_var, l_children); |
1604 | r_child.set(r_child_org, r_weight, r_var, r_children); |
1605 | |
1606 | if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) |
1607 | { |
1608 | TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); |
1609 | |
1610 | for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) |
1611 | { |
1612 | if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) |
1613 | { |
1614 | l_child.m_var = 1e-4f; |
1615 | break; |
1616 | } |
1617 | } |
1618 | } |
1619 | |
1620 | if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1)) |
1621 | { |
1622 | TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first); |
1623 | |
1624 | for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++) |
1625 | { |
1626 | if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first)) |
1627 | { |
1628 | r_child.m_var = 1e-4f; |
1629 | break; |
1630 | } |
1631 | } |
1632 | } |
1633 | |
1634 | if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) |
1635 | var_heap.add_heap(l_child_index, l_child.m_var); |
1636 | |
1637 | if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) |
1638 | var_heap.add_heap(r_child_index, r_child.m_var); |
1639 | |
1640 | return true; |
1641 | } |
1642 | |
1643 | TrainingVectorType compute_split_axis(const tsvq_node &node) const |
1644 | { |
1645 | const uint32_t N = TrainingVectorType::num_elements; |
1646 | |
1647 | matrix<N, N, float> cmatrix; |
1648 | |
1649 | if ((N != 16) || (!g_cpu_supports_sse41)) |
1650 | { |
1651 | cmatrix.set_zero(); |
1652 | |
1653 | // Compute covariance matrix from weighted input vectors |
1654 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1655 | { |
1656 | const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin); |
1657 | const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v); |
1658 | |
1659 | for (uint32_t x = 0; x < N; x++) |
1660 | for (uint32_t y = x; y < N; y++) |
1661 | cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y]; |
1662 | } |
1663 | } |
1664 | else |
1665 | { |
1666 | #if BASISU_SUPPORT_SSE |
1667 | // Specialize the case with 16x16 matrices, which are quite expensive without SIMD. |
1668 | // This SSE function takes pointers to void types, so do some sanity checks. |
1669 | assert(sizeof(TrainingVectorType) == sizeof(float) * 16); |
1670 | assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>)); |
1671 | update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix); |
1672 | #endif |
1673 | } |
1674 | |
1675 | const float renorm_scale = 1.0f / node.m_weight; |
1676 | |
1677 | for (uint32_t x = 0; x < N; x++) |
1678 | for (uint32_t y = x; y < N; y++) |
1679 | cmatrix[x][y] *= renorm_scale; |
1680 | |
1681 | // Diagonal flip |
1682 | for (uint32_t x = 0; x < (N - 1); x++) |
1683 | for (uint32_t y = x + 1; y < N; y++) |
1684 | cmatrix[y][x] = cmatrix[x][y]; |
1685 | |
1686 | return compute_pca_from_covar<N, TrainingVectorType>(cmatrix); |
1687 | } |
1688 | |
1689 | bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const |
1690 | { |
1691 | //const uint32_t N = TrainingVectorType::num_elements; |
1692 | |
1693 | if (2 == node.m_training_vecs.size()) |
1694 | { |
1695 | l_child_result = m_training_vecs[node.m_training_vecs[0]].first; |
1696 | r_child_result = m_training_vecs[node.m_training_vecs[1]].first; |
1697 | return true; |
1698 | } |
1699 | |
1700 | TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f); |
1701 | double l_weight = 0.0f, r_weight = 0.0f; |
1702 | |
1703 | // Compute initial left/right children |
1704 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1705 | { |
1706 | const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; |
1707 | |
1708 | const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; |
1709 | |
1710 | double t = (v - node.m_origin).dot(axis); |
1711 | if (t >= 0.0f) |
1712 | { |
1713 | r_child += v * weight; |
1714 | r_weight += weight; |
1715 | } |
1716 | else |
1717 | { |
1718 | l_child += v * weight; |
1719 | l_weight += weight; |
1720 | } |
1721 | } |
1722 | |
1723 | if ((l_weight > 0.0f) && (r_weight > 0.0f)) |
1724 | { |
1725 | l_child_result = l_child * static_cast<float>(1.0f / l_weight); |
1726 | r_child_result = r_child * static_cast<float>(1.0f / r_weight); |
1727 | } |
1728 | else |
1729 | { |
1730 | TrainingVectorType l(1e+20f); |
1731 | TrainingVectorType h(-1e+20f); |
1732 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1733 | { |
1734 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; |
1735 | |
1736 | l = TrainingVectorType::component_min(l, v); |
1737 | h = TrainingVectorType::component_max(h, v); |
1738 | } |
1739 | |
1740 | TrainingVectorType r(h - l); |
1741 | |
1742 | float largest_axis_v = 0.0f; |
1743 | int largest_axis_index = -1; |
1744 | for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++) |
1745 | { |
1746 | if (r[i] > largest_axis_v) |
1747 | { |
1748 | largest_axis_v = r[i]; |
1749 | largest_axis_index = i; |
1750 | } |
1751 | } |
1752 | |
1753 | if (largest_axis_index < 0) |
1754 | return false; |
1755 | |
1756 | basisu::vector<float> keys(node.m_training_vecs.size()); |
1757 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1758 | keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; |
1759 | |
1760 | uint_vec indices(node.m_training_vecs.size()); |
1761 | indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]); |
1762 | |
1763 | l_child.set_zero(); |
1764 | l_weight = 0; |
1765 | |
1766 | r_child.set_zero(); |
1767 | r_weight = 0; |
1768 | |
1769 | const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2; |
1770 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1771 | { |
1772 | const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; |
1773 | |
1774 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; |
1775 | |
1776 | if (i < half_index) |
1777 | { |
1778 | l_child += v * weight; |
1779 | l_weight += weight; |
1780 | } |
1781 | else |
1782 | { |
1783 | r_child += v * weight; |
1784 | r_weight += weight; |
1785 | } |
1786 | } |
1787 | |
1788 | if ((l_weight > 0.0f) && (r_weight > 0.0f)) |
1789 | { |
1790 | l_child_result = l_child * static_cast<float>(1.0f / l_weight); |
1791 | r_child_result = r_child * static_cast<float>(1.0f / r_weight); |
1792 | } |
1793 | else |
1794 | { |
1795 | l_child_result = l; |
1796 | r_child_result = h; |
1797 | } |
1798 | } |
1799 | |
1800 | return true; |
1801 | } |
1802 | |
1803 | bool refine_split(const tsvq_node &node, |
1804 | TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children, |
1805 | TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const |
1806 | { |
1807 | l_children.reserve(node.m_training_vecs.size()); |
1808 | r_children.reserve(node.m_training_vecs.size()); |
1809 | |
1810 | float prev_total_variance = 1e+10f; |
1811 | |
1812 | // Refine left/right children locations using k-means iterations |
1813 | const uint32_t cMaxIters = 6; |
1814 | for (uint32_t iter = 0; iter < cMaxIters; iter++) |
1815 | { |
1816 | l_children.resize(0); |
1817 | r_children.resize(0); |
1818 | |
1819 | TrainingVectorType new_l_child(cZero), new_r_child(cZero); |
1820 | |
1821 | double l_ttsum = 0.0f, r_ttsum = 0.0f; |
1822 | |
1823 | l_weight = 0; |
1824 | r_weight = 0; |
1825 | |
1826 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1827 | { |
1828 | const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; |
1829 | const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; |
1830 | |
1831 | double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v); |
1832 | |
1833 | if (left_dist2 >= right_dist2) |
1834 | { |
1835 | new_r_child += (v * static_cast<float>(weight)); |
1836 | r_weight += weight; |
1837 | |
1838 | r_ttsum += weight * v.dot(v); |
1839 | r_children.push_back(node.m_training_vecs[i]); |
1840 | } |
1841 | else |
1842 | { |
1843 | new_l_child += (v * static_cast<float>(weight)); |
1844 | l_weight += weight; |
1845 | |
1846 | l_ttsum += weight * v.dot(v); |
1847 | l_children.push_back(node.m_training_vecs[i]); |
1848 | } |
1849 | } |
1850 | |
1851 | // Node is unsplittable using the above algorithm - try something else to split it up. |
1852 | if ((!l_weight) || (!r_weight)) |
1853 | { |
1854 | l_children.resize(0); |
1855 | new_l_child.set(0.0f); |
1856 | l_ttsum = 0.0f; |
1857 | l_weight = 0; |
1858 | |
1859 | r_children.resize(0); |
1860 | new_r_child.set(0.0f); |
1861 | r_ttsum = 0.0f; |
1862 | r_weight = 0; |
1863 | |
1864 | TrainingVectorType firstVec; |
1865 | for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) |
1866 | { |
1867 | const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; |
1868 | const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; |
1869 | |
1870 | if ((!i) || (v == firstVec)) |
1871 | { |
1872 | firstVec = v; |
1873 | |
1874 | new_r_child += (v * static_cast<float>(weight)); |
1875 | r_weight += weight; |
1876 | |
1877 | r_ttsum += weight * v.dot(v); |
1878 | r_children.push_back(node.m_training_vecs[i]); |
1879 | } |
1880 | else |
1881 | { |
1882 | new_l_child += (v * static_cast<float>(weight)); |
1883 | l_weight += weight; |
1884 | |
1885 | l_ttsum += weight * v.dot(v); |
1886 | l_children.push_back(node.m_training_vecs[i]); |
1887 | } |
1888 | } |
1889 | |
1890 | if ((!l_weight) || (!r_weight)) |
1891 | return false; |
1892 | } |
1893 | |
1894 | l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight)); |
1895 | r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight)); |
1896 | |
1897 | new_l_child *= (1.0f / l_weight); |
1898 | new_r_child *= (1.0f / r_weight); |
1899 | |
1900 | l_child = new_l_child; |
1901 | r_child = new_r_child; |
1902 | |
1903 | float total_var = l_var + r_var; |
1904 | const float cGiveupVariance = .00001f; |
1905 | if (total_var < cGiveupVariance) |
1906 | break; |
1907 | |
1908 | // Check to see if the variance has settled |
1909 | const float cVarianceDeltaThresh = .00125f; |
1910 | if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh) |
1911 | break; |
1912 | |
1913 | prev_total_variance = total_var; |
1914 | } |
1915 | |
1916 | return true; |
1917 | } |
1918 | }; |
1919 | |
1920 | struct weighted_block_group |
1921 | { |
1922 | uint64_t m_total_weight; |
1923 | uint_vec m_indices; |
1924 | }; |
1925 | |
1926 | template<typename Quantizer> |
1927 | bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, |
1928 | uint32_t max_codebook_size, uint32_t max_parent_codebook_size, |
1929 | basisu::vector<uint_vec>& codebook, |
1930 | basisu::vector<uint_vec>& parent_codebook, |
1931 | uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) |
1932 | { |
1933 | codebook.resize(0); |
1934 | parent_codebook.resize(0); |
1935 | |
1936 | if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16)) |
1937 | { |
1938 | if (!q.generate(max_codebook_size)) |
1939 | return false; |
1940 | |
1941 | q.retrieve(codebook); |
1942 | |
1943 | if (max_parent_codebook_size) |
1944 | q.retrieve(max_parent_codebook_size, parent_codebook); |
1945 | |
1946 | return true; |
1947 | } |
1948 | |
1949 | const uint32_t cMaxThreads = 16; |
1950 | if (max_threads > cMaxThreads) |
1951 | max_threads = cMaxThreads; |
1952 | |
1953 | if (!q.generate(max_threads)) |
1954 | return false; |
1955 | |
1956 | basisu::vector<uint_vec> initial_codebook; |
1957 | |
1958 | q.retrieve(initial_codebook); |
1959 | |
1960 | if (initial_codebook.size() < max_threads) |
1961 | { |
1962 | codebook = initial_codebook; |
1963 | |
1964 | if (max_parent_codebook_size) |
1965 | q.retrieve(max_parent_codebook_size, parent_codebook); |
1966 | |
1967 | return true; |
1968 | } |
1969 | |
1970 | Quantizer quantizers[cMaxThreads]; |
1971 | |
1972 | bool success_flags[cMaxThreads]; |
1973 | clear_obj(success_flags); |
1974 | |
1975 | basisu::vector<uint_vec> local_clusters[cMaxThreads]; |
1976 | basisu::vector<uint_vec> local_parent_clusters[cMaxThreads]; |
1977 | |
1978 | for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) |
1979 | { |
1980 | #ifndef __EMSCRIPTEN__ |
1981 | pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { |
1982 | #endif |
1983 | |
1984 | Quantizer& lq = quantizers[thread_iter]; |
1985 | uint_vec& cluster_indices = initial_codebook[thread_iter]; |
1986 | |
1987 | uint_vec local_to_global(cluster_indices.size()); |
1988 | |
1989 | for (uint32_t i = 0; i < cluster_indices.size(); i++) |
1990 | { |
1991 | const uint32_t global_training_vec_index = cluster_indices[i]; |
1992 | local_to_global[i] = global_training_vec_index; |
1993 | |
1994 | lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second); |
1995 | } |
1996 | |
1997 | const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs(); |
1998 | |
1999 | success_flags[thread_iter] = lq.generate(max_clusters); |
2000 | |
2001 | if (success_flags[thread_iter]) |
2002 | { |
2003 | lq.retrieve(local_clusters[thread_iter]); |
2004 | |
2005 | for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++) |
2006 | { |
2007 | for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++) |
2008 | local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]]; |
2009 | } |
2010 | |
2011 | if (max_parent_codebook_size) |
2012 | { |
2013 | lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]); |
2014 | |
2015 | for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++) |
2016 | { |
2017 | for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++) |
2018 | local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]]; |
2019 | } |
2020 | } |
2021 | } |
2022 | |
2023 | #ifndef __EMSCRIPTEN__ |
2024 | } ); |
2025 | #endif |
2026 | |
2027 | } // thread_iter |
2028 | |
2029 | #ifndef __EMSCRIPTEN__ |
2030 | pJob_pool->wait_for_all(); |
2031 | #endif |
2032 | |
2033 | uint32_t total_clusters = 0, total_parent_clusters = 0; |
2034 | |
2035 | for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++) |
2036 | { |
2037 | if (!success_flags[thread_iter]) |
2038 | return false; |
2039 | total_clusters += (uint32_t)local_clusters[thread_iter].size(); |
2040 | total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size(); |
2041 | } |
2042 | |
2043 | codebook.reserve(total_clusters); |
2044 | parent_codebook.reserve(total_parent_clusters); |
2045 | |
2046 | for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) |
2047 | { |
2048 | for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++) |
2049 | { |
2050 | codebook.resize(codebook.size() + 1); |
2051 | codebook.back().swap(local_clusters[thread_iter][j]); |
2052 | } |
2053 | |
2054 | for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++) |
2055 | { |
2056 | parent_codebook.resize(parent_codebook.size() + 1); |
2057 | parent_codebook.back().swap(local_parent_clusters[thread_iter][j]); |
2058 | } |
2059 | } |
2060 | |
2061 | return true; |
2062 | } |
2063 | |
2064 | template<typename Quantizer> |
2065 | bool generate_hierarchical_codebook_threaded(Quantizer& q, |
2066 | uint32_t max_codebook_size, uint32_t max_parent_codebook_size, |
2067 | basisu::vector<uint_vec>& codebook, |
2068 | basisu::vector<uint_vec>& parent_codebook, |
2069 | uint32_t max_threads, job_pool *pJob_pool, |
2070 | bool even_odd_input_pairs_equal) |
2071 | { |
2072 | typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher; |
2073 | |
2074 | typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, |
2075 | training_vec_bit_hasher> group_hash; |
2076 | |
2077 | //interval_timer tm; |
2078 | //tm.start(); |
2079 | |
2080 | group_hash unique_vecs; |
2081 | |
2082 | unique_vecs.reserve(20000); |
2083 | |
2084 | weighted_block_group g; |
2085 | |
2086 | if (even_odd_input_pairs_equal) |
2087 | { |
2088 | g.m_indices.resize(2); |
2089 | |
2090 | assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0); |
2091 | |
2092 | for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2) |
2093 | { |
2094 | assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first); |
2095 | |
2096 | g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second; |
2097 | g.m_indices[0] = i; |
2098 | g.m_indices[1] = i + 1; |
2099 | |
2100 | auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); |
2101 | |
2102 | if (!ins_res.second) |
2103 | { |
2104 | (ins_res.first)->second.m_total_weight += g.m_total_weight; |
2105 | (ins_res.first)->second.m_indices.push_back(i); |
2106 | (ins_res.first)->second.m_indices.push_back(i + 1); |
2107 | } |
2108 | } |
2109 | } |
2110 | else |
2111 | { |
2112 | g.m_indices.resize(1); |
2113 | |
2114 | for (uint32_t i = 0; i < q.get_training_vecs().size(); i++) |
2115 | { |
2116 | g.m_total_weight = q.get_training_vecs()[i].second; |
2117 | g.m_indices[0] = i; |
2118 | |
2119 | auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g)); |
2120 | |
2121 | if (!ins_res.second) |
2122 | { |
2123 | (ins_res.first)->second.m_total_weight += g.m_total_weight; |
2124 | (ins_res.first)->second.m_indices.push_back(i); |
2125 | } |
2126 | } |
2127 | } |
2128 | |
2129 | //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs()); |
2130 | debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n" , q.get_total_training_vecs(), (uint32_t)unique_vecs.size()); |
2131 | |
2132 | Quantizer group_quant; |
2133 | typedef typename group_hash::const_iterator group_hash_const_iter; |
2134 | basisu::vector<group_hash_const_iter> unique_vec_iters; |
2135 | unique_vec_iters.reserve(unique_vecs.size()); |
2136 | |
2137 | for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) |
2138 | { |
2139 | group_quant.add_training_vec(iter->first, iter->second.m_total_weight); |
2140 | unique_vec_iters.push_back(iter); |
2141 | } |
2142 | |
2143 | bool limit_clusterizers = true; |
2144 | if (unique_vecs.size() <= max_codebook_size) |
2145 | limit_clusterizers = false; |
2146 | |
2147 | debug_printf("Limit clusterizers: %u\n" , limit_clusterizers); |
2148 | |
2149 | basisu::vector<uint_vec> group_codebook, group_parent_codebook; |
2150 | bool status = generate_hierarchical_codebook_threaded_internal(group_quant, |
2151 | max_codebook_size, max_parent_codebook_size, |
2152 | group_codebook, |
2153 | group_parent_codebook, |
2154 | (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool); |
2155 | |
2156 | if (!status) |
2157 | return false; |
2158 | |
2159 | codebook.resize(0); |
2160 | for (uint32_t i = 0; i < group_codebook.size(); i++) |
2161 | { |
2162 | codebook.resize(codebook.size() + 1); |
2163 | |
2164 | for (uint32_t j = 0; j < group_codebook[i].size(); j++) |
2165 | { |
2166 | const uint32_t group_index = group_codebook[i][j]; |
2167 | |
2168 | typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; |
2169 | const uint_vec& training_vec_indices = group_iter->second.m_indices; |
2170 | |
2171 | append_vector(codebook.back(), training_vec_indices); |
2172 | } |
2173 | } |
2174 | |
2175 | parent_codebook.resize(0); |
2176 | for (uint32_t i = 0; i < group_parent_codebook.size(); i++) |
2177 | { |
2178 | parent_codebook.resize(parent_codebook.size() + 1); |
2179 | |
2180 | for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++) |
2181 | { |
2182 | const uint32_t group_index = group_parent_codebook[i][j]; |
2183 | |
2184 | typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; |
2185 | const uint_vec& training_vec_indices = group_iter->second.m_indices; |
2186 | |
2187 | append_vector(parent_codebook.back(), training_vec_indices); |
2188 | } |
2189 | } |
2190 | |
2191 | return true; |
2192 | } |
2193 | |
2194 | // Canonical Huffman coding |
2195 | |
2196 | class histogram |
2197 | { |
2198 | basisu::vector<uint32_t> m_hist; |
2199 | |
2200 | public: |
2201 | histogram(uint32_t size = 0) { init(size); } |
2202 | |
2203 | void clear() |
2204 | { |
2205 | clear_vector(m_hist); |
2206 | } |
2207 | |
2208 | void init(uint32_t size) |
2209 | { |
2210 | m_hist.resize(0); |
2211 | m_hist.resize(size); |
2212 | } |
2213 | |
2214 | inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); } |
2215 | |
2216 | inline const uint32_t &operator[] (uint32_t index) const |
2217 | { |
2218 | return m_hist[index]; |
2219 | } |
2220 | |
2221 | inline uint32_t &operator[] (uint32_t index) |
2222 | { |
2223 | return m_hist[index]; |
2224 | } |
2225 | |
2226 | inline void inc(uint32_t index) |
2227 | { |
2228 | m_hist[index]++; |
2229 | } |
2230 | |
2231 | uint64_t get_total() const |
2232 | { |
2233 | uint64_t total = 0; |
2234 | for (uint32_t i = 0; i < m_hist.size(); ++i) |
2235 | total += m_hist[i]; |
2236 | return total; |
2237 | } |
2238 | |
2239 | double get_entropy() const |
2240 | { |
2241 | double total = static_cast<double>(get_total()); |
2242 | if (total == 0.0f) |
2243 | return 0.0f; |
2244 | |
2245 | const double inv_total = 1.0f / total; |
2246 | const double neg_inv_log2 = -1.0f / log(2.0f); |
2247 | |
2248 | double e = 0.0f; |
2249 | for (uint32_t i = 0; i < m_hist.size(); i++) |
2250 | if (m_hist[i]) |
2251 | e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]); |
2252 | |
2253 | return e; |
2254 | } |
2255 | }; |
2256 | |
2257 | struct sym_freq |
2258 | { |
2259 | uint32_t m_key; |
2260 | uint16_t m_sym_index; |
2261 | }; |
2262 | |
2263 | sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); |
2264 | void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); |
2265 | void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); |
2266 | |
2267 | class huffman_encoding_table |
2268 | { |
2269 | public: |
2270 | huffman_encoding_table() |
2271 | { |
2272 | } |
2273 | |
2274 | void clear() |
2275 | { |
2276 | clear_vector(m_codes); |
2277 | clear_vector(m_code_sizes); |
2278 | } |
2279 | |
2280 | bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize) |
2281 | { |
2282 | return init(h.size(), &h[0], max_code_size); |
2283 | } |
2284 | |
2285 | bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); |
2286 | bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); |
2287 | |
2288 | inline const uint16_vec &get_codes() const { return m_codes; } |
2289 | inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } |
2290 | |
2291 | uint32_t get_total_used_codes() const |
2292 | { |
2293 | for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--) |
2294 | if (m_code_sizes[i]) |
2295 | return i + 1; |
2296 | return 0; |
2297 | } |
2298 | |
2299 | private: |
2300 | uint16_vec m_codes; |
2301 | uint8_vec m_code_sizes; |
2302 | }; |
2303 | |
2304 | class bitwise_coder |
2305 | { |
2306 | public: |
2307 | bitwise_coder() : |
2308 | m_bit_buffer(0), |
2309 | m_bit_buffer_size(0), |
2310 | m_total_bits(0) |
2311 | { |
2312 | } |
2313 | |
2314 | inline void clear() |
2315 | { |
2316 | clear_vector(m_bytes); |
2317 | m_bit_buffer = 0; |
2318 | m_bit_buffer_size = 0; |
2319 | m_total_bits = 0; |
2320 | } |
2321 | |
2322 | inline const uint8_vec &get_bytes() const { return m_bytes; } |
2323 | |
2324 | inline uint64_t get_total_bits() const { return m_total_bits; } |
2325 | inline void clear_total_bits() { m_total_bits = 0; } |
2326 | |
2327 | inline void init(uint32_t reserve_size = 1024) |
2328 | { |
2329 | m_bytes.reserve(reserve_size); |
2330 | m_bytes.resize(0); |
2331 | |
2332 | m_bit_buffer = 0; |
2333 | m_bit_buffer_size = 0; |
2334 | m_total_bits = 0; |
2335 | } |
2336 | |
2337 | inline uint32_t flush() |
2338 | { |
2339 | if (m_bit_buffer_size) |
2340 | { |
2341 | m_total_bits += 8 - (m_bit_buffer_size & 7); |
2342 | append_byte(static_cast<uint8_t>(m_bit_buffer)); |
2343 | |
2344 | m_bit_buffer = 0; |
2345 | m_bit_buffer_size = 0; |
2346 | |
2347 | return 8; |
2348 | } |
2349 | |
2350 | return 0; |
2351 | } |
2352 | |
2353 | inline uint32_t put_bits(uint32_t bits, uint32_t num_bits) |
2354 | { |
2355 | assert(num_bits <= 32); |
2356 | assert(bits < (1ULL << num_bits)); |
2357 | |
2358 | if (!num_bits) |
2359 | return 0; |
2360 | |
2361 | m_total_bits += num_bits; |
2362 | |
2363 | uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer; |
2364 | m_bit_buffer_size += num_bits; |
2365 | |
2366 | while (m_bit_buffer_size >= 8) |
2367 | { |
2368 | append_byte(static_cast<uint8_t>(v)); |
2369 | v >>= 8; |
2370 | m_bit_buffer_size -= 8; |
2371 | } |
2372 | |
2373 | m_bit_buffer = static_cast<uint8_t>(v); |
2374 | return num_bits; |
2375 | } |
2376 | |
2377 | inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab) |
2378 | { |
2379 | uint32_t code = tab.get_codes()[sym]; |
2380 | uint32_t code_size = tab.get_code_sizes()[sym]; |
2381 | assert(code_size >= 1); |
2382 | put_bits(code, code_size); |
2383 | return code_size; |
2384 | } |
2385 | |
2386 | inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) |
2387 | { |
2388 | assert((n >= 2) && (v < n)); |
2389 | |
2390 | uint32_t k = floor_log2i(n); |
2391 | uint32_t u = (1 << (k + 1)) - n; |
2392 | |
2393 | if (v < u) |
2394 | return put_bits(v, k); |
2395 | |
2396 | uint32_t x = v + u; |
2397 | assert((x >> 1) >= u); |
2398 | |
2399 | put_bits(x >> 1, k); |
2400 | put_bits(x & 1, 1); |
2401 | return k + 1; |
2402 | } |
2403 | |
2404 | inline uint32_t put_rice(uint32_t v, uint32_t m) |
2405 | { |
2406 | assert(m); |
2407 | |
2408 | const uint64_t start_bits = m_total_bits; |
2409 | |
2410 | uint32_t q = v >> m, r = v & ((1 << m) - 1); |
2411 | |
2412 | // rice coding sanity check |
2413 | assert(q <= 64); |
2414 | |
2415 | for (; q > 16; q -= 16) |
2416 | put_bits(0xFFFF, 16); |
2417 | |
2418 | put_bits((1 << q) - 1, q); |
2419 | put_bits(r << 1, m + 1); |
2420 | |
2421 | return (uint32_t)(m_total_bits - start_bits); |
2422 | } |
2423 | |
2424 | inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits) |
2425 | { |
2426 | assert(chunk_bits); |
2427 | |
2428 | const uint32_t chunk_size = 1 << chunk_bits; |
2429 | const uint32_t chunk_mask = chunk_size - 1; |
2430 | |
2431 | uint32_t total_bits = 0; |
2432 | |
2433 | for ( ; ; ) |
2434 | { |
2435 | uint32_t next_v = v >> chunk_bits; |
2436 | |
2437 | total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); |
2438 | if (!next_v) |
2439 | break; |
2440 | |
2441 | v = next_v; |
2442 | } |
2443 | |
2444 | return total_bits; |
2445 | } |
2446 | |
2447 | uint32_t emit_huffman_table(const huffman_encoding_table &tab); |
2448 | |
2449 | private: |
2450 | uint8_vec m_bytes; |
2451 | uint32_t m_bit_buffer, m_bit_buffer_size; |
2452 | uint64_t m_total_bits; |
2453 | |
2454 | void append_byte(uint8_t c) |
2455 | { |
2456 | m_bytes.resize(m_bytes.size() + 1); |
2457 | m_bytes.back() = c; |
2458 | } |
2459 | |
2460 | static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len); |
2461 | static void end_zero_run(uint16_vec &syms, uint32_t &run_size); |
2462 | }; |
2463 | |
2464 | class huff2D |
2465 | { |
2466 | public: |
2467 | huff2D() { } |
2468 | huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); } |
2469 | |
2470 | inline const histogram &get_histogram() const { return m_histogram; } |
2471 | inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; } |
2472 | |
2473 | inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) |
2474 | { |
2475 | assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); |
2476 | |
2477 | m_bits_per_sym = bits_per_sym; |
2478 | m_total_syms_per_group = total_syms_per_group; |
2479 | m_cur_sym_bits = 0; |
2480 | m_cur_num_syms = 0; |
2481 | m_decode_syms_remaining = 0; |
2482 | m_next_decoder_group_index = 0; |
2483 | |
2484 | m_histogram.init(1 << (bits_per_sym * total_syms_per_group)); |
2485 | } |
2486 | |
2487 | inline void clear() |
2488 | { |
2489 | m_group_bits.clear(); |
2490 | |
2491 | m_cur_sym_bits = 0; |
2492 | m_cur_num_syms = 0; |
2493 | m_decode_syms_remaining = 0; |
2494 | m_next_decoder_group_index = 0; |
2495 | } |
2496 | |
2497 | inline void emit(uint32_t sym) |
2498 | { |
2499 | m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym)); |
2500 | m_cur_num_syms++; |
2501 | |
2502 | if (m_cur_num_syms == m_total_syms_per_group) |
2503 | flush(); |
2504 | } |
2505 | |
2506 | inline void flush() |
2507 | { |
2508 | if (m_cur_num_syms) |
2509 | { |
2510 | m_group_bits.push_back(m_cur_sym_bits); |
2511 | m_histogram.inc(m_cur_sym_bits); |
2512 | |
2513 | m_cur_sym_bits = 0; |
2514 | m_cur_num_syms = 0; |
2515 | } |
2516 | } |
2517 | |
2518 | inline bool start_encoding(uint32_t code_size_limit = 16) |
2519 | { |
2520 | flush(); |
2521 | |
2522 | if (!m_encoding_table.init(m_histogram, code_size_limit)) |
2523 | return false; |
2524 | |
2525 | m_decode_syms_remaining = 0; |
2526 | m_next_decoder_group_index = 0; |
2527 | |
2528 | return true; |
2529 | } |
2530 | |
2531 | inline uint32_t emit_next_sym(bitwise_coder &c) |
2532 | { |
2533 | uint32_t bits = 0; |
2534 | |
2535 | if (!m_decode_syms_remaining) |
2536 | { |
2537 | bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table); |
2538 | m_decode_syms_remaining = m_total_syms_per_group; |
2539 | } |
2540 | |
2541 | m_decode_syms_remaining--; |
2542 | return bits; |
2543 | } |
2544 | |
2545 | inline void emit_flush() |
2546 | { |
2547 | m_decode_syms_remaining = 0; |
2548 | } |
2549 | |
2550 | private: |
2551 | uint_vec m_group_bits; |
2552 | huffman_encoding_table m_encoding_table; |
2553 | histogram m_histogram; |
2554 | uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining; |
2555 | }; |
2556 | |
2557 | bool huffman_test(int rand_seed); |
2558 | |
2559 | // VQ index reordering |
2560 | |
2561 | class palette_index_reorderer |
2562 | { |
2563 | public: |
2564 | palette_index_reorderer() |
2565 | { |
2566 | } |
2567 | |
2568 | void clear() |
2569 | { |
2570 | clear_vector(m_hist); |
2571 | clear_vector(m_total_count_to_picked); |
2572 | clear_vector(m_entries_picked); |
2573 | clear_vector(m_entries_to_do); |
2574 | clear_vector(m_remap_table); |
2575 | } |
2576 | |
2577 | // returns [0,1] distance of entry i to entry j |
2578 | typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); |
2579 | |
2580 | void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); |
2581 | |
2582 | // Table remaps old to new symbol indices |
2583 | inline const uint_vec &get_remap_table() const { return m_remap_table; } |
2584 | |
2585 | private: |
2586 | uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table; |
2587 | |
2588 | inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; } |
2589 | inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } } |
2590 | |
2591 | void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices); |
2592 | void find_initial(uint32_t num_syms); |
2593 | void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); |
2594 | float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); |
2595 | }; |
2596 | |
2597 | // Simple 32-bit 2D image class |
2598 | |
2599 | class image |
2600 | { |
2601 | public: |
2602 | image() : |
2603 | m_width(0), m_height(0), m_pitch(0) |
2604 | { |
2605 | } |
2606 | |
2607 | image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : |
2608 | m_width(0), m_height(0), m_pitch(0) |
2609 | { |
2610 | resize(w, h, p); |
2611 | } |
2612 | |
2613 | image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : |
2614 | m_width(0), m_height(0), m_pitch(0) |
2615 | { |
2616 | init(pImage, width, height, comps); |
2617 | } |
2618 | |
2619 | image(const image &other) : |
2620 | m_width(0), m_height(0), m_pitch(0) |
2621 | { |
2622 | *this = other; |
2623 | } |
2624 | |
2625 | image &swap(image &other) |
2626 | { |
2627 | std::swap(m_width, other.m_width); |
2628 | std::swap(m_height, other.m_height); |
2629 | std::swap(m_pitch, other.m_pitch); |
2630 | m_pixels.swap(other.m_pixels); |
2631 | return *this; |
2632 | } |
2633 | |
2634 | image &operator= (const image &rhs) |
2635 | { |
2636 | if (this != &rhs) |
2637 | { |
2638 | m_width = rhs.m_width; |
2639 | m_height = rhs.m_height; |
2640 | m_pitch = rhs.m_pitch; |
2641 | m_pixels = rhs.m_pixels; |
2642 | } |
2643 | return *this; |
2644 | } |
2645 | |
2646 | image &clear() |
2647 | { |
2648 | m_width = 0; |
2649 | m_height = 0; |
2650 | m_pitch = 0; |
2651 | clear_vector(m_pixels); |
2652 | return *this; |
2653 | } |
2654 | |
2655 | image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) |
2656 | { |
2657 | return crop(w, h, p, background); |
2658 | } |
2659 | |
2660 | image &set_all(const color_rgba &c) |
2661 | { |
2662 | for (uint32_t i = 0; i < m_pixels.size(); i++) |
2663 | m_pixels[i] = c; |
2664 | return *this; |
2665 | } |
2666 | |
2667 | void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) |
2668 | { |
2669 | assert(comps >= 1 && comps <= 4); |
2670 | |
2671 | resize(width, height); |
2672 | |
2673 | for (uint32_t y = 0; y < height; y++) |
2674 | { |
2675 | for (uint32_t x = 0; x < width; x++) |
2676 | { |
2677 | const uint8_t *pSrc = &pImage[(x + y * width) * comps]; |
2678 | color_rgba &dst = (*this)(x, y); |
2679 | |
2680 | if (comps == 1) |
2681 | { |
2682 | dst.r = pSrc[0]; |
2683 | dst.g = pSrc[0]; |
2684 | dst.b = pSrc[0]; |
2685 | dst.a = 255; |
2686 | } |
2687 | else if (comps == 2) |
2688 | { |
2689 | dst.r = pSrc[0]; |
2690 | dst.g = pSrc[0]; |
2691 | dst.b = pSrc[0]; |
2692 | dst.a = pSrc[1]; |
2693 | } |
2694 | else |
2695 | { |
2696 | dst.r = pSrc[0]; |
2697 | dst.g = pSrc[1]; |
2698 | dst.b = pSrc[2]; |
2699 | if (comps == 4) |
2700 | dst.a = pSrc[3]; |
2701 | else |
2702 | dst.a = 255; |
2703 | } |
2704 | } |
2705 | } |
2706 | } |
2707 | |
2708 | image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) |
2709 | { |
2710 | for (uint32_t iy = 0; iy < h; iy++) |
2711 | for (uint32_t ix = 0; ix < w; ix++) |
2712 | set_clipped(x + ix, y + iy, c); |
2713 | return *this; |
2714 | } |
2715 | |
2716 | image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) |
2717 | { |
2718 | for (uint32_t iy = 0; iy < h; iy++) |
2719 | for (uint32_t ix = 0; ix < w; ix++) |
2720 | set_clipped_alpha(x + ix, y + iy, c); |
2721 | return *this; |
2722 | } |
2723 | |
2724 | image &crop_dup_borders(uint32_t w, uint32_t h) |
2725 | { |
2726 | const uint32_t orig_w = m_width, orig_h = m_height; |
2727 | |
2728 | crop(w, h); |
2729 | |
2730 | if (orig_w && orig_h) |
2731 | { |
2732 | if (m_width > orig_w) |
2733 | { |
2734 | for (uint32_t x = orig_w; x < m_width; x++) |
2735 | for (uint32_t y = 0; y < m_height; y++) |
2736 | set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); |
2737 | } |
2738 | |
2739 | if (m_height > orig_h) |
2740 | { |
2741 | for (uint32_t y = orig_h; y < m_height; y++) |
2742 | for (uint32_t x = 0; x < m_width; x++) |
2743 | set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U))); |
2744 | } |
2745 | } |
2746 | return *this; |
2747 | } |
2748 | |
2749 | // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer). |
2750 | image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) |
2751 | { |
2752 | if (p == UINT32_MAX) |
2753 | p = w; |
2754 | |
2755 | clear(); |
2756 | |
2757 | if ((!p) || (!w) || (!h)) |
2758 | return *this; |
2759 | |
2760 | m_pixels.grant_ownership(pPixels, p * h, p * h); |
2761 | |
2762 | m_width = w; |
2763 | m_height = h; |
2764 | m_pitch = p; |
2765 | |
2766 | return *this; |
2767 | } |
2768 | |
2769 | image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true) |
2770 | { |
2771 | if (p == UINT32_MAX) |
2772 | p = w; |
2773 | |
2774 | if ((w == m_width) && (m_height == h) && (m_pitch == p)) |
2775 | return *this; |
2776 | |
2777 | if ((!w) || (!h) || (!p)) |
2778 | { |
2779 | clear(); |
2780 | return *this; |
2781 | } |
2782 | |
2783 | color_rgba_vec cur_state; |
2784 | cur_state.swap(m_pixels); |
2785 | |
2786 | m_pixels.resize(p * h); |
2787 | |
2788 | if (init_image) |
2789 | { |
2790 | if (m_width || m_height) |
2791 | { |
2792 | for (uint32_t y = 0; y < h; y++) |
2793 | { |
2794 | for (uint32_t x = 0; x < w; x++) |
2795 | { |
2796 | if ((x < m_width) && (y < m_height)) |
2797 | m_pixels[x + y * p] = cur_state[x + y * m_pitch]; |
2798 | else |
2799 | m_pixels[x + y * p] = background; |
2800 | } |
2801 | } |
2802 | } |
2803 | else |
2804 | { |
2805 | m_pixels.set_all(background); |
2806 | } |
2807 | } |
2808 | |
2809 | m_width = w; |
2810 | m_height = h; |
2811 | m_pitch = p; |
2812 | |
2813 | return *this; |
2814 | } |
2815 | |
2816 | inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
2817 | inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
2818 | |
2819 | inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
2820 | inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
2821 | |
2822 | inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const |
2823 | { |
2824 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
2825 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
2826 | return m_pixels[x + y * m_pitch]; |
2827 | } |
2828 | |
2829 | inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) |
2830 | { |
2831 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
2832 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
2833 | return m_pixels[x + y * m_pitch]; |
2834 | } |
2835 | |
2836 | inline image &set_clipped(int x, int y, const color_rgba &c) |
2837 | { |
2838 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) |
2839 | (*this)(x, y) = c; |
2840 | return *this; |
2841 | } |
2842 | |
2843 | inline image& set_clipped_alpha(int x, int y, const color_rgba& c) |
2844 | { |
2845 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) |
2846 | (*this)(x, y).m_comps[3] = c.m_comps[3]; |
2847 | return *this; |
2848 | } |
2849 | |
2850 | // Very straightforward blit with full clipping. Not fast, but it works. |
2851 | image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) |
2852 | { |
2853 | for (int y = 0; y < src_h; y++) |
2854 | { |
2855 | const int sy = src_y + y; |
2856 | if (sy < 0) |
2857 | continue; |
2858 | else if (sy >= (int)src.get_height()) |
2859 | break; |
2860 | |
2861 | for (int x = 0; x < src_w; x++) |
2862 | { |
2863 | const int sx = src_x + x; |
2864 | if (sx < 0) |
2865 | continue; |
2866 | else if (sx >= (int)src.get_height()) |
2867 | break; |
2868 | |
2869 | set_clipped(dst_x + x, dst_y + y, src(sx, sy)); |
2870 | } |
2871 | } |
2872 | |
2873 | return *this; |
2874 | } |
2875 | |
2876 | const image &(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const |
2877 | { |
2878 | if (((src_x + w) > m_width) || ((src_y + h) > m_height)) |
2879 | { |
2880 | // Slower clamping case |
2881 | for (uint32_t y = 0; y < h; y++) |
2882 | for (uint32_t x = 0; x < w; x++) |
2883 | *pDst++ = get_clamped(src_x + x, src_y + y); |
2884 | } |
2885 | else |
2886 | { |
2887 | const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch]; |
2888 | |
2889 | for (uint32_t y = 0; y < h; y++) |
2890 | { |
2891 | memcpy(pDst, pSrc, w * sizeof(color_rgba)); |
2892 | pSrc += m_pitch; |
2893 | pDst += w; |
2894 | } |
2895 | } |
2896 | |
2897 | return *this; |
2898 | } |
2899 | |
2900 | image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) |
2901 | { |
2902 | for (uint32_t y = 0; y < h; y++) |
2903 | for (uint32_t x = 0; x < w; x++) |
2904 | set_clipped(dst_x + x, dst_y + y, *pSrc++); |
2905 | return *this; |
2906 | } |
2907 | |
2908 | inline uint32_t get_width() const { return m_width; } |
2909 | inline uint32_t get_height() const { return m_height; } |
2910 | inline uint32_t get_pitch() const { return m_pitch; } |
2911 | inline uint32_t get_total_pixels() const { return m_width * m_height; } |
2912 | |
2913 | inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } |
2914 | inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } |
2915 | inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } |
2916 | |
2917 | inline const color_rgba_vec &get_pixels() const { return m_pixels; } |
2918 | inline color_rgba_vec &get_pixels() { return m_pixels; } |
2919 | |
2920 | inline const color_rgba *get_ptr() const { return &m_pixels[0]; } |
2921 | inline color_rgba *get_ptr() { return &m_pixels[0]; } |
2922 | |
2923 | bool has_alpha() const |
2924 | { |
2925 | for (uint32_t y = 0; y < m_height; ++y) |
2926 | for (uint32_t x = 0; x < m_width; ++x) |
2927 | if ((*this)(x, y).a < 255) |
2928 | return true; |
2929 | |
2930 | return false; |
2931 | } |
2932 | |
2933 | image &set_alpha(uint8_t a) |
2934 | { |
2935 | for (uint32_t y = 0; y < m_height; ++y) |
2936 | for (uint32_t x = 0; x < m_width; ++x) |
2937 | (*this)(x, y).a = a; |
2938 | return *this; |
2939 | } |
2940 | |
2941 | image &flip_y() |
2942 | { |
2943 | for (uint32_t y = 0; y < m_height / 2; ++y) |
2944 | for (uint32_t x = 0; x < m_width; ++x) |
2945 | std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); |
2946 | return *this; |
2947 | } |
2948 | |
2949 | // TODO: There are many ways to do this, not sure this is the best way. |
2950 | image &renormalize_normal_map() |
2951 | { |
2952 | for (uint32_t y = 0; y < m_height; y++) |
2953 | { |
2954 | for (uint32_t x = 0; x < m_width; x++) |
2955 | { |
2956 | color_rgba &c = (*this)(x, y); |
2957 | if ((c.r == 128) && (c.g == 128) && (c.b == 128)) |
2958 | continue; |
2959 | |
2960 | vec3F v(c.r, c.g, c.b); |
2961 | v = (v * (2.0f / 255.0f)) - vec3F(1.0f); |
2962 | v.clamp(-1.0f, 1.0f); |
2963 | |
2964 | float length = v.length(); |
2965 | const float cValidThresh = .077f; |
2966 | if (length < cValidThresh) |
2967 | { |
2968 | c.set(128, 128, 128, c.a); |
2969 | } |
2970 | else if (fabs(length - 1.0f) > cValidThresh) |
2971 | { |
2972 | if (length) |
2973 | v /= length; |
2974 | |
2975 | for (uint32_t i = 0; i < 3; i++) |
2976 | c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); |
2977 | |
2978 | if ((c.g == 128) && (c.r == 128)) |
2979 | { |
2980 | if (c.b < 128) |
2981 | c.b = 0; |
2982 | else |
2983 | c.b = 255; |
2984 | } |
2985 | } |
2986 | } |
2987 | } |
2988 | return *this; |
2989 | } |
2990 | |
2991 | void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); |
2992 | |
2993 | private: |
2994 | uint32_t m_width, m_height, m_pitch; // all in pixels |
2995 | color_rgba_vec m_pixels; |
2996 | }; |
2997 | |
2998 | // Float images |
2999 | |
3000 | typedef basisu::vector<vec4F> vec4F_vec; |
3001 | |
3002 | class imagef |
3003 | { |
3004 | public: |
3005 | imagef() : |
3006 | m_width(0), m_height(0), m_pitch(0) |
3007 | { |
3008 | } |
3009 | |
3010 | imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : |
3011 | m_width(0), m_height(0), m_pitch(0) |
3012 | { |
3013 | resize(w, h, p); |
3014 | } |
3015 | |
3016 | imagef(const imagef &other) : |
3017 | m_width(0), m_height(0), m_pitch(0) |
3018 | { |
3019 | *this = other; |
3020 | } |
3021 | |
3022 | imagef &swap(imagef &other) |
3023 | { |
3024 | std::swap(m_width, other.m_width); |
3025 | std::swap(m_height, other.m_height); |
3026 | std::swap(m_pitch, other.m_pitch); |
3027 | m_pixels.swap(other.m_pixels); |
3028 | return *this; |
3029 | } |
3030 | |
3031 | imagef &operator= (const imagef &rhs) |
3032 | { |
3033 | if (this != &rhs) |
3034 | { |
3035 | m_width = rhs.m_width; |
3036 | m_height = rhs.m_height; |
3037 | m_pitch = rhs.m_pitch; |
3038 | m_pixels = rhs.m_pixels; |
3039 | } |
3040 | return *this; |
3041 | } |
3042 | |
3043 | imagef &clear() |
3044 | { |
3045 | m_width = 0; |
3046 | m_height = 0; |
3047 | m_pitch = 0; |
3048 | clear_vector(m_pixels); |
3049 | return *this; |
3050 | } |
3051 | |
3052 | imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0)) |
3053 | { |
3054 | const uint32_t width = src.get_width(); |
3055 | const uint32_t height = src.get_height(); |
3056 | |
3057 | resize(width, height); |
3058 | |
3059 | for (int y = 0; y < (int)height; y++) |
3060 | { |
3061 | for (uint32_t x = 0; x < width; x++) |
3062 | { |
3063 | const color_rgba &src_pixel = src(x, y); |
3064 | (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); |
3065 | } |
3066 | } |
3067 | |
3068 | return *this; |
3069 | } |
3070 | |
3071 | imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) |
3072 | { |
3073 | return resize(other.get_width(), other.get_height(), p, background); |
3074 | } |
3075 | |
3076 | imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) |
3077 | { |
3078 | return crop(w, h, p, background); |
3079 | } |
3080 | |
3081 | imagef &set_all(const vec4F &c) |
3082 | { |
3083 | for (uint32_t i = 0; i < m_pixels.size(); i++) |
3084 | m_pixels[i] = c; |
3085 | return *this; |
3086 | } |
3087 | |
3088 | imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c) |
3089 | { |
3090 | for (uint32_t iy = 0; iy < h; iy++) |
3091 | for (uint32_t ix = 0; ix < w; ix++) |
3092 | set_clipped(x + ix, y + iy, c); |
3093 | return *this; |
3094 | } |
3095 | |
3096 | imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) |
3097 | { |
3098 | if (p == UINT32_MAX) |
3099 | p = w; |
3100 | |
3101 | if ((w == m_width) && (m_height == h) && (m_pitch == p)) |
3102 | return *this; |
3103 | |
3104 | if ((!w) || (!h) || (!p)) |
3105 | { |
3106 | clear(); |
3107 | return *this; |
3108 | } |
3109 | |
3110 | vec4F_vec cur_state; |
3111 | cur_state.swap(m_pixels); |
3112 | |
3113 | m_pixels.resize(p * h); |
3114 | |
3115 | for (uint32_t y = 0; y < h; y++) |
3116 | { |
3117 | for (uint32_t x = 0; x < w; x++) |
3118 | { |
3119 | if ((x < m_width) && (y < m_height)) |
3120 | m_pixels[x + y * p] = cur_state[x + y * m_pitch]; |
3121 | else |
3122 | m_pixels[x + y * p] = background; |
3123 | } |
3124 | } |
3125 | |
3126 | m_width = w; |
3127 | m_height = h; |
3128 | m_pitch = p; |
3129 | |
3130 | return *this; |
3131 | } |
3132 | |
3133 | inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
3134 | inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } |
3135 | |
3136 | inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
3137 | inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); } |
3138 | |
3139 | inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const |
3140 | { |
3141 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
3142 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
3143 | return m_pixels[x + y * m_pitch]; |
3144 | } |
3145 | |
3146 | inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) |
3147 | { |
3148 | x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1); |
3149 | y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1); |
3150 | return m_pixels[x + y * m_pitch]; |
3151 | } |
3152 | |
3153 | inline imagef &set_clipped(int x, int y, const vec4F &c) |
3154 | { |
3155 | if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) |
3156 | (*this)(x, y) = c; |
3157 | return *this; |
3158 | } |
3159 | |
3160 | // Very straightforward blit with full clipping. Not fast, but it works. |
3161 | imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) |
3162 | { |
3163 | for (int y = 0; y < src_h; y++) |
3164 | { |
3165 | const int sy = src_y + y; |
3166 | if (sy < 0) |
3167 | continue; |
3168 | else if (sy >= (int)src.get_height()) |
3169 | break; |
3170 | |
3171 | for (int x = 0; x < src_w; x++) |
3172 | { |
3173 | const int sx = src_x + x; |
3174 | if (sx < 0) |
3175 | continue; |
3176 | else if (sx >= (int)src.get_height()) |
3177 | break; |
3178 | |
3179 | set_clipped(dst_x + x, dst_y + y, src(sx, sy)); |
3180 | } |
3181 | } |
3182 | |
3183 | return *this; |
3184 | } |
3185 | |
3186 | const imagef &(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const |
3187 | { |
3188 | for (uint32_t y = 0; y < h; y++) |
3189 | for (uint32_t x = 0; x < w; x++) |
3190 | *pDst++ = get_clamped(src_x + x, src_y + y); |
3191 | return *this; |
3192 | } |
3193 | |
3194 | imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) |
3195 | { |
3196 | for (uint32_t y = 0; y < h; y++) |
3197 | for (uint32_t x = 0; x < w; x++) |
3198 | set_clipped(dst_x + x, dst_y + y, *pSrc++); |
3199 | return *this; |
3200 | } |
3201 | |
3202 | inline uint32_t get_width() const { return m_width; } |
3203 | inline uint32_t get_height() const { return m_height; } |
3204 | inline uint32_t get_pitch() const { return m_pitch; } |
3205 | inline uint32_t get_total_pixels() const { return m_width * m_height; } |
3206 | |
3207 | inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } |
3208 | inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } |
3209 | inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } |
3210 | |
3211 | inline const vec4F_vec &get_pixels() const { return m_pixels; } |
3212 | inline vec4F_vec &get_pixels() { return m_pixels; } |
3213 | |
3214 | inline const vec4F *get_ptr() const { return &m_pixels[0]; } |
3215 | inline vec4F *get_ptr() { return &m_pixels[0]; } |
3216 | |
3217 | private: |
3218 | uint32_t m_width, m_height, m_pitch; // all in pixels |
3219 | vec4F_vec m_pixels; |
3220 | }; |
3221 | |
3222 | // Image metrics |
3223 | |
3224 | class image_metrics |
3225 | { |
3226 | public: |
3227 | // TODO: Add ssim |
3228 | float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; |
3229 | |
3230 | image_metrics() |
3231 | { |
3232 | clear(); |
3233 | } |
3234 | |
3235 | void clear() |
3236 | { |
3237 | m_max = 0; |
3238 | m_mean = 0; |
3239 | m_mean_squared = 0; |
3240 | m_rms = 0; |
3241 | m_psnr = 0; |
3242 | m_ssim = 0; |
3243 | } |
3244 | |
3245 | void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n" , pPrefix ? pPrefix : "" , m_max, m_mean, m_rms, m_psnr); } |
3246 | |
3247 | void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false); |
3248 | }; |
3249 | |
3250 | // Image saving/loading/resampling |
3251 | |
3252 | bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); |
3253 | bool load_png(const char* pFilename, image& img); |
3254 | inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } |
3255 | |
3256 | bool load_tga(const char* pFilename, image& img); |
3257 | inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } |
3258 | |
3259 | bool load_jpg(const char *pFilename, image& img); |
3260 | inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } |
3261 | |
3262 | // Currently loads .PNG, .TGA, or .JPG |
3263 | bool load_image(const char* pFilename, image& img); |
3264 | inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } |
3265 | |
3266 | uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); |
3267 | uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); |
3268 | |
3269 | enum |
3270 | { |
3271 | cImageSaveGrayscale = 1, |
3272 | cImageSaveIgnoreAlpha = 2 |
3273 | }; |
3274 | |
3275 | bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); |
3276 | inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } |
3277 | |
3278 | bool read_file_to_vec(const char* pFilename, uint8_vec& data); |
3279 | |
3280 | bool write_data_to_file(const char* pFilename, const void* pData, size_t len); |
3281 | |
3282 | inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "" , 0); } |
3283 | |
3284 | float linear_to_srgb(float l); |
3285 | float srgb_to_linear(float s); |
3286 | |
3287 | bool image_resample(const image &src, image &dst, bool srgb = false, |
3288 | const char *pFilter = "lanczos4" , float filter_scale = 1.0f, |
3289 | bool wrapping = false, |
3290 | uint32_t first_comp = 0, uint32_t num_comps = 4); |
3291 | |
3292 | // Timing |
3293 | |
3294 | typedef uint64_t timer_ticks; |
3295 | |
3296 | class interval_timer |
3297 | { |
3298 | public: |
3299 | interval_timer(); |
3300 | |
3301 | void start(); |
3302 | void stop(); |
3303 | |
3304 | double get_elapsed_secs() const; |
3305 | inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } |
3306 | |
3307 | static void init(); |
3308 | static inline timer_ticks get_ticks_per_sec() { return g_freq; } |
3309 | static timer_ticks get_ticks(); |
3310 | static double ticks_to_secs(timer_ticks ticks); |
3311 | static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } |
3312 | |
3313 | private: |
3314 | static timer_ticks g_init_ticks, g_freq; |
3315 | static double g_timer_freq; |
3316 | |
3317 | timer_ticks m_start_time, m_stop_time; |
3318 | |
3319 | bool m_started, m_stopped; |
3320 | }; |
3321 | |
3322 | // 2D array |
3323 | |
3324 | template<typename T> |
3325 | class vector2D |
3326 | { |
3327 | typedef basisu::vector<T> TVec; |
3328 | |
3329 | uint32_t m_width, m_height; |
3330 | TVec m_values; |
3331 | |
3332 | public: |
3333 | vector2D() : |
3334 | m_width(0), |
3335 | m_height(0) |
3336 | { |
3337 | } |
3338 | |
3339 | vector2D(uint32_t w, uint32_t h) : |
3340 | m_width(0), |
3341 | m_height(0) |
3342 | { |
3343 | resize(w, h); |
3344 | } |
3345 | |
3346 | vector2D(const vector2D &other) |
3347 | { |
3348 | *this = other; |
3349 | } |
3350 | |
3351 | vector2D &operator= (const vector2D &other) |
3352 | { |
3353 | if (this != &other) |
3354 | { |
3355 | m_width = other.m_width; |
3356 | m_height = other.m_height; |
3357 | m_values = other.m_values; |
3358 | } |
3359 | return *this; |
3360 | } |
3361 | |
3362 | inline bool operator== (const vector2D &rhs) const |
3363 | { |
3364 | return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); |
3365 | } |
3366 | |
3367 | inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); } |
3368 | |
3369 | inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } |
3370 | inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } |
3371 | |
3372 | inline const T &operator[] (uint32_t i) const { return m_values[i]; } |
3373 | inline T &operator[] (uint32_t i) { return m_values[i]; } |
3374 | |
3375 | inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } |
3376 | inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); } |
3377 | |
3378 | void clear() |
3379 | { |
3380 | m_width = 0; |
3381 | m_height = 0; |
3382 | m_values.clear(); |
3383 | } |
3384 | |
3385 | void set_all(const T&val) |
3386 | { |
3387 | vector_set_all(m_values, val); |
3388 | } |
3389 | |
3390 | inline const T* get_ptr() const { return &m_values[0]; } |
3391 | inline T* get_ptr() { return &m_values[0]; } |
3392 | |
3393 | vector2D &resize(uint32_t new_width, uint32_t new_height) |
3394 | { |
3395 | if ((m_width == new_width) && (m_height == new_height)) |
3396 | return *this; |
3397 | |
3398 | TVec oldVals(new_width * new_height); |
3399 | oldVals.swap(m_values); |
3400 | |
3401 | const uint32_t w = minimum(m_width, new_width); |
3402 | const uint32_t h = minimum(m_height, new_height); |
3403 | |
3404 | if ((w) && (h)) |
3405 | { |
3406 | for (uint32_t y = 0; y < h; y++) |
3407 | for (uint32_t x = 0; x < w; x++) |
3408 | m_values[x + y * new_width] = oldVals[x + y * m_width]; |
3409 | } |
3410 | |
3411 | m_width = new_width; |
3412 | m_height = new_height; |
3413 | |
3414 | return *this; |
3415 | } |
3416 | }; |
3417 | |
3418 | inline FILE *fopen_safe(const char *pFilename, const char *pMode) |
3419 | { |
3420 | #ifdef _WIN32 |
3421 | FILE *pFile = nullptr; |
3422 | fopen_s(&pFile, pFilename, pMode); |
3423 | return pFile; |
3424 | #else |
3425 | return fopen(pFilename, pMode); |
3426 | #endif |
3427 | } |
3428 | |
3429 | void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); |
3430 | |
3431 | const uint32_t cPixelBlockWidth = 4; |
3432 | const uint32_t cPixelBlockHeight = 4; |
3433 | const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight; |
3434 | |
3435 | struct pixel_block |
3436 | { |
3437 | color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] |
3438 | |
3439 | inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } |
3440 | inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } |
3441 | |
3442 | inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; } |
3443 | inline color_rgba* get_ptr() { return &m_pixels[0][0]; } |
3444 | |
3445 | inline void clear() { clear_obj(*this); } |
3446 | |
3447 | inline bool operator== (const pixel_block& rhs) const |
3448 | { |
3449 | return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0; |
3450 | } |
3451 | }; |
3452 | typedef basisu::vector<pixel_block> pixel_block_vec; |
3453 | |
3454 | } // namespace basisu |
3455 | |
3456 | |
3457 | |