1// basisu_enc.h
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15#pragma once
16#include "../transcoder/basisu.h"
17#include "../transcoder/basisu_transcoder_internal.h"
18
19#include <mutex>
20#include <atomic>
21#include <condition_variable>
22#include <functional>
23#include <thread>
24#include <unordered_map>
25#include <ostream>
26
27#if !defined(_WIN32) || defined(__MINGW32__)
28#include <libgen.h>
29#endif
30
31// This module is really just a huge grab bag of classes and helper functions needed by the encoder.
32
33// If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time.
34#define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0)
35
36#if BASISU_SUPPORT_SSE
37// Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors.
38extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16);
39#endif
40
41namespace basisu
42{
43 extern uint8_t g_hamming_dist[256];
44 extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8];
45
46 // true if basisu_encoder_init() has been called and returned.
47 extern bool g_library_initialized;
48
49 // Encoder library initialization.
50 // This function MUST be called before encoding anything!
51 void basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
52 void basisu_encoder_deinit();
53
54 // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
55 extern void detect_sse41();
56
57#if BASISU_SUPPORT_SSE
58 extern bool g_cpu_supports_sse41;
59#else
60 const bool g_cpu_supports_sse41 = false;
61#endif
62
63 void error_vprintf(const char* pFmt, va_list args);
64 void error_printf(const char *pFmt, ...);
65
66 // Helpers
67
68 inline uint8_t clamp255(int32_t i)
69 {
70 return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
71 }
72
73 inline int32_t clampi(int32_t value, int32_t low, int32_t high)
74 {
75 if (value < low)
76 value = low;
77 else if (value > high)
78 value = high;
79 return value;
80 }
81
82 inline uint8_t mul_8(uint32_t v, uint32_t a)
83 {
84 v = v * a + 128;
85 return (uint8_t)((v + (v >> 8)) >> 8);
86 }
87
88 inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
89 {
90 assert(codesize <= 64);
91 uint64_t bits = 0;
92 uint32_t total_bits = 0;
93
94 while (total_bits < codesize)
95 {
96 uint32_t byte_bit_offset = bit_offset & 7;
97 uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
98
99 uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
100 byte_bits &= ((1 << bits_to_read) - 1);
101
102 bits |= ((uint64_t)(byte_bits) << total_bits);
103
104 total_bits += bits_to_read;
105 bit_offset += bits_to_read;
106 }
107
108 return bits;
109 }
110
111 inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
112 {
113 assert(codesize <= 32);
114 uint32_t bits = 0;
115 uint32_t total_bits = 0;
116
117 while (total_bits < codesize)
118 {
119 uint32_t byte_bit_offset = bit_offset & 7;
120 uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
121
122 uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
123 byte_bits &= ((1 << bits_to_read) - 1);
124
125 bits |= (byte_bits << total_bits);
126
127 total_bits += bits_to_read;
128 bit_offset += bits_to_read;
129 }
130
131 return bits;
132 }
133
134 // Hashing
135
136 inline uint32_t bitmix32c(uint32_t v)
137 {
138 v = (v + 0x7ed55d16) + (v << 12);
139 v = (v ^ 0xc761c23c) ^ (v >> 19);
140 v = (v + 0x165667b1) + (v << 5);
141 v = (v + 0xd3a2646c) ^ (v << 9);
142 v = (v + 0xfd7046c5) + (v << 3);
143 v = (v ^ 0xb55a4f09) ^ (v >> 16);
144 return v;
145 }
146
147 inline uint32_t bitmix32(uint32_t v)
148 {
149 v -= (v << 6);
150 v ^= (v >> 17);
151 v -= (v << 9);
152 v ^= (v << 4);
153 v -= (v << 3);
154 v ^= (v << 10);
155 v ^= (v >> 15);
156 return v;
157 }
158
159 inline uint32_t wang_hash(uint32_t seed)
160 {
161 seed = (seed ^ 61) ^ (seed >> 16);
162 seed *= 9;
163 seed = seed ^ (seed >> 4);
164 seed *= 0x27d4eb2d;
165 seed = seed ^ (seed >> 15);
166 return seed;
167 }
168
169 uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
170
171 template <typename Key>
172 struct bit_hasher
173 {
174 std::size_t operator()(const Key& k) const
175 {
176 return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));
177 }
178 };
179
180 class running_stat
181 {
182 public:
183 running_stat() { clear(); }
184
185 void clear()
186 {
187 m_n = 0;
188 m_total = 0;
189 m_old_m = 0;
190 m_new_m = 0;
191 m_old_s = 0;
192 m_new_s = 0;
193 m_min = 0;
194 m_max = 0;
195 }
196
197 void push(double x)
198 {
199 m_n++;
200 m_total += x;
201 if (m_n == 1)
202 {
203 m_old_m = m_new_m = x;
204 m_old_s = 0.0;
205 m_min = x;
206 m_max = x;
207 }
208 else
209 {
210 // See Knuth TAOCP vol 2, 3rd edition, page 232
211 m_new_m = m_old_m + (x - m_old_m) / m_n;
212 m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m);
213 m_old_m = m_new_m;
214 m_old_s = m_new_s;
215 m_min = basisu::minimum(x, m_min);
216 m_max = basisu::maximum(x, m_max);
217 }
218 }
219
220 uint32_t get_num() const
221 {
222 return m_n;
223 }
224
225 double get_total() const
226 {
227 return m_total;
228 }
229
230 double get_mean() const
231 {
232 return (m_n > 0) ? m_new_m : 0.0;
233 }
234
235 // Returns sample variance
236 double get_variance() const
237 {
238 return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0);
239 }
240
241 double get_std_dev() const
242 {
243 return sqrt(get_variance());
244 }
245
246 double get_min() const
247 {
248 return m_min;
249 }
250
251 double get_max() const
252 {
253 return m_max;
254 }
255
256 private:
257 uint32_t m_n;
258 double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max;
259 };
260
261 // Linear algebra
262
263 template <uint32_t N, typename T>
264 class vec
265 {
266 protected:
267 T m_v[N];
268
269 public:
270 enum { num_elements = N };
271
272 inline vec() { }
273 inline vec(eZero) { set_zero(); }
274
275 explicit inline vec(T val) { set(val); }
276 inline vec(T v0, T v1) { set(v0, v1); }
277 inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }
278 inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }
279 inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
280 template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }
281
282 inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
283 inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }
284
285 inline T getX() const { return m_v[0]; }
286 inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }
287 inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }
288 inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }
289
290 inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }
291 inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
292
293 inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
294
295 template <uint32_t OtherN, typename OtherT>
296 inline vec &set(const vec<OtherN, OtherT> &other)
297 {
298 uint32_t i;
299 if ((const void *)(&other) == (const void *)(this))
300 return *this;
301 const uint32_t m = minimum(OtherN, N);
302 for (i = 0; i < m; i++)
303 m_v[i] = static_cast<T>(other[i]);
304 for (; i < N; i++)
305 m_v[i] = 0;
306 return *this;
307 }
308
309 inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }
310 inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }
311 inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }
312
313 inline vec &set(T v0, T v1)
314 {
315 m_v[0] = v0;
316 if (N >= 2)
317 {
318 m_v[1] = v1;
319 clear_elements(2, N);
320 }
321 return *this;
322 }
323
324 inline vec &set(T v0, T v1, T v2)
325 {
326 m_v[0] = v0;
327 if (N >= 2)
328 {
329 m_v[1] = v1;
330 if (N >= 3)
331 {
332 m_v[2] = v2;
333 clear_elements(3, N);
334 }
335 }
336 return *this;
337 }
338
339 inline vec &set(T v0, T v1, T v2, T v3)
340 {
341 m_v[0] = v0;
342 if (N >= 2)
343 {
344 m_v[1] = v1;
345 if (N >= 3)
346 {
347 m_v[2] = v2;
348
349 if (N >= 4)
350 {
351 m_v[3] = v3;
352 clear_elements(5, N);
353 }
354 }
355 }
356 return *this;
357 }
358
359 inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }
360 template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; }
361
362 inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); }
363 inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); }
364
365 inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }
366 inline vec operator+ () const { return *this; }
367 inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }
368 inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }
369 inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }
370 inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }
371 inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }
372 inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }
373
374 friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }
375 friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }
376 friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }
377 friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }
378 friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }
379 friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }
380
381 static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }
382
383 inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }
384
385 inline T norm() const { return dot_product(*this, *this); }
386 inline T length() const { return sqrt(norm()); }
387
388 inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }
389 inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; }
390
391 inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
392 inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
393
394 inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
395
396 inline vec &clamp(T l, T h)
397 {
398 for (uint32_t i = 0; i < N; i++)
399 m_v[i] = basisu::clamp(m_v[i], l, h);
400 return *this;
401 }
402
403 static vec component_min(const vec& a, const vec& b)
404 {
405 vec res;
406 for (uint32_t i = 0; i < N; i++)
407 res[i] = minimum(a[i], b[i]);
408 return res;
409 }
410
411 static vec component_max(const vec& a, const vec& b)
412 {
413 vec res;
414 for (uint32_t i = 0; i < N; i++)
415 res[i] = maximum(a[i], b[i]);
416 return res;
417 }
418 };
419
420 typedef vec<4, double> vec4D;
421 typedef vec<3, double> vec3D;
422 typedef vec<2, double> vec2D;
423 typedef vec<1, double> vec1D;
424
425 typedef vec<4, float> vec4F;
426 typedef vec<3, float> vec3F;
427 typedef vec<2, float> vec2F;
428 typedef vec<1, float> vec1F;
429
430 typedef vec<16, float> vec16F;
431
432 template <uint32_t Rows, uint32_t Cols, typename T>
433 class matrix
434 {
435 public:
436 typedef vec<Rows, T> col_vec;
437 typedef vec<Cols, T> row_vec;
438
439 typedef T scalar_type;
440
441 enum { rows = Rows, cols = Cols };
442
443 protected:
444 row_vec m_r[Rows];
445
446 public:
447 inline matrix() {}
448 inline matrix(eZero) { set_zero(); }
449 inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }
450 inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }
451
452 inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
453 inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
454
455 inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }
456 inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }
457
458 inline matrix &set_zero()
459 {
460 for (uint32_t i = 0; i < Rows; i++)
461 m_r[i].set_zero();
462 return *this;
463 }
464
465 inline matrix &set_identity()
466 {
467 for (uint32_t i = 0; i < Rows; i++)
468 {
469 m_r[i].set_zero();
470 if (i < Cols)
471 m_r[i][i] = 1.0f;
472 }
473 return *this;
474 }
475 };
476
477 template<uint32_t N, typename VectorType>
478 inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)
479 {
480 VectorType axis;
481 if (N == 1)
482 axis.set(1.0f);
483 else
484 {
485 for (uint32_t i = 0; i < N; i++)
486 axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1)));
487 }
488
489 VectorType prev_axis(axis);
490
491 // Power iterations
492 for (uint32_t power_iter = 0; power_iter < 8; power_iter++)
493 {
494 VectorType trial_axis;
495 double max_sum = 0;
496
497 for (uint32_t i = 0; i < N; i++)
498 {
499 double sum = 0;
500 for (uint32_t j = 0; j < N; j++)
501 sum += cmatrix[i][j] * axis[j];
502
503 trial_axis[i] = static_cast<float>(sum);
504
505 max_sum = maximum(fabs(sum), max_sum);
506 }
507
508 if (max_sum != 0.0f)
509 trial_axis *= static_cast<float>(1.0f / max_sum);
510
511 VectorType delta_axis(prev_axis - trial_axis);
512
513 prev_axis = axis;
514 axis = trial_axis;
515
516 if (delta_axis.norm() < .0024f)
517 break;
518 }
519
520 return axis.normalize_in_place();
521 }
522
523 template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)
524 {
525 for (uint32_t i = 0; i < num_indices; i++)
526 pIndices[i] = i;
527
528 std::sort(
529 pIndices,
530 pIndices + num_indices,
531 [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }
532 );
533 }
534
535 // 1-4 byte direct Radix sort.
536 template <typename T>
537 T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size)
538 {
539 assert(key_ofs < sizeof(T));
540 assert((key_size >= 1) && (key_size <= 4));
541
542 uint32_t hist[256 * 4];
543
544 memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
545
546#define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs))
547
548 if (key_size == 4)
549 {
550 T* p = pBuf0;
551 T* q = pBuf0 + num_vals;
552 for (; p != q; p++)
553 {
554 const uint32_t key = BASISU_GET_KEY(p);
555
556 hist[key & 0xFF]++;
557 hist[256 + ((key >> 8) & 0xFF)]++;
558 hist[512 + ((key >> 16) & 0xFF)]++;
559 hist[768 + ((key >> 24) & 0xFF)]++;
560 }
561 }
562 else if (key_size == 3)
563 {
564 T* p = pBuf0;
565 T* q = pBuf0 + num_vals;
566 for (; p != q; p++)
567 {
568 const uint32_t key = BASISU_GET_KEY(p);
569
570 hist[key & 0xFF]++;
571 hist[256 + ((key >> 8) & 0xFF)]++;
572 hist[512 + ((key >> 16) & 0xFF)]++;
573 }
574 }
575 else if (key_size == 2)
576 {
577 T* p = pBuf0;
578 T* q = pBuf0 + (num_vals >> 1) * 2;
579
580 for (; p != q; p += 2)
581 {
582 const uint32_t key0 = BASISU_GET_KEY(p);
583 const uint32_t key1 = BASISU_GET_KEY(p + 1);
584
585 hist[key0 & 0xFF]++;
586 hist[256 + ((key0 >> 8) & 0xFF)]++;
587
588 hist[key1 & 0xFF]++;
589 hist[256 + ((key1 >> 8) & 0xFF)]++;
590 }
591
592 if (num_vals & 1)
593 {
594 const uint32_t key = BASISU_GET_KEY(p);
595
596 hist[key & 0xFF]++;
597 hist[256 + ((key >> 8) & 0xFF)]++;
598 }
599 }
600 else
601 {
602 assert(key_size == 1);
603 if (key_size != 1)
604 return NULL;
605
606 T* p = pBuf0;
607 T* q = pBuf0 + (num_vals >> 1) * 2;
608
609 for (; p != q; p += 2)
610 {
611 const uint32_t key0 = BASISU_GET_KEY(p);
612 const uint32_t key1 = BASISU_GET_KEY(p + 1);
613
614 hist[key0 & 0xFF]++;
615 hist[key1 & 0xFF]++;
616 }
617
618 if (num_vals & 1)
619 {
620 const uint32_t key = BASISU_GET_KEY(p);
621 hist[key & 0xFF]++;
622 }
623 }
624
625 T* pCur = pBuf0;
626 T* pNew = pBuf1;
627
628 for (uint32_t pass = 0; pass < key_size; pass++)
629 {
630 const uint32_t* pHist = &hist[pass << 8];
631
632 uint32_t offsets[256];
633
634 uint32_t cur_ofs = 0;
635 for (uint32_t i = 0; i < 256; i += 2)
636 {
637 offsets[i] = cur_ofs;
638 cur_ofs += pHist[i];
639
640 offsets[i + 1] = cur_ofs;
641 cur_ofs += pHist[i + 1];
642 }
643
644 const uint32_t pass_shift = pass << 3;
645
646 T* p = pCur;
647 T* q = pCur + (num_vals >> 1) * 2;
648
649 for (; p != q; p += 2)
650 {
651 uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
652 uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF;
653
654 if (c0 == c1)
655 {
656 uint32_t dst_offset0 = offsets[c0];
657
658 offsets[c0] = dst_offset0 + 2;
659
660 pNew[dst_offset0] = p[0];
661 pNew[dst_offset0 + 1] = p[1];
662 }
663 else
664 {
665 uint32_t dst_offset0 = offsets[c0]++;
666 uint32_t dst_offset1 = offsets[c1]++;
667
668 pNew[dst_offset0] = p[0];
669 pNew[dst_offset1] = p[1];
670 }
671 }
672
673 if (num_vals & 1)
674 {
675 uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
676
677 uint32_t dst_offset = offsets[c];
678 offsets[c] = dst_offset + 1;
679
680 pNew[dst_offset] = *p;
681 }
682
683 T* t = pCur;
684 pCur = pNew;
685 pNew = t;
686 }
687
688 return pCur;
689 }
690
691#undef BASISU_GET_KEY
692
693 // Very simple job pool with no dependencies.
694 class job_pool
695 {
696 BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool);
697
698 public:
699 // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc.
700 job_pool(uint32_t num_threads);
701 ~job_pool();
702
703 void add_job(const std::function<void()>& job);
704 void add_job(std::function<void()>&& job);
705
706 void wait_for_all();
707
708 size_t get_total_threads() const { return 1 + m_threads.size(); }
709
710 private:
711 std::vector<std::thread> m_threads;
712 std::vector<std::function<void()> > m_queue;
713
714 std::mutex m_mutex;
715 std::condition_variable m_has_work;
716 std::condition_variable m_no_more_jobs;
717
718 uint32_t m_num_active_jobs;
719
720 std::atomic<bool> m_kill_flag;
721
722 void job_thread(uint32_t index);
723 };
724
725 // Simple 32-bit color class
726
727 class color_rgba_i16
728 {
729 public:
730 union
731 {
732 int16_t m_comps[4];
733
734 struct
735 {
736 int16_t r;
737 int16_t g;
738 int16_t b;
739 int16_t a;
740 };
741 };
742
743 inline color_rgba_i16()
744 {
745 static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");
746 }
747
748 inline color_rgba_i16(int sr, int sg, int sb, int sa)
749 {
750 set(sr, sg, sb, sa);
751 }
752
753 inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)
754 {
755 m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX);
756 m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX);
757 m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX);
758 m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX);
759 return *this;
760 }
761 };
762
763 class color_rgba
764 {
765 public:
766 union
767 {
768 uint8_t m_comps[4];
769
770 struct
771 {
772 uint8_t r;
773 uint8_t g;
774 uint8_t b;
775 uint8_t a;
776 };
777 };
778
779 inline color_rgba()
780 {
781 static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
782 static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)");
783 }
784
785 // Not too hot about this idea.
786 inline color_rgba(const basist::color32& other) :
787 r(other.r),
788 g(other.g),
789 b(other.b),
790 a(other.a)
791 {
792 }
793
794 color_rgba& operator= (const basist::color32& rhs)
795 {
796 r = rhs.r;
797 g = rhs.g;
798 b = rhs.b;
799 a = rhs.a;
800 return *this;
801 }
802
803 inline color_rgba(int y)
804 {
805 set(y);
806 }
807
808 inline color_rgba(int y, int na)
809 {
810 set(y, na);
811 }
812
813 inline color_rgba(int sr, int sg, int sb, int sa)
814 {
815 set(sr, sg, sb, sa);
816 }
817
818 inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
819 {
820 set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
821 }
822
823 inline color_rgba& set_noclamp_y(int y)
824 {
825 m_comps[0] = (uint8_t)y;
826 m_comps[1] = (uint8_t)y;
827 m_comps[2] = (uint8_t)y;
828 m_comps[3] = (uint8_t)255;
829 return *this;
830 }
831
832 inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
833 {
834 m_comps[0] = (uint8_t)sr;
835 m_comps[1] = (uint8_t)sg;
836 m_comps[2] = (uint8_t)sb;
837 m_comps[3] = (uint8_t)sa;
838 return *this;
839 }
840
841 inline color_rgba &set(int y)
842 {
843 m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
844 m_comps[1] = m_comps[0];
845 m_comps[2] = m_comps[0];
846 m_comps[3] = 255;
847 return *this;
848 }
849
850 inline color_rgba &set(int y, int na)
851 {
852 m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
853 m_comps[1] = m_comps[0];
854 m_comps[2] = m_comps[0];
855 m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));
856 return *this;
857 }
858
859 inline color_rgba &set(int sr, int sg, int sb, int sa)
860 {
861 m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
862 m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
863 m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
864 m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));
865 return *this;
866 }
867
868 inline color_rgba &set_rgb(int sr, int sg, int sb)
869 {
870 m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
871 m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
872 m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
873 return *this;
874 }
875
876 inline color_rgba &set_rgb(const color_rgba &other)
877 {
878 r = other.r;
879 g = other.g;
880 b = other.b;
881 return *this;
882 }
883
884 inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
885 inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
886
887 inline void clear()
888 {
889 m_comps[0] = 0;
890 m_comps[1] = 0;
891 m_comps[2] = 0;
892 m_comps[3] = 0;
893 }
894
895 inline bool operator== (const color_rgba &rhs) const
896 {
897 if (m_comps[0] != rhs.m_comps[0]) return false;
898 if (m_comps[1] != rhs.m_comps[1]) return false;
899 if (m_comps[2] != rhs.m_comps[2]) return false;
900 if (m_comps[3] != rhs.m_comps[3]) return false;
901 return true;
902 }
903
904 inline bool operator!= (const color_rgba &rhs) const
905 {
906 return !(*this == rhs);
907 }
908
909 inline bool operator<(const color_rgba &rhs) const
910 {
911 for (int i = 0; i < 4; i++)
912 {
913 if (m_comps[i] < rhs.m_comps[i])
914 return true;
915 else if (m_comps[i] != rhs.m_comps[i])
916 return false;
917 }
918 return false;
919 }
920
921 inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }
922 inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
923 inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }
924
925 inline basist::color32 get_color32() const
926 {
927 return basist::color32(r, g, b, a);
928 }
929
930 static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
931 static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
932 };
933
934 typedef basisu::vector<color_rgba> color_rgba_vec;
935
936 const color_rgba g_black_color(0, 0, 0, 255);
937 const color_rgba g_black_trans_color(0, 0, 0, 0);
938 const color_rgba g_white_color(255, 255, 255, 255);
939
940 inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)
941 {
942 int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;
943 return dr * dr + dg * dg + db * db;
944 }
945
946 inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)
947 {
948 int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;
949 return dr * dr + dg * dg + db * db + da * da;
950 }
951
952 inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)
953 {
954 if (alpha)
955 return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);
956 else
957 return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);
958 }
959
960 // TODO: Allow user to control channel weightings.
961 inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)
962 {
963 if (perceptual)
964 {
965#if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE
966 const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
967 const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
968
969 const float cr1 = e1.r - l1;
970 const float cr2 = e2.r - l2;
971
972 const float cb1 = e1.b - l1;
973 const float cb2 = e2.b - l2;
974
975 const float dl = l1 - l2;
976 const float dcr = cr1 - cr2;
977 const float dcb = cb1 - cb2;
978
979 uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);
980
981 if (alpha)
982 {
983 int da = static_cast<int>(e1.a) - static_cast<int>(e2.a);
984 d += static_cast<uint32_t>(128.0f*da*da);
985 }
986
987 return d;
988#elif 1
989 int dr = e1.r - e2.r;
990 int dg = e1.g - e2.g;
991 int db = e1.b - e2.b;
992
993#if 0
994 int delta_l = dr * 27 + dg * 92 + db * 9;
995 int delta_cr = dr * 128 - delta_l;
996 int delta_cb = db * 128 - delta_l;
997
998 uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) +
999 ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) +
1000 ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U);
1001#else
1002 int64_t delta_l = dr * 27 + dg * 92 + db * 9;
1003 int64_t delta_cr = dr * 128 - delta_l;
1004 int64_t delta_cb = db * 128 - delta_l;
1005
1006 uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) +
1007 ((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) +
1008 ((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U);
1009#endif
1010
1011 if (alpha)
1012 {
1013 int da = (e1.a - e2.a) << 7;
1014 // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring.
1015 id += ((uint32_t)(da * da) >> 7U);
1016 }
1017
1018 return id;
1019#else
1020 int dr = e1.r - e2.r;
1021 int dg = e1.g - e2.g;
1022 int db = e1.b - e2.b;
1023
1024 int64_t delta_l = dr * 27 + dg * 92 + db * 9;
1025 int64_t delta_cr = dr * 128 - delta_l;
1026 int64_t delta_cb = db * 128 - delta_l;
1027
1028 int64_t id = ((delta_l * delta_l) * 128) +
1029 ((delta_cr * delta_cr) * 26) +
1030 ((delta_cb * delta_cb) * 3);
1031
1032 if (alpha)
1033 {
1034 int64_t da = (e1.a - e2.a);
1035 id += (da * da) * 128;
1036 }
1037
1038 int d = (id + 8192) >> 14;
1039
1040 return d;
1041#endif
1042 }
1043 else
1044 return color_distance(e1, e2, alpha);
1045 }
1046
1047 static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b)
1048 {
1049 const int dl = a.r - b.r;
1050 const int da = a.a - b.a;
1051 return dl * dl + da * da;
1052 }
1053
1054 // String helpers
1055
1056 inline int string_find_right(const std::string& filename, char c)
1057 {
1058 size_t result = filename.find_last_of(c);
1059 return (result == std::string::npos) ? -1 : (int)result;
1060 }
1061
1062 inline std::string string_get_extension(const std::string &filename)
1063 {
1064 int sep = -1;
1065#ifdef _WIN32
1066 sep = string_find_right(filename, '\\');
1067#endif
1068 if (sep < 0)
1069 sep = string_find_right(filename, '/');
1070
1071 int dot = string_find_right(filename, '.');
1072 if (dot <= sep)
1073 return "";
1074
1075 std::string result(filename);
1076 result.erase(0, dot + 1);
1077
1078 return result;
1079 }
1080
1081 inline bool string_remove_extension(std::string &filename)
1082 {
1083 int sep = -1;
1084#ifdef _WIN32
1085 sep = string_find_right(filename, '\\');
1086#endif
1087 if (sep < 0)
1088 sep = string_find_right(filename, '/');
1089
1090 int dot = string_find_right(filename, '.');
1091 if ((dot < sep) || (dot < 0))
1092 return false;
1093
1094 filename.resize(dot);
1095
1096 return true;
1097 }
1098
1099 inline std::string string_format(const char* pFmt, ...)
1100 {
1101 char buf[2048];
1102
1103 va_list args;
1104 va_start(args, pFmt);
1105#ifdef _WIN32
1106 vsprintf_s(buf, sizeof(buf), pFmt, args);
1107#else
1108 vsnprintf(buf, sizeof(buf), pFmt, args);
1109#endif
1110 va_end(args);
1111
1112 return std::string(buf);
1113 }
1114
1115 inline std::string string_tolower(const std::string& s)
1116 {
1117 std::string result(s);
1118 for (size_t i = 0; i < result.size(); i++)
1119 result[i] = (char)tolower((int)result[i]);
1120 return result;
1121 }
1122
1123 inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)
1124 {
1125 assert(pDst && pSrc && dst_len);
1126 if (!dst_len)
1127 return pDst;
1128
1129 const size_t src_len = strlen(pSrc);
1130 const size_t src_len_plus_terminator = src_len + 1;
1131
1132 if (src_len_plus_terminator <= dst_len)
1133 memcpy(pDst, pSrc, src_len_plus_terminator);
1134 else
1135 {
1136 if (dst_len > 1)
1137 memcpy(pDst, pSrc, dst_len - 1);
1138 pDst[dst_len - 1] = '\0';
1139 }
1140
1141 return pDst;
1142 }
1143
1144 inline bool string_ends_with(const std::string& s, char c)
1145 {
1146 return (s.size() != 0) && (s.back() == c);
1147 }
1148
1149 inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)
1150 {
1151#ifdef _MSC_VER
1152 char drive_buf[_MAX_DRIVE] = { 0 };
1153 char dir_buf[_MAX_DIR] = { 0 };
1154 char fname_buf[_MAX_FNAME] = { 0 };
1155 char ext_buf[_MAX_EXT] = { 0 };
1156
1157 errno_t error = _splitpath_s(p,
1158 pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
1159 pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
1160 pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
1161 pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
1162 if (error != 0)
1163 return false;
1164
1165 if (pDrive) *pDrive = drive_buf;
1166 if (pDir) *pDir = dir_buf;
1167 if (pFilename) *pFilename = fname_buf;
1168 if (pExt) *pExt = ext_buf;
1169 return true;
1170#else
1171 char dirtmp[1024], nametmp[1024];
1172 strcpy_safe(dirtmp, sizeof(dirtmp), p);
1173 strcpy_safe(nametmp, sizeof(nametmp), p);
1174
1175 if (pDrive)
1176 pDrive->resize(0);
1177
1178 const char *pDirName = dirname(dirtmp);
1179 const char* pBaseName = basename(nametmp);
1180 if ((!pDirName) || (!pBaseName))
1181 return false;
1182
1183 if (pDir)
1184 {
1185 *pDir = pDirName;
1186 if ((pDir->size()) && (pDir->back() != '/'))
1187 *pDir += "/";
1188 }
1189
1190 if (pFilename)
1191 {
1192 *pFilename = pBaseName;
1193 string_remove_extension(*pFilename);
1194 }
1195
1196 if (pExt)
1197 {
1198 *pExt = pBaseName;
1199 *pExt = string_get_extension(*pExt);
1200 if (pExt->size())
1201 *pExt = "." + *pExt;
1202 }
1203
1204 return true;
1205#endif
1206 }
1207
1208 inline bool is_path_separator(char c)
1209 {
1210#ifdef _WIN32
1211 return (c == '/') || (c == '\\');
1212#else
1213 return (c == '/');
1214#endif
1215 }
1216
1217 inline bool is_drive_separator(char c)
1218 {
1219#ifdef _WIN32
1220 return (c == ':');
1221#else
1222 (void)c;
1223 return false;
1224#endif
1225 }
1226
1227 inline void string_combine_path(std::string &dst, const char *p, const char *q)
1228 {
1229 std::string temp(p);
1230 if (temp.size() && !is_path_separator(q[0]))
1231 {
1232 if (!is_path_separator(temp.back()))
1233 temp.append(1, BASISU_PATH_SEPERATOR_CHAR);
1234 }
1235 temp += q;
1236 dst.swap(temp);
1237 }
1238
1239 inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)
1240 {
1241 string_combine_path(dst, p, q);
1242 string_combine_path(dst, dst.c_str(), r);
1243 }
1244
1245 inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)
1246 {
1247 string_combine_path(dst, p, q, r);
1248 if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))
1249 dst.append(1, '.');
1250 dst.append(pExt);
1251 }
1252
1253 inline bool string_get_pathname(const char *p, std::string &path)
1254 {
1255 std::string temp_drive, temp_path;
1256 if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))
1257 return false;
1258 string_combine_path(path, temp_drive.c_str(), temp_path.c_str());
1259 return true;
1260 }
1261
1262 inline bool string_get_filename(const char *p, std::string &filename)
1263 {
1264 std::string temp_ext;
1265 if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))
1266 return false;
1267 filename += temp_ext;
1268 return true;
1269 }
1270
1271 class rand
1272 {
1273 std::mt19937 m_mt;
1274
1275 public:
1276 rand() { }
1277
1278 rand(uint32_t s) { seed(s); }
1279 void seed(uint32_t s) { m_mt.seed(s); }
1280
1281 // between [l,h]
1282 int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }
1283
1284 uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }
1285
1286 bool bit() { return irand(0, 1) == 1; }
1287
1288 uint8_t byte() { return static_cast<uint8_t>(urand32()); }
1289
1290 // between [l,h)
1291 float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }
1292
1293 float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }
1294 };
1295
1296 class priority_queue
1297 {
1298 public:
1299 priority_queue() :
1300 m_size(0)
1301 {
1302 }
1303
1304 void clear()
1305 {
1306 m_heap.clear();
1307 m_size = 0;
1308 }
1309
1310 void init(uint32_t max_entries, uint32_t first_index, float first_priority)
1311 {
1312 m_heap.resize(max_entries + 1);
1313 m_heap[1].m_index = first_index;
1314 m_heap[1].m_priority = first_priority;
1315 m_size = 1;
1316 }
1317
1318 inline uint32_t size() const { return m_size; }
1319
1320 inline uint32_t get_top_index() const { return m_heap[1].m_index; }
1321 inline float get_top_priority() const { return m_heap[1].m_priority; }
1322
1323 inline void delete_top()
1324 {
1325 assert(m_size > 0);
1326 m_heap[1] = m_heap[m_size];
1327 m_size--;
1328 if (m_size)
1329 down_heap(1);
1330 }
1331
1332 inline void add_heap(uint32_t index, float priority)
1333 {
1334 m_size++;
1335
1336 uint32_t k = m_size;
1337
1338 if (m_size >= m_heap.size())
1339 m_heap.resize(m_size + 1);
1340
1341 for (;;)
1342 {
1343 uint32_t parent_index = k >> 1;
1344 if ((!parent_index) || (m_heap[parent_index].m_priority > priority))
1345 break;
1346 m_heap[k] = m_heap[parent_index];
1347 k = parent_index;
1348 }
1349
1350 m_heap[k].m_index = index;
1351 m_heap[k].m_priority = priority;
1352 }
1353
1354 private:
1355 struct entry
1356 {
1357 uint32_t m_index;
1358 float m_priority;
1359 };
1360
1361 basisu::vector<entry> m_heap;
1362 uint32_t m_size;
1363
1364 // Push down entry at index
1365 inline void down_heap(uint32_t heap_index)
1366 {
1367 uint32_t orig_index = m_heap[heap_index].m_index;
1368 const float orig_priority = m_heap[heap_index].m_priority;
1369
1370 uint32_t child_index;
1371 while ((child_index = (heap_index << 1)) <= m_size)
1372 {
1373 if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;
1374 if (orig_priority > m_heap[child_index].m_priority)
1375 break;
1376 m_heap[heap_index] = m_heap[child_index];
1377 heap_index = child_index;
1378 }
1379
1380 m_heap[heap_index].m_index = orig_index;
1381 m_heap[heap_index].m_priority = orig_priority;
1382 }
1383 };
1384
1385 // Tree structured vector quantization (TSVQ)
1386
1387 template <typename TrainingVectorType>
1388 class tree_vector_quant
1389 {
1390 public:
1391 typedef TrainingVectorType training_vec_type;
1392 typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight;
1393 typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs;
1394
1395 tree_vector_quant() :
1396 m_next_codebook_index(0)
1397 {
1398 }
1399
1400 void clear()
1401 {
1402 clear_vector(m_training_vecs);
1403 clear_vector(m_nodes);
1404 m_next_codebook_index = 0;
1405 }
1406
1407 void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }
1408
1409 size_t get_total_training_vecs() const { return m_training_vecs.size(); }
1410 const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }
1411 array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
1412
1413 void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
1414 {
1415 for (uint32_t i = 0; i < m_nodes.size(); i++)
1416 {
1417 const tsvq_node &n = m_nodes[i];
1418 if (!n.is_leaf())
1419 continue;
1420
1421 codebook.resize(codebook.size() + 1);
1422 codebook.back() = n.m_training_vecs;
1423 }
1424 }
1425
1426 void retrieve(basisu::vector<TrainingVectorType> &codebook) const
1427 {
1428 for (uint32_t i = 0; i < m_nodes.size(); i++)
1429 {
1430 const tsvq_node &n = m_nodes[i];
1431 if (!n.is_leaf())
1432 continue;
1433
1434 codebook.resize(codebook.size() + 1);
1435 codebook.back() = n.m_origin;
1436 }
1437 }
1438
1439 void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
1440 {
1441 uint_vec node_stack;
1442 node_stack.reserve(512);
1443
1444 codebook.resize(0);
1445 codebook.reserve(max_clusters);
1446
1447 uint32_t node_index = 0;
1448
1449 while (true)
1450 {
1451 const tsvq_node& cur = m_nodes[node_index];
1452
1453 if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
1454 {
1455 codebook.resize(codebook.size() + 1);
1456 codebook.back() = cur.m_training_vecs;
1457
1458 if (node_stack.empty())
1459 break;
1460
1461 node_index = node_stack.back();
1462 node_stack.pop_back();
1463 continue;
1464 }
1465
1466 node_stack.push_back(cur.m_right_index);
1467 node_index = cur.m_left_index;
1468 }
1469 }
1470
1471 bool generate(uint32_t max_size)
1472 {
1473 if (!m_training_vecs.size())
1474 return false;
1475
1476 m_next_codebook_index = 0;
1477
1478 clear_vector(m_nodes);
1479 m_nodes.reserve(max_size * 2 + 1);
1480
1481 m_nodes.push_back(prepare_root());
1482
1483 priority_queue var_heap;
1484 var_heap.init(max_size, 0, m_nodes[0].m_var);
1485
1486 basisu::vector<uint32_t> l_children, r_children;
1487
1488 // Now split the worst nodes
1489 l_children.reserve(m_training_vecs.size() + 1);
1490 r_children.reserve(m_training_vecs.size() + 1);
1491
1492 uint32_t total_leaf_nodes = 1;
1493
1494 //interval_timer tm;
1495 //tm.start();
1496
1497 while ((var_heap.size()) && (total_leaf_nodes < max_size))
1498 {
1499 const uint32_t node_index = var_heap.get_top_index();
1500 const tsvq_node &node = m_nodes[node_index];
1501
1502 assert(node.m_var == var_heap.get_top_priority());
1503 assert(node.is_leaf());
1504
1505 var_heap.delete_top();
1506
1507 if (node.m_training_vecs.size() > 1)
1508 {
1509 if (split_node(node_index, var_heap, l_children, r_children))
1510 {
1511 // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.
1512 total_leaf_nodes += 1;
1513 }
1514 }
1515 }
1516
1517 //debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs());
1518
1519 return true;
1520 }
1521
1522 private:
1523 class tsvq_node
1524 {
1525 public:
1526 inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { }
1527
1528 // vecs is erased
1529 inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }
1530
1531 inline bool is_leaf() const { return m_left_index < 0; }
1532
1533 float m_var;
1534 uint64_t m_weight;
1535 TrainingVectorType m_origin;
1536 int32_t m_left_index, m_right_index;
1537 basisu::vector<uint32_t> m_training_vecs;
1538 int m_codebook_index;
1539 };
1540
1541 typedef basisu::vector<tsvq_node> tsvq_node_vec;
1542 tsvq_node_vec m_nodes;
1543
1544 array_of_weighted_training_vecs m_training_vecs;
1545
1546 uint32_t m_next_codebook_index;
1547
1548 tsvq_node prepare_root() const
1549 {
1550 double ttsum = 0.0f;
1551
1552 // Prepare root node containing all training vectors
1553 tsvq_node root;
1554 root.m_training_vecs.reserve(m_training_vecs.size());
1555
1556 for (uint32_t i = 0; i < m_training_vecs.size(); i++)
1557 {
1558 const TrainingVectorType &v = m_training_vecs[i].first;
1559 const uint64_t weight = m_training_vecs[i].second;
1560
1561 root.m_training_vecs.push_back(i);
1562
1563 root.m_origin += (v * static_cast<float>(weight));
1564 root.m_weight += weight;
1565
1566 ttsum += v.dot(v) * weight;
1567 }
1568
1569 root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));
1570
1571 root.m_origin *= (1.0f / root.m_weight);
1572
1573 return root;
1574 }
1575
1576 bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children)
1577 {
1578 TrainingVectorType l_child_org, r_child_org;
1579 uint64_t l_weight = 0, r_weight = 0;
1580 float l_var = 0.0f, r_var = 0.0f;
1581
1582 // Compute initial left/right child origins
1583 if (!prep_split(m_nodes[node_index], l_child_org, r_child_org))
1584 return false;
1585
1586 // Use k-means iterations to refine these children vectors
1587 if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))
1588 return false;
1589
1590 // Create children
1591 const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;
1592
1593 m_nodes[node_index].m_left_index = l_child_index;
1594 m_nodes[node_index].m_right_index = r_child_index;
1595
1596 m_nodes[node_index].m_codebook_index = m_next_codebook_index;
1597 m_next_codebook_index++;
1598
1599 m_nodes.resize(m_nodes.size() + 2);
1600
1601 tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];
1602
1603 l_child.set(l_child_org, l_weight, l_var, l_children);
1604 r_child.set(r_child_org, r_weight, r_var, r_children);
1605
1606 if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1))
1607 {
1608 TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first);
1609
1610 for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++)
1611 {
1612 if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first))
1613 {
1614 l_child.m_var = 1e-4f;
1615 break;
1616 }
1617 }
1618 }
1619
1620 if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1))
1621 {
1622 TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first);
1623
1624 for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++)
1625 {
1626 if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first))
1627 {
1628 r_child.m_var = 1e-4f;
1629 break;
1630 }
1631 }
1632 }
1633
1634 if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))
1635 var_heap.add_heap(l_child_index, l_child.m_var);
1636
1637 if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))
1638 var_heap.add_heap(r_child_index, r_child.m_var);
1639
1640 return true;
1641 }
1642
1643 TrainingVectorType compute_split_axis(const tsvq_node &node) const
1644 {
1645 const uint32_t N = TrainingVectorType::num_elements;
1646
1647 matrix<N, N, float> cmatrix;
1648
1649 if ((N != 16) || (!g_cpu_supports_sse41))
1650 {
1651 cmatrix.set_zero();
1652
1653 // Compute covariance matrix from weighted input vectors
1654 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1655 {
1656 const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);
1657 const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v);
1658
1659 for (uint32_t x = 0; x < N; x++)
1660 for (uint32_t y = x; y < N; y++)
1661 cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];
1662 }
1663 }
1664 else
1665 {
1666#if BASISU_SUPPORT_SSE
1667 // Specialize the case with 16x16 matrices, which are quite expensive without SIMD.
1668 // This SSE function takes pointers to void types, so do some sanity checks.
1669 assert(sizeof(TrainingVectorType) == sizeof(float) * 16);
1670 assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>));
1671 update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);
1672#endif
1673 }
1674
1675 const float renorm_scale = 1.0f / node.m_weight;
1676
1677 for (uint32_t x = 0; x < N; x++)
1678 for (uint32_t y = x; y < N; y++)
1679 cmatrix[x][y] *= renorm_scale;
1680
1681 // Diagonal flip
1682 for (uint32_t x = 0; x < (N - 1); x++)
1683 for (uint32_t y = x + 1; y < N; y++)
1684 cmatrix[y][x] = cmatrix[x][y];
1685
1686 return compute_pca_from_covar<N, TrainingVectorType>(cmatrix);
1687 }
1688
1689 bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const
1690 {
1691 //const uint32_t N = TrainingVectorType::num_elements;
1692
1693 if (2 == node.m_training_vecs.size())
1694 {
1695 l_child_result = m_training_vecs[node.m_training_vecs[0]].first;
1696 r_child_result = m_training_vecs[node.m_training_vecs[1]].first;
1697 return true;
1698 }
1699
1700 TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);
1701 double l_weight = 0.0f, r_weight = 0.0f;
1702
1703 // Compute initial left/right children
1704 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1705 {
1706 const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
1707
1708 const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
1709
1710 double t = (v - node.m_origin).dot(axis);
1711 if (t >= 0.0f)
1712 {
1713 r_child += v * weight;
1714 r_weight += weight;
1715 }
1716 else
1717 {
1718 l_child += v * weight;
1719 l_weight += weight;
1720 }
1721 }
1722
1723 if ((l_weight > 0.0f) && (r_weight > 0.0f))
1724 {
1725 l_child_result = l_child * static_cast<float>(1.0f / l_weight);
1726 r_child_result = r_child * static_cast<float>(1.0f / r_weight);
1727 }
1728 else
1729 {
1730 TrainingVectorType l(1e+20f);
1731 TrainingVectorType h(-1e+20f);
1732 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1733 {
1734 const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
1735
1736 l = TrainingVectorType::component_min(l, v);
1737 h = TrainingVectorType::component_max(h, v);
1738 }
1739
1740 TrainingVectorType r(h - l);
1741
1742 float largest_axis_v = 0.0f;
1743 int largest_axis_index = -1;
1744 for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++)
1745 {
1746 if (r[i] > largest_axis_v)
1747 {
1748 largest_axis_v = r[i];
1749 largest_axis_index = i;
1750 }
1751 }
1752
1753 if (largest_axis_index < 0)
1754 return false;
1755
1756 basisu::vector<float> keys(node.m_training_vecs.size());
1757 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1758 keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index];
1759
1760 uint_vec indices(node.m_training_vecs.size());
1761 indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]);
1762
1763 l_child.set_zero();
1764 l_weight = 0;
1765
1766 r_child.set_zero();
1767 r_weight = 0;
1768
1769 const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2;
1770 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1771 {
1772 const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
1773
1774 const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
1775
1776 if (i < half_index)
1777 {
1778 l_child += v * weight;
1779 l_weight += weight;
1780 }
1781 else
1782 {
1783 r_child += v * weight;
1784 r_weight += weight;
1785 }
1786 }
1787
1788 if ((l_weight > 0.0f) && (r_weight > 0.0f))
1789 {
1790 l_child_result = l_child * static_cast<float>(1.0f / l_weight);
1791 r_child_result = r_child * static_cast<float>(1.0f / r_weight);
1792 }
1793 else
1794 {
1795 l_child_result = l;
1796 r_child_result = h;
1797 }
1798 }
1799
1800 return true;
1801 }
1802
1803 bool refine_split(const tsvq_node &node,
1804 TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children,
1805 TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const
1806 {
1807 l_children.reserve(node.m_training_vecs.size());
1808 r_children.reserve(node.m_training_vecs.size());
1809
1810 float prev_total_variance = 1e+10f;
1811
1812 // Refine left/right children locations using k-means iterations
1813 const uint32_t cMaxIters = 6;
1814 for (uint32_t iter = 0; iter < cMaxIters; iter++)
1815 {
1816 l_children.resize(0);
1817 r_children.resize(0);
1818
1819 TrainingVectorType new_l_child(cZero), new_r_child(cZero);
1820
1821 double l_ttsum = 0.0f, r_ttsum = 0.0f;
1822
1823 l_weight = 0;
1824 r_weight = 0;
1825
1826 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1827 {
1828 const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
1829 const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
1830
1831 double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v);
1832
1833 if (left_dist2 >= right_dist2)
1834 {
1835 new_r_child += (v * static_cast<float>(weight));
1836 r_weight += weight;
1837
1838 r_ttsum += weight * v.dot(v);
1839 r_children.push_back(node.m_training_vecs[i]);
1840 }
1841 else
1842 {
1843 new_l_child += (v * static_cast<float>(weight));
1844 l_weight += weight;
1845
1846 l_ttsum += weight * v.dot(v);
1847 l_children.push_back(node.m_training_vecs[i]);
1848 }
1849 }
1850
1851 // Node is unsplittable using the above algorithm - try something else to split it up.
1852 if ((!l_weight) || (!r_weight))
1853 {
1854 l_children.resize(0);
1855 new_l_child.set(0.0f);
1856 l_ttsum = 0.0f;
1857 l_weight = 0;
1858
1859 r_children.resize(0);
1860 new_r_child.set(0.0f);
1861 r_ttsum = 0.0f;
1862 r_weight = 0;
1863
1864 TrainingVectorType firstVec;
1865 for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1866 {
1867 const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
1868 const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
1869
1870 if ((!i) || (v == firstVec))
1871 {
1872 firstVec = v;
1873
1874 new_r_child += (v * static_cast<float>(weight));
1875 r_weight += weight;
1876
1877 r_ttsum += weight * v.dot(v);
1878 r_children.push_back(node.m_training_vecs[i]);
1879 }
1880 else
1881 {
1882 new_l_child += (v * static_cast<float>(weight));
1883 l_weight += weight;
1884
1885 l_ttsum += weight * v.dot(v);
1886 l_children.push_back(node.m_training_vecs[i]);
1887 }
1888 }
1889
1890 if ((!l_weight) || (!r_weight))
1891 return false;
1892 }
1893
1894 l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));
1895 r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));
1896
1897 new_l_child *= (1.0f / l_weight);
1898 new_r_child *= (1.0f / r_weight);
1899
1900 l_child = new_l_child;
1901 r_child = new_r_child;
1902
1903 float total_var = l_var + r_var;
1904 const float cGiveupVariance = .00001f;
1905 if (total_var < cGiveupVariance)
1906 break;
1907
1908 // Check to see if the variance has settled
1909 const float cVarianceDeltaThresh = .00125f;
1910 if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)
1911 break;
1912
1913 prev_total_variance = total_var;
1914 }
1915
1916 return true;
1917 }
1918 };
1919
1920 struct weighted_block_group
1921 {
1922 uint64_t m_total_weight;
1923 uint_vec m_indices;
1924 };
1925
1926 template<typename Quantizer>
1927 bool generate_hierarchical_codebook_threaded_internal(Quantizer& q,
1928 uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
1929 basisu::vector<uint_vec>& codebook,
1930 basisu::vector<uint_vec>& parent_codebook,
1931 uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool)
1932 {
1933 codebook.resize(0);
1934 parent_codebook.resize(0);
1935
1936 if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16))
1937 {
1938 if (!q.generate(max_codebook_size))
1939 return false;
1940
1941 q.retrieve(codebook);
1942
1943 if (max_parent_codebook_size)
1944 q.retrieve(max_parent_codebook_size, parent_codebook);
1945
1946 return true;
1947 }
1948
1949 const uint32_t cMaxThreads = 16;
1950 if (max_threads > cMaxThreads)
1951 max_threads = cMaxThreads;
1952
1953 if (!q.generate(max_threads))
1954 return false;
1955
1956 basisu::vector<uint_vec> initial_codebook;
1957
1958 q.retrieve(initial_codebook);
1959
1960 if (initial_codebook.size() < max_threads)
1961 {
1962 codebook = initial_codebook;
1963
1964 if (max_parent_codebook_size)
1965 q.retrieve(max_parent_codebook_size, parent_codebook);
1966
1967 return true;
1968 }
1969
1970 Quantizer quantizers[cMaxThreads];
1971
1972 bool success_flags[cMaxThreads];
1973 clear_obj(success_flags);
1974
1975 basisu::vector<uint_vec> local_clusters[cMaxThreads];
1976 basisu::vector<uint_vec> local_parent_clusters[cMaxThreads];
1977
1978 for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
1979 {
1980#ifndef __EMSCRIPTEN__
1981 pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {
1982#endif
1983
1984 Quantizer& lq = quantizers[thread_iter];
1985 uint_vec& cluster_indices = initial_codebook[thread_iter];
1986
1987 uint_vec local_to_global(cluster_indices.size());
1988
1989 for (uint32_t i = 0; i < cluster_indices.size(); i++)
1990 {
1991 const uint32_t global_training_vec_index = cluster_indices[i];
1992 local_to_global[i] = global_training_vec_index;
1993
1994 lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second);
1995 }
1996
1997 const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs();
1998
1999 success_flags[thread_iter] = lq.generate(max_clusters);
2000
2001 if (success_flags[thread_iter])
2002 {
2003 lq.retrieve(local_clusters[thread_iter]);
2004
2005 for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++)
2006 {
2007 for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++)
2008 local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]];
2009 }
2010
2011 if (max_parent_codebook_size)
2012 {
2013 lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]);
2014
2015 for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++)
2016 {
2017 for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++)
2018 local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]];
2019 }
2020 }
2021 }
2022
2023#ifndef __EMSCRIPTEN__
2024 } );
2025#endif
2026
2027 } // thread_iter
2028
2029#ifndef __EMSCRIPTEN__
2030 pJob_pool->wait_for_all();
2031#endif
2032
2033 uint32_t total_clusters = 0, total_parent_clusters = 0;
2034
2035 for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++)
2036 {
2037 if (!success_flags[thread_iter])
2038 return false;
2039 total_clusters += (uint32_t)local_clusters[thread_iter].size();
2040 total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size();
2041 }
2042
2043 codebook.reserve(total_clusters);
2044 parent_codebook.reserve(total_parent_clusters);
2045
2046 for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
2047 {
2048 for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++)
2049 {
2050 codebook.resize(codebook.size() + 1);
2051 codebook.back().swap(local_clusters[thread_iter][j]);
2052 }
2053
2054 for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++)
2055 {
2056 parent_codebook.resize(parent_codebook.size() + 1);
2057 parent_codebook.back().swap(local_parent_clusters[thread_iter][j]);
2058 }
2059 }
2060
2061 return true;
2062 }
2063
2064 template<typename Quantizer>
2065 bool generate_hierarchical_codebook_threaded(Quantizer& q,
2066 uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
2067 basisu::vector<uint_vec>& codebook,
2068 basisu::vector<uint_vec>& parent_codebook,
2069 uint32_t max_threads, job_pool *pJob_pool,
2070 bool even_odd_input_pairs_equal)
2071 {
2072 typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher;
2073
2074 typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group,
2075 training_vec_bit_hasher> group_hash;
2076
2077 //interval_timer tm;
2078 //tm.start();
2079
2080 group_hash unique_vecs;
2081
2082 unique_vecs.reserve(20000);
2083
2084 weighted_block_group g;
2085
2086 if (even_odd_input_pairs_equal)
2087 {
2088 g.m_indices.resize(2);
2089
2090 assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0);
2091
2092 for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2)
2093 {
2094 assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first);
2095
2096 g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second;
2097 g.m_indices[0] = i;
2098 g.m_indices[1] = i + 1;
2099
2100 auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
2101
2102 if (!ins_res.second)
2103 {
2104 (ins_res.first)->second.m_total_weight += g.m_total_weight;
2105 (ins_res.first)->second.m_indices.push_back(i);
2106 (ins_res.first)->second.m_indices.push_back(i + 1);
2107 }
2108 }
2109 }
2110 else
2111 {
2112 g.m_indices.resize(1);
2113
2114 for (uint32_t i = 0; i < q.get_training_vecs().size(); i++)
2115 {
2116 g.m_total_weight = q.get_training_vecs()[i].second;
2117 g.m_indices[0] = i;
2118
2119 auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
2120
2121 if (!ins_res.second)
2122 {
2123 (ins_res.first)->second.m_total_weight += g.m_total_weight;
2124 (ins_res.first)->second.m_indices.push_back(i);
2125 }
2126 }
2127 }
2128
2129 //debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs());
2130 debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size());
2131
2132 Quantizer group_quant;
2133 typedef typename group_hash::const_iterator group_hash_const_iter;
2134 basisu::vector<group_hash_const_iter> unique_vec_iters;
2135 unique_vec_iters.reserve(unique_vecs.size());
2136
2137 for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter)
2138 {
2139 group_quant.add_training_vec(iter->first, iter->second.m_total_weight);
2140 unique_vec_iters.push_back(iter);
2141 }
2142
2143 bool limit_clusterizers = true;
2144 if (unique_vecs.size() <= max_codebook_size)
2145 limit_clusterizers = false;
2146
2147 debug_printf("Limit clusterizers: %u\n", limit_clusterizers);
2148
2149 basisu::vector<uint_vec> group_codebook, group_parent_codebook;
2150 bool status = generate_hierarchical_codebook_threaded_internal(group_quant,
2151 max_codebook_size, max_parent_codebook_size,
2152 group_codebook,
2153 group_parent_codebook,
2154 (unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool);
2155
2156 if (!status)
2157 return false;
2158
2159 codebook.resize(0);
2160 for (uint32_t i = 0; i < group_codebook.size(); i++)
2161 {
2162 codebook.resize(codebook.size() + 1);
2163
2164 for (uint32_t j = 0; j < group_codebook[i].size(); j++)
2165 {
2166 const uint32_t group_index = group_codebook[i][j];
2167
2168 typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
2169 const uint_vec& training_vec_indices = group_iter->second.m_indices;
2170
2171 append_vector(codebook.back(), training_vec_indices);
2172 }
2173 }
2174
2175 parent_codebook.resize(0);
2176 for (uint32_t i = 0; i < group_parent_codebook.size(); i++)
2177 {
2178 parent_codebook.resize(parent_codebook.size() + 1);
2179
2180 for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++)
2181 {
2182 const uint32_t group_index = group_parent_codebook[i][j];
2183
2184 typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
2185 const uint_vec& training_vec_indices = group_iter->second.m_indices;
2186
2187 append_vector(parent_codebook.back(), training_vec_indices);
2188 }
2189 }
2190
2191 return true;
2192 }
2193
2194 // Canonical Huffman coding
2195
2196 class histogram
2197 {
2198 basisu::vector<uint32_t> m_hist;
2199
2200 public:
2201 histogram(uint32_t size = 0) { init(size); }
2202
2203 void clear()
2204 {
2205 clear_vector(m_hist);
2206 }
2207
2208 void init(uint32_t size)
2209 {
2210 m_hist.resize(0);
2211 m_hist.resize(size);
2212 }
2213
2214 inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); }
2215
2216 inline const uint32_t &operator[] (uint32_t index) const
2217 {
2218 return m_hist[index];
2219 }
2220
2221 inline uint32_t &operator[] (uint32_t index)
2222 {
2223 return m_hist[index];
2224 }
2225
2226 inline void inc(uint32_t index)
2227 {
2228 m_hist[index]++;
2229 }
2230
2231 uint64_t get_total() const
2232 {
2233 uint64_t total = 0;
2234 for (uint32_t i = 0; i < m_hist.size(); ++i)
2235 total += m_hist[i];
2236 return total;
2237 }
2238
2239 double get_entropy() const
2240 {
2241 double total = static_cast<double>(get_total());
2242 if (total == 0.0f)
2243 return 0.0f;
2244
2245 const double inv_total = 1.0f / total;
2246 const double neg_inv_log2 = -1.0f / log(2.0f);
2247
2248 double e = 0.0f;
2249 for (uint32_t i = 0; i < m_hist.size(); i++)
2250 if (m_hist[i])
2251 e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]);
2252
2253 return e;
2254 }
2255 };
2256
2257 struct sym_freq
2258 {
2259 uint32_t m_key;
2260 uint16_t m_sym_index;
2261 };
2262
2263 sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
2264 void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);
2265 void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);
2266
2267 class huffman_encoding_table
2268 {
2269 public:
2270 huffman_encoding_table()
2271 {
2272 }
2273
2274 void clear()
2275 {
2276 clear_vector(m_codes);
2277 clear_vector(m_code_sizes);
2278 }
2279
2280 bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)
2281 {
2282 return init(h.size(), &h[0], max_code_size);
2283 }
2284
2285 bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);
2286 bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);
2287
2288 inline const uint16_vec &get_codes() const { return m_codes; }
2289 inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }
2290
2291 uint32_t get_total_used_codes() const
2292 {
2293 for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--)
2294 if (m_code_sizes[i])
2295 return i + 1;
2296 return 0;
2297 }
2298
2299 private:
2300 uint16_vec m_codes;
2301 uint8_vec m_code_sizes;
2302 };
2303
2304 class bitwise_coder
2305 {
2306 public:
2307 bitwise_coder() :
2308 m_bit_buffer(0),
2309 m_bit_buffer_size(0),
2310 m_total_bits(0)
2311 {
2312 }
2313
2314 inline void clear()
2315 {
2316 clear_vector(m_bytes);
2317 m_bit_buffer = 0;
2318 m_bit_buffer_size = 0;
2319 m_total_bits = 0;
2320 }
2321
2322 inline const uint8_vec &get_bytes() const { return m_bytes; }
2323
2324 inline uint64_t get_total_bits() const { return m_total_bits; }
2325 inline void clear_total_bits() { m_total_bits = 0; }
2326
2327 inline void init(uint32_t reserve_size = 1024)
2328 {
2329 m_bytes.reserve(reserve_size);
2330 m_bytes.resize(0);
2331
2332 m_bit_buffer = 0;
2333 m_bit_buffer_size = 0;
2334 m_total_bits = 0;
2335 }
2336
2337 inline uint32_t flush()
2338 {
2339 if (m_bit_buffer_size)
2340 {
2341 m_total_bits += 8 - (m_bit_buffer_size & 7);
2342 append_byte(static_cast<uint8_t>(m_bit_buffer));
2343
2344 m_bit_buffer = 0;
2345 m_bit_buffer_size = 0;
2346
2347 return 8;
2348 }
2349
2350 return 0;
2351 }
2352
2353 inline uint32_t put_bits(uint32_t bits, uint32_t num_bits)
2354 {
2355 assert(num_bits <= 32);
2356 assert(bits < (1ULL << num_bits));
2357
2358 if (!num_bits)
2359 return 0;
2360
2361 m_total_bits += num_bits;
2362
2363 uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer;
2364 m_bit_buffer_size += num_bits;
2365
2366 while (m_bit_buffer_size >= 8)
2367 {
2368 append_byte(static_cast<uint8_t>(v));
2369 v >>= 8;
2370 m_bit_buffer_size -= 8;
2371 }
2372
2373 m_bit_buffer = static_cast<uint8_t>(v);
2374 return num_bits;
2375 }
2376
2377 inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab)
2378 {
2379 uint32_t code = tab.get_codes()[sym];
2380 uint32_t code_size = tab.get_code_sizes()[sym];
2381 assert(code_size >= 1);
2382 put_bits(code, code_size);
2383 return code_size;
2384 }
2385
2386 inline uint32_t put_truncated_binary(uint32_t v, uint32_t n)
2387 {
2388 assert((n >= 2) && (v < n));
2389
2390 uint32_t k = floor_log2i(n);
2391 uint32_t u = (1 << (k + 1)) - n;
2392
2393 if (v < u)
2394 return put_bits(v, k);
2395
2396 uint32_t x = v + u;
2397 assert((x >> 1) >= u);
2398
2399 put_bits(x >> 1, k);
2400 put_bits(x & 1, 1);
2401 return k + 1;
2402 }
2403
2404 inline uint32_t put_rice(uint32_t v, uint32_t m)
2405 {
2406 assert(m);
2407
2408 const uint64_t start_bits = m_total_bits;
2409
2410 uint32_t q = v >> m, r = v & ((1 << m) - 1);
2411
2412 // rice coding sanity check
2413 assert(q <= 64);
2414
2415 for (; q > 16; q -= 16)
2416 put_bits(0xFFFF, 16);
2417
2418 put_bits((1 << q) - 1, q);
2419 put_bits(r << 1, m + 1);
2420
2421 return (uint32_t)(m_total_bits - start_bits);
2422 }
2423
2424 inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits)
2425 {
2426 assert(chunk_bits);
2427
2428 const uint32_t chunk_size = 1 << chunk_bits;
2429 const uint32_t chunk_mask = chunk_size - 1;
2430
2431 uint32_t total_bits = 0;
2432
2433 for ( ; ; )
2434 {
2435 uint32_t next_v = v >> chunk_bits;
2436
2437 total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1);
2438 if (!next_v)
2439 break;
2440
2441 v = next_v;
2442 }
2443
2444 return total_bits;
2445 }
2446
2447 uint32_t emit_huffman_table(const huffman_encoding_table &tab);
2448
2449 private:
2450 uint8_vec m_bytes;
2451 uint32_t m_bit_buffer, m_bit_buffer_size;
2452 uint64_t m_total_bits;
2453
2454 void append_byte(uint8_t c)
2455 {
2456 m_bytes.resize(m_bytes.size() + 1);
2457 m_bytes.back() = c;
2458 }
2459
2460 static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);
2461 static void end_zero_run(uint16_vec &syms, uint32_t &run_size);
2462 };
2463
2464 class huff2D
2465 {
2466 public:
2467 huff2D() { }
2468 huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); }
2469
2470 inline const histogram &get_histogram() const { return m_histogram; }
2471 inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; }
2472
2473 inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group)
2474 {
2475 assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1);
2476
2477 m_bits_per_sym = bits_per_sym;
2478 m_total_syms_per_group = total_syms_per_group;
2479 m_cur_sym_bits = 0;
2480 m_cur_num_syms = 0;
2481 m_decode_syms_remaining = 0;
2482 m_next_decoder_group_index = 0;
2483
2484 m_histogram.init(1 << (bits_per_sym * total_syms_per_group));
2485 }
2486
2487 inline void clear()
2488 {
2489 m_group_bits.clear();
2490
2491 m_cur_sym_bits = 0;
2492 m_cur_num_syms = 0;
2493 m_decode_syms_remaining = 0;
2494 m_next_decoder_group_index = 0;
2495 }
2496
2497 inline void emit(uint32_t sym)
2498 {
2499 m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym));
2500 m_cur_num_syms++;
2501
2502 if (m_cur_num_syms == m_total_syms_per_group)
2503 flush();
2504 }
2505
2506 inline void flush()
2507 {
2508 if (m_cur_num_syms)
2509 {
2510 m_group_bits.push_back(m_cur_sym_bits);
2511 m_histogram.inc(m_cur_sym_bits);
2512
2513 m_cur_sym_bits = 0;
2514 m_cur_num_syms = 0;
2515 }
2516 }
2517
2518 inline bool start_encoding(uint32_t code_size_limit = 16)
2519 {
2520 flush();
2521
2522 if (!m_encoding_table.init(m_histogram, code_size_limit))
2523 return false;
2524
2525 m_decode_syms_remaining = 0;
2526 m_next_decoder_group_index = 0;
2527
2528 return true;
2529 }
2530
2531 inline uint32_t emit_next_sym(bitwise_coder &c)
2532 {
2533 uint32_t bits = 0;
2534
2535 if (!m_decode_syms_remaining)
2536 {
2537 bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table);
2538 m_decode_syms_remaining = m_total_syms_per_group;
2539 }
2540
2541 m_decode_syms_remaining--;
2542 return bits;
2543 }
2544
2545 inline void emit_flush()
2546 {
2547 m_decode_syms_remaining = 0;
2548 }
2549
2550 private:
2551 uint_vec m_group_bits;
2552 huffman_encoding_table m_encoding_table;
2553 histogram m_histogram;
2554 uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining;
2555 };
2556
2557 bool huffman_test(int rand_seed);
2558
2559 // VQ index reordering
2560
2561 class palette_index_reorderer
2562 {
2563 public:
2564 palette_index_reorderer()
2565 {
2566 }
2567
2568 void clear()
2569 {
2570 clear_vector(m_hist);
2571 clear_vector(m_total_count_to_picked);
2572 clear_vector(m_entries_picked);
2573 clear_vector(m_entries_to_do);
2574 clear_vector(m_remap_table);
2575 }
2576
2577 // returns [0,1] distance of entry i to entry j
2578 typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx);
2579
2580 void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
2581
2582 // Table remaps old to new symbol indices
2583 inline const uint_vec &get_remap_table() const { return m_remap_table; }
2584
2585 private:
2586 uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table;
2587
2588 inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; }
2589 inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } }
2590
2591 void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices);
2592 void find_initial(uint32_t num_syms);
2593 void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
2594 float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
2595 };
2596
2597 // Simple 32-bit 2D image class
2598
2599 class image
2600 {
2601 public:
2602 image() :
2603 m_width(0), m_height(0), m_pitch(0)
2604 {
2605 }
2606
2607 image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
2608 m_width(0), m_height(0), m_pitch(0)
2609 {
2610 resize(w, h, p);
2611 }
2612
2613 image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) :
2614 m_width(0), m_height(0), m_pitch(0)
2615 {
2616 init(pImage, width, height, comps);
2617 }
2618
2619 image(const image &other) :
2620 m_width(0), m_height(0), m_pitch(0)
2621 {
2622 *this = other;
2623 }
2624
2625 image &swap(image &other)
2626 {
2627 std::swap(m_width, other.m_width);
2628 std::swap(m_height, other.m_height);
2629 std::swap(m_pitch, other.m_pitch);
2630 m_pixels.swap(other.m_pixels);
2631 return *this;
2632 }
2633
2634 image &operator= (const image &rhs)
2635 {
2636 if (this != &rhs)
2637 {
2638 m_width = rhs.m_width;
2639 m_height = rhs.m_height;
2640 m_pitch = rhs.m_pitch;
2641 m_pixels = rhs.m_pixels;
2642 }
2643 return *this;
2644 }
2645
2646 image &clear()
2647 {
2648 m_width = 0;
2649 m_height = 0;
2650 m_pitch = 0;
2651 clear_vector(m_pixels);
2652 return *this;
2653 }
2654
2655 image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)
2656 {
2657 return crop(w, h, p, background);
2658 }
2659
2660 image &set_all(const color_rgba &c)
2661 {
2662 for (uint32_t i = 0; i < m_pixels.size(); i++)
2663 m_pixels[i] = c;
2664 return *this;
2665 }
2666
2667 void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps)
2668 {
2669 assert(comps >= 1 && comps <= 4);
2670
2671 resize(width, height);
2672
2673 for (uint32_t y = 0; y < height; y++)
2674 {
2675 for (uint32_t x = 0; x < width; x++)
2676 {
2677 const uint8_t *pSrc = &pImage[(x + y * width) * comps];
2678 color_rgba &dst = (*this)(x, y);
2679
2680 if (comps == 1)
2681 {
2682 dst.r = pSrc[0];
2683 dst.g = pSrc[0];
2684 dst.b = pSrc[0];
2685 dst.a = 255;
2686 }
2687 else if (comps == 2)
2688 {
2689 dst.r = pSrc[0];
2690 dst.g = pSrc[0];
2691 dst.b = pSrc[0];
2692 dst.a = pSrc[1];
2693 }
2694 else
2695 {
2696 dst.r = pSrc[0];
2697 dst.g = pSrc[1];
2698 dst.b = pSrc[2];
2699 if (comps == 4)
2700 dst.a = pSrc[3];
2701 else
2702 dst.a = 255;
2703 }
2704 }
2705 }
2706 }
2707
2708 image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c)
2709 {
2710 for (uint32_t iy = 0; iy < h; iy++)
2711 for (uint32_t ix = 0; ix < w; ix++)
2712 set_clipped(x + ix, y + iy, c);
2713 return *this;
2714 }
2715
2716 image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c)
2717 {
2718 for (uint32_t iy = 0; iy < h; iy++)
2719 for (uint32_t ix = 0; ix < w; ix++)
2720 set_clipped_alpha(x + ix, y + iy, c);
2721 return *this;
2722 }
2723
2724 image &crop_dup_borders(uint32_t w, uint32_t h)
2725 {
2726 const uint32_t orig_w = m_width, orig_h = m_height;
2727
2728 crop(w, h);
2729
2730 if (orig_w && orig_h)
2731 {
2732 if (m_width > orig_w)
2733 {
2734 for (uint32_t x = orig_w; x < m_width; x++)
2735 for (uint32_t y = 0; y < m_height; y++)
2736 set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
2737 }
2738
2739 if (m_height > orig_h)
2740 {
2741 for (uint32_t y = orig_h; y < m_height; y++)
2742 for (uint32_t x = 0; x < m_width; x++)
2743 set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
2744 }
2745 }
2746 return *this;
2747 }
2748
2749 // pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer).
2750 image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX)
2751 {
2752 if (p == UINT32_MAX)
2753 p = w;
2754
2755 clear();
2756
2757 if ((!p) || (!w) || (!h))
2758 return *this;
2759
2760 m_pixels.grant_ownership(pPixels, p * h, p * h);
2761
2762 m_width = w;
2763 m_height = h;
2764 m_pitch = p;
2765
2766 return *this;
2767 }
2768
2769 image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true)
2770 {
2771 if (p == UINT32_MAX)
2772 p = w;
2773
2774 if ((w == m_width) && (m_height == h) && (m_pitch == p))
2775 return *this;
2776
2777 if ((!w) || (!h) || (!p))
2778 {
2779 clear();
2780 return *this;
2781 }
2782
2783 color_rgba_vec cur_state;
2784 cur_state.swap(m_pixels);
2785
2786 m_pixels.resize(p * h);
2787
2788 if (init_image)
2789 {
2790 if (m_width || m_height)
2791 {
2792 for (uint32_t y = 0; y < h; y++)
2793 {
2794 for (uint32_t x = 0; x < w; x++)
2795 {
2796 if ((x < m_width) && (y < m_height))
2797 m_pixels[x + y * p] = cur_state[x + y * m_pitch];
2798 else
2799 m_pixels[x + y * p] = background;
2800 }
2801 }
2802 }
2803 else
2804 {
2805 m_pixels.set_all(background);
2806 }
2807 }
2808
2809 m_width = w;
2810 m_height = h;
2811 m_pitch = p;
2812
2813 return *this;
2814 }
2815
2816 inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
2817 inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
2818
2819 inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
2820 inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
2821
2822 inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
2823 {
2824 x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
2825 y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
2826 return m_pixels[x + y * m_pitch];
2827 }
2828
2829 inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
2830 {
2831 x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
2832 y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
2833 return m_pixels[x + y * m_pitch];
2834 }
2835
2836 inline image &set_clipped(int x, int y, const color_rgba &c)
2837 {
2838 if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
2839 (*this)(x, y) = c;
2840 return *this;
2841 }
2842
2843 inline image& set_clipped_alpha(int x, int y, const color_rgba& c)
2844 {
2845 if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
2846 (*this)(x, y).m_comps[3] = c.m_comps[3];
2847 return *this;
2848 }
2849
2850 // Very straightforward blit with full clipping. Not fast, but it works.
2851 image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
2852 {
2853 for (int y = 0; y < src_h; y++)
2854 {
2855 const int sy = src_y + y;
2856 if (sy < 0)
2857 continue;
2858 else if (sy >= (int)src.get_height())
2859 break;
2860
2861 for (int x = 0; x < src_w; x++)
2862 {
2863 const int sx = src_x + x;
2864 if (sx < 0)
2865 continue;
2866 else if (sx >= (int)src.get_height())
2867 break;
2868
2869 set_clipped(dst_x + x, dst_y + y, src(sx, sy));
2870 }
2871 }
2872
2873 return *this;
2874 }
2875
2876 const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
2877 {
2878 if (((src_x + w) > m_width) || ((src_y + h) > m_height))
2879 {
2880 // Slower clamping case
2881 for (uint32_t y = 0; y < h; y++)
2882 for (uint32_t x = 0; x < w; x++)
2883 *pDst++ = get_clamped(src_x + x, src_y + y);
2884 }
2885 else
2886 {
2887 const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch];
2888
2889 for (uint32_t y = 0; y < h; y++)
2890 {
2891 memcpy(pDst, pSrc, w * sizeof(color_rgba));
2892 pSrc += m_pitch;
2893 pDst += w;
2894 }
2895 }
2896
2897 return *this;
2898 }
2899
2900 image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
2901 {
2902 for (uint32_t y = 0; y < h; y++)
2903 for (uint32_t x = 0; x < w; x++)
2904 set_clipped(dst_x + x, dst_y + y, *pSrc++);
2905 return *this;
2906 }
2907
2908 inline uint32_t get_width() const { return m_width; }
2909 inline uint32_t get_height() const { return m_height; }
2910 inline uint32_t get_pitch() const { return m_pitch; }
2911 inline uint32_t get_total_pixels() const { return m_width * m_height; }
2912
2913 inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
2914 inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
2915 inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
2916
2917 inline const color_rgba_vec &get_pixels() const { return m_pixels; }
2918 inline color_rgba_vec &get_pixels() { return m_pixels; }
2919
2920 inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
2921 inline color_rgba *get_ptr() { return &m_pixels[0]; }
2922
2923 bool has_alpha() const
2924 {
2925 for (uint32_t y = 0; y < m_height; ++y)
2926 for (uint32_t x = 0; x < m_width; ++x)
2927 if ((*this)(x, y).a < 255)
2928 return true;
2929
2930 return false;
2931 }
2932
2933 image &set_alpha(uint8_t a)
2934 {
2935 for (uint32_t y = 0; y < m_height; ++y)
2936 for (uint32_t x = 0; x < m_width; ++x)
2937 (*this)(x, y).a = a;
2938 return *this;
2939 }
2940
2941 image &flip_y()
2942 {
2943 for (uint32_t y = 0; y < m_height / 2; ++y)
2944 for (uint32_t x = 0; x < m_width; ++x)
2945 std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
2946 return *this;
2947 }
2948
2949 // TODO: There are many ways to do this, not sure this is the best way.
2950 image &renormalize_normal_map()
2951 {
2952 for (uint32_t y = 0; y < m_height; y++)
2953 {
2954 for (uint32_t x = 0; x < m_width; x++)
2955 {
2956 color_rgba &c = (*this)(x, y);
2957 if ((c.r == 128) && (c.g == 128) && (c.b == 128))
2958 continue;
2959
2960 vec3F v(c.r, c.g, c.b);
2961 v = (v * (2.0f / 255.0f)) - vec3F(1.0f);
2962 v.clamp(-1.0f, 1.0f);
2963
2964 float length = v.length();
2965 const float cValidThresh = .077f;
2966 if (length < cValidThresh)
2967 {
2968 c.set(128, 128, 128, c.a);
2969 }
2970 else if (fabs(length - 1.0f) > cValidThresh)
2971 {
2972 if (length)
2973 v /= length;
2974
2975 for (uint32_t i = 0; i < 3; i++)
2976 c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f));
2977
2978 if ((c.g == 128) && (c.r == 128))
2979 {
2980 if (c.b < 128)
2981 c.b = 0;
2982 else
2983 c.b = 255;
2984 }
2985 }
2986 }
2987 }
2988 return *this;
2989 }
2990
2991 void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...);
2992
2993 private:
2994 uint32_t m_width, m_height, m_pitch; // all in pixels
2995 color_rgba_vec m_pixels;
2996 };
2997
2998 // Float images
2999
3000 typedef basisu::vector<vec4F> vec4F_vec;
3001
3002 class imagef
3003 {
3004 public:
3005 imagef() :
3006 m_width(0), m_height(0), m_pitch(0)
3007 {
3008 }
3009
3010 imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
3011 m_width(0), m_height(0), m_pitch(0)
3012 {
3013 resize(w, h, p);
3014 }
3015
3016 imagef(const imagef &other) :
3017 m_width(0), m_height(0), m_pitch(0)
3018 {
3019 *this = other;
3020 }
3021
3022 imagef &swap(imagef &other)
3023 {
3024 std::swap(m_width, other.m_width);
3025 std::swap(m_height, other.m_height);
3026 std::swap(m_pitch, other.m_pitch);
3027 m_pixels.swap(other.m_pixels);
3028 return *this;
3029 }
3030
3031 imagef &operator= (const imagef &rhs)
3032 {
3033 if (this != &rhs)
3034 {
3035 m_width = rhs.m_width;
3036 m_height = rhs.m_height;
3037 m_pitch = rhs.m_pitch;
3038 m_pixels = rhs.m_pixels;
3039 }
3040 return *this;
3041 }
3042
3043 imagef &clear()
3044 {
3045 m_width = 0;
3046 m_height = 0;
3047 m_pitch = 0;
3048 clear_vector(m_pixels);
3049 return *this;
3050 }
3051
3052 imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0))
3053 {
3054 const uint32_t width = src.get_width();
3055 const uint32_t height = src.get_height();
3056
3057 resize(width, height);
3058
3059 for (int y = 0; y < (int)height; y++)
3060 {
3061 for (uint32_t x = 0; x < width; x++)
3062 {
3063 const color_rgba &src_pixel = src(x, y);
3064 (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);
3065 }
3066 }
3067
3068 return *this;
3069 }
3070
3071 imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
3072 {
3073 return resize(other.get_width(), other.get_height(), p, background);
3074 }
3075
3076 imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
3077 {
3078 return crop(w, h, p, background);
3079 }
3080
3081 imagef &set_all(const vec4F &c)
3082 {
3083 for (uint32_t i = 0; i < m_pixels.size(); i++)
3084 m_pixels[i] = c;
3085 return *this;
3086 }
3087
3088 imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c)
3089 {
3090 for (uint32_t iy = 0; iy < h; iy++)
3091 for (uint32_t ix = 0; ix < w; ix++)
3092 set_clipped(x + ix, y + iy, c);
3093 return *this;
3094 }
3095
3096 imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1))
3097 {
3098 if (p == UINT32_MAX)
3099 p = w;
3100
3101 if ((w == m_width) && (m_height == h) && (m_pitch == p))
3102 return *this;
3103
3104 if ((!w) || (!h) || (!p))
3105 {
3106 clear();
3107 return *this;
3108 }
3109
3110 vec4F_vec cur_state;
3111 cur_state.swap(m_pixels);
3112
3113 m_pixels.resize(p * h);
3114
3115 for (uint32_t y = 0; y < h; y++)
3116 {
3117 for (uint32_t x = 0; x < w; x++)
3118 {
3119 if ((x < m_width) && (y < m_height))
3120 m_pixels[x + y * p] = cur_state[x + y * m_pitch];
3121 else
3122 m_pixels[x + y * p] = background;
3123 }
3124 }
3125
3126 m_width = w;
3127 m_height = h;
3128 m_pitch = p;
3129
3130 return *this;
3131 }
3132
3133 inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
3134 inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
3135
3136 inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
3137 inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
3138
3139 inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
3140 {
3141 x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
3142 y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
3143 return m_pixels[x + y * m_pitch];
3144 }
3145
3146 inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
3147 {
3148 x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
3149 y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
3150 return m_pixels[x + y * m_pitch];
3151 }
3152
3153 inline imagef &set_clipped(int x, int y, const vec4F &c)
3154 {
3155 if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
3156 (*this)(x, y) = c;
3157 return *this;
3158 }
3159
3160 // Very straightforward blit with full clipping. Not fast, but it works.
3161 imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
3162 {
3163 for (int y = 0; y < src_h; y++)
3164 {
3165 const int sy = src_y + y;
3166 if (sy < 0)
3167 continue;
3168 else if (sy >= (int)src.get_height())
3169 break;
3170
3171 for (int x = 0; x < src_w; x++)
3172 {
3173 const int sx = src_x + x;
3174 if (sx < 0)
3175 continue;
3176 else if (sx >= (int)src.get_height())
3177 break;
3178
3179 set_clipped(dst_x + x, dst_y + y, src(sx, sy));
3180 }
3181 }
3182
3183 return *this;
3184 }
3185
3186 const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
3187 {
3188 for (uint32_t y = 0; y < h; y++)
3189 for (uint32_t x = 0; x < w; x++)
3190 *pDst++ = get_clamped(src_x + x, src_y + y);
3191 return *this;
3192 }
3193
3194 imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
3195 {
3196 for (uint32_t y = 0; y < h; y++)
3197 for (uint32_t x = 0; x < w; x++)
3198 set_clipped(dst_x + x, dst_y + y, *pSrc++);
3199 return *this;
3200 }
3201
3202 inline uint32_t get_width() const { return m_width; }
3203 inline uint32_t get_height() const { return m_height; }
3204 inline uint32_t get_pitch() const { return m_pitch; }
3205 inline uint32_t get_total_pixels() const { return m_width * m_height; }
3206
3207 inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
3208 inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
3209 inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
3210
3211 inline const vec4F_vec &get_pixels() const { return m_pixels; }
3212 inline vec4F_vec &get_pixels() { return m_pixels; }
3213
3214 inline const vec4F *get_ptr() const { return &m_pixels[0]; }
3215 inline vec4F *get_ptr() { return &m_pixels[0]; }
3216
3217 private:
3218 uint32_t m_width, m_height, m_pitch; // all in pixels
3219 vec4F_vec m_pixels;
3220 };
3221
3222 // Image metrics
3223
3224 class image_metrics
3225 {
3226 public:
3227 // TODO: Add ssim
3228 float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
3229
3230 image_metrics()
3231 {
3232 clear();
3233 }
3234
3235 void clear()
3236 {
3237 m_max = 0;
3238 m_mean = 0;
3239 m_mean_squared = 0;
3240 m_rms = 0;
3241 m_psnr = 0;
3242 m_ssim = 0;
3243 }
3244
3245 void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
3246
3247 void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
3248 };
3249
3250 // Image saving/loading/resampling
3251
3252 bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr);
3253 bool load_png(const char* pFilename, image& img);
3254 inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); }
3255
3256 bool load_tga(const char* pFilename, image& img);
3257 inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
3258
3259 bool load_jpg(const char *pFilename, image& img);
3260 inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
3261
3262 // Currently loads .PNG, .TGA, or .JPG
3263 bool load_image(const char* pFilename, image& img);
3264 inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
3265
3266 uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
3267 uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
3268
3269 enum
3270 {
3271 cImageSaveGrayscale = 1,
3272 cImageSaveIgnoreAlpha = 2
3273 };
3274
3275 bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0);
3276 inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
3277
3278 bool read_file_to_vec(const char* pFilename, uint8_vec& data);
3279
3280 bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
3281
3282 inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
3283
3284 float linear_to_srgb(float l);
3285 float srgb_to_linear(float s);
3286
3287 bool image_resample(const image &src, image &dst, bool srgb = false,
3288 const char *pFilter = "lanczos4", float filter_scale = 1.0f,
3289 bool wrapping = false,
3290 uint32_t first_comp = 0, uint32_t num_comps = 4);
3291
3292 // Timing
3293
3294 typedef uint64_t timer_ticks;
3295
3296 class interval_timer
3297 {
3298 public:
3299 interval_timer();
3300
3301 void start();
3302 void stop();
3303
3304 double get_elapsed_secs() const;
3305 inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); }
3306
3307 static void init();
3308 static inline timer_ticks get_ticks_per_sec() { return g_freq; }
3309 static timer_ticks get_ticks();
3310 static double ticks_to_secs(timer_ticks ticks);
3311 static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; }
3312
3313 private:
3314 static timer_ticks g_init_ticks, g_freq;
3315 static double g_timer_freq;
3316
3317 timer_ticks m_start_time, m_stop_time;
3318
3319 bool m_started, m_stopped;
3320 };
3321
3322 // 2D array
3323
3324 template<typename T>
3325 class vector2D
3326 {
3327 typedef basisu::vector<T> TVec;
3328
3329 uint32_t m_width, m_height;
3330 TVec m_values;
3331
3332 public:
3333 vector2D() :
3334 m_width(0),
3335 m_height(0)
3336 {
3337 }
3338
3339 vector2D(uint32_t w, uint32_t h) :
3340 m_width(0),
3341 m_height(0)
3342 {
3343 resize(w, h);
3344 }
3345
3346 vector2D(const vector2D &other)
3347 {
3348 *this = other;
3349 }
3350
3351 vector2D &operator= (const vector2D &other)
3352 {
3353 if (this != &other)
3354 {
3355 m_width = other.m_width;
3356 m_height = other.m_height;
3357 m_values = other.m_values;
3358 }
3359 return *this;
3360 }
3361
3362 inline bool operator== (const vector2D &rhs) const
3363 {
3364 return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values);
3365 }
3366
3367 inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); }
3368
3369 inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
3370 inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
3371
3372 inline const T &operator[] (uint32_t i) const { return m_values[i]; }
3373 inline T &operator[] (uint32_t i) { return m_values[i]; }
3374
3375 inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
3376 inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width), clamp<int>(y, 0, m_height)); }
3377
3378 void clear()
3379 {
3380 m_width = 0;
3381 m_height = 0;
3382 m_values.clear();
3383 }
3384
3385 void set_all(const T&val)
3386 {
3387 vector_set_all(m_values, val);
3388 }
3389
3390 inline const T* get_ptr() const { return &m_values[0]; }
3391 inline T* get_ptr() { return &m_values[0]; }
3392
3393 vector2D &resize(uint32_t new_width, uint32_t new_height)
3394 {
3395 if ((m_width == new_width) && (m_height == new_height))
3396 return *this;
3397
3398 TVec oldVals(new_width * new_height);
3399 oldVals.swap(m_values);
3400
3401 const uint32_t w = minimum(m_width, new_width);
3402 const uint32_t h = minimum(m_height, new_height);
3403
3404 if ((w) && (h))
3405 {
3406 for (uint32_t y = 0; y < h; y++)
3407 for (uint32_t x = 0; x < w; x++)
3408 m_values[x + y * new_width] = oldVals[x + y * m_width];
3409 }
3410
3411 m_width = new_width;
3412 m_height = new_height;
3413
3414 return *this;
3415 }
3416 };
3417
3418 inline FILE *fopen_safe(const char *pFilename, const char *pMode)
3419 {
3420#ifdef _WIN32
3421 FILE *pFile = nullptr;
3422 fopen_s(&pFile, pFilename, pMode);
3423 return pFile;
3424#else
3425 return fopen(pFilename, pMode);
3426#endif
3427 }
3428
3429 void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);
3430
3431 const uint32_t cPixelBlockWidth = 4;
3432 const uint32_t cPixelBlockHeight = 4;
3433 const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight;
3434
3435 struct pixel_block
3436 {
3437 color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
3438
3439 inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
3440 inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
3441
3442 inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; }
3443 inline color_rgba* get_ptr() { return &m_pixels[0][0]; }
3444
3445 inline void clear() { clear_obj(*this); }
3446
3447 inline bool operator== (const pixel_block& rhs) const
3448 {
3449 return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
3450 }
3451 };
3452 typedef basisu::vector<pixel_block> pixel_block_vec;
3453
3454} // namespace basisu
3455
3456
3457