1// basisu_enc.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15#include "basisu_enc.h"
16#include "basisu_resampler.h"
17#include "basisu_resampler_filters.h"
18#include "basisu_etc.h"
19#include "../transcoder/basisu_transcoder.h"
20#include "basisu_bc7enc.h"
21#include "jpgd.h"
22#include "pvpngreader.h"
23#include "basisu_opencl.h"
24#include <vector>
25
26#define MINIZ_HEADER_FILE_ONLY
27#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
28#include "basisu_miniz.h"
29
30#if defined(_WIN32)
31// For QueryPerformanceCounter/QueryPerformanceFrequency
32#define WIN32_LEAN_AND_MEAN
33#include <windows.h>
34#endif
35
36namespace basisu
37{
38 uint64_t interval_timer::g_init_ticks, interval_timer::g_freq;
39 double interval_timer::g_timer_freq;
40#if BASISU_SUPPORT_SSE
41 bool g_cpu_supports_sse41;
42#endif
43
44 uint8_t g_hamming_dist[256] =
45 {
46 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
47 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
48 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
51 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
52 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
53 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
54 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
55 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
56 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
60 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
61 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
62 };
63
64 // This is a Public Domain 8x8 font from here:
65 // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h
66 const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] =
67 {
68 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( )
69 { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!)
70 { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (")
71 { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#)
72 { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($)
73 { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%)
74 { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&)
75 { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (')
76 { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (()
77 { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ())
78 { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*)
79 { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+)
80 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,)
81 { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-)
82 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.)
83 { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/)
84 { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0)
85 { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1)
86 { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2)
87 { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3)
88 { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4)
89 { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5)
90 { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6)
91 { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7)
92 { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8)
93 { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9)
94 { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:)
95 { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;)
96 { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<)
97 { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=)
98 { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>)
99 { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?)
100 { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@)
101 { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A)
102 { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B)
103 { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C)
104 { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D)
105 { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E)
106 { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F)
107 { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G)
108 { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H)
109 { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I)
110 { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J)
111 { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K)
112 { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L)
113 { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M)
114 { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N)
115 { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O)
116 { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P)
117 { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q)
118 { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R)
119 { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S)
120 { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T)
121 { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U)
122 { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V)
123 { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W)
124 { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X)
125 { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y)
126 { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z)
127 { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([)
128 { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\)
129 { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (])
130 { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^)
131 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_)
132 { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`)
133 { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a)
134 { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b)
135 { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c)
136 { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d)
137 { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e)
138 { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f)
139 { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g)
140 { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h)
141 { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i)
142 { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j)
143 { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k)
144 { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l)
145 { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m)
146 { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n)
147 { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o)
148 { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p)
149 { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q)
150 { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r)
151 { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s)
152 { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t)
153 { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u)
154 { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v)
155 { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w)
156 { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x)
157 { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y)
158 { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z)
159 { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({)
160 { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|)
161 { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (})
162 { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~)
163 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F
164 };
165
166 bool g_library_initialized;
167 std::mutex g_encoder_init_mutex;
168
169 // Encoder library initialization (just call once at startup)
170 void basisu_encoder_init(bool use_opencl, bool opencl_force_serialization)
171 {
172 std::lock_guard<std::mutex> lock(g_encoder_init_mutex);
173
174 if (g_library_initialized)
175 return;
176
177 detect_sse41();
178
179 basist::basisu_transcoder_init();
180 pack_etc1_solid_color_init();
181 //uastc_init();
182 bc7enc_compress_block_init(); // must be after uastc_init()
183
184 // Don't bother initializing the OpenCL module at all if it's been completely disabled.
185 if (use_opencl)
186 {
187 opencl_init(opencl_force_serialization);
188 }
189
190 interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports
191
192 g_library_initialized = true;
193 }
194
195 void basisu_encoder_deinit()
196 {
197 opencl_deinit();
198
199 g_library_initialized = false;
200 }
201
202 void error_vprintf(const char* pFmt, va_list args)
203 {
204 char buf[8192];
205
206#ifdef _WIN32
207 vsprintf_s(buf, sizeof(buf), pFmt, args);
208#else
209 vsnprintf(buf, sizeof(buf), pFmt, args);
210#endif
211
212 fprintf(stderr, "ERROR: %s", buf);
213 }
214
215 void error_printf(const char *pFmt, ...)
216 {
217 va_list args;
218 va_start(args, pFmt);
219 error_vprintf(pFmt, args);
220 va_end(args);
221 }
222
223#if defined(_WIN32)
224 inline void query_counter(timer_ticks* pTicks)
225 {
226 QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(pTicks));
227 }
228 inline void query_counter_frequency(timer_ticks* pTicks)
229 {
230 QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks));
231 }
232#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__)
233#include <sys/time.h>
234 inline void query_counter(timer_ticks* pTicks)
235 {
236 struct timeval cur_time;
237 gettimeofday(&cur_time, NULL);
238 *pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec);
239 }
240 inline void query_counter_frequency(timer_ticks* pTicks)
241 {
242 *pTicks = 1000000;
243 }
244#elif defined(__GNUC__)
245#include <sys/timex.h>
246 inline void query_counter(timer_ticks* pTicks)
247 {
248 struct timeval cur_time;
249 gettimeofday(&cur_time, NULL);
250 *pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec);
251 }
252 inline void query_counter_frequency(timer_ticks* pTicks)
253 {
254 *pTicks = 1000000;
255 }
256#else
257#error TODO
258#endif
259
260 interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false)
261 {
262 if (!g_timer_freq)
263 init();
264 }
265
266 void interval_timer::start()
267 {
268 query_counter(&m_start_time);
269 m_started = true;
270 m_stopped = false;
271 }
272
273 void interval_timer::stop()
274 {
275 assert(m_started);
276 query_counter(&m_stop_time);
277 m_stopped = true;
278 }
279
280 double interval_timer::get_elapsed_secs() const
281 {
282 assert(m_started);
283 if (!m_started)
284 return 0;
285
286 timer_ticks stop_time = m_stop_time;
287 if (!m_stopped)
288 query_counter(&stop_time);
289
290 timer_ticks delta = stop_time - m_start_time;
291 return delta * g_timer_freq;
292 }
293
294 void interval_timer::init()
295 {
296 if (!g_timer_freq)
297 {
298 query_counter_frequency(&g_freq);
299 g_timer_freq = 1.0f / g_freq;
300 query_counter(&g_init_ticks);
301 }
302 }
303
304 timer_ticks interval_timer::get_ticks()
305 {
306 if (!g_timer_freq)
307 init();
308 timer_ticks ticks;
309 query_counter(&ticks);
310 return ticks - g_init_ticks;
311 }
312
313 double interval_timer::ticks_to_secs(timer_ticks ticks)
314 {
315 if (!g_timer_freq)
316 init();
317 return ticks * g_timer_freq;
318 }
319
320 const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000;
321
322 bool load_tga(const char* pFilename, image& img)
323 {
324 int w = 0, h = 0, n_chans = 0;
325 uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans);
326
327 if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4)))
328 {
329 error_printf("Failed loading .TGA image \"%s\"!\n", pFilename);
330
331 if (pImage_data)
332 free(pImage_data);
333
334 return false;
335 }
336
337 if (sizeof(void *) == sizeof(uint32_t))
338 {
339 if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE)
340 {
341 error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h);
342
343 if (pImage_data)
344 free(pImage_data);
345
346 return false;
347 }
348 }
349
350 img.resize(w, h);
351
352 const uint8_t *pSrc = pImage_data;
353 for (int y = 0; y < h; y++)
354 {
355 color_rgba *pDst = &img(0, y);
356
357 for (int x = 0; x < w; x++)
358 {
359 pDst->r = pSrc[0];
360 pDst->g = pSrc[1];
361 pDst->b = pSrc[2];
362 pDst->a = (n_chans == 3) ? 255 : pSrc[3];
363
364 pSrc += n_chans;
365 ++pDst;
366 }
367 }
368
369 free(pImage_data);
370
371 return true;
372 }
373
374 bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)
375 {
376 interval_timer tm;
377 tm.start();
378
379 if (!buf_size)
380 return false;
381
382 uint32_t width = 0, height = 0, num_chans = 0;
383 void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans);
384 if (!pBuf)
385 {
386 error_printf("pv_png::load_png failed while loading image \"%s\"\n", pFilename);
387 return false;
388 }
389
390 img.grant_ownership(reinterpret_cast<color_rgba*>(pImage), width, height);
391
392 //debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs());
393
394 return true;
395 }
396
397 bool load_png(const char* pFilename, image& img)
398 {
399 uint8_vec buffer;
400 if (!read_file_to_vec(pFilename, buffer))
401 {
402 error_printf("load_png: Failed reading file \"%s\"!\n", pFilename);
403 return false;
404 }
405
406 return load_png(buffer.data(), buffer.size(), img, pFilename);
407 }
408
409 bool load_jpg(const char *pFilename, image& img)
410 {
411 int width = 0, height = 0, actual_comps = 0;
412 uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
413 if (!pImage_data)
414 return false;
415
416 img.init(pImage_data, width, height, 4);
417
418 free(pImage_data);
419
420 return true;
421 }
422
423 bool load_image(const char* pFilename, image& img)
424 {
425 std::string ext(string_get_extension(std::string(pFilename)));
426
427 if (ext.length() == 0)
428 return false;
429
430 const char *pExt = ext.c_str();
431
432 if (strcasecmp(pExt, "png") == 0)
433 return load_png(pFilename, img);
434 if (strcasecmp(pExt, "tga") == 0)
435 return load_tga(pFilename, img);
436 if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) )
437 return load_jpg(pFilename, img);
438
439 return false;
440 }
441
442 bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp)
443 {
444 if (!img.get_total_pixels())
445 return false;
446
447 void* pPNG_data = nullptr;
448 size_t PNG_data_size = 0;
449
450 if (image_save_flags & cImageSaveGrayscale)
451 {
452 uint8_vec g_pixels(img.get_total_pixels());
453 uint8_t* pDst = &g_pixels[0];
454
455 for (uint32_t y = 0; y < img.get_height(); y++)
456 for (uint32_t x = 0; x < img.get_width(); x++)
457 *pDst++ = img(x, y)[grayscale_comp];
458
459 pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), 1, &PNG_data_size, 1, false);
460 }
461 else
462 {
463 bool has_alpha = false;
464
465 if ((image_save_flags & cImageSaveIgnoreAlpha) == 0)
466 has_alpha = img.has_alpha();
467
468 if (!has_alpha)
469 {
470 uint8_vec rgb_pixels(img.get_total_pixels() * 3);
471 uint8_t* pDst = &rgb_pixels[0];
472
473 for (uint32_t y = 0; y < img.get_height(); y++)
474 {
475 const color_rgba* pSrc = &img(0, y);
476 for (uint32_t x = 0; x < img.get_width(); x++)
477 {
478 pDst[0] = pSrc->r;
479 pDst[1] = pSrc->g;
480 pDst[2] = pSrc->b;
481
482 pSrc++;
483 pDst += 3;
484 }
485 }
486
487 pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), 3, &PNG_data_size, 1, false);
488 }
489 else
490 {
491 pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), 4, &PNG_data_size, 1, false);
492 }
493 }
494
495 if (!pPNG_data)
496 return false;
497
498 bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size);
499 if (!status)
500 {
501 error_printf("save_png: Failed writing to filename \"%s\"!\n", pFilename);
502 }
503
504 free(pPNG_data);
505
506 return status;
507 }
508
509 bool read_file_to_vec(const char* pFilename, uint8_vec& data)
510 {
511 FILE* pFile = nullptr;
512#ifdef _WIN32
513 fopen_s(&pFile, pFilename, "rb");
514#else
515 pFile = fopen(pFilename, "rb");
516#endif
517 if (!pFile)
518 return false;
519
520 fseek(pFile, 0, SEEK_END);
521#ifdef _WIN32
522 int64_t filesize = _ftelli64(pFile);
523#else
524 int64_t filesize = ftello(pFile);
525#endif
526 if (filesize < 0)
527 {
528 fclose(pFile);
529 return false;
530 }
531 fseek(pFile, 0, SEEK_SET);
532
533 if (sizeof(size_t) == sizeof(uint32_t))
534 {
535 if (filesize > 0x70000000)
536 {
537 // File might be too big to load safely in one alloc
538 fclose(pFile);
539 return false;
540 }
541 }
542
543 if (!data.try_resize((size_t)filesize))
544 {
545 fclose(pFile);
546 return false;
547 }
548
549 if (filesize)
550 {
551 if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize)
552 {
553 fclose(pFile);
554 return false;
555 }
556 }
557
558 fclose(pFile);
559 return true;
560 }
561
562 bool write_data_to_file(const char* pFilename, const void* pData, size_t len)
563 {
564 FILE* pFile = nullptr;
565#ifdef _WIN32
566 fopen_s(&pFile, pFilename, "wb");
567#else
568 pFile = fopen(pFilename, "wb");
569#endif
570 if (!pFile)
571 return false;
572
573 if (len)
574 {
575 if (fwrite(pData, 1, len, pFile) != len)
576 {
577 fclose(pFile);
578 return false;
579 }
580 }
581
582 return fclose(pFile) != EOF;
583 }
584
585 float linear_to_srgb(float l)
586 {
587 assert(l >= 0.0f && l <= 1.0f);
588 if (l < .0031308f)
589 return saturate(l * 12.92f);
590 else
591 return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f);
592 }
593
594 float srgb_to_linear(float s)
595 {
596 assert(s >= 0.0f && s <= 1.0f);
597 if (s < .04045f)
598 return saturate(s * (1.0f/12.92f));
599 else
600 return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f));
601 }
602
603 bool image_resample(const image &src, image &dst, bool srgb,
604 const char *pFilter, float filter_scale,
605 bool wrapping,
606 uint32_t first_comp, uint32_t num_comps)
607 {
608 assert((first_comp + num_comps) <= 4);
609
610 const int cMaxComps = 4;
611
612 const uint32_t src_w = src.get_width(), src_h = src.get_height();
613 const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();
614
615 if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)
616 {
617 printf("Image is too large!\n");
618 return false;
619 }
620
621 if (!src_w || !src_h || !dst_w || !dst_h)
622 return false;
623
624 if ((num_comps < 1) || (num_comps > cMaxComps))
625 return false;
626
627 if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))
628 {
629 printf("Image is too large!\n");
630 return false;
631 }
632
633 if ((src_w == dst_w) && (src_h == dst_h))
634 {
635 dst = src;
636 return true;
637 }
638
639 float srgb_to_linear_table[256];
640 if (srgb)
641 {
642 for (int i = 0; i < 256; ++i)
643 srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f));
644 }
645
646 const int LINEAR_TO_SRGB_TABLE_SIZE = 8192;
647 uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];
648
649 if (srgb)
650 {
651 for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
652 linear_to_srgb_table[i] = (uint8_t)clamp<int>((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255);
653 }
654
655 std::vector<float> samples[cMaxComps];
656 Resampler *resamplers[cMaxComps];
657
658 resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,
659 wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,
660 pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);
661 samples[0].resize(src_w);
662
663 for (uint32_t i = 1; i < num_comps; ++i)
664 {
665 resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,
666 wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,
667 pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);
668 samples[i].resize(src_w);
669 }
670
671 uint32_t dst_y = 0;
672
673 for (uint32_t src_y = 0; src_y < src_h; ++src_y)
674 {
675 const color_rgba *pSrc = &src(0, src_y);
676
677 // Put source lines into resampler(s)
678 for (uint32_t x = 0; x < src_w; ++x)
679 {
680 for (uint32_t c = 0; c < num_comps; ++c)
681 {
682 const uint32_t comp_index = first_comp + c;
683 const uint32_t v = (*pSrc)[comp_index];
684
685 if (!srgb || (comp_index == 3))
686 samples[c][x] = v * (1.0f / 255.0f);
687 else
688 samples[c][x] = srgb_to_linear_table[v];
689 }
690
691 pSrc++;
692 }
693
694 for (uint32_t c = 0; c < num_comps; ++c)
695 {
696 if (!resamplers[c]->put_line(&samples[c][0]))
697 {
698 for (uint32_t i = 0; i < num_comps; i++)
699 delete resamplers[i];
700 return false;
701 }
702 }
703
704 // Now retrieve any output lines
705 for (;;)
706 {
707 uint32_t c;
708 for (c = 0; c < num_comps; ++c)
709 {
710 const uint32_t comp_index = first_comp + c;
711
712 const float *pOutput_samples = resamplers[c]->get_line();
713 if (!pOutput_samples)
714 break;
715
716 const bool linear_flag = !srgb || (comp_index == 3);
717
718 color_rgba *pDst = &dst(0, dst_y);
719
720 for (uint32_t x = 0; x < dst_w; x++)
721 {
722 // TODO: Add dithering
723 if (linear_flag)
724 {
725 int j = (int)(255.0f * pOutput_samples[x] + .5f);
726 (*pDst)[comp_index] = (uint8_t)clamp<int>(j, 0, 255);
727 }
728 else
729 {
730 int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f);
731 (*pDst)[comp_index] = linear_to_srgb_table[clamp<int>(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)];
732 }
733
734 pDst++;
735 }
736 }
737 if (c < num_comps)
738 break;
739
740 ++dst_y;
741 }
742 }
743
744 for (uint32_t i = 0; i < num_comps; ++i)
745 delete resamplers[i];
746
747 return true;
748 }
749
750 void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms)
751 {
752 // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen
753 if (!num_syms)
754 return;
755
756 if (1 == num_syms)
757 {
758 A[0].m_key = 1;
759 return;
760 }
761
762 A[0].m_key += A[1].m_key;
763
764 int s = 2, r = 0, next;
765 for (next = 1; next < (num_syms - 1); ++next)
766 {
767 if ((s >= num_syms) || (A[r].m_key < A[s].m_key))
768 {
769 A[next].m_key = A[r].m_key;
770 A[r].m_key = next;
771 ++r;
772 }
773 else
774 {
775 A[next].m_key = A[s].m_key;
776 ++s;
777 }
778
779 if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key))
780 {
781 A[next].m_key = A[next].m_key + A[r].m_key;
782 A[r].m_key = next;
783 ++r;
784 }
785 else
786 {
787 A[next].m_key = A[next].m_key + A[s].m_key;
788 ++s;
789 }
790 }
791 A[num_syms - 2].m_key = 0;
792
793 for (next = num_syms - 3; next >= 0; --next)
794 {
795 A[next].m_key = 1 + A[A[next].m_key].m_key;
796 }
797
798 int num_avail = 1, num_used = 0, depth = 0;
799 r = num_syms - 2;
800 next = num_syms - 1;
801 while (num_avail > 0)
802 {
803 for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r )
804 ;
805
806 for ( ; num_avail > num_used; --next, --num_avail)
807 A[next].m_key = depth;
808
809 num_avail = 2 * num_used;
810 num_used = 0;
811 ++depth;
812 }
813 }
814
815 void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
816 {
817 int i;
818 uint32_t total = 0;
819 if (code_list_len <= 1)
820 return;
821
822 for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++)
823 pNum_codes[max_code_size] += pNum_codes[i];
824
825 for (i = max_code_size; i > 0; i--)
826 total += (((uint32_t)pNum_codes[i]) << (max_code_size - i));
827
828 while (total != (1UL << max_code_size))
829 {
830 pNum_codes[max_code_size]--;
831 for (i = max_code_size - 1; i > 0; i--)
832 {
833 if (pNum_codes[i])
834 {
835 pNum_codes[i]--;
836 pNum_codes[i + 1] += 2;
837 break;
838 }
839 }
840
841 total--;
842 }
843 }
844
845 sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1)
846 {
847 uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2];
848 sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
849
850 clear_obj(hist);
851
852 for (i = 0; i < num_syms; i++)
853 {
854 uint32_t freq = pSyms0[i].m_key;
855
856 // We scale all input frequencies to 16-bits.
857 assert(freq <= UINT16_MAX);
858
859 hist[freq & 0xFF]++;
860 hist[256 + ((freq >> 8) & 0xFF)]++;
861 }
862
863 while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
864 total_passes--;
865
866 for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
867 {
868 const uint32_t *pHist = &hist[pass << 8];
869 uint32_t offsets[256], cur_ofs = 0;
870 for (i = 0; i < 256; i++)
871 {
872 offsets[i] = cur_ofs;
873 cur_ofs += pHist[i];
874 }
875
876 for (i = 0; i < num_syms; i++)
877 pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
878
879 sym_freq *t = pCur_syms;
880 pCur_syms = pNew_syms;
881 pNew_syms = t;
882 }
883
884 return pCur_syms;
885 }
886
887 bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size)
888 {
889 if (max_code_size > cHuffmanMaxSupportedCodeSize)
890 return false;
891 if ((!num_syms) || (num_syms > cHuffmanMaxSyms))
892 return false;
893
894 uint32_t total_used_syms = 0;
895 for (uint32_t i = 0; i < num_syms; i++)
896 if (pFreq[i])
897 total_used_syms++;
898
899 if (!total_used_syms)
900 return false;
901
902 std::vector<sym_freq> sym_freq0(total_used_syms), sym_freq1(total_used_syms);
903 for (uint32_t i = 0, j = 0; i < num_syms; i++)
904 {
905 if (pFreq[i])
906 {
907 sym_freq0[j].m_key = pFreq[i];
908 sym_freq0[j++].m_sym_index = static_cast<uint16_t>(i);
909 }
910 }
911
912 sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]);
913
914 canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms);
915
916 int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1];
917 clear_obj(num_codes);
918
919 for (uint32_t i = 0; i < total_used_syms; i++)
920 {
921 if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize)
922 return false;
923
924 num_codes[pSym_freq[i].m_key]++;
925 }
926
927 canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size);
928
929 m_code_sizes.resize(0);
930 m_code_sizes.resize(num_syms);
931
932 m_codes.resize(0);
933 m_codes.resize(num_syms);
934
935 for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++)
936 for (uint32_t l = num_codes[i]; l > 0; l--)
937 m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast<uint8_t>(i);
938
939 uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1];
940
941 next_code[1] = 0;
942 for (uint32_t j = 0, i = 2; i <= max_code_size; i++)
943 next_code[i] = j = ((j + num_codes[i - 1]) << 1);
944
945 for (uint32_t i = 0; i < num_syms; i++)
946 {
947 uint32_t rev_code = 0, code, code_size;
948 if ((code_size = m_code_sizes[i]) == 0)
949 continue;
950 if (code_size > cHuffmanMaxSupportedInternalCodeSize)
951 return false;
952 code = next_code[code_size]++;
953 for (uint32_t l = code_size; l > 0; l--, code >>= 1)
954 rev_code = (rev_code << 1) | (code & 1);
955 m_codes[i] = static_cast<uint16_t>(rev_code);
956 }
957
958 return true;
959 }
960
961 bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size)
962 {
963 if ((!num_syms) || (num_syms > cHuffmanMaxSyms))
964 return false;
965
966 uint16_vec sym_freq(num_syms);
967
968 uint32_t max_freq = 0;
969 for (uint32_t i = 0; i < num_syms; i++)
970 max_freq = maximum(max_freq, pSym_freq[i]);
971
972 if (max_freq < UINT16_MAX)
973 {
974 for (uint32_t i = 0; i < num_syms; i++)
975 sym_freq[i] = static_cast<uint16_t>(pSym_freq[i]);
976 }
977 else
978 {
979 for (uint32_t i = 0; i < num_syms; i++)
980 {
981 if (pSym_freq[i])
982 {
983 uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq);
984 sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534));
985 }
986 }
987 }
988
989 return init(num_syms, &sym_freq[0], max_code_size);
990 }
991
992 void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len)
993 {
994 if (run_size)
995 {
996 if (run_size < cHuffmanSmallRepeatSizeMin)
997 {
998 while (run_size--)
999 syms.push_back(static_cast<uint16_t>(len));
1000 }
1001 else if (run_size <= cHuffmanSmallRepeatSizeMax)
1002 {
1003 syms.push_back(static_cast<uint16_t>(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6)));
1004 }
1005 else
1006 {
1007 assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax));
1008 syms.push_back(static_cast<uint16_t>(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6)));
1009 }
1010 }
1011
1012 run_size = 0;
1013 }
1014
1015 void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size)
1016 {
1017 if (run_size)
1018 {
1019 if (run_size < cHuffmanSmallZeroRunSizeMin)
1020 {
1021 while (run_size--)
1022 syms.push_back(0);
1023 }
1024 else if (run_size <= cHuffmanSmallZeroRunSizeMax)
1025 {
1026 syms.push_back(static_cast<uint16_t>(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6)));
1027 }
1028 else
1029 {
1030 assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax));
1031 syms.push_back(static_cast<uint16_t>(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6)));
1032 }
1033 }
1034
1035 run_size = 0;
1036 }
1037
1038 uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab)
1039 {
1040 const uint64_t start_bits = m_total_bits;
1041
1042 const uint8_vec &code_sizes = tab.get_code_sizes();
1043
1044 uint32_t total_used = tab.get_total_used_codes();
1045 put_bits(total_used, cHuffmanMaxSymsLog2);
1046
1047 if (!total_used)
1048 return 0;
1049
1050 uint16_vec syms;
1051 syms.reserve(total_used + 16);
1052
1053 uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0;
1054
1055 for (uint32_t i = 0; i <= total_used; ++i)
1056 {
1057 const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i];
1058 assert((code_len == 0xFF) || (code_len <= 16));
1059
1060 if (code_len)
1061 {
1062 end_zero_run(syms, zero_run_size);
1063
1064 if (code_len != prev_code_len)
1065 {
1066 end_nonzero_run(syms, nonzero_run_size, prev_code_len);
1067 if (code_len != 0xFF)
1068 syms.push_back(static_cast<uint16_t>(code_len));
1069 }
1070 else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax)
1071 end_nonzero_run(syms, nonzero_run_size, prev_code_len);
1072 }
1073 else
1074 {
1075 end_nonzero_run(syms, nonzero_run_size, prev_code_len);
1076
1077 if (++zero_run_size == cHuffmanBigZeroRunSizeMax)
1078 end_zero_run(syms, zero_run_size);
1079 }
1080
1081 prev_code_len = code_len;
1082 }
1083
1084 histogram h(cHuffmanTotalCodelengthCodes);
1085 for (uint32_t i = 0; i < syms.size(); i++)
1086 h.inc(syms[i] & 63);
1087
1088 huffman_encoding_table ct;
1089 if (!ct.init(h, 7))
1090 return 0;
1091
1092 assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes);
1093
1094 uint32_t total_codelength_codes;
1095 for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--)
1096 if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]])
1097 break;
1098
1099 assert(total_codelength_codes);
1100
1101 put_bits(total_codelength_codes, 5);
1102 for (uint32_t i = 0; i < total_codelength_codes; i++)
1103 put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3);
1104
1105 for (uint32_t i = 0; i < syms.size(); ++i)
1106 {
1107 const uint32_t l = syms[i] & 63, e = syms[i] >> 6;
1108
1109 put_code(l, ct);
1110
1111 if (l == cHuffmanSmallZeroRunCode)
1112 put_bits(e, cHuffmanSmallZeroRunExtraBits);
1113 else if (l == cHuffmanBigZeroRunCode)
1114 put_bits(e, cHuffmanBigZeroRunExtraBits);
1115 else if (l == cHuffmanSmallRepeatCode)
1116 put_bits(e, cHuffmanSmallRepeatExtraBits);
1117 else if (l == cHuffmanBigRepeatCode)
1118 put_bits(e, cHuffmanBigRepeatExtraBits);
1119 }
1120
1121 return (uint32_t)(m_total_bits - start_bits);
1122 }
1123
1124 bool huffman_test(int rand_seed)
1125 {
1126 histogram h(19);
1127
1128 // Feed in a fibonacci sequence to force large codesizes
1129 h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3;
1130 h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21;
1131 h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144;
1132 h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987;
1133 h[16] += 1597; h[17] += 2584; h[18] += 4181;
1134
1135 huffman_encoding_table etab;
1136 etab.init(h, 16);
1137
1138 {
1139 bitwise_coder c;
1140 c.init(1024);
1141
1142 c.emit_huffman_table(etab);
1143 for (int i = 0; i < 19; i++)
1144 c.put_code(i, etab);
1145
1146 c.flush();
1147
1148 basist::bitwise_decoder d;
1149 d.init(&c.get_bytes()[0], static_cast<uint32_t>(c.get_bytes().size()));
1150
1151 basist::huffman_decoding_table dtab;
1152 bool success = d.read_huffman_table(dtab);
1153 if (!success)
1154 {
1155 assert(0);
1156 printf("Failure 5\n");
1157 return false;
1158 }
1159
1160 for (uint32_t i = 0; i < 19; i++)
1161 {
1162 uint32_t s = d.decode_huffman(dtab);
1163 if (s != i)
1164 {
1165 assert(0);
1166 printf("Failure 5\n");
1167 return false;
1168 }
1169 }
1170 }
1171
1172 basisu::rand r;
1173 r.seed(rand_seed);
1174
1175 for (int iter = 0; iter < 500000; iter++)
1176 {
1177 printf("%u\n", iter);
1178
1179 uint32_t max_sym = r.irand(0, 8193);
1180 uint32_t num_codes = r.irand(1, 10000);
1181 uint_vec syms(num_codes);
1182
1183 for (uint32_t i = 0; i < num_codes; i++)
1184 {
1185 if (r.bit())
1186 syms[i] = r.irand(0, max_sym);
1187 else
1188 {
1189 int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum<int>(1, max_sym / 2)) + .5f);
1190 s = basisu::clamp<int>(s, 0, max_sym);
1191
1192 syms[i] = s;
1193 }
1194
1195 }
1196
1197 histogram h1(max_sym + 1);
1198 for (uint32_t i = 0; i < num_codes; i++)
1199 h1[syms[i]]++;
1200
1201 huffman_encoding_table etab2;
1202 if (!etab2.init(h1, 16))
1203 {
1204 assert(0);
1205 printf("Failed 0\n");
1206 return false;
1207 }
1208
1209 bitwise_coder c;
1210 c.init(1024);
1211
1212 c.emit_huffman_table(etab2);
1213
1214 for (uint32_t i = 0; i < num_codes; i++)
1215 c.put_code(syms[i], etab2);
1216
1217 c.flush();
1218
1219 basist::bitwise_decoder d;
1220 d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size());
1221
1222 basist::huffman_decoding_table dtab;
1223 bool success = d.read_huffman_table(dtab);
1224 if (!success)
1225 {
1226 assert(0);
1227 printf("Failed 2\n");
1228 return false;
1229 }
1230
1231 for (uint32_t i = 0; i < num_codes; i++)
1232 {
1233 uint32_t s = d.decode_huffman(dtab);
1234 if (s != syms[i])
1235 {
1236 assert(0);
1237 printf("Failed 4\n");
1238 return false;
1239 }
1240 }
1241
1242 }
1243 return true;
1244 }
1245
1246 void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)
1247 {
1248 assert((num_syms > 0) && (num_indices > 0));
1249 assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f));
1250
1251 clear();
1252
1253 m_remap_table.resize(num_syms);
1254 m_entries_picked.reserve(num_syms);
1255 m_total_count_to_picked.resize(num_syms);
1256
1257 if (num_indices <= 1)
1258 return;
1259
1260 prepare_hist(num_syms, num_indices, pIndices);
1261 find_initial(num_syms);
1262
1263 while (m_entries_to_do.size())
1264 {
1265 // Find the best entry to move into the picked list.
1266 uint32_t best_entry;
1267 double best_count;
1268 find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight);
1269
1270 // We now have chosen an entry to place in the picked list, now determine which side it goes on.
1271 const uint32_t entry_to_move = m_entries_to_do[best_entry];
1272
1273 float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight);
1274
1275 // Put entry_to_move either on the "left" or "right" side of the picked entries
1276 if (side <= 0)
1277 m_entries_picked.push_back(entry_to_move);
1278 else
1279 m_entries_picked.insert(m_entries_picked.begin(), entry_to_move);
1280
1281 // Erase best_entry from the todo list
1282 m_entries_to_do.erase(m_entries_to_do.begin() + best_entry);
1283
1284 // We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry
1285 for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
1286 m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms);
1287 }
1288
1289 for (uint32_t i = 0; i < num_syms; i++)
1290 m_remap_table[m_entries_picked[i]] = i;
1291 }
1292
1293 void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices)
1294 {
1295 m_hist.resize(0);
1296 m_hist.resize(num_syms * num_syms);
1297
1298 for (uint32_t i = 0; i < num_indices; i++)
1299 {
1300 const uint32_t idx = pIndices[i];
1301 inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms);
1302 inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms);
1303 }
1304 }
1305
1306 void palette_index_reorderer::find_initial(uint32_t num_syms)
1307 {
1308 uint32_t max_count = 0, max_index = 0;
1309 for (uint32_t i = 0; i < num_syms * num_syms; i++)
1310 if (m_hist[i] > max_count)
1311 max_count = m_hist[i], max_index = i;
1312
1313 uint32_t a = max_index / num_syms, b = max_index % num_syms;
1314
1315 m_entries_picked.push_back(a);
1316 m_entries_picked.push_back(b);
1317
1318 for (uint32_t i = 0; i < num_syms; i++)
1319 if ((i != b) && (i != a))
1320 m_entries_to_do.push_back(i);
1321
1322 for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
1323 for (uint32_t j = 0; j < m_entries_picked.size(); j++)
1324 m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms);
1325 }
1326
1327 void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)
1328 {
1329 best_entry = 0;
1330 best_count = 0;
1331
1332 for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
1333 {
1334 const uint32_t u = m_entries_to_do[i];
1335 double total_count = m_total_count_to_picked[u];
1336
1337 if (pDist_func)
1338 {
1339 float w = maximum<float>((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx));
1340 assert((w >= 0.0f) && (w <= 1.0f));
1341 total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w);
1342 }
1343
1344 if (total_count <= best_count)
1345 continue;
1346
1347 best_entry = i;
1348 best_count = total_count;
1349 }
1350 }
1351
1352 float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)
1353 {
1354 float which_side = 0;
1355
1356 int l_count = 0, r_count = 0;
1357 for (uint32_t j = 0; j < m_entries_picked.size(); j++)
1358 {
1359 const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1));
1360 which_side += static_cast<float>(r * count);
1361 if (r >= 0)
1362 l_count += r * count;
1363 else
1364 r_count += -r * count;
1365 }
1366
1367 if (pDist_func)
1368 {
1369 float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx));
1370 float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx));
1371 which_side = w_left * l_count - w_right * r_count;
1372 }
1373 return which_side;
1374 }
1375
1376 void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)
1377 {
1378 assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
1379
1380 const uint32_t width = basisu::minimum(a.get_width(), b.get_width());
1381 const uint32_t height = basisu::minimum(a.get_height(), b.get_height());
1382
1383 double hist[256];
1384 clear_obj(hist);
1385
1386 for (uint32_t y = 0; y < height; y++)
1387 {
1388 for (uint32_t x = 0; x < width; x++)
1389 {
1390 const color_rgba &ca = a(x, y), &cb = b(x, y);
1391
1392 if (total_chans)
1393 {
1394 for (uint32_t c = 0; c < total_chans; c++)
1395 hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;
1396 }
1397 else
1398 {
1399 if (use_601_luma)
1400 hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++;
1401 else
1402 hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++;
1403 }
1404 }
1405 }
1406
1407 m_max = 0;
1408 double sum = 0.0f, sum2 = 0.0f;
1409 for (uint32_t i = 0; i < 256; i++)
1410 {
1411 if (hist[i])
1412 {
1413 m_max = basisu::maximum<float>(m_max, (float)i);
1414 double v = i * hist[i];
1415 sum += v;
1416 sum2 += i * v;
1417 }
1418 }
1419
1420 double total_values = (double)width * (double)height;
1421 if (avg_comp_error)
1422 total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);
1423
1424 m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0);
1425 m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f);
1426 m_rms = (float)sqrt(m_mean_squared);
1427 m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f;
1428 }
1429
1430 void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed)
1431 {
1432 rand r(seed);
1433
1434 uint8_t *pDst = static_cast<uint8_t *>(pBuf);
1435
1436 while (size >= sizeof(uint32_t))
1437 {
1438 *(uint32_t *)pDst = r.urand32();
1439 pDst += sizeof(uint32_t);
1440 size -= sizeof(uint32_t);
1441 }
1442
1443 while (size)
1444 {
1445 *pDst++ = r.byte();
1446 size--;
1447 }
1448 }
1449
1450 uint32_t hash_hsieh(const uint8_t *pBuf, size_t len)
1451 {
1452 if (!pBuf || !len)
1453 return 0;
1454
1455 uint32_t h = static_cast<uint32_t>(len);
1456
1457 const uint32_t bytes_left = len & 3;
1458 len >>= 2;
1459
1460 while (len--)
1461 {
1462 const uint16_t *pWords = reinterpret_cast<const uint16_t *>(pBuf);
1463
1464 h += pWords[0];
1465
1466 const uint32_t t = (pWords[1] << 11) ^ h;
1467 h = (h << 16) ^ t;
1468
1469 pBuf += sizeof(uint32_t);
1470
1471 h += h >> 11;
1472 }
1473
1474 switch (bytes_left)
1475 {
1476 case 1:
1477 h += *reinterpret_cast<const signed char*>(pBuf);
1478 h ^= h << 10;
1479 h += h >> 1;
1480 break;
1481 case 2:
1482 h += *reinterpret_cast<const uint16_t *>(pBuf);
1483 h ^= h << 11;
1484 h += h >> 17;
1485 break;
1486 case 3:
1487 h += *reinterpret_cast<const uint16_t *>(pBuf);
1488 h ^= h << 16;
1489 h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18;
1490 h += h >> 11;
1491 break;
1492 default:
1493 break;
1494 }
1495
1496 h ^= h << 3;
1497 h += h >> 5;
1498 h ^= h << 4;
1499 h += h >> 17;
1500 h ^= h << 25;
1501 h += h >> 6;
1502
1503 return h;
1504 }
1505
1506 job_pool::job_pool(uint32_t num_threads) :
1507 m_num_active_jobs(0),
1508 m_kill_flag(false)
1509 {
1510 assert(num_threads >= 1U);
1511
1512 debug_printf("job_pool::job_pool: %u total threads\n", num_threads);
1513
1514 if (num_threads > 1)
1515 {
1516 m_threads.resize(num_threads - 1);
1517
1518 for (int i = 0; i < ((int)num_threads - 1); i++)
1519 m_threads[i] = std::thread([this, i] { job_thread(i); });
1520 }
1521 }
1522
1523 job_pool::~job_pool()
1524 {
1525 debug_printf("job_pool::~job_pool\n");
1526
1527 // Notify all workers that they need to die right now.
1528 m_kill_flag = true;
1529
1530 m_has_work.notify_all();
1531
1532 // Wait for all workers to die.
1533 for (uint32_t i = 0; i < m_threads.size(); i++)
1534 m_threads[i].join();
1535 }
1536
1537 void job_pool::add_job(const std::function<void()>& job)
1538 {
1539 std::unique_lock<std::mutex> lock(m_mutex);
1540
1541 m_queue.emplace_back(job);
1542
1543 const size_t queue_size = m_queue.size();
1544
1545 lock.unlock();
1546
1547 if (queue_size > 1)
1548 m_has_work.notify_one();
1549 }
1550
1551 void job_pool::add_job(std::function<void()>&& job)
1552 {
1553 std::unique_lock<std::mutex> lock(m_mutex);
1554
1555 m_queue.emplace_back(std::move(job));
1556
1557 const size_t queue_size = m_queue.size();
1558
1559 lock.unlock();
1560
1561 if (queue_size > 1)
1562 {
1563 m_has_work.notify_one();
1564 }
1565 }
1566
1567 void job_pool::wait_for_all()
1568 {
1569 std::unique_lock<std::mutex> lock(m_mutex);
1570
1571 // Drain the job queue on the calling thread.
1572 while (!m_queue.empty())
1573 {
1574 std::function<void()> job(m_queue.back());
1575 m_queue.pop_back();
1576
1577 lock.unlock();
1578
1579 job();
1580
1581 lock.lock();
1582 }
1583
1584 // The queue is empty, now wait for all active jobs to finish up.
1585 m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } );
1586 }
1587
1588 void job_pool::job_thread(uint32_t index)
1589 {
1590 BASISU_NOTE_UNUSED(index);
1591 //debug_printf("job_pool::job_thread: starting %u\n", index);
1592
1593 while (true)
1594 {
1595 std::unique_lock<std::mutex> lock(m_mutex);
1596
1597 // Wait for any jobs to be issued.
1598 m_has_work.wait(lock, [this] { return m_kill_flag || m_queue.size(); } );
1599
1600 // Check to see if we're supposed to exit.
1601 if (m_kill_flag)
1602 break;
1603
1604 // Get the job and execute it.
1605 std::function<void()> job(m_queue.back());
1606 m_queue.pop_back();
1607
1608 ++m_num_active_jobs;
1609
1610 lock.unlock();
1611
1612 job();
1613
1614 lock.lock();
1615
1616 --m_num_active_jobs;
1617
1618 // Now check if there are no more jobs remaining.
1619 const bool all_done = m_queue.empty() && !m_num_active_jobs;
1620
1621 lock.unlock();
1622
1623 if (all_done)
1624 m_no_more_jobs.notify_all();
1625 }
1626
1627 //debug_printf("job_pool::job_thread: exiting\n");
1628 }
1629
1630 // .TGA image loading
1631 #pragma pack(push)
1632 #pragma pack(1)
1633 struct tga_header
1634 {
1635 uint8_t m_id_len;
1636 uint8_t m_cmap;
1637 uint8_t m_type;
1638 packed_uint<2> m_cmap_first;
1639 packed_uint<2> m_cmap_len;
1640 uint8_t m_cmap_bpp;
1641 packed_uint<2> m_x_org;
1642 packed_uint<2> m_y_org;
1643 packed_uint<2> m_width;
1644 packed_uint<2> m_height;
1645 uint8_t m_depth;
1646 uint8_t m_desc;
1647 };
1648 #pragma pack(pop)
1649
1650 const uint32_t MAX_TGA_IMAGE_SIZE = 16384;
1651
1652 enum tga_image_type
1653 {
1654 cITPalettized = 1,
1655 cITRGB = 2,
1656 cITGrayscale = 3
1657 };
1658
1659 uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans)
1660 {
1661 width = 0;
1662 height = 0;
1663 n_chans = 0;
1664
1665 if (buf_size <= sizeof(tga_header))
1666 return nullptr;
1667
1668 const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf);
1669
1670 if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE))
1671 return nullptr;
1672
1673 if (hdr.m_desc >> 6)
1674 return nullptr;
1675
1676 // Simple validation
1677 if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1))
1678 return nullptr;
1679
1680 if (hdr.m_cmap)
1681 {
1682 if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32))
1683 return nullptr;
1684
1685 // Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either.
1686 if (hdr.m_cmap_first != 0)
1687 return nullptr;
1688 }
1689
1690 const bool x_flipped = (hdr.m_desc & 0x10) != 0;
1691 const bool y_flipped = (hdr.m_desc & 0x20) == 0;
1692
1693 bool rle_flag = false;
1694 int file_image_type = hdr.m_type;
1695 if (file_image_type > 8)
1696 {
1697 file_image_type -= 8;
1698 rle_flag = true;
1699 }
1700
1701 const tga_image_type image_type = static_cast<tga_image_type>(file_image_type);
1702
1703 switch (file_image_type)
1704 {
1705 case cITRGB:
1706 if (hdr.m_depth == 8)
1707 return nullptr;
1708 break;
1709 case cITPalettized:
1710 if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0))
1711 return nullptr;
1712 break;
1713 case cITGrayscale:
1714 if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0))
1715 return nullptr;
1716 if ((hdr.m_depth != 8) && (hdr.m_depth != 16))
1717 return nullptr;
1718 break;
1719 default:
1720 return nullptr;
1721 }
1722
1723 uint32_t tga_bytes_per_pixel = 0;
1724
1725 switch (hdr.m_depth)
1726 {
1727 case 32:
1728 tga_bytes_per_pixel = 4;
1729 n_chans = 4;
1730 break;
1731 case 24:
1732 tga_bytes_per_pixel = 3;
1733 n_chans = 3;
1734 break;
1735 case 16:
1736 case 15:
1737 tga_bytes_per_pixel = 2;
1738 // For compatibility with stb_image_write.h
1739 n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3;
1740 break;
1741 case 8:
1742 tga_bytes_per_pixel = 1;
1743 // For palettized RGBA support, which both FreeImage and stb_image support.
1744 n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3;
1745 break;
1746 default:
1747 return nullptr;
1748 }
1749
1750 //const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel;
1751
1752 const uint8_t *pSrc = pBuf + sizeof(tga_header);
1753 uint32_t bytes_remaining = buf_size - sizeof(tga_header);
1754
1755 if (hdr.m_id_len)
1756 {
1757 if (bytes_remaining < hdr.m_id_len)
1758 return nullptr;
1759 pSrc += hdr.m_id_len;
1760 bytes_remaining += hdr.m_id_len;
1761 }
1762
1763 color_rgba pal[256];
1764 for (uint32_t i = 0; i < 256; i++)
1765 pal[i].set(0, 0, 0, 255);
1766
1767 if ((hdr.m_cmap) && (hdr.m_cmap_len))
1768 {
1769 if (image_type == cITPalettized)
1770 {
1771 // Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years).
1772 if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) )
1773 return nullptr;
1774
1775 if (hdr.m_cmap_bpp == 32)
1776 {
1777 const uint32_t pal_size = hdr.m_cmap_len * 4;
1778 if (bytes_remaining < pal_size)
1779 return nullptr;
1780
1781 for (uint32_t i = 0; i < hdr.m_cmap_len; i++)
1782 {
1783 pal[i].r = pSrc[i * 4 + 2];
1784 pal[i].g = pSrc[i * 4 + 1];
1785 pal[i].b = pSrc[i * 4 + 0];
1786 pal[i].a = pSrc[i * 4 + 3];
1787 }
1788
1789 bytes_remaining -= pal_size;
1790 pSrc += pal_size;
1791 }
1792 else if (hdr.m_cmap_bpp == 24)
1793 {
1794 const uint32_t pal_size = hdr.m_cmap_len * 3;
1795 if (bytes_remaining < pal_size)
1796 return nullptr;
1797
1798 for (uint32_t i = 0; i < hdr.m_cmap_len; i++)
1799 {
1800 pal[i].r = pSrc[i * 3 + 2];
1801 pal[i].g = pSrc[i * 3 + 1];
1802 pal[i].b = pSrc[i * 3 + 0];
1803 pal[i].a = 255;
1804 }
1805
1806 bytes_remaining -= pal_size;
1807 pSrc += pal_size;
1808 }
1809 else
1810 {
1811 const uint32_t pal_size = hdr.m_cmap_len * 2;
1812 if (bytes_remaining < pal_size)
1813 return nullptr;
1814
1815 for (uint32_t i = 0; i < hdr.m_cmap_len; i++)
1816 {
1817 const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8);
1818
1819 pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31;
1820 pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31;
1821 pal[i].b = ((v & 31) * 255 + 15) / 31;
1822 pal[i].a = 255;
1823 }
1824
1825 bytes_remaining -= pal_size;
1826 pSrc += pal_size;
1827 }
1828 }
1829 else
1830 {
1831 const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len;
1832 if (bytes_remaining < bytes_to_skip)
1833 return nullptr;
1834 pSrc += bytes_to_skip;
1835 bytes_remaining += bytes_to_skip;
1836 }
1837 }
1838
1839 width = hdr.m_width;
1840 height = hdr.m_height;
1841
1842 const uint32_t source_pitch = width * tga_bytes_per_pixel;
1843 const uint32_t dest_pitch = width * n_chans;
1844
1845 uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height);
1846 if (!pImage)
1847 return nullptr;
1848
1849 std::vector<uint8_t> input_line_buf;
1850 if (rle_flag)
1851 input_line_buf.resize(source_pitch);
1852
1853 int run_type = 0, run_remaining = 0;
1854 uint8_t run_pixel[4];
1855 memset(run_pixel, 0, sizeof(run_pixel));
1856
1857 for (int y = 0; y < height; y++)
1858 {
1859 const uint8_t *pLine_data;
1860
1861 if (rle_flag)
1862 {
1863 int pixels_remaining = width;
1864 uint8_t *pDst = &input_line_buf[0];
1865
1866 do
1867 {
1868 if (!run_remaining)
1869 {
1870 if (bytes_remaining < 1)
1871 {
1872 free(pImage);
1873 return nullptr;
1874 }
1875
1876 int v = *pSrc++;
1877 bytes_remaining--;
1878
1879 run_type = v & 0x80;
1880 run_remaining = (v & 0x7F) + 1;
1881
1882 if (run_type)
1883 {
1884 if (bytes_remaining < tga_bytes_per_pixel)
1885 {
1886 free(pImage);
1887 return nullptr;
1888 }
1889
1890 memcpy(run_pixel, pSrc, tga_bytes_per_pixel);
1891 pSrc += tga_bytes_per_pixel;
1892 bytes_remaining -= tga_bytes_per_pixel;
1893 }
1894 }
1895
1896 const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining);
1897 pixels_remaining -= n;
1898 run_remaining -= n;
1899
1900 if (run_type)
1901 {
1902 for (uint32_t i = 0; i < n; i++)
1903 for (uint32_t j = 0; j < tga_bytes_per_pixel; j++)
1904 *pDst++ = run_pixel[j];
1905 }
1906 else
1907 {
1908 const uint32_t bytes_wanted = n * tga_bytes_per_pixel;
1909
1910 if (bytes_remaining < bytes_wanted)
1911 {
1912 free(pImage);
1913 return nullptr;
1914 }
1915
1916 memcpy(pDst, pSrc, bytes_wanted);
1917 pDst += bytes_wanted;
1918
1919 pSrc += bytes_wanted;
1920 bytes_remaining -= bytes_wanted;
1921 }
1922
1923 } while (pixels_remaining);
1924
1925 assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel);
1926
1927 pLine_data = &input_line_buf[0];
1928 }
1929 else
1930 {
1931 if (bytes_remaining < source_pitch)
1932 {
1933 free(pImage);
1934 return nullptr;
1935 }
1936
1937 pLine_data = pSrc;
1938 bytes_remaining -= source_pitch;
1939 pSrc += source_pitch;
1940 }
1941
1942 // Convert to 24bpp RGB or 32bpp RGBA.
1943 uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0);
1944 const int dst_stride = x_flipped ? -((int)n_chans) : n_chans;
1945
1946 switch (hdr.m_depth)
1947 {
1948 case 32:
1949 assert(tga_bytes_per_pixel == 4 && n_chans == 4);
1950 for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride)
1951 {
1952 pDst[0] = pLine_data[2];
1953 pDst[1] = pLine_data[1];
1954 pDst[2] = pLine_data[0];
1955 pDst[3] = pLine_data[3];
1956 }
1957 break;
1958 case 24:
1959 assert(tga_bytes_per_pixel == 3 && n_chans == 3);
1960 for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride)
1961 {
1962 pDst[0] = pLine_data[2];
1963 pDst[1] = pLine_data[1];
1964 pDst[2] = pLine_data[0];
1965 }
1966 break;
1967 case 16:
1968 case 15:
1969 if (image_type == cITRGB)
1970 {
1971 assert(tga_bytes_per_pixel == 2 && n_chans == 3);
1972 for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride)
1973 {
1974 const uint32_t v = pLine_data[0] | (pLine_data[1] << 8);
1975 pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31;
1976 pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31;
1977 pDst[2] = ((v & 31) * 255 + 15) / 31;
1978 }
1979 }
1980 else
1981 {
1982 assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4);
1983 for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride)
1984 {
1985 pDst[0] = pLine_data[0];
1986 pDst[1] = pLine_data[0];
1987 pDst[2] = pLine_data[0];
1988 pDst[3] = pLine_data[1];
1989 }
1990 }
1991 break;
1992 case 8:
1993 assert(tga_bytes_per_pixel == 1);
1994 if (image_type == cITPalettized)
1995 {
1996 if (hdr.m_cmap_bpp == 32)
1997 {
1998 assert(n_chans == 4);
1999 for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)
2000 {
2001 const uint32_t c = *pLine_data;
2002 pDst[0] = pal[c].r;
2003 pDst[1] = pal[c].g;
2004 pDst[2] = pal[c].b;
2005 pDst[3] = pal[c].a;
2006 }
2007 }
2008 else
2009 {
2010 assert(n_chans == 3);
2011 for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)
2012 {
2013 const uint32_t c = *pLine_data;
2014 pDst[0] = pal[c].r;
2015 pDst[1] = pal[c].g;
2016 pDst[2] = pal[c].b;
2017 }
2018 }
2019 }
2020 else
2021 {
2022 assert(n_chans == 3);
2023 for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)
2024 {
2025 const uint8_t c = *pLine_data;
2026 pDst[0] = c;
2027 pDst[1] = c;
2028 pDst[2] = c;
2029 }
2030 }
2031 break;
2032 default:
2033 assert(0);
2034 break;
2035 }
2036 } // y
2037
2038 return pImage;
2039 }
2040
2041 uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans)
2042 {
2043 width = height = n_chans = 0;
2044
2045 uint8_vec filedata;
2046 if (!read_file_to_vec(pFilename, filedata))
2047 return nullptr;
2048
2049 if (!filedata.size() || (filedata.size() > UINT32_MAX))
2050 return nullptr;
2051
2052 return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans);
2053 }
2054
2055 void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...)
2056 {
2057 char buf[2048];
2058
2059 va_list args;
2060 va_start(args, pFmt);
2061#ifdef _WIN32
2062 vsprintf_s(buf, sizeof(buf), pFmt, args);
2063#else
2064 vsnprintf(buf, sizeof(buf), pFmt, args);
2065#endif
2066 va_end(args);
2067
2068 const char* p = buf;
2069
2070 const uint32_t orig_x_ofs = x_ofs;
2071
2072 while (*p)
2073 {
2074 uint8_t c = *p++;
2075 if ((c < 32) || (c > 127))
2076 c = '.';
2077
2078 const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0];
2079
2080 for (uint32_t y = 0; y < 8; y++)
2081 {
2082 uint32_t row_bits = pGlpyh[y];
2083 for (uint32_t x = 0; x < 8; x++)
2084 {
2085 const uint32_t q = row_bits & (1 << x);
2086
2087 const color_rgba* pColor = q ? &fg : pBG;
2088 if (!pColor)
2089 continue;
2090
2091 if (alpha_only)
2092 fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);
2093 else
2094 fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);
2095 }
2096 }
2097
2098 x_ofs += 8 * scale_x;
2099 if ((x_ofs + 8 * scale_x) > m_width)
2100 {
2101 x_ofs = orig_x_ofs;
2102 y_ofs += 8 * scale_y;
2103 }
2104 }
2105 }
2106
2107} // namespace basisu
2108