basisu_enc.cpp source code [Godot/thirdparty/basis_universal/encoder/basisu_enc.cpp]

1	// basisu_enc.cpp
2	// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3	//
4	// Licensed under the Apache License, Version 2.0 (the "License");
5	// you may not use this file except in compliance with the License.
6	// You may obtain a copy of the License at
7	//
8	// http://www.apache.org/licenses/LICENSE-2.0
9	//
10	// Unless required by applicable law or agreed to in writing, software
11	// distributed under the License is distributed on an "AS IS" BASIS,
12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	// See the License for the specific language governing permissions and
14	// limitations under the License.
15	#include "basisu_enc.h"
16	#include "basisu_resampler.h"
17	#include "basisu_resampler_filters.h"
18	#include "basisu_etc.h"
19	#include "../transcoder/basisu_transcoder.h"
20	#include "basisu_bc7enc.h"
21	#include "jpgd.h"
22	#include "pvpngreader.h"
23	#include "basisu_opencl.h"
24	#include <vector>
25
26	#define MINIZ_HEADER_FILE_ONLY
27	#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
28	#include "basisu_miniz.h"
29
30	#if defined(_WIN32)
31	// For QueryPerformanceCounter/QueryPerformanceFrequency
32	#define WIN32_LEAN_AND_MEAN
33	#include <windows.h>
34	#endif
35
36	namespace basisu
37	{
38	uint64_t interval_timer::g_init_ticks, interval_timer::g_freq;
39	double interval_timer::g_timer_freq;
40	#if BASISU_SUPPORT_SSE
41	bool g_cpu_supports_sse41;
42	#endif
43
44	uint8_t g_hamming_dist[`256`] =
45	{
46	`0`, `1`, `1`, `2`, `1`, `2`, `2`, `3`, `1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`,
47	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
48	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
49	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
50	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
51	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
52	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
53	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
54	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
55	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
56	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
57	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
58	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
59	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
60	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
61	`4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`, `5`, `6`, `6`, `7`, `6`, `7`, `7`, `8`
62	};
63
64	// This is a Public Domain 8x8 font from here:
65	// https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h
66	const uint8_t g_debug_font8x8_basic[`127` - `32` + `1`][`8`] =
67	{
68	{ `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+0020 ( )
69	{ `0x18`, `0x3C`, `0x3C`, `0x18`, `0x18`, `0x00`, `0x18`, `0x00`}, // U+0021 (!)
70	{ `0x36`, `0x36`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+0022 (")
71	{ `0x36`, `0x36`, `0x7F`, `0x36`, `0x7F`, `0x36`, `0x36`, `0x00`}, // U+0023 (#)
72	{ `0x0C`, `0x3E`, `0x03`, `0x1E`, `0x30`, `0x1F`, `0x0C`, `0x00`}, // U+0024 ($)
73	{ `0x00`, `0x63`, `0x33`, `0x18`, `0x0C`, `0x66`, `0x63`, `0x00`}, // U+0025 (%)
74	{ `0x1C`, `0x36`, `0x1C`, `0x6E`, `0x3B`, `0x33`, `0x6E`, `0x00`}, // U+0026 (&)
75	{ `0x06`, `0x06`, `0x03`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+0027 (')
76	{ `0x18`, `0x0C`, `0x06`, `0x06`, `0x06`, `0x0C`, `0x18`, `0x00`}, // U+0028 (()
77	{ `0x06`, `0x0C`, `0x18`, `0x18`, `0x18`, `0x0C`, `0x06`, `0x00`}, // U+0029 ())
78	{ `0x00`, `0x66`, `0x3C`, `0xFF`, `0x3C`, `0x66`, `0x00`, `0x00`}, // U+002A ()*
79	{ `0x00`, `0x0C`, `0x0C`, `0x3F`, `0x0C`, `0x0C`, `0x00`, `0x00`}, // U+002B (+)
80	{ `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x0C`, `0x0C`, `0x06`}, // U+002C (,)
81	{ `0x00`, `0x00`, `0x00`, `0x3F`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+002D (-)
82	{ `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x0C`, `0x0C`, `0x00`}, // U+002E (.)
83	{ `0x60`, `0x30`, `0x18`, `0x0C`, `0x06`, `0x03`, `0x01`, `0x00`}, // U+002F (/)
84	{ `0x3E`, `0x63`, `0x73`, `0x7B`, `0x6F`, `0x67`, `0x3E`, `0x00`}, // U+0030 (0)
85	{ `0x0C`, `0x0E`, `0x0C`, `0x0C`, `0x0C`, `0x0C`, `0x3F`, `0x00`}, // U+0031 (1)
86	{ `0x1E`, `0x33`, `0x30`, `0x1C`, `0x06`, `0x33`, `0x3F`, `0x00`}, // U+0032 (2)
87	{ `0x1E`, `0x33`, `0x30`, `0x1C`, `0x30`, `0x33`, `0x1E`, `0x00`}, // U+0033 (3)
88	{ `0x38`, `0x3C`, `0x36`, `0x33`, `0x7F`, `0x30`, `0x78`, `0x00`}, // U+0034 (4)
89	{ `0x3F`, `0x03`, `0x1F`, `0x30`, `0x30`, `0x33`, `0x1E`, `0x00`}, // U+0035 (5)
90	{ `0x1C`, `0x06`, `0x03`, `0x1F`, `0x33`, `0x33`, `0x1E`, `0x00`}, // U+0036 (6)
91	{ `0x3F`, `0x33`, `0x30`, `0x18`, `0x0C`, `0x0C`, `0x0C`, `0x00`}, // U+0037 (7)
92	{ `0x1E`, `0x33`, `0x33`, `0x1E`, `0x33`, `0x33`, `0x1E`, `0x00`}, // U+0038 (8)
93	{ `0x1E`, `0x33`, `0x33`, `0x3E`, `0x30`, `0x18`, `0x0E`, `0x00`}, // U+0039 (9)
94	{ `0x00`, `0x0C`, `0x0C`, `0x00`, `0x00`, `0x0C`, `0x0C`, `0x00`}, // U+003A (:)
95	{ `0x00`, `0x0C`, `0x0C`, `0x00`, `0x00`, `0x0C`, `0x0C`, `0x06`}, // U+003B (;)
96	{ `0x18`, `0x0C`, `0x06`, `0x03`, `0x06`, `0x0C`, `0x18`, `0x00`}, // U+003C (<)
97	{ `0x00`, `0x00`, `0x3F`, `0x00`, `0x00`, `0x3F`, `0x00`, `0x00`}, // U+003D (=)
98	{ `0x06`, `0x0C`, `0x18`, `0x30`, `0x18`, `0x0C`, `0x06`, `0x00`}, // U+003E (>)
99	{ `0x1E`, `0x33`, `0x30`, `0x18`, `0x0C`, `0x00`, `0x0C`, `0x00`}, // U+003F (?)
100	{ `0x3E`, `0x63`, `0x7B`, `0x7B`, `0x7B`, `0x03`, `0x1E`, `0x00`}, // U+0040 (@)
101	{ `0x0C`, `0x1E`, `0x33`, `0x33`, `0x3F`, `0x33`, `0x33`, `0x00`}, // U+0041 (A)
102	{ `0x3F`, `0x66`, `0x66`, `0x3E`, `0x66`, `0x66`, `0x3F`, `0x00`}, // U+0042 (B)
103	{ `0x3C`, `0x66`, `0x03`, `0x03`, `0x03`, `0x66`, `0x3C`, `0x00`}, // U+0043 (C)
104	{ `0x1F`, `0x36`, `0x66`, `0x66`, `0x66`, `0x36`, `0x1F`, `0x00`}, // U+0044 (D)
105	{ `0x7F`, `0x46`, `0x16`, `0x1E`, `0x16`, `0x46`, `0x7F`, `0x00`}, // U+0045 (E)
106	{ `0x7F`, `0x46`, `0x16`, `0x1E`, `0x16`, `0x06`, `0x0F`, `0x00`}, // U+0046 (F)
107	{ `0x3C`, `0x66`, `0x03`, `0x03`, `0x73`, `0x66`, `0x7C`, `0x00`}, // U+0047 (G)
108	{ `0x33`, `0x33`, `0x33`, `0x3F`, `0x33`, `0x33`, `0x33`, `0x00`}, // U+0048 (H)
109	{ `0x1E`, `0x0C`, `0x0C`, `0x0C`, `0x0C`, `0x0C`, `0x1E`, `0x00`}, // U+0049 (I)
110	{ `0x78`, `0x30`, `0x30`, `0x30`, `0x33`, `0x33`, `0x1E`, `0x00`}, // U+004A (J)
111	{ `0x67`, `0x66`, `0x36`, `0x1E`, `0x36`, `0x66`, `0x67`, `0x00`}, // U+004B (K)
112	{ `0x0F`, `0x06`, `0x06`, `0x06`, `0x46`, `0x66`, `0x7F`, `0x00`}, // U+004C (L)
113	{ `0x63`, `0x77`, `0x7F`, `0x7F`, `0x6B`, `0x63`, `0x63`, `0x00`}, // U+004D (M)
114	{ `0x63`, `0x67`, `0x6F`, `0x7B`, `0x73`, `0x63`, `0x63`, `0x00`}, // U+004E (N)
115	{ `0x1C`, `0x36`, `0x63`, `0x63`, `0x63`, `0x36`, `0x1C`, `0x00`}, // U+004F (O)
116	{ `0x3F`, `0x66`, `0x66`, `0x3E`, `0x06`, `0x06`, `0x0F`, `0x00`}, // U+0050 (P)
117	{ `0x1E`, `0x33`, `0x33`, `0x33`, `0x3B`, `0x1E`, `0x38`, `0x00`}, // U+0051 (Q)
118	{ `0x3F`, `0x66`, `0x66`, `0x3E`, `0x36`, `0x66`, `0x67`, `0x00`}, // U+0052 (R)
119	{ `0x1E`, `0x33`, `0x07`, `0x0E`, `0x38`, `0x33`, `0x1E`, `0x00`}, // U+0053 (S)
120	{ `0x3F`, `0x2D`, `0x0C`, `0x0C`, `0x0C`, `0x0C`, `0x1E`, `0x00`}, // U+0054 (T)
121	{ `0x33`, `0x33`, `0x33`, `0x33`, `0x33`, `0x33`, `0x3F`, `0x00`}, // U+0055 (U)
122	{ `0x33`, `0x33`, `0x33`, `0x33`, `0x33`, `0x1E`, `0x0C`, `0x00`}, // U+0056 (V)
123	{ `0x63`, `0x63`, `0x63`, `0x6B`, `0x7F`, `0x77`, `0x63`, `0x00`}, // U+0057 (W)
124	{ `0x63`, `0x63`, `0x36`, `0x1C`, `0x1C`, `0x36`, `0x63`, `0x00`}, // U+0058 (X)
125	{ `0x33`, `0x33`, `0x33`, `0x1E`, `0x0C`, `0x0C`, `0x1E`, `0x00`}, // U+0059 (Y)
126	{ `0x7F`, `0x63`, `0x31`, `0x18`, `0x4C`, `0x66`, `0x7F`, `0x00`}, // U+005A (Z)
127	{ `0x1E`, `0x06`, `0x06`, `0x06`, `0x06`, `0x06`, `0x1E`, `0x00`}, // U+005B ([)
128	{ `0x03`, `0x06`, `0x0C`, `0x18`, `0x30`, `0x60`, `0x40`, `0x00`}, // U+005C (\)
129	{ `0x1E`, `0x18`, `0x18`, `0x18`, `0x18`, `0x18`, `0x1E`, `0x00`}, // U+005D (])
130	{ `0x08`, `0x1C`, `0x36`, `0x63`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+005E (^)
131	{ `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0xFF`}, // U+005F (_)
132	{ `0x0C`, `0x0C`, `0x18`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+0060 (`)
133	{ `0x00`, `0x00`, `0x1E`, `0x30`, `0x3E`, `0x33`, `0x6E`, `0x00`}, // U+0061 (a)
134	{ `0x07`, `0x06`, `0x06`, `0x3E`, `0x66`, `0x66`, `0x3B`, `0x00`}, // U+0062 (b)
135	{ `0x00`, `0x00`, `0x1E`, `0x33`, `0x03`, `0x33`, `0x1E`, `0x00`}, // U+0063 (c)
136	{ `0x38`, `0x30`, `0x30`, `0x3e`, `0x33`, `0x33`, `0x6E`, `0x00`}, // U+0064 (d)
137	{ `0x00`, `0x00`, `0x1E`, `0x33`, `0x3f`, `0x03`, `0x1E`, `0x00`}, // U+0065 (e)
138	{ `0x1C`, `0x36`, `0x06`, `0x0f`, `0x06`, `0x06`, `0x0F`, `0x00`}, // U+0066 (f)
139	{ `0x00`, `0x00`, `0x6E`, `0x33`, `0x33`, `0x3E`, `0x30`, `0x1F`}, // U+0067 (g)
140	{ `0x07`, `0x06`, `0x36`, `0x6E`, `0x66`, `0x66`, `0x67`, `0x00`}, // U+0068 (h)
141	{ `0x0C`, `0x00`, `0x0E`, `0x0C`, `0x0C`, `0x0C`, `0x1E`, `0x00`}, // U+0069 (i)
142	{ `0x30`, `0x00`, `0x30`, `0x30`, `0x30`, `0x33`, `0x33`, `0x1E`}, // U+006A (j)
143	{ `0x07`, `0x06`, `0x66`, `0x36`, `0x1E`, `0x36`, `0x67`, `0x00`}, // U+006B (k)
144	{ `0x0E`, `0x0C`, `0x0C`, `0x0C`, `0x0C`, `0x0C`, `0x1E`, `0x00`}, // U+006C (l)
145	{ `0x00`, `0x00`, `0x33`, `0x7F`, `0x7F`, `0x6B`, `0x63`, `0x00`}, // U+006D (m)
146	{ `0x00`, `0x00`, `0x1F`, `0x33`, `0x33`, `0x33`, `0x33`, `0x00`}, // U+006E (n)
147	{ `0x00`, `0x00`, `0x1E`, `0x33`, `0x33`, `0x33`, `0x1E`, `0x00`}, // U+006F (o)
148	{ `0x00`, `0x00`, `0x3B`, `0x66`, `0x66`, `0x3E`, `0x06`, `0x0F`}, // U+0070 (p)
149	{ `0x00`, `0x00`, `0x6E`, `0x33`, `0x33`, `0x3E`, `0x30`, `0x78`}, // U+0071 (q)
150	{ `0x00`, `0x00`, `0x3B`, `0x6E`, `0x66`, `0x06`, `0x0F`, `0x00`}, // U+0072 (r)
151	{ `0x00`, `0x00`, `0x3E`, `0x03`, `0x1E`, `0x30`, `0x1F`, `0x00`}, // U+0073 (s)
152	{ `0x08`, `0x0C`, `0x3E`, `0x0C`, `0x0C`, `0x2C`, `0x18`, `0x00`}, // U+0074 (t)
153	{ `0x00`, `0x00`, `0x33`, `0x33`, `0x33`, `0x33`, `0x6E`, `0x00`}, // U+0075 (u)
154	{ `0x00`, `0x00`, `0x33`, `0x33`, `0x33`, `0x1E`, `0x0C`, `0x00`}, // U+0076 (v)
155	{ `0x00`, `0x00`, `0x63`, `0x6B`, `0x7F`, `0x7F`, `0x36`, `0x00`}, // U+0077 (w)
156	{ `0x00`, `0x00`, `0x63`, `0x36`, `0x1C`, `0x36`, `0x63`, `0x00`}, // U+0078 (x)
157	{ `0x00`, `0x00`, `0x33`, `0x33`, `0x33`, `0x3E`, `0x30`, `0x1F`}, // U+0079 (y)
158	{ `0x00`, `0x00`, `0x3F`, `0x19`, `0x0C`, `0x26`, `0x3F`, `0x00`}, // U+007A (z)
159	{ `0x38`, `0x0C`, `0x0C`, `0x07`, `0x0C`, `0x0C`, `0x38`, `0x00`}, // U+007B ({)
160	{ `0x18`, `0x18`, `0x18`, `0x00`, `0x18`, `0x18`, `0x18`, `0x00`}, // U+007C (\|)
161	{ `0x07`, `0x0C`, `0x0C`, `0x38`, `0x0C`, `0x0C`, `0x07`, `0x00`}, // U+007D (})
162	{ `0x6E`, `0x3B`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`}, // U+007E (~)
163	{ `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`, `0x00`} // U+007F
164	};
165
166	bool g_library_initialized;
167	std::mutex g_encoder_init_mutex;
168
169	// Encoder library initialization (just call once at startup)
170	void basisu_encoder_init(bool use_opencl, bool opencl_force_serialization)
171	{
172	std::lock_guard<std::mutex> lock(g_encoder_init_mutex);
173
174	if (g_library_initialized)
175	return;
176
177	detect_sse41();
178
179	basist::basisu_transcoder_init();
180	pack_etc1_solid_color_init();
181	//uastc_init();
182	bc7enc_compress_block_init(); // must be after uastc_init()
183
184	// Don't bother initializing the OpenCL module at all if it's been completely disabled.
185	if (use_opencl)
186	{
187	opencl_init(opencl_force_serialization);
188	}
189
190	interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports
191
192	g_library_initialized = true;
193	}
194
195	void basisu_encoder_deinit()
196	{
197	opencl_deinit();
198
199	g_library_initialized = false;
200	}
201
202	void error_vprintf(const char* pFmt, va_list args)
203	{
204	char buf[`8192`];
205
206	#ifdef _WIN32
207	vsprintf_s(buf, sizeof(buf), pFmt, args);
208	#else
209	vsnprintf(buf, sizeof(buf), pFmt, args);
210	#endif
211
212	fprintf(stderr, "ERROR: %s", buf);
213	}
214
215	void error_printf(const char *pFmt, ...)
216	{
217	va_list args;
218	va_start(args, pFmt);
219	error_vprintf(pFmt, args);
220	va_end(args);
221	}
222
223	#if defined(_WIN32)
224	inline void query_counter(timer_ticks* pTicks)
225	{
226	QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(pTicks));
227	}
228	inline void query_counter_frequency(timer_ticks* pTicks)
229	{
230	QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks));
231	}
232	#elif defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(__OpenBSD__) \|\| defined(__EMSCRIPTEN__)
233	#include <sys/time.h>
234	inline void query_counter(timer_ticks* pTicks)
235	{
236	struct timeval cur_time;
237	gettimeofday(&cur_time, NULL);
238	pTicks = static_cast<unsigned* long long>(cur_time.tv_sec) * `1000000ULL` + static_cast<unsigned long long>(cur_time.tv_usec);
239	}
240	inline void query_counter_frequency(timer_ticks* pTicks)
241	{
242	*pTicks = `1000000`;
243	}
244	#elif defined(__GNUC__)
245	#include <sys/timex.h>
246	inline void query_counter(timer_ticks* pTicks)
247	{
248	struct timeval cur_time;
249	gettimeofday(&cur_time, NULL);
250	pTicks = static_cast<unsigned* long long>(cur_time.tv_sec) * `1000000ULL` + static_cast<unsigned long long>(cur_time.tv_usec);
251	}
252	inline void query_counter_frequency(timer_ticks* pTicks)
253	{
254	*pTicks = `1000000`;
255	}
256	#else
257	#error TODO
258	#endif
259
260	interval_timer::interval_timer() : m_start_time(`0`), m_stop_time(`0`), m_started(false), m_stopped(false)
261	{
262	if (!g_timer_freq)
263	init();
264	}
265
266	void interval_timer::start()
267	{
268	query_counter(&m_start_time);
269	m_started = true;
270	m_stopped = false;
271	}
272
273	void interval_timer::stop()
274	{
275	assert(m_started);
276	query_counter(&m_stop_time);
277	m_stopped = true;
278	}
279
280	double interval_timer::get_elapsed_secs() const
281	{
282	assert(m_started);
283	if (!m_started)
284	return `0`;
285
286	timer_ticks stop_time = m_stop_time;
287	if (!m_stopped)
288	query_counter(&stop_time);
289
290	timer_ticks delta = stop_time - m_start_time;
291	return delta * g_timer_freq;
292	}
293
294	void interval_timer::init()
295	{
296	if (!g_timer_freq)
297	{
298	query_counter_frequency(&g_freq);
299	g_timer_freq = `1.0f` / g_freq;
300	query_counter(&g_init_ticks);
301	}
302	}
303
304	timer_ticks interval_timer::get_ticks()
305	{
306	if (!g_timer_freq)
307	init();
308	timer_ticks ticks;
309	query_counter(&ticks);
310	return ticks - g_init_ticks;
311	}
312
313	double interval_timer::ticks_to_secs(timer_ticks ticks)
314	{
315	if (!g_timer_freq)
316	init();
317	return ticks * g_timer_freq;
318	}
319
320	const uint32_t MAX_32BIT_ALLOC_SIZE = `250000000`;
321
322	bool load_tga(const char* pFilename, image& img)
323	{
324	int w = `0`, h = `0`, n_chans = `0`;
325	uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans);
326
327	if ((!pImage_data) \|\| (!w) \|\| (!h) \|\| ((n_chans != `3`) && (n_chans != `4`)))
328	{
329	error_printf("Failed loading .TGA image \"%s\"!\n", pFilename);
330
331	if (pImage_data)
332	free(pImage_data);
333
334	return false;
335	}
336
337	if (sizeof(void ) == sizeof*(uint32_t))
338	{
339	if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE)
340	{
341	error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h);
342
343	if (pImage_data)
344	free(pImage_data);
345
346	return false;
347	}
348	}
349
350	img.resize(w, h);
351
352	const uint8_t *pSrc = pImage_data;
353	for (int y = `0`; y < h; y++)
354	{
355	color_rgba *pDst = &img (`0`, y);
356
357	for (int x = `0`; x < w; x++)
358	{
359	pDst->r = pSrc[`0`];
360	pDst->g = pSrc[`1`];
361	pDst->b = pSrc[`2`];
362	pDst->a = (n_chans == `3`) ? `255` : pSrc[`3`];
363
364	pSrc += n_chans;
365	++pDst;
366	}
367	}
368
369	free(pImage_data);
370
371	return true;
372	}
373
374	bool load_png(const uint8_t pBuf, size_t buf_size, image &img, const* char *pFilename)
375	{
376	interval_timer tm;
377	tm.start();
378
379	if (!buf_size)
380	return false;
381
382	uint32_t width = `0`, height = `0`, num_chans = `0`;
383	void* pImage = pv_png::load_png(pBuf, buf_size, `4`, width, height, num_chans);
384	if (!pBuf)
385	{
386	error_printf("pv_png::load_png failed while loading image \"%s\"\n", pFilename);
387	return false;
388	}
389
390	img.grant_ownership(reinterpret_cast<color_rgba*>(pImage), width, height);
391
392	//debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs());
393
394	return true;
395	}
396
397	bool load_png(const char* pFilename, image& img)
398	{
399	uint8_vec buffer;
400	if (!read_file_to_vec(pFilename, buffer))
401	{
402	error_printf("load_png: Failed reading file \"%s\"!\n", pFilename);
403	return false;
404	}
405
406	return load_png(buffer.data(), buffer.size(), img, pFilename);
407	}
408
409	bool load_jpg(const char *pFilename, image& img)
410	{
411	int width = `0`, height = `0`, actual_comps = `0`;
412	uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, `4`, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
413	if (!pImage_data)
414	return false;
415
416	img.init(pImage_data, width, height, `4`);
417
418	free(pImage_data);
419
420	return true;
421	}
422
423	bool load_image(const char* pFilename, image& img)
424	{
425	std::string ext(string_get_extension(std::string (pFilename)));
426
427	if (ext.length() == `0`)
428	return false;
429
430	const char *pExt = ext.c_str();
431
432	if (strcasecmp(pExt, "png") == `0`)
433	return load_png(pFilename, img);
434	if (strcasecmp(pExt, "tga") == `0`)
435	return load_tga(pFilename, img);
436	if ( (strcasecmp(pExt, "jpg") == `0`) \|\| (strcasecmp(pExt, "jfif") == `0`) \|\| (strcasecmp(pExt, "jpeg") == `0`) )
437	return load_jpg(pFilename, img);
438
439	return false;
440	}
441
442	bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp)
443	{
444	if (!img.get_total_pixels())
445	return false;
446
447	void* pPNG_data = nullptr;
448	size_t PNG_data_size = `0`;
449
450	if (image_save_flags & cImageSaveGrayscale)
451	{
452	uint8_vec g_pixels(img.get_total_pixels());
453	uint8_t* pDst = &g_pixels [`0`];
454
455	for (uint32_t y = `0`; y < img.get_height(); y++)
456	for (uint32_t x = `0`; x < img.get_width(); x++)
457	*pDst++ = img (x, y)[grayscale_comp];
458
459	pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), `1`, &PNG_data_size, `1`, false);
460	}
461	else
462	{
463	bool has_alpha = false;
464
465	if ((image_save_flags & cImageSaveIgnoreAlpha) == `0`)
466	has_alpha = img.has_alpha();
467
468	if (!has_alpha)
469	{
470	uint8_vec rgb_pixels(img.get_total_pixels() * `3`);
471	uint8_t* pDst = &rgb_pixels [`0`];
472
473	for (uint32_t y = `0`; y < img.get_height(); y++)
474	{
475	const color_rgba* pSrc = &img (`0`, y);
476	for (uint32_t x = `0`; x < img.get_width(); x++)
477	{
478	pDst[`0`] = pSrc->r;
479	pDst[`1`] = pSrc->g;
480	pDst[`2`] = pSrc->b;
481
482	pSrc++;
483	pDst += `3`;
484	}
485	}
486
487	pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), `3`, &PNG_data_size, `1`, false);
488	}
489	else
490	{
491	pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), `4`, &PNG_data_size, `1`, false);
492	}
493	}
494
495	if (!pPNG_data)
496	return false;
497
498	bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size);
499	if (!status)
500	{
501	error_printf("save_png: Failed writing to filename \"%s\"!\n", pFilename);
502	}
503
504	free(pPNG_data);
505
506	return status;
507	}
508
509	bool read_file_to_vec(const char* pFilename, uint8_vec& data)
510	{
511	FILE* pFile = nullptr;
512	#ifdef _WIN32
513	fopen_s(&pFile, pFilename, "rb");
514	#else
515	pFile = fopen(pFilename, "rb");
516	#endif
517	if (!pFile)
518	return false;
519
520	fseek(pFile, `0`, SEEK_END);
521	#ifdef _WIN32
522	int64_t filesize = _ftelli64(pFile);
523	#else
524	int64_t filesize = ftello(pFile);
525	#endif
526	if (filesize < `0`)
527	{
528	fclose(pFile);
529	return false;
530	}
531	fseek(pFile, `0`, SEEK_SET);
532
533	if (sizeof(size_t) == sizeof(uint32_t))
534	{
535	if (filesize > `0x70000000`)
536	{
537	// File might be too big to load safely in one alloc
538	fclose(pFile);
539	return false;
540	}
541	}
542
543	if (!data.try_resize((size_t)filesize))
544	{
545	fclose(pFile);
546	return false;
547	}
548
549	if (filesize)
550	{
551	if (fread(&data [`0`], `1`, (size_t)filesize, pFile) != (size_t)filesize)
552	{
553	fclose(pFile);
554	return false;
555	}
556	}
557
558	fclose(pFile);
559	return true;
560	}
561
562	bool write_data_to_file(const char* pFilename, const void* pData, size_t len)
563	{
564	FILE* pFile = nullptr;
565	#ifdef _WIN32
566	fopen_s(&pFile, pFilename, "wb");
567	#else
568	pFile = fopen(pFilename, "wb");
569	#endif
570	if (!pFile)
571	return false;
572
573	if (len)
574	{
575	if (fwrite(pData, `1`, len, pFile) != len)
576	{
577	fclose(pFile);
578	return false;
579	}
580	}
581
582	return fclose(pFile) != EOF;
583	}
584
585	float linear_to_srgb(float l)
586	{
587	assert(l >= `0.0f` && l <= `1.0f`);
588	if (l < `.0031308f`)
589	return saturate(l * `12.92f`);
590	else
591	return saturate(`1.055f` * powf(l, `1.0f`/`2.4f`) - `.055f`);
592	}
593
594	float srgb_to_linear(float s)
595	{
596	assert(s >= `0.0f` && s <= `1.0f`);
597	if (s < `.04045f`)
598	return saturate(s * (`1.0f`/`12.92f`));
599	else
600	return saturate(powf((s + `.055f`) * (`1.0f`/`1.055f`), `2.4f`));
601	}
602
603	bool image_resample(const image &src, image &dst, bool srgb,
604	const char pFilter, float* filter_scale,
605	bool wrapping,
606	uint32_t first_comp, uint32_t num_comps)
607	{
608	assert((first_comp + num_comps) <= `4`);
609
610	const int cMaxComps = `4`;
611
612	const uint32_t src_w = src.get_width(), src_h = src.get_height();
613	const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();
614
615	if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)
616	{
617	printf("Image is too large!\n");
618	return false;
619	}
620
621	if (!src_w \|\| !src_h \|\| !dst_w \|\| !dst_h)
622	return false;
623
624	if ((num_comps < `1`) \|\| (num_comps > cMaxComps))
625	return false;
626
627	if ((minimum(dst_w, dst_h) < `1`) \|\| (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))
628	{
629	printf("Image is too large!\n");
630	return false;
631	}
632
633	if ((src_w == dst_w) && (src_h == dst_h))
634	{
635	dst = src;
636	return true;
637	}
638
639	float srgb_to_linear_table[`256`];
640	if (srgb)
641	{
642	for (int i = `0`; i < `256`; ++i)
643	srgb_to_linear_table[i] = srgb_to_linear((float)i * (`1.0f`/`255.0f`));
644	}
645
646	const int LINEAR_TO_SRGB_TABLE_SIZE = `8192`;
647	uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];
648
649	if (srgb)
650	{
651	for (int i = `0`; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
652	linear_to_srgb_table[i] = (uint8_t)clamp<int>((int)(`255.0f` * linear_to_srgb((float)i * (`1.0f` / (LINEAR_TO_SRGB_TABLE_SIZE - `1`))) + `.5f`), `0`, `255`);
653	}
654
655	std::vector<float> samples[cMaxComps];
656	Resampler *resamplers[cMaxComps];
657
658	resamplers[`0`] = new Resampler (src_w, src_h, dst_w, dst_h,
659	wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, `0.0f`, `1.0f`,
660	pFilter, nullptr, nullptr, filter_scale, filter_scale, `0`, `0`);
661	samples[`0`].resize(src_w);
662
663	for (uint32_t i = `1`; i < num_comps; ++i)
664	{
665	resamplers[i] = new Resampler (src_w, src_h, dst_w, dst_h,
666	wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, `0.0f`, `1.0f`,
667	pFilter, resamplers[`0`]->get_clist_x(), resamplers[`0`]->get_clist_y(), filter_scale, filter_scale, `0`, `0`);
668	samples[i].resize(src_w);
669	}
670
671	uint32_t dst_y = `0`;
672
673	for (uint32_t src_y = `0`; src_y < src_h; ++src_y)
674	{
675	const color_rgba *pSrc = &src (`0`, src_y);
676
677	// Put source lines into resampler(s)
678	for (uint32_t x = `0`; x < src_w; ++x)
679	{
680	for (uint32_t c = `0`; c < num_comps; ++c)
681	{
682	const uint32_t comp_index = first_comp + c;
683	const uint32_t v = (*pSrc)[comp_index];
684
685	if (!srgb \|\| (comp_index == `3`))
686	samples[c][x] = v * (`1.0f` / `255.0f`);
687	else
688	samples[c][x] = srgb_to_linear_table[v];
689	}
690
691	pSrc++;
692	}
693
694	for (uint32_t c = `0`; c < num_comps; ++c)
695	{
696	if (!resamplers[c]->put_line(&samples[c][`0`]))
697	{
698	for (uint32_t i = `0`; i < num_comps; i++)
699	delete resamplers[i];
700	return false;
701	}
702	}
703
704	// Now retrieve any output lines
705	for (;;)
706	{
707	uint32_t c;
708	for (c = `0`; c < num_comps; ++c)
709	{
710	const uint32_t comp_index = first_comp + c;
711
712	const float *pOutput_samples = resamplers[c]->get_line();
713	if (!pOutput_samples)
714	break;
715
716	const bool linear_flag = !srgb \|\| (comp_index == `3`);
717
718	color_rgba *pDst = &dst (`0`, dst_y);
719
720	for (uint32_t x = `0`; x < dst_w; x++)
721	{
722	// TODO: Add dithering
723	if (linear_flag)
724	{
725	int j = (int)(`255.0f` * pOutput_samples[x] + `.5f`);
726	(pDst)[comp_index] = (uint8_t)clamp<int*>(j, `0`, `255`);
727	}
728	else
729	{
730	int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - `1`) * pOutput_samples[x] + `.5f`);
731	(pDst)[comp_index] = linear_to_srgb_table[clamp<int*>(j, `0`, LINEAR_TO_SRGB_TABLE_SIZE - `1`)];
732	}
733
734	pDst++;
735	}
736	}
737	if (c < num_comps)
738	break;
739
740	++dst_y;
741	}
742	}
743
744	for (uint32_t i = `0`; i < num_comps; ++i)
745	delete resamplers[i];
746
747	return true;
748	}
749
750	void canonical_huffman_calculate_minimum_redundancy(sym_freq A, int* num_syms)
751	{
752	// See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen
753	if (!num_syms)
754	return;
755
756	if (`1` == num_syms)
757	{
758	A[`0`].m_key = `1`;
759	return;
760	}
761
762	A[`0`].m_key += A[`1`].m_key;
763
764	int s = `2`, r = `0`, next;
765	for (next = `1`; next < (num_syms - `1`); ++next)
766	{
767	if ((s >= num_syms) \|\| (A[r].m_key < A[s].m_key))
768	{
769	A[next].m_key = A[r].m_key;
770	A[r].m_key = next;
771	++r;
772	}
773	else
774	{
775	A[next].m_key = A[s].m_key;
776	++s;
777	}
778
779	if ((s >= num_syms) \|\| ((r < next) && A[r].m_key < A[s].m_key))
780	{
781	A[next].m_key = A[next].m_key + A[r].m_key;
782	A[r].m_key = next;
783	++r;
784	}
785	else
786	{
787	A[next].m_key = A[next].m_key + A[s].m_key;
788	++s;
789	}
790	}
791	A[num_syms - `2`].m_key = `0`;
792
793	for (next = num_syms - `3`; next >= `0`; --next)
794	{
795	A[next].m_key = `1` + A[A[next].m_key].m_key;
796	}
797
798	int num_avail = `1`, num_used = `0`, depth = `0`;
799	r = num_syms - `2`;
800	next = num_syms - `1`;
801	while (num_avail > `0`)
802	{
803	for ( ; (r >= `0`) && ((int)A[r].m_key == depth); ++num_used, --r )
804	;
805
806	for ( ; num_avail > num_used; --next, --num_avail)
807	A[next].m_key = depth;
808
809	num_avail = `2` * num_used;
810	num_used = `0`;
811	++depth;
812	}
813	}
814
815	void canonical_huffman_enforce_max_code_size(int pNum_codes, int* code_list_len, int max_code_size)
816	{
817	int i;
818	uint32_t total = `0`;
819	if (code_list_len <= `1`)
820	return;
821
822	for (i = max_code_size + `1`; i <= cHuffmanMaxSupportedInternalCodeSize; i++)
823	pNum_codes[max_code_size] += pNum_codes[i];
824
825	for (i = max_code_size; i > `0`; i--)
826	total += (((uint32_t)pNum_codes[i]) << (max_code_size - i));
827
828	while (total != (`1UL` << max_code_size))
829	{
830	pNum_codes[max_code_size]--;
831	for (i = max_code_size - `1`; i > `0`; i--)
832	{
833	if (pNum_codes[i])
834	{
835	pNum_codes[i]--;
836	pNum_codes[i + `1`] += `2`;
837	break;
838	}
839	}
840
841	total--;
842	}
843	}
844
845	sym_freq canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq pSyms0, sym_freq *pSyms1)
846	{
847	uint32_t total_passes = `2`, pass_shift, pass, i, hist[`256` * `2`];
848	sym_freq pCur_syms = pSyms0, pNew_syms = pSyms1;
849
850	clear_obj(hist);
851
852	for (i = `0`; i < num_syms; i++)
853	{
854	uint32_t freq = pSyms0[i].m_key;
855
856	// We scale all input frequencies to 16-bits.
857	assert(freq <= UINT16_MAX);
858
859	hist[freq & `0xFF`]++;
860	hist[`256` + ((freq >> `8`) & `0xFF`)]++;
861	}
862
863	while ((total_passes > `1`) && (num_syms == hist[(total_passes - `1`) * `256`]))
864	total_passes--;
865
866	for (pass_shift = `0`, pass = `0`; pass < total_passes; pass++, pass_shift += `8`)
867	{
868	const uint32_t *pHist = &hist[pass << `8`];
869	uint32_t offsets[`256`], cur_ofs = `0`;
870	for (i = `0`; i < `256`; i++)
871	{
872	offsets[i] = cur_ofs;
873	cur_ofs += pHist[i];
874	}
875
876	for (i = `0`; i < num_syms; i++)
877	pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & `0xFF`]++] = pCur_syms[i];
878
879	sym_freq *t = pCur_syms;
880	pCur_syms = pNew_syms;
881	pNew_syms = t;
882	}
883
884	return pCur_syms;
885	}
886
887	bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size)
888	{
889	if (max_code_size > cHuffmanMaxSupportedCodeSize)
890	return false;
891	if ((!num_syms) \|\| (num_syms > cHuffmanMaxSyms))
892	return false;
893
894	uint32_t total_used_syms = `0`;
895	for (uint32_t i = `0`; i < num_syms; i++)
896	if (pFreq[i])
897	total_used_syms++;
898
899	if (!total_used_syms)
900	return false;
901
902	std::vector<sym_freq> sym_freq0(total_used_syms), sym_freq1(total_used_syms);
903	for (uint32_t i = `0`, j = `0`; i < num_syms; i++)
904	{
905	if (pFreq[i])
906	{
907	sym_freq0 [j].m_key = pFreq[i];
908	sym_freq0 [j++].m_sym_index = static_cast<uint16_t>(i);
909	}
910	}
911
912	sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0 [`0`], &sym_freq1 [`0`]);
913
914	canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms);
915
916	int num_codes[cHuffmanMaxSupportedInternalCodeSize + `1`];
917	clear_obj(num_codes);
918
919	for (uint32_t i = `0`; i < total_used_syms; i++)
920	{
921	if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize)
922	return false;
923
924	num_codes[pSym_freq[i].m_key]++;
925	}
926
927	canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size);
928
929	m_code_sizes.resize(`0`);
930	m_code_sizes.resize(num_syms);
931
932	m_codes.resize(`0`);
933	m_codes.resize(num_syms);
934
935	for (uint32_t i = `1`, j = total_used_syms; i <= max_code_size; i++)
936	for (uint32_t l = num_codes[i]; l > `0`; l--)
937	m_code_sizes [pSym_freq[--j].m_sym_index] = static_cast<uint8_t>(i);
938
939	uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + `1`];
940
941	next_code[`1`] = `0`;
942	for (uint32_t j = `0`, i = `2`; i <= max_code_size; i++)
943	next_code[i] = j = ((j + num_codes[i - `1`]) << `1`);
944
945	for (uint32_t i = `0`; i < num_syms; i++)
946	{
947	uint32_t rev_code = `0`, code, code_size;
948	if ((code_size = m_code_sizes [i]) == `0`)
949	continue;
950	if (code_size > cHuffmanMaxSupportedInternalCodeSize)
951	return false;
952	code = next_code[code_size]++;
953	for (uint32_t l = code_size; l > `0`; l--, code >>= `1`)
954	rev_code = (rev_code << `1`) \| (code & `1`);
955	m_codes [i] = static_cast<uint16_t>(rev_code);
956	}
957
958	return true;
959	}
960
961	bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size)
962	{
963	if ((!num_syms) \|\| (num_syms > cHuffmanMaxSyms))
964	return false;
965
966	uint16_vec sym_freq(num_syms);
967
968	uint32_t max_freq = `0`;
969	for (uint32_t i = `0`; i < num_syms; i++)
970	max_freq = maximum(max_freq, pSym_freq[i]);
971
972	if (max_freq < UINT16_MAX)
973	{
974	for (uint32_t i = `0`; i < num_syms; i++)
975	sym_freq [i] = static_cast<uint16_t>(pSym_freq[i]);
976	}
977	else
978	{
979	for (uint32_t i = `0`; i < num_syms; i++)
980	{
981	if (pSym_freq[i])
982	{
983	uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * `65534U` + (max_freq >> `1`)) / max_freq);
984	sym_freq [i] = static_cast<uint16_t>(clamp<uint32_t>(f, `1`, `65534`));
985	}
986	}
987	}
988
989	return init(num_syms, &sym_freq [`0`], max_code_size);
990	}
991
992	void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len)
993	{
994	if (run_size)
995	{
996	if (run_size < cHuffmanSmallRepeatSizeMin)
997	{
998	while (run_size--)
999	syms.push_back(static_cast<uint16_t>(len));
1000	}
1001	else if (run_size <= cHuffmanSmallRepeatSizeMax)
1002	{
1003	syms.push_back(static_cast<uint16_t>(cHuffmanSmallRepeatCode \| ((run_size - cHuffmanSmallRepeatSizeMin) << `6`)));
1004	}
1005	else
1006	{
1007	assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax));
1008	syms.push_back(static_cast<uint16_t>(cHuffmanBigRepeatCode \| ((run_size - cHuffmanBigRepeatSizeMin) << `6`)));
1009	}
1010	}
1011
1012	run_size = `0`;
1013	}
1014
1015	void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size)
1016	{
1017	if (run_size)
1018	{
1019	if (run_size < cHuffmanSmallZeroRunSizeMin)
1020	{
1021	while (run_size--)
1022	syms.push_back(`0`);
1023	}
1024	else if (run_size <= cHuffmanSmallZeroRunSizeMax)
1025	{
1026	syms.push_back(static_cast<uint16_t>(cHuffmanSmallZeroRunCode \| ((run_size - cHuffmanSmallZeroRunSizeMin) << `6`)));
1027	}
1028	else
1029	{
1030	assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax));
1031	syms.push_back(static_cast<uint16_t>(cHuffmanBigZeroRunCode \| ((run_size - cHuffmanBigZeroRunSizeMin) << `6`)));
1032	}
1033	}
1034
1035	run_size = `0`;
1036	}
1037
1038	uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab)
1039	{
1040	const uint64_t start_bits = m_total_bits;
1041
1042	const uint8_vec &code_sizes = tab.get_code_sizes();
1043
1044	uint32_t total_used = tab.get_total_used_codes();
1045	put_bits(total_used, cHuffmanMaxSymsLog2);
1046
1047	if (!total_used)
1048	return `0`;
1049
1050	uint16_vec syms;
1051	syms.reserve(total_used + `16`);
1052
1053	uint32_t prev_code_len = UINT_MAX, zero_run_size = `0`, nonzero_run_size = `0`;
1054
1055	for (uint32_t i = `0`; i <= total_used; ++i)
1056	{
1057	const uint32_t code_len = (i == total_used) ? `0xFF` : code_sizes [i];
1058	assert((code_len == `0xFF`) \|\| (code_len <= `16`));
1059
1060	if (code_len)
1061	{
1062	end_zero_run(syms, zero_run_size);
1063
1064	if (code_len != prev_code_len)
1065	{
1066	end_nonzero_run(syms, nonzero_run_size, prev_code_len);
1067	if (code_len != `0xFF`)
1068	syms.push_back(static_cast<uint16_t>(code_len));
1069	}
1070	else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax)
1071	end_nonzero_run(syms, nonzero_run_size, prev_code_len);
1072	}
1073	else
1074	{
1075	end_nonzero_run(syms, nonzero_run_size, prev_code_len);
1076
1077	if (++zero_run_size == cHuffmanBigZeroRunSizeMax)
1078	end_zero_run(syms, zero_run_size);
1079	}
1080
1081	prev_code_len = code_len;
1082	}
1083
1084	histogram h(cHuffmanTotalCodelengthCodes);
1085	for (uint32_t i = `0`; i < syms.size(); i++)
1086	h.inc(syms [i] & `63`);
1087
1088	huffman_encoding_table ct;
1089	if (!ct.init(h, `7`))
1090	return `0`;
1091
1092	assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes);
1093
1094	uint32_t total_codelength_codes;
1095	for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > `0`; total_codelength_codes--)
1096	if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - `1`]])
1097	break;
1098
1099	assert(total_codelength_codes);
1100
1101	put_bits(total_codelength_codes, `5`);
1102	for (uint32_t i = `0`; i < total_codelength_codes; i++)
1103	put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], `3`);
1104
1105	for (uint32_t i = `0`; i < syms.size(); ++i)
1106	{
1107	const uint32_t l = syms [i] & `63`, e = syms [i] >> `6`;
1108
1109	put_code(l, ct);
1110
1111	if (l == cHuffmanSmallZeroRunCode)
1112	put_bits(e, cHuffmanSmallZeroRunExtraBits);
1113	else if (l == cHuffmanBigZeroRunCode)
1114	put_bits(e, cHuffmanBigZeroRunExtraBits);
1115	else if (l == cHuffmanSmallRepeatCode)
1116	put_bits(e, cHuffmanSmallRepeatExtraBits);
1117	else if (l == cHuffmanBigRepeatCode)
1118	put_bits(e, cHuffmanBigRepeatExtraBits);
1119	}
1120
1121	return (uint32_t)(m_total_bits - start_bits);
1122	}
1123
1124	bool huffman_test(int rand_seed)
1125	{
1126	histogram h(`19`);
1127
1128	// Feed in a fibonacci sequence to force large codesizes
1129	h [`0`] += `1`; h [`1`] += `1`; h [`2`] += `2`; h [`3`] += `3`;
1130	h [`4`] += `5`; h [`5`] += `8`; h [`6`] += `13`; h [`7`] += `21`;
1131	h [`8`] += `34`; h [`9`] += `55`; h [`10`] += `89`; h [`11`] += `144`;
1132	h [`12`] += `233`; h [`13`] += `377`; h [`14`] += `610`; h [`15`] += `987`;
1133	h [`16`] += `1597`; h [`17`] += `2584`; h [`18`] += `4181`;
1134
1135	huffman_encoding_table etab;
1136	etab.init(h, `16`);
1137
1138	{
1139	bitwise_coder c;
1140	c.init(`1024`);
1141
1142	c.emit_huffman_table(etab);
1143	for (int i = `0`; i < `19`; i++)
1144	c.put_code(i, etab);
1145
1146	c.flush();
1147
1148	basist::bitwise_decoder d;
1149	d.init(&c.get_bytes()[`0`], static_cast<uint32_t>(c.get_bytes().size()));
1150
1151	basist::huffman_decoding_table dtab;
1152	bool success = d.read_huffman_table(dtab);
1153	if (!success)
1154	{
1155	assert(`0`);
1156	printf("Failure 5\n");
1157	return false;
1158	}
1159
1160	for (uint32_t i = `0`; i < `19`; i++)
1161	{
1162	uint32_t s = d.decode_huffman(dtab);
1163	if (s != i)
1164	{
1165	assert(`0`);
1166	printf("Failure 5\n");
1167	return false;
1168	}
1169	}
1170	}
1171
1172	basisu::rand r;
1173	r.seed(rand_seed);
1174
1175	for (int iter = `0`; iter < `500000`; iter++)
1176	{
1177	printf("%u\n", iter);
1178
1179	uint32_t max_sym = r.irand(`0`, `8193`);
1180	uint32_t num_codes = r.irand(`1`, `10000`);
1181	uint_vec syms(num_codes);
1182
1183	for (uint32_t i = `0`; i < num_codes; i++)
1184	{
1185	if (r.bit())
1186	syms [i] = r.irand(`0`, max_sym);
1187	else
1188	{
1189	int s = (int)(r.gaussian((float)max_sym / `2`, (float)maximum<int>(`1`, max_sym / `2`)) + `.5f`);
1190	s = basisu::clamp<int>(s, `0`, max_sym);
1191
1192	syms [i] = s;
1193	}
1194
1195	}
1196
1197	histogram h1(max_sym + `1`);
1198	for (uint32_t i = `0`; i < num_codes; i++)
1199	h1 [syms [i]]++;
1200
1201	huffman_encoding_table etab2;
1202	if (!etab2.init(h1, `16`))
1203	{
1204	assert(`0`);
1205	printf("Failed 0\n");
1206	return false;
1207	}
1208
1209	bitwise_coder c;
1210	c.init(`1024`);
1211
1212	c.emit_huffman_table(etab2);
1213
1214	for (uint32_t i = `0`; i < num_codes; i++)
1215	c.put_code(syms [i], etab2);
1216
1217	c.flush();
1218
1219	basist::bitwise_decoder d;
1220	d.init(&c.get_bytes()[`0`], (uint32_t)c.get_bytes().size());
1221
1222	basist::huffman_decoding_table dtab;
1223	bool success = d.read_huffman_table(dtab);
1224	if (!success)
1225	{
1226	assert(`0`);
1227	printf("Failed 2\n");
1228	return false;
1229	}
1230
1231	for (uint32_t i = `0`; i < num_codes; i++)
1232	{
1233	uint32_t s = d.decode_huffman(dtab);
1234	if (s != syms [i])
1235	{
1236	assert(`0`);
1237	printf("Failed 4\n");
1238	return false;
1239	}
1240	}
1241
1242	}
1243	return true;
1244	}
1245
1246	void palette_index_reorderer::init(uint32_t num_indices, const uint32_t pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void* pCtx, float* dist_func_weight)
1247	{
1248	assert((num_syms > `0`) && (num_indices > `0`));
1249	assert((dist_func_weight >= `0.0f`) && (dist_func_weight <= `1.0f`));
1250
1251	clear();
1252
1253	m_remap_table.resize(num_syms);
1254	m_entries_picked.reserve(num_syms);
1255	m_total_count_to_picked.resize(num_syms);
1256
1257	if (num_indices <= `1`)
1258	return;
1259
1260	prepare_hist(num_syms, num_indices, pIndices);
1261	find_initial(num_syms);
1262
1263	while (m_entries_to_do.size())
1264	{
1265	// Find the best entry to move into the picked list.
1266	uint32_t best_entry;
1267	double best_count;
1268	find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight);
1269
1270	// We now have chosen an entry to place in the picked list, now determine which side it goes on.
1271	const uint32_t entry_to_move = m_entries_to_do [best_entry];
1272
1273	float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight);
1274
1275	// Put entry_to_move either on the "left" or "right" side of the picked entries
1276	if (side <= `0`)
1277	m_entries_picked.push_back(entry_to_move);
1278	else
1279	m_entries_picked.insert(m_entries_picked.begin(), entry_to_move);
1280
1281	// Erase best_entry from the todo list
1282	m_entries_to_do.erase(m_entries_to_do.begin() + best_entry);
1283
1284	// We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry
1285	for (uint32_t i = `0`; i < m_entries_to_do.size(); i++)
1286	m_total_count_to_picked [m_entries_to_do [i]] += get_hist(m_entries_to_do [i], entry_to_move, num_syms);
1287	}
1288
1289	for (uint32_t i = `0`; i < num_syms; i++)
1290	m_remap_table [m_entries_picked [i]] = i;
1291	}
1292
1293	void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices)
1294	{
1295	m_hist.resize(`0`);
1296	m_hist.resize(num_syms * num_syms);
1297
1298	for (uint32_t i = `0`; i < num_indices; i++)
1299	{
1300	const uint32_t idx = pIndices[i];
1301	inc_hist(idx, (i < (num_indices - `1`)) ? pIndices[i + `1`] : -`1`, num_syms);
1302	inc_hist(idx, (i > `0`) ? pIndices[i - `1`] : -`1`, num_syms);
1303	}
1304	}
1305
1306	void palette_index_reorderer::find_initial(uint32_t num_syms)
1307	{
1308	uint32_t max_count = `0`, max_index = `0`;
1309	for (uint32_t i = `0`; i < num_syms * num_syms; i++)
1310	if (m_hist [i] > max_count)
1311	max_count = m_hist [i], max_index = i;
1312
1313	uint32_t a = max_index / num_syms, b = max_index % num_syms;
1314
1315	m_entries_picked.push_back(a);
1316	m_entries_picked.push_back(b);
1317
1318	for (uint32_t i = `0`; i < num_syms; i++)
1319	if ((i != b) && (i != a))
1320	m_entries_to_do.push_back(i);
1321
1322	for (uint32_t i = `0`; i < m_entries_to_do.size(); i++)
1323	for (uint32_t j = `0`; j < m_entries_picked.size(); j++)
1324	m_total_count_to_picked [m_entries_to_do [i]] += get_hist(m_entries_to_do [i], m_entries_picked [j], num_syms);
1325	}
1326
1327	void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void pCtx, float* dist_func_weight)
1328	{
1329	best_entry = `0`;
1330	best_count = `0`;
1331
1332	for (uint32_t i = `0`; i < m_entries_to_do.size(); i++)
1333	{
1334	const uint32_t u = m_entries_to_do [i];
1335	double total_count = m_total_count_to_picked [u];
1336
1337	if (pDist_func)
1338	{
1339	float w = maximum<float>((pDist_func)(u, m_entries_picked.front(), pCtx), (pDist_func)(u, m_entries_picked.back(), pCtx));
1340	assert((w >= `0.0f`) && (w <= `1.0f`));
1341	total_count = (total_count + `1.0f`) * lerp(`1.0f` - dist_func_weight, `1.0f` + dist_func_weight, w);
1342	}
1343
1344	if (total_count <= best_count)
1345	continue;
1346
1347	best_entry = i;
1348	best_count = total_count;
1349	}
1350	}
1351
1352	float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void pCtx, float* dist_func_weight)
1353	{
1354	float which_side = `0`;
1355
1356	int l_count = `0`, r_count = `0`;
1357	for (uint32_t j = `0`; j < m_entries_picked.size(); j++)
1358	{
1359	const int count = get_hist(entry_to_move, m_entries_picked [j], num_syms), r = ((int)m_entries_picked.size() + `1` - `2` * (j + `1`));
1360	which_side += static_cast<float>(r * count);
1361	if (r >= `0`)
1362	l_count += r * count;
1363	else
1364	r_count += -r * count;
1365	}
1366
1367	if (pDist_func)
1368	{
1369	float w_left = lerp(`1.0f` - dist_func_weight, `1.0f` + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx));
1370	float w_right = lerp(`1.0f` - dist_func_weight, `1.0f` + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx));
1371	which_side = w_left * l_count - w_right * r_count;
1372	}
1373	return which_side;
1374	}
1375
1376	void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)
1377	{
1378	assert((first_chan < `4U`) && (first_chan + total_chans <= `4U`));
1379
1380	const uint32_t width = basisu::minimum(a.get_width(), b.get_width());
1381	const uint32_t height = basisu::minimum(a.get_height(), b.get_height());
1382
1383	double hist[`256`];
1384	clear_obj(hist);
1385
1386	for (uint32_t y = `0`; y < height; y++)
1387	{
1388	for (uint32_t x = `0`; x < width; x++)
1389	{
1390	const color_rgba &ca = a (x, y), &cb = b (x, y);
1391
1392	if (total_chans)
1393	{
1394	for (uint32_t c = `0`; c < total_chans; c++)
1395	hist[iabs(ca [first_chan + c] - cb [first_chan + c])]++;
1396	}
1397	else
1398	{
1399	if (use_601_luma)
1400	hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++;
1401	else
1402	hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++;
1403	}
1404	}
1405	}
1406
1407	m_max = `0`;
1408	double sum = `0.0f`, sum2 = `0.0f`;
1409	for (uint32_t i = `0`; i < `256`; i++)
1410	{
1411	if (hist[i])
1412	{
1413	m_max = basisu::maximum<float>(m_max, (float)i);
1414	double v = i * hist[i];
1415	sum += v;
1416	sum2 += i * v;
1417	}
1418	}
1419
1420	double total_values = (double)width * (double)height;
1421	if (avg_comp_error)
1422	total_values = (double*)clamp<uint32_t>(total_chans, `1`, `4`);
1423
1424	m_mean = (float)clamp<double>(sum / total_values, `0.0f`, `255.0`);
1425	m_mean_squared = (float)clamp<double>(sum2 / total_values, `0.0f`, `255.0f` * `255.0f`);
1426	m_rms = (float)sqrt(m_mean_squared);
1427	m_psnr = m_rms ? (float)clamp<double>(log10(`255.0` / m_rms) * `20.0f`, `0.0f`, `100.0f`) : `100.0f`;
1428	}
1429
1430	void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed)
1431	{
1432	rand r(seed);
1433
1434	uint8_t pDst = static_cast<uint8_t >(pBuf);
1435
1436	while (size >= sizeof(uint32_t))
1437	{
1438	(uint32_t )pDst = r.urand32();
1439	pDst += sizeof(uint32_t);
1440	size -= sizeof(uint32_t);
1441	}
1442
1443	while (size)
1444	{
1445	*pDst++ = r.byte();
1446	size--;
1447	}
1448	}
1449
1450	uint32_t hash_hsieh(const uint8_t *pBuf, size_t len)
1451	{
1452	if (!pBuf \|\| !len)
1453	return `0`;
1454
1455	uint32_t h = static_cast<uint32_t>(len);
1456
1457	const uint32_t bytes_left = len & `3`;
1458	len >>= `2`;
1459
1460	while (len--)
1461	{
1462	const uint16_t pWords = reinterpret_cast<const* uint16_t *>(pBuf);
1463
1464	h += pWords[`0`];
1465
1466	const uint32_t t = (pWords[`1`] << `11`) ^ h;
1467	h = (h << `16`) ^ t;
1468
1469	pBuf += sizeof(uint32_t);
1470
1471	h += h >> `11`;
1472	}
1473
1474	switch (bytes_left)
1475	{
1476	case `1`:
1477	h += *reinterpret_cast<const signed char*>(pBuf);
1478	h ^= h << `10`;
1479	h += h >> `1`;
1480	break;
1481	case `2`:
1482	h += *reinterpret_cast<const uint16_t *>(pBuf);
1483	h ^= h << `11`;
1484	h += h >> `17`;
1485	break;
1486	case `3`:
1487	h += *reinterpret_cast<const uint16_t *>(pBuf);
1488	h ^= h << `16`;
1489	h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << `18`;
1490	h += h >> `11`;
1491	break;
1492	default:
1493	break;
1494	}
1495
1496	h ^= h << `3`;
1497	h += h >> `5`;
1498	h ^= h << `4`;
1499	h += h >> `17`;
1500	h ^= h << `25`;
1501	h += h >> `6`;
1502
1503	return h;
1504	}
1505
1506	job_pool::job_pool(uint32_t num_threads) :
1507	m_num_active_jobs(`0`),
1508	m_kill_flag (false)
1509	{
1510	assert(num_threads >= `1U`);
1511
1512	debug_printf("job_pool::job_pool: %u total threads\n", num_threads);
1513
1514	if (num_threads > `1`)
1515	{
1516	m_threads.resize(num_threads - `1`);
1517
1518	for (int i = `0`; i < ((int)num_threads - `1`); i++)
1519	m_threads [i] = std::thread ([this, i] { job_thread(i); });
1520	}
1521	}
1522
1523	job_pool::~job_pool()
1524	{
1525	debug_printf("job_pool::~job_pool\n");
1526
1527	// Notify all workers that they need to die right now.
1528	m_kill_flag = true;
1529
1530	m_has_work.notify_all();
1531
1532	// Wait for all workers to die.
1533	for (uint32_t i = `0`; i < m_threads.size(); i++)
1534	m_threads [i].join();
1535	}
1536
1537	void job_pool::add_job(const std::function<void()>& job)
1538	{
1539	std::unique_lock<std::mutex> lock(m_mutex);
1540
1541	m_queue.emplace_back(job);
1542
1543	const size_t queue_size = m_queue.size();
1544
1545	lock.unlock();
1546
1547	if (queue_size > `1`)
1548	m_has_work.notify_one();
1549	}
1550
1551	void job_pool::add_job(std::function<void()>&& job)
1552	{
1553	std::unique_lock<std::mutex> lock(m_mutex);
1554
1555	m_queue.emplace_back(std::move(job));
1556
1557	const size_t queue_size = m_queue.size();
1558
1559	lock.unlock();
1560
1561	if (queue_size > `1`)
1562	{
1563	m_has_work.notify_one();
1564	}
1565	}
1566
1567	void job_pool::wait_for_all()
1568	{
1569	std::unique_lock<std::mutex> lock(m_mutex);
1570
1571	// Drain the job queue on the calling thread.
1572	while (!m_queue.empty())
1573	{
1574	std::function<void()> job(m_queue.back());
1575	m_queue.pop_back();
1576
1577	lock.unlock();
1578
1579	job ();
1580
1581	lock.lock();
1582	}
1583
1584	// The queue is empty, now wait for all active jobs to finish up.
1585	m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } );
1586	}
1587
1588	void job_pool::job_thread(uint32_t index)
1589	{
1590	BASISU_NOTE_UNUSED(index);
1591	//debug_printf("job_pool::job_thread: starting %u\n", index);
1592
1593	while (true)
1594	{
1595	std::unique_lock<std::mutex> lock(m_mutex);
1596
1597	// Wait for any jobs to be issued.
1598	m_has_work.wait(lock, [this] { return m_kill_flag \|\| m_queue.size(); } );
1599
1600	// Check to see if we're supposed to exit.
1601	if (m_kill_flag)
1602	break;
1603
1604	// Get the job and execute it.
1605	std::function<void()> job(m_queue.back());
1606	m_queue.pop_back();
1607
1608	++m_num_active_jobs;
1609
1610	lock.unlock();
1611
1612	job ();
1613
1614	lock.lock();
1615
1616	--m_num_active_jobs;
1617
1618	// Now check if there are no more jobs remaining.
1619	const bool all_done = m_queue.empty() && !m_num_active_jobs;
1620
1621	lock.unlock();
1622
1623	if (all_done)
1624	m_no_more_jobs.notify_all();
1625	}
1626
1627	//debug_printf("job_pool::job_thread: exiting\n");
1628	}
1629
1630	// .TGA image loading
1631	#pragma pack(push)
1632	#pragma pack(1)
1633	struct tga_header
1634	{
1635	uint8_t m_id_len;
1636	uint8_t m_cmap;
1637	uint8_t m_type;
1638	packed_uint<`2`> m_cmap_first;
1639	packed_uint<`2`> m_cmap_len;
1640	uint8_t m_cmap_bpp;
1641	packed_uint<`2`> m_x_org;
1642	packed_uint<`2`> m_y_org;
1643	packed_uint<`2`> m_width;
1644	packed_uint<`2`> m_height;
1645	uint8_t m_depth;
1646	uint8_t m_desc;
1647	};
1648	#pragma pack(pop)
1649
1650	const uint32_t MAX_TGA_IMAGE_SIZE = `16384`;
1651
1652	enum tga_image_type
1653	{
1654	cITPalettized = `1`,
1655	cITRGB = `2`,
1656	cITGrayscale = `3`
1657	};
1658
1659	uint8_t read_tga(const* uint8_t pBuf, uint32_t buf_size, int* &width, int &height, int &n_chans)
1660	{
1661	width = `0`;
1662	height = `0`;
1663	n_chans = `0`;
1664
1665	if (buf_size <= sizeof(tga_header))
1666	return nullptr;
1667
1668	const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf);
1669
1670	if ((!hdr.m_width) \|\| (!hdr.m_height) \|\| (hdr.m_width > MAX_TGA_IMAGE_SIZE) \|\| (hdr.m_height > MAX_TGA_IMAGE_SIZE))
1671	return nullptr;
1672
1673	if (hdr.m_desc >> `6`)
1674	return nullptr;
1675
1676	// Simple validation
1677	if ((hdr.m_cmap != `0`) && (hdr.m_cmap != `1`))
1678	return nullptr;
1679
1680	if (hdr.m_cmap)
1681	{
1682	if ((hdr.m_cmap_bpp == `0`) \|\| (hdr.m_cmap_bpp > `32`))
1683	return nullptr;
1684
1685	// Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either.
1686	if (hdr.m_cmap_first != `0`)
1687	return nullptr;
1688	}
1689
1690	const bool x_flipped = (hdr.m_desc & `0x10`) != `0`;
1691	const bool y_flipped = (hdr.m_desc & `0x20`) == `0`;
1692
1693	bool rle_flag = false;
1694	int file_image_type = hdr.m_type;
1695	if (file_image_type > `8`)
1696	{
1697	file_image_type -= `8`;
1698	rle_flag = true;
1699	}
1700
1701	const tga_image_type image_type = static_cast<tga_image_type>(file_image_type);
1702
1703	switch (file_image_type)
1704	{
1705	case cITRGB:
1706	if (hdr.m_depth == `8`)
1707	return nullptr;
1708	break;
1709	case cITPalettized:
1710	if ((hdr.m_depth != `8`) \|\| (hdr.m_cmap != `1`) \|\| (hdr.m_cmap_len == `0`))
1711	return nullptr;
1712	break;
1713	case cITGrayscale:
1714	if ((hdr.m_cmap != `0`) \|\| (hdr.m_cmap_len != `0`))
1715	return nullptr;
1716	if ((hdr.m_depth != `8`) && (hdr.m_depth != `16`))
1717	return nullptr;
1718	break;
1719	default:
1720	return nullptr;
1721	}
1722
1723	uint32_t tga_bytes_per_pixel = `0`;
1724
1725	switch (hdr.m_depth)
1726	{
1727	case `32`:
1728	tga_bytes_per_pixel = `4`;
1729	n_chans = `4`;
1730	break;
1731	case `24`:
1732	tga_bytes_per_pixel = `3`;
1733	n_chans = `3`;
1734	break;
1735	case `16`:
1736	case `15`:
1737	tga_bytes_per_pixel = `2`;
1738	// For compatibility with stb_image_write.h
1739	n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == `16`)) ? `4` : `3`;
1740	break;
1741	case `8`:
1742	tga_bytes_per_pixel = `1`;
1743	// For palettized RGBA support, which both FreeImage and stb_image support.
1744	n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == `32`)) ? `4` : `3`;
1745	break;
1746	default:
1747	return nullptr;
1748	}
1749
1750	//const uint32_t bytes_per_line = hdr.m_width tga_bytes_per_pixel;*
1751
1752	const uint8_t pSrc = pBuf + sizeof*(tga_header);
1753	uint32_t bytes_remaining = buf_size - sizeof(tga_header);
1754
1755	if (hdr.m_id_len)
1756	{
1757	if (bytes_remaining < hdr.m_id_len)
1758	return nullptr;
1759	pSrc += hdr.m_id_len;
1760	bytes_remaining += hdr.m_id_len;
1761	}
1762
1763	color_rgba pal[`256`];
1764	for (uint32_t i = `0`; i < `256`; i++)
1765	pal[i].set(`0`, `0`, `0`, `255`);
1766
1767	if ((hdr.m_cmap) && (hdr.m_cmap_len))
1768	{
1769	if (image_type == cITPalettized)
1770	{
1771	// Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years).
1772	if ( ((hdr.m_cmap_bpp != `32`) && (hdr.m_cmap_bpp != `24`) && (hdr.m_cmap_bpp != `15`) && (hdr.m_cmap_bpp != `16`)) \|\| (hdr.m_cmap_len > `256`) )
1773	return nullptr;
1774
1775	if (hdr.m_cmap_bpp == `32`)
1776	{
1777	const uint32_t pal_size = hdr.m_cmap_len * `4`;
1778	if (bytes_remaining < pal_size)
1779	return nullptr;
1780
1781	for (uint32_t i = `0`; i < hdr.m_cmap_len; i++)
1782	{
1783	pal[i].r = pSrc[i * `4` + `2`];
1784	pal[i].g = pSrc[i * `4` + `1`];
1785	pal[i].b = pSrc[i * `4` + `0`];
1786	pal[i].a = pSrc[i * `4` + `3`];
1787	}
1788
1789	bytes_remaining -= pal_size;
1790	pSrc += pal_size;
1791	}
1792	else if (hdr.m_cmap_bpp == `24`)
1793	{
1794	const uint32_t pal_size = hdr.m_cmap_len * `3`;
1795	if (bytes_remaining < pal_size)
1796	return nullptr;
1797
1798	for (uint32_t i = `0`; i < hdr.m_cmap_len; i++)
1799	{
1800	pal[i].r = pSrc[i * `3` + `2`];
1801	pal[i].g = pSrc[i * `3` + `1`];
1802	pal[i].b = pSrc[i * `3` + `0`];
1803	pal[i].a = `255`;
1804	}
1805
1806	bytes_remaining -= pal_size;
1807	pSrc += pal_size;
1808	}
1809	else
1810	{
1811	const uint32_t pal_size = hdr.m_cmap_len * `2`;
1812	if (bytes_remaining < pal_size)
1813	return nullptr;
1814
1815	for (uint32_t i = `0`; i < hdr.m_cmap_len; i++)
1816	{
1817	const uint32_t v = pSrc[i * `2` + `0`] \| (pSrc[i * `2` + `1`] << `8`);
1818
1819	pal[i].r = (((v >> `10`) & `31`) * `255` + `15`) / `31`;
1820	pal[i].g = (((v >> `5`) & `31`) * `255` + `15`) / `31`;
1821	pal[i].b = ((v & `31`) * `255` + `15`) / `31`;
1822	pal[i].a = `255`;
1823	}
1824
1825	bytes_remaining -= pal_size;
1826	pSrc += pal_size;
1827	}
1828	}
1829	else
1830	{
1831	const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> `3`) * hdr.m_cmap_len;
1832	if (bytes_remaining < bytes_to_skip)
1833	return nullptr;
1834	pSrc += bytes_to_skip;
1835	bytes_remaining += bytes_to_skip;
1836	}
1837	}
1838
1839	width = hdr.m_width;
1840	height = hdr.m_height;
1841
1842	const uint32_t source_pitch = width * tga_bytes_per_pixel;
1843	const uint32_t dest_pitch = width * n_chans;
1844
1845	uint8_t pImage = (uint8_t )malloc(dest_pitch * height);
1846	if (!pImage)
1847	return nullptr;
1848
1849	std::vector<uint8_t> input_line_buf;
1850	if (rle_flag)
1851	input_line_buf.resize(source_pitch);
1852
1853	int run_type = `0`, run_remaining = `0`;
1854	uint8_t run_pixel[`4`];
1855	memset(run_pixel, `0`, sizeof(run_pixel));
1856
1857	for (int y = `0`; y < height; y++)
1858	{
1859	const uint8_t *pLine_data;
1860
1861	if (rle_flag)
1862	{
1863	int pixels_remaining = width;
1864	uint8_t *pDst = &input_line_buf [`0`];
1865
1866	do
1867	{
1868	if (!run_remaining)
1869	{
1870	if (bytes_remaining < `1`)
1871	{
1872	free(pImage);
1873	return nullptr;
1874	}
1875
1876	int v = *pSrc++;
1877	bytes_remaining--;
1878
1879	run_type = v & `0x80`;
1880	run_remaining = (v & `0x7F`) + `1`;
1881
1882	if (run_type)
1883	{
1884	if (bytes_remaining < tga_bytes_per_pixel)
1885	{
1886	free(pImage);
1887	return nullptr;
1888	}
1889
1890	memcpy(run_pixel, pSrc, tga_bytes_per_pixel);
1891	pSrc += tga_bytes_per_pixel;
1892	bytes_remaining -= tga_bytes_per_pixel;
1893	}
1894	}
1895
1896	const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining);
1897	pixels_remaining -= n;
1898	run_remaining -= n;
1899
1900	if (run_type)
1901	{
1902	for (uint32_t i = `0`; i < n; i++)
1903	for (uint32_t j = `0`; j < tga_bytes_per_pixel; j++)
1904	*pDst++ = run_pixel[j];
1905	}
1906	else
1907	{
1908	const uint32_t bytes_wanted = n * tga_bytes_per_pixel;
1909
1910	if (bytes_remaining < bytes_wanted)
1911	{
1912	free(pImage);
1913	return nullptr;
1914	}
1915
1916	memcpy(pDst, pSrc, bytes_wanted);
1917	pDst += bytes_wanted;
1918
1919	pSrc += bytes_wanted;
1920	bytes_remaining -= bytes_wanted;
1921	}
1922
1923	} while (pixels_remaining);
1924
1925	assert((pDst - &input_line_buf[`0`]) == width * tga_bytes_per_pixel);
1926
1927	pLine_data = &input_line_buf [`0`];
1928	}
1929	else
1930	{
1931	if (bytes_remaining < source_pitch)
1932	{
1933	free(pImage);
1934	return nullptr;
1935	}
1936
1937	pLine_data = pSrc;
1938	bytes_remaining -= source_pitch;
1939	pSrc += source_pitch;
1940	}
1941
1942	// Convert to 24bpp RGB or 32bpp RGBA.
1943	uint8_t pDst = pImage + (y_flipped ? (height - `1` - y) : y) dest_pitch + (x_flipped ? (width - `1`) * n_chans : `0`);
1944	const int dst_stride = x_flipped ? -((int)n_chans) : n_chans;
1945
1946	switch (hdr.m_depth)
1947	{
1948	case `32`:
1949	assert(tga_bytes_per_pixel == `4` && n_chans == `4`);
1950	for (int i = `0`; i < width; i++, pLine_data += `4`, pDst += dst_stride)
1951	{
1952	pDst[`0`] = pLine_data[`2`];
1953	pDst[`1`] = pLine_data[`1`];
1954	pDst[`2`] = pLine_data[`0`];
1955	pDst[`3`] = pLine_data[`3`];
1956	}
1957	break;
1958	case `24`:
1959	assert(tga_bytes_per_pixel == `3` && n_chans == `3`);
1960	for (int i = `0`; i < width; i++, pLine_data += `3`, pDst += dst_stride)
1961	{
1962	pDst[`0`] = pLine_data[`2`];
1963	pDst[`1`] = pLine_data[`1`];
1964	pDst[`2`] = pLine_data[`0`];
1965	}
1966	break;
1967	case `16`:
1968	case `15`:
1969	if (image_type == cITRGB)
1970	{
1971	assert(tga_bytes_per_pixel == `2` && n_chans == `3`);
1972	for (int i = `0`; i < width; i++, pLine_data += `2`, pDst += dst_stride)
1973	{
1974	const uint32_t v = pLine_data[`0`] \| (pLine_data[`1`] << `8`);
1975	pDst[`0`] = (((v >> `10`) & `31`) * `255` + `15`) / `31`;
1976	pDst[`1`] = (((v >> `5`) & `31`) * `255` + `15`) / `31`;
1977	pDst[`2`] = ((v & `31`) * `255` + `15`) / `31`;
1978	}
1979	}
1980	else
1981	{
1982	assert(image_type == cITGrayscale && tga_bytes_per_pixel == `2` && n_chans == `4`);
1983	for (int i = `0`; i < width; i++, pLine_data += `2`, pDst += dst_stride)
1984	{
1985	pDst[`0`] = pLine_data[`0`];
1986	pDst[`1`] = pLine_data[`0`];
1987	pDst[`2`] = pLine_data[`0`];
1988	pDst[`3`] = pLine_data[`1`];
1989	}
1990	}
1991	break;
1992	case `8`:
1993	assert(tga_bytes_per_pixel == `1`);
1994	if (image_type == cITPalettized)
1995	{
1996	if (hdr.m_cmap_bpp == `32`)
1997	{
1998	assert(n_chans == `4`);
1999	for (int i = `0`; i < width; i++, pLine_data++, pDst += dst_stride)
2000	{
2001	const uint32_t c = *pLine_data;
2002	pDst[`0`] = pal[c].r;
2003	pDst[`1`] = pal[c].g;
2004	pDst[`2`] = pal[c].b;
2005	pDst[`3`] = pal[c].a;
2006	}
2007	}
2008	else
2009	{
2010	assert(n_chans == `3`);
2011	for (int i = `0`; i < width; i++, pLine_data++, pDst += dst_stride)
2012	{
2013	const uint32_t c = *pLine_data;
2014	pDst[`0`] = pal[c].r;
2015	pDst[`1`] = pal[c].g;
2016	pDst[`2`] = pal[c].b;
2017	}
2018	}
2019	}
2020	else
2021	{
2022	assert(n_chans == `3`);
2023	for (int i = `0`; i < width; i++, pLine_data++, pDst += dst_stride)
2024	{
2025	const uint8_t c = *pLine_data;
2026	pDst[`0`] = c;
2027	pDst[`1`] = c;
2028	pDst[`2`] = c;
2029	}
2030	}
2031	break;
2032	default:
2033	assert(`0`);
2034	break;
2035	}
2036	} // y
2037
2038	return pImage;
2039	}
2040
2041	uint8_t read_tga(const* char pFilename, int* &width, int &height, int &n_chans)
2042	{
2043	width = height = n_chans = `0`;
2044
2045	uint8_vec filedata;
2046	if (!read_file_to_vec(pFilename, filedata))
2047	return nullptr;
2048
2049	if (!filedata.size() \|\| (filedata.size() > UINT32_MAX))
2050	return nullptr;
2051
2052	return read_tga(&filedata [`0`], (uint32_t)filedata.size(), width, height, n_chans);
2053	}
2054
2055	void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...)
2056	{
2057	char buf[`2048`];
2058
2059	va_list args;
2060	va_start(args, pFmt);
2061	#ifdef _WIN32
2062	vsprintf_s(buf, sizeof(buf), pFmt, args);
2063	#else
2064	vsnprintf(buf, sizeof(buf), pFmt, args);
2065	#endif
2066	va_end(args);
2067
2068	const char* p = buf;
2069
2070	const uint32_t orig_x_ofs = x_ofs;
2071
2072	while (*p)
2073	{
2074	uint8_t c = *p++;
2075	if ((c < `32`) \|\| (c > `127`))
2076	c = `'.'`;
2077
2078	const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - `32`][`0`];
2079
2080	for (uint32_t y = `0`; y < `8`; y++)
2081	{
2082	uint32_t row_bits = pGlpyh[y];
2083	for (uint32_t x = `0`; x < `8`; x++)
2084	{
2085	const uint32_t q = row_bits & (`1` << x);
2086
2087	const color_rgba* pColor = q ? &fg : pBG;
2088	if (!pColor)
2089	continue;
2090
2091	if (alpha_only)
2092	fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);
2093	else
2094	fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);
2095	}
2096	}
2097
2098	x_ofs += `8` * scale_x;
2099	if ((x_ofs + `8` * scale_x) > m_width)
2100	{
2101	x_ofs = orig_x_ofs;
2102	y_ofs += `8` * scale_y;
2103	}
2104	}
2105	}
2106
2107	} // namespace basisu
2108

Browse the source code of Godot/thirdparty/basis_universal/encoder/basisu_enc.cpp