1 | // basisu_enc.cpp |
2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // http://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | #include "basisu_enc.h" |
16 | #include "basisu_resampler.h" |
17 | #include "basisu_resampler_filters.h" |
18 | #include "basisu_etc.h" |
19 | #include "../transcoder/basisu_transcoder.h" |
20 | #include "basisu_bc7enc.h" |
21 | #include "jpgd.h" |
22 | #include "pvpngreader.h" |
23 | #include "basisu_opencl.h" |
24 | #include <vector> |
25 | |
26 | #define |
27 | #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES |
28 | #include "basisu_miniz.h" |
29 | |
30 | #if defined(_WIN32) |
31 | // For QueryPerformanceCounter/QueryPerformanceFrequency |
32 | #define WIN32_LEAN_AND_MEAN |
33 | #include <windows.h> |
34 | #endif |
35 | |
36 | namespace basisu |
37 | { |
38 | uint64_t interval_timer::g_init_ticks, interval_timer::g_freq; |
39 | double interval_timer::g_timer_freq; |
40 | #if BASISU_SUPPORT_SSE |
41 | bool g_cpu_supports_sse41; |
42 | #endif |
43 | |
44 | uint8_t g_hamming_dist[256] = |
45 | { |
46 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
47 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
48 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
49 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
50 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
51 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
52 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
53 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
54 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
55 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
56 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
57 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
58 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
59 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
60 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
61 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 |
62 | }; |
63 | |
64 | // This is a Public Domain 8x8 font from here: |
65 | // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h |
66 | const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = |
67 | { |
68 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) |
69 | { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) |
70 | { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (") |
71 | { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#) |
72 | { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($) |
73 | { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%) |
74 | { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&) |
75 | { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (') |
76 | { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (() |
77 | { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ()) |
78 | { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*) |
79 | { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+) |
80 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,) |
81 | { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-) |
82 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.) |
83 | { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/) |
84 | { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0) |
85 | { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1) |
86 | { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2) |
87 | { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3) |
88 | { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4) |
89 | { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5) |
90 | { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6) |
91 | { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7) |
92 | { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8) |
93 | { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9) |
94 | { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:) |
95 | { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;) |
96 | { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<) |
97 | { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=) |
98 | { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>) |
99 | { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?) |
100 | { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@) |
101 | { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A) |
102 | { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B) |
103 | { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C) |
104 | { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D) |
105 | { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E) |
106 | { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F) |
107 | { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G) |
108 | { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H) |
109 | { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I) |
110 | { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J) |
111 | { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K) |
112 | { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L) |
113 | { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M) |
114 | { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N) |
115 | { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O) |
116 | { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P) |
117 | { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q) |
118 | { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R) |
119 | { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S) |
120 | { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T) |
121 | { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U) |
122 | { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V) |
123 | { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W) |
124 | { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X) |
125 | { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y) |
126 | { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z) |
127 | { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([) |
128 | { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\) |
129 | { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (]) |
130 | { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^) |
131 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_) |
132 | { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`) |
133 | { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a) |
134 | { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b) |
135 | { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c) |
136 | { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d) |
137 | { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e) |
138 | { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f) |
139 | { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g) |
140 | { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h) |
141 | { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i) |
142 | { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j) |
143 | { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k) |
144 | { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l) |
145 | { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m) |
146 | { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n) |
147 | { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o) |
148 | { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p) |
149 | { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q) |
150 | { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r) |
151 | { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s) |
152 | { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t) |
153 | { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u) |
154 | { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v) |
155 | { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w) |
156 | { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x) |
157 | { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y) |
158 | { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z) |
159 | { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({) |
160 | { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|) |
161 | { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (}) |
162 | { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) |
163 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F |
164 | }; |
165 | |
166 | bool g_library_initialized; |
167 | std::mutex g_encoder_init_mutex; |
168 | |
169 | // Encoder library initialization (just call once at startup) |
170 | void basisu_encoder_init(bool use_opencl, bool opencl_force_serialization) |
171 | { |
172 | std::lock_guard<std::mutex> lock(g_encoder_init_mutex); |
173 | |
174 | if (g_library_initialized) |
175 | return; |
176 | |
177 | detect_sse41(); |
178 | |
179 | basist::basisu_transcoder_init(); |
180 | pack_etc1_solid_color_init(); |
181 | //uastc_init(); |
182 | bc7enc_compress_block_init(); // must be after uastc_init() |
183 | |
184 | // Don't bother initializing the OpenCL module at all if it's been completely disabled. |
185 | if (use_opencl) |
186 | { |
187 | opencl_init(opencl_force_serialization); |
188 | } |
189 | |
190 | interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports |
191 | |
192 | g_library_initialized = true; |
193 | } |
194 | |
195 | void basisu_encoder_deinit() |
196 | { |
197 | opencl_deinit(); |
198 | |
199 | g_library_initialized = false; |
200 | } |
201 | |
202 | void error_vprintf(const char* pFmt, va_list args) |
203 | { |
204 | char buf[8192]; |
205 | |
206 | #ifdef _WIN32 |
207 | vsprintf_s(buf, sizeof(buf), pFmt, args); |
208 | #else |
209 | vsnprintf(buf, sizeof(buf), pFmt, args); |
210 | #endif |
211 | |
212 | fprintf(stderr, "ERROR: %s" , buf); |
213 | } |
214 | |
215 | void error_printf(const char *pFmt, ...) |
216 | { |
217 | va_list args; |
218 | va_start(args, pFmt); |
219 | error_vprintf(pFmt, args); |
220 | va_end(args); |
221 | } |
222 | |
223 | #if defined(_WIN32) |
224 | inline void query_counter(timer_ticks* pTicks) |
225 | { |
226 | QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(pTicks)); |
227 | } |
228 | inline void query_counter_frequency(timer_ticks* pTicks) |
229 | { |
230 | QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks)); |
231 | } |
232 | #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__) |
233 | #include <sys/time.h> |
234 | inline void query_counter(timer_ticks* pTicks) |
235 | { |
236 | struct timeval cur_time; |
237 | gettimeofday(&cur_time, NULL); |
238 | *pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec); |
239 | } |
240 | inline void query_counter_frequency(timer_ticks* pTicks) |
241 | { |
242 | *pTicks = 1000000; |
243 | } |
244 | #elif defined(__GNUC__) |
245 | #include <sys/timex.h> |
246 | inline void query_counter(timer_ticks* pTicks) |
247 | { |
248 | struct timeval cur_time; |
249 | gettimeofday(&cur_time, NULL); |
250 | *pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec); |
251 | } |
252 | inline void query_counter_frequency(timer_ticks* pTicks) |
253 | { |
254 | *pTicks = 1000000; |
255 | } |
256 | #else |
257 | #error TODO |
258 | #endif |
259 | |
260 | interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) |
261 | { |
262 | if (!g_timer_freq) |
263 | init(); |
264 | } |
265 | |
266 | void interval_timer::start() |
267 | { |
268 | query_counter(&m_start_time); |
269 | m_started = true; |
270 | m_stopped = false; |
271 | } |
272 | |
273 | void interval_timer::stop() |
274 | { |
275 | assert(m_started); |
276 | query_counter(&m_stop_time); |
277 | m_stopped = true; |
278 | } |
279 | |
280 | double interval_timer::get_elapsed_secs() const |
281 | { |
282 | assert(m_started); |
283 | if (!m_started) |
284 | return 0; |
285 | |
286 | timer_ticks stop_time = m_stop_time; |
287 | if (!m_stopped) |
288 | query_counter(&stop_time); |
289 | |
290 | timer_ticks delta = stop_time - m_start_time; |
291 | return delta * g_timer_freq; |
292 | } |
293 | |
294 | void interval_timer::init() |
295 | { |
296 | if (!g_timer_freq) |
297 | { |
298 | query_counter_frequency(&g_freq); |
299 | g_timer_freq = 1.0f / g_freq; |
300 | query_counter(&g_init_ticks); |
301 | } |
302 | } |
303 | |
304 | timer_ticks interval_timer::get_ticks() |
305 | { |
306 | if (!g_timer_freq) |
307 | init(); |
308 | timer_ticks ticks; |
309 | query_counter(&ticks); |
310 | return ticks - g_init_ticks; |
311 | } |
312 | |
313 | double interval_timer::ticks_to_secs(timer_ticks ticks) |
314 | { |
315 | if (!g_timer_freq) |
316 | init(); |
317 | return ticks * g_timer_freq; |
318 | } |
319 | |
320 | const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; |
321 | |
322 | bool load_tga(const char* pFilename, image& img) |
323 | { |
324 | int w = 0, h = 0, n_chans = 0; |
325 | uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); |
326 | |
327 | if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) |
328 | { |
329 | error_printf("Failed loading .TGA image \"%s\"!\n" , pFilename); |
330 | |
331 | if (pImage_data) |
332 | free(pImage_data); |
333 | |
334 | return false; |
335 | } |
336 | |
337 | if (sizeof(void *) == sizeof(uint32_t)) |
338 | { |
339 | if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) |
340 | { |
341 | error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n" , pFilename, w, h); |
342 | |
343 | if (pImage_data) |
344 | free(pImage_data); |
345 | |
346 | return false; |
347 | } |
348 | } |
349 | |
350 | img.resize(w, h); |
351 | |
352 | const uint8_t *pSrc = pImage_data; |
353 | for (int y = 0; y < h; y++) |
354 | { |
355 | color_rgba *pDst = &img(0, y); |
356 | |
357 | for (int x = 0; x < w; x++) |
358 | { |
359 | pDst->r = pSrc[0]; |
360 | pDst->g = pSrc[1]; |
361 | pDst->b = pSrc[2]; |
362 | pDst->a = (n_chans == 3) ? 255 : pSrc[3]; |
363 | |
364 | pSrc += n_chans; |
365 | ++pDst; |
366 | } |
367 | } |
368 | |
369 | free(pImage_data); |
370 | |
371 | return true; |
372 | } |
373 | |
374 | bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) |
375 | { |
376 | interval_timer tm; |
377 | tm.start(); |
378 | |
379 | if (!buf_size) |
380 | return false; |
381 | |
382 | uint32_t width = 0, height = 0, num_chans = 0; |
383 | void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans); |
384 | if (!pBuf) |
385 | { |
386 | error_printf("pv_png::load_png failed while loading image \"%s\"\n" , pFilename); |
387 | return false; |
388 | } |
389 | |
390 | img.grant_ownership(reinterpret_cast<color_rgba*>(pImage), width, height); |
391 | |
392 | //debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs()); |
393 | |
394 | return true; |
395 | } |
396 | |
397 | bool load_png(const char* pFilename, image& img) |
398 | { |
399 | uint8_vec buffer; |
400 | if (!read_file_to_vec(pFilename, buffer)) |
401 | { |
402 | error_printf("load_png: Failed reading file \"%s\"!\n" , pFilename); |
403 | return false; |
404 | } |
405 | |
406 | return load_png(buffer.data(), buffer.size(), img, pFilename); |
407 | } |
408 | |
409 | bool load_jpg(const char *pFilename, image& img) |
410 | { |
411 | int width = 0, height = 0, actual_comps = 0; |
412 | uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); |
413 | if (!pImage_data) |
414 | return false; |
415 | |
416 | img.init(pImage_data, width, height, 4); |
417 | |
418 | free(pImage_data); |
419 | |
420 | return true; |
421 | } |
422 | |
423 | bool load_image(const char* pFilename, image& img) |
424 | { |
425 | std::string ext(string_get_extension(std::string(pFilename))); |
426 | |
427 | if (ext.length() == 0) |
428 | return false; |
429 | |
430 | const char *pExt = ext.c_str(); |
431 | |
432 | if (strcasecmp(pExt, "png" ) == 0) |
433 | return load_png(pFilename, img); |
434 | if (strcasecmp(pExt, "tga" ) == 0) |
435 | return load_tga(pFilename, img); |
436 | if ( (strcasecmp(pExt, "jpg" ) == 0) || (strcasecmp(pExt, "jfif" ) == 0) || (strcasecmp(pExt, "jpeg" ) == 0) ) |
437 | return load_jpg(pFilename, img); |
438 | |
439 | return false; |
440 | } |
441 | |
442 | bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) |
443 | { |
444 | if (!img.get_total_pixels()) |
445 | return false; |
446 | |
447 | void* pPNG_data = nullptr; |
448 | size_t PNG_data_size = 0; |
449 | |
450 | if (image_save_flags & cImageSaveGrayscale) |
451 | { |
452 | uint8_vec g_pixels(img.get_total_pixels()); |
453 | uint8_t* pDst = &g_pixels[0]; |
454 | |
455 | for (uint32_t y = 0; y < img.get_height(); y++) |
456 | for (uint32_t x = 0; x < img.get_width(); x++) |
457 | *pDst++ = img(x, y)[grayscale_comp]; |
458 | |
459 | pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), 1, &PNG_data_size, 1, false); |
460 | } |
461 | else |
462 | { |
463 | bool has_alpha = false; |
464 | |
465 | if ((image_save_flags & cImageSaveIgnoreAlpha) == 0) |
466 | has_alpha = img.has_alpha(); |
467 | |
468 | if (!has_alpha) |
469 | { |
470 | uint8_vec rgb_pixels(img.get_total_pixels() * 3); |
471 | uint8_t* pDst = &rgb_pixels[0]; |
472 | |
473 | for (uint32_t y = 0; y < img.get_height(); y++) |
474 | { |
475 | const color_rgba* pSrc = &img(0, y); |
476 | for (uint32_t x = 0; x < img.get_width(); x++) |
477 | { |
478 | pDst[0] = pSrc->r; |
479 | pDst[1] = pSrc->g; |
480 | pDst[2] = pSrc->b; |
481 | |
482 | pSrc++; |
483 | pDst += 3; |
484 | } |
485 | } |
486 | |
487 | pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), 3, &PNG_data_size, 1, false); |
488 | } |
489 | else |
490 | { |
491 | pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), 4, &PNG_data_size, 1, false); |
492 | } |
493 | } |
494 | |
495 | if (!pPNG_data) |
496 | return false; |
497 | |
498 | bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size); |
499 | if (!status) |
500 | { |
501 | error_printf("save_png: Failed writing to filename \"%s\"!\n" , pFilename); |
502 | } |
503 | |
504 | free(pPNG_data); |
505 | |
506 | return status; |
507 | } |
508 | |
509 | bool read_file_to_vec(const char* pFilename, uint8_vec& data) |
510 | { |
511 | FILE* pFile = nullptr; |
512 | #ifdef _WIN32 |
513 | fopen_s(&pFile, pFilename, "rb" ); |
514 | #else |
515 | pFile = fopen(pFilename, "rb" ); |
516 | #endif |
517 | if (!pFile) |
518 | return false; |
519 | |
520 | fseek(pFile, 0, SEEK_END); |
521 | #ifdef _WIN32 |
522 | int64_t filesize = _ftelli64(pFile); |
523 | #else |
524 | int64_t filesize = ftello(pFile); |
525 | #endif |
526 | if (filesize < 0) |
527 | { |
528 | fclose(pFile); |
529 | return false; |
530 | } |
531 | fseek(pFile, 0, SEEK_SET); |
532 | |
533 | if (sizeof(size_t) == sizeof(uint32_t)) |
534 | { |
535 | if (filesize > 0x70000000) |
536 | { |
537 | // File might be too big to load safely in one alloc |
538 | fclose(pFile); |
539 | return false; |
540 | } |
541 | } |
542 | |
543 | if (!data.try_resize((size_t)filesize)) |
544 | { |
545 | fclose(pFile); |
546 | return false; |
547 | } |
548 | |
549 | if (filesize) |
550 | { |
551 | if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize) |
552 | { |
553 | fclose(pFile); |
554 | return false; |
555 | } |
556 | } |
557 | |
558 | fclose(pFile); |
559 | return true; |
560 | } |
561 | |
562 | bool write_data_to_file(const char* pFilename, const void* pData, size_t len) |
563 | { |
564 | FILE* pFile = nullptr; |
565 | #ifdef _WIN32 |
566 | fopen_s(&pFile, pFilename, "wb" ); |
567 | #else |
568 | pFile = fopen(pFilename, "wb" ); |
569 | #endif |
570 | if (!pFile) |
571 | return false; |
572 | |
573 | if (len) |
574 | { |
575 | if (fwrite(pData, 1, len, pFile) != len) |
576 | { |
577 | fclose(pFile); |
578 | return false; |
579 | } |
580 | } |
581 | |
582 | return fclose(pFile) != EOF; |
583 | } |
584 | |
585 | float linear_to_srgb(float l) |
586 | { |
587 | assert(l >= 0.0f && l <= 1.0f); |
588 | if (l < .0031308f) |
589 | return saturate(l * 12.92f); |
590 | else |
591 | return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f); |
592 | } |
593 | |
594 | float srgb_to_linear(float s) |
595 | { |
596 | assert(s >= 0.0f && s <= 1.0f); |
597 | if (s < .04045f) |
598 | return saturate(s * (1.0f/12.92f)); |
599 | else |
600 | return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f)); |
601 | } |
602 | |
603 | bool image_resample(const image &src, image &dst, bool srgb, |
604 | const char *pFilter, float filter_scale, |
605 | bool wrapping, |
606 | uint32_t first_comp, uint32_t num_comps) |
607 | { |
608 | assert((first_comp + num_comps) <= 4); |
609 | |
610 | const int cMaxComps = 4; |
611 | |
612 | const uint32_t src_w = src.get_width(), src_h = src.get_height(); |
613 | const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); |
614 | |
615 | if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) |
616 | { |
617 | printf("Image is too large!\n" ); |
618 | return false; |
619 | } |
620 | |
621 | if (!src_w || !src_h || !dst_w || !dst_h) |
622 | return false; |
623 | |
624 | if ((num_comps < 1) || (num_comps > cMaxComps)) |
625 | return false; |
626 | |
627 | if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) |
628 | { |
629 | printf("Image is too large!\n" ); |
630 | return false; |
631 | } |
632 | |
633 | if ((src_w == dst_w) && (src_h == dst_h)) |
634 | { |
635 | dst = src; |
636 | return true; |
637 | } |
638 | |
639 | float srgb_to_linear_table[256]; |
640 | if (srgb) |
641 | { |
642 | for (int i = 0; i < 256; ++i) |
643 | srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f)); |
644 | } |
645 | |
646 | const int LINEAR_TO_SRGB_TABLE_SIZE = 8192; |
647 | uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE]; |
648 | |
649 | if (srgb) |
650 | { |
651 | for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i) |
652 | linear_to_srgb_table[i] = (uint8_t)clamp<int>((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255); |
653 | } |
654 | |
655 | std::vector<float> samples[cMaxComps]; |
656 | Resampler *resamplers[cMaxComps]; |
657 | |
658 | resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, |
659 | wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, |
660 | pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); |
661 | samples[0].resize(src_w); |
662 | |
663 | for (uint32_t i = 1; i < num_comps; ++i) |
664 | { |
665 | resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, |
666 | wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, |
667 | pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); |
668 | samples[i].resize(src_w); |
669 | } |
670 | |
671 | uint32_t dst_y = 0; |
672 | |
673 | for (uint32_t src_y = 0; src_y < src_h; ++src_y) |
674 | { |
675 | const color_rgba *pSrc = &src(0, src_y); |
676 | |
677 | // Put source lines into resampler(s) |
678 | for (uint32_t x = 0; x < src_w; ++x) |
679 | { |
680 | for (uint32_t c = 0; c < num_comps; ++c) |
681 | { |
682 | const uint32_t comp_index = first_comp + c; |
683 | const uint32_t v = (*pSrc)[comp_index]; |
684 | |
685 | if (!srgb || (comp_index == 3)) |
686 | samples[c][x] = v * (1.0f / 255.0f); |
687 | else |
688 | samples[c][x] = srgb_to_linear_table[v]; |
689 | } |
690 | |
691 | pSrc++; |
692 | } |
693 | |
694 | for (uint32_t c = 0; c < num_comps; ++c) |
695 | { |
696 | if (!resamplers[c]->put_line(&samples[c][0])) |
697 | { |
698 | for (uint32_t i = 0; i < num_comps; i++) |
699 | delete resamplers[i]; |
700 | return false; |
701 | } |
702 | } |
703 | |
704 | // Now retrieve any output lines |
705 | for (;;) |
706 | { |
707 | uint32_t c; |
708 | for (c = 0; c < num_comps; ++c) |
709 | { |
710 | const uint32_t comp_index = first_comp + c; |
711 | |
712 | const float *pOutput_samples = resamplers[c]->get_line(); |
713 | if (!pOutput_samples) |
714 | break; |
715 | |
716 | const bool linear_flag = !srgb || (comp_index == 3); |
717 | |
718 | color_rgba *pDst = &dst(0, dst_y); |
719 | |
720 | for (uint32_t x = 0; x < dst_w; x++) |
721 | { |
722 | // TODO: Add dithering |
723 | if (linear_flag) |
724 | { |
725 | int j = (int)(255.0f * pOutput_samples[x] + .5f); |
726 | (*pDst)[comp_index] = (uint8_t)clamp<int>(j, 0, 255); |
727 | } |
728 | else |
729 | { |
730 | int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f); |
731 | (*pDst)[comp_index] = linear_to_srgb_table[clamp<int>(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)]; |
732 | } |
733 | |
734 | pDst++; |
735 | } |
736 | } |
737 | if (c < num_comps) |
738 | break; |
739 | |
740 | ++dst_y; |
741 | } |
742 | } |
743 | |
744 | for (uint32_t i = 0; i < num_comps; ++i) |
745 | delete resamplers[i]; |
746 | |
747 | return true; |
748 | } |
749 | |
750 | void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms) |
751 | { |
752 | // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen |
753 | if (!num_syms) |
754 | return; |
755 | |
756 | if (1 == num_syms) |
757 | { |
758 | A[0].m_key = 1; |
759 | return; |
760 | } |
761 | |
762 | A[0].m_key += A[1].m_key; |
763 | |
764 | int s = 2, r = 0, next; |
765 | for (next = 1; next < (num_syms - 1); ++next) |
766 | { |
767 | if ((s >= num_syms) || (A[r].m_key < A[s].m_key)) |
768 | { |
769 | A[next].m_key = A[r].m_key; |
770 | A[r].m_key = next; |
771 | ++r; |
772 | } |
773 | else |
774 | { |
775 | A[next].m_key = A[s].m_key; |
776 | ++s; |
777 | } |
778 | |
779 | if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key)) |
780 | { |
781 | A[next].m_key = A[next].m_key + A[r].m_key; |
782 | A[r].m_key = next; |
783 | ++r; |
784 | } |
785 | else |
786 | { |
787 | A[next].m_key = A[next].m_key + A[s].m_key; |
788 | ++s; |
789 | } |
790 | } |
791 | A[num_syms - 2].m_key = 0; |
792 | |
793 | for (next = num_syms - 3; next >= 0; --next) |
794 | { |
795 | A[next].m_key = 1 + A[A[next].m_key].m_key; |
796 | } |
797 | |
798 | int num_avail = 1, num_used = 0, depth = 0; |
799 | r = num_syms - 2; |
800 | next = num_syms - 1; |
801 | while (num_avail > 0) |
802 | { |
803 | for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r ) |
804 | ; |
805 | |
806 | for ( ; num_avail > num_used; --next, --num_avail) |
807 | A[next].m_key = depth; |
808 | |
809 | num_avail = 2 * num_used; |
810 | num_used = 0; |
811 | ++depth; |
812 | } |
813 | } |
814 | |
815 | void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) |
816 | { |
817 | int i; |
818 | uint32_t total = 0; |
819 | if (code_list_len <= 1) |
820 | return; |
821 | |
822 | for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++) |
823 | pNum_codes[max_code_size] += pNum_codes[i]; |
824 | |
825 | for (i = max_code_size; i > 0; i--) |
826 | total += (((uint32_t)pNum_codes[i]) << (max_code_size - i)); |
827 | |
828 | while (total != (1UL << max_code_size)) |
829 | { |
830 | pNum_codes[max_code_size]--; |
831 | for (i = max_code_size - 1; i > 0; i--) |
832 | { |
833 | if (pNum_codes[i]) |
834 | { |
835 | pNum_codes[i]--; |
836 | pNum_codes[i + 1] += 2; |
837 | break; |
838 | } |
839 | } |
840 | |
841 | total--; |
842 | } |
843 | } |
844 | |
845 | sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1) |
846 | { |
847 | uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2]; |
848 | sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; |
849 | |
850 | clear_obj(hist); |
851 | |
852 | for (i = 0; i < num_syms; i++) |
853 | { |
854 | uint32_t freq = pSyms0[i].m_key; |
855 | |
856 | // We scale all input frequencies to 16-bits. |
857 | assert(freq <= UINT16_MAX); |
858 | |
859 | hist[freq & 0xFF]++; |
860 | hist[256 + ((freq >> 8) & 0xFF)]++; |
861 | } |
862 | |
863 | while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) |
864 | total_passes--; |
865 | |
866 | for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) |
867 | { |
868 | const uint32_t *pHist = &hist[pass << 8]; |
869 | uint32_t offsets[256], cur_ofs = 0; |
870 | for (i = 0; i < 256; i++) |
871 | { |
872 | offsets[i] = cur_ofs; |
873 | cur_ofs += pHist[i]; |
874 | } |
875 | |
876 | for (i = 0; i < num_syms; i++) |
877 | pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; |
878 | |
879 | sym_freq *t = pCur_syms; |
880 | pCur_syms = pNew_syms; |
881 | pNew_syms = t; |
882 | } |
883 | |
884 | return pCur_syms; |
885 | } |
886 | |
887 | bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size) |
888 | { |
889 | if (max_code_size > cHuffmanMaxSupportedCodeSize) |
890 | return false; |
891 | if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) |
892 | return false; |
893 | |
894 | uint32_t total_used_syms = 0; |
895 | for (uint32_t i = 0; i < num_syms; i++) |
896 | if (pFreq[i]) |
897 | total_used_syms++; |
898 | |
899 | if (!total_used_syms) |
900 | return false; |
901 | |
902 | std::vector<sym_freq> sym_freq0(total_used_syms), sym_freq1(total_used_syms); |
903 | for (uint32_t i = 0, j = 0; i < num_syms; i++) |
904 | { |
905 | if (pFreq[i]) |
906 | { |
907 | sym_freq0[j].m_key = pFreq[i]; |
908 | sym_freq0[j++].m_sym_index = static_cast<uint16_t>(i); |
909 | } |
910 | } |
911 | |
912 | sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]); |
913 | |
914 | canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms); |
915 | |
916 | int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1]; |
917 | clear_obj(num_codes); |
918 | |
919 | for (uint32_t i = 0; i < total_used_syms; i++) |
920 | { |
921 | if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize) |
922 | return false; |
923 | |
924 | num_codes[pSym_freq[i].m_key]++; |
925 | } |
926 | |
927 | canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size); |
928 | |
929 | m_code_sizes.resize(0); |
930 | m_code_sizes.resize(num_syms); |
931 | |
932 | m_codes.resize(0); |
933 | m_codes.resize(num_syms); |
934 | |
935 | for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++) |
936 | for (uint32_t l = num_codes[i]; l > 0; l--) |
937 | m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast<uint8_t>(i); |
938 | |
939 | uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1]; |
940 | |
941 | next_code[1] = 0; |
942 | for (uint32_t j = 0, i = 2; i <= max_code_size; i++) |
943 | next_code[i] = j = ((j + num_codes[i - 1]) << 1); |
944 | |
945 | for (uint32_t i = 0; i < num_syms; i++) |
946 | { |
947 | uint32_t rev_code = 0, code, code_size; |
948 | if ((code_size = m_code_sizes[i]) == 0) |
949 | continue; |
950 | if (code_size > cHuffmanMaxSupportedInternalCodeSize) |
951 | return false; |
952 | code = next_code[code_size]++; |
953 | for (uint32_t l = code_size; l > 0; l--, code >>= 1) |
954 | rev_code = (rev_code << 1) | (code & 1); |
955 | m_codes[i] = static_cast<uint16_t>(rev_code); |
956 | } |
957 | |
958 | return true; |
959 | } |
960 | |
961 | bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size) |
962 | { |
963 | if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) |
964 | return false; |
965 | |
966 | uint16_vec sym_freq(num_syms); |
967 | |
968 | uint32_t max_freq = 0; |
969 | for (uint32_t i = 0; i < num_syms; i++) |
970 | max_freq = maximum(max_freq, pSym_freq[i]); |
971 | |
972 | if (max_freq < UINT16_MAX) |
973 | { |
974 | for (uint32_t i = 0; i < num_syms; i++) |
975 | sym_freq[i] = static_cast<uint16_t>(pSym_freq[i]); |
976 | } |
977 | else |
978 | { |
979 | for (uint32_t i = 0; i < num_syms; i++) |
980 | { |
981 | if (pSym_freq[i]) |
982 | { |
983 | uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq); |
984 | sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534)); |
985 | } |
986 | } |
987 | } |
988 | |
989 | return init(num_syms, &sym_freq[0], max_code_size); |
990 | } |
991 | |
992 | void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len) |
993 | { |
994 | if (run_size) |
995 | { |
996 | if (run_size < cHuffmanSmallRepeatSizeMin) |
997 | { |
998 | while (run_size--) |
999 | syms.push_back(static_cast<uint16_t>(len)); |
1000 | } |
1001 | else if (run_size <= cHuffmanSmallRepeatSizeMax) |
1002 | { |
1003 | syms.push_back(static_cast<uint16_t>(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6))); |
1004 | } |
1005 | else |
1006 | { |
1007 | assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax)); |
1008 | syms.push_back(static_cast<uint16_t>(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6))); |
1009 | } |
1010 | } |
1011 | |
1012 | run_size = 0; |
1013 | } |
1014 | |
1015 | void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size) |
1016 | { |
1017 | if (run_size) |
1018 | { |
1019 | if (run_size < cHuffmanSmallZeroRunSizeMin) |
1020 | { |
1021 | while (run_size--) |
1022 | syms.push_back(0); |
1023 | } |
1024 | else if (run_size <= cHuffmanSmallZeroRunSizeMax) |
1025 | { |
1026 | syms.push_back(static_cast<uint16_t>(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6))); |
1027 | } |
1028 | else |
1029 | { |
1030 | assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax)); |
1031 | syms.push_back(static_cast<uint16_t>(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6))); |
1032 | } |
1033 | } |
1034 | |
1035 | run_size = 0; |
1036 | } |
1037 | |
1038 | uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab) |
1039 | { |
1040 | const uint64_t start_bits = m_total_bits; |
1041 | |
1042 | const uint8_vec &code_sizes = tab.get_code_sizes(); |
1043 | |
1044 | uint32_t total_used = tab.get_total_used_codes(); |
1045 | put_bits(total_used, cHuffmanMaxSymsLog2); |
1046 | |
1047 | if (!total_used) |
1048 | return 0; |
1049 | |
1050 | uint16_vec syms; |
1051 | syms.reserve(total_used + 16); |
1052 | |
1053 | uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0; |
1054 | |
1055 | for (uint32_t i = 0; i <= total_used; ++i) |
1056 | { |
1057 | const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i]; |
1058 | assert((code_len == 0xFF) || (code_len <= 16)); |
1059 | |
1060 | if (code_len) |
1061 | { |
1062 | end_zero_run(syms, zero_run_size); |
1063 | |
1064 | if (code_len != prev_code_len) |
1065 | { |
1066 | end_nonzero_run(syms, nonzero_run_size, prev_code_len); |
1067 | if (code_len != 0xFF) |
1068 | syms.push_back(static_cast<uint16_t>(code_len)); |
1069 | } |
1070 | else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax) |
1071 | end_nonzero_run(syms, nonzero_run_size, prev_code_len); |
1072 | } |
1073 | else |
1074 | { |
1075 | end_nonzero_run(syms, nonzero_run_size, prev_code_len); |
1076 | |
1077 | if (++zero_run_size == cHuffmanBigZeroRunSizeMax) |
1078 | end_zero_run(syms, zero_run_size); |
1079 | } |
1080 | |
1081 | prev_code_len = code_len; |
1082 | } |
1083 | |
1084 | histogram h(cHuffmanTotalCodelengthCodes); |
1085 | for (uint32_t i = 0; i < syms.size(); i++) |
1086 | h.inc(syms[i] & 63); |
1087 | |
1088 | huffman_encoding_table ct; |
1089 | if (!ct.init(h, 7)) |
1090 | return 0; |
1091 | |
1092 | assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes); |
1093 | |
1094 | uint32_t total_codelength_codes; |
1095 | for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--) |
1096 | if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]]) |
1097 | break; |
1098 | |
1099 | assert(total_codelength_codes); |
1100 | |
1101 | put_bits(total_codelength_codes, 5); |
1102 | for (uint32_t i = 0; i < total_codelength_codes; i++) |
1103 | put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3); |
1104 | |
1105 | for (uint32_t i = 0; i < syms.size(); ++i) |
1106 | { |
1107 | const uint32_t l = syms[i] & 63, e = syms[i] >> 6; |
1108 | |
1109 | put_code(l, ct); |
1110 | |
1111 | if (l == cHuffmanSmallZeroRunCode) |
1112 | put_bits(e, cHuffmanSmallZeroRunExtraBits); |
1113 | else if (l == cHuffmanBigZeroRunCode) |
1114 | put_bits(e, cHuffmanBigZeroRunExtraBits); |
1115 | else if (l == cHuffmanSmallRepeatCode) |
1116 | put_bits(e, cHuffmanSmallRepeatExtraBits); |
1117 | else if (l == cHuffmanBigRepeatCode) |
1118 | put_bits(e, cHuffmanBigRepeatExtraBits); |
1119 | } |
1120 | |
1121 | return (uint32_t)(m_total_bits - start_bits); |
1122 | } |
1123 | |
1124 | bool huffman_test(int rand_seed) |
1125 | { |
1126 | histogram h(19); |
1127 | |
1128 | // Feed in a fibonacci sequence to force large codesizes |
1129 | h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3; |
1130 | h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21; |
1131 | h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144; |
1132 | h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987; |
1133 | h[16] += 1597; h[17] += 2584; h[18] += 4181; |
1134 | |
1135 | huffman_encoding_table etab; |
1136 | etab.init(h, 16); |
1137 | |
1138 | { |
1139 | bitwise_coder c; |
1140 | c.init(1024); |
1141 | |
1142 | c.emit_huffman_table(etab); |
1143 | for (int i = 0; i < 19; i++) |
1144 | c.put_code(i, etab); |
1145 | |
1146 | c.flush(); |
1147 | |
1148 | basist::bitwise_decoder d; |
1149 | d.init(&c.get_bytes()[0], static_cast<uint32_t>(c.get_bytes().size())); |
1150 | |
1151 | basist::huffman_decoding_table dtab; |
1152 | bool success = d.read_huffman_table(dtab); |
1153 | if (!success) |
1154 | { |
1155 | assert(0); |
1156 | printf("Failure 5\n" ); |
1157 | return false; |
1158 | } |
1159 | |
1160 | for (uint32_t i = 0; i < 19; i++) |
1161 | { |
1162 | uint32_t s = d.decode_huffman(dtab); |
1163 | if (s != i) |
1164 | { |
1165 | assert(0); |
1166 | printf("Failure 5\n" ); |
1167 | return false; |
1168 | } |
1169 | } |
1170 | } |
1171 | |
1172 | basisu::rand r; |
1173 | r.seed(rand_seed); |
1174 | |
1175 | for (int iter = 0; iter < 500000; iter++) |
1176 | { |
1177 | printf("%u\n" , iter); |
1178 | |
1179 | uint32_t max_sym = r.irand(0, 8193); |
1180 | uint32_t num_codes = r.irand(1, 10000); |
1181 | uint_vec syms(num_codes); |
1182 | |
1183 | for (uint32_t i = 0; i < num_codes; i++) |
1184 | { |
1185 | if (r.bit()) |
1186 | syms[i] = r.irand(0, max_sym); |
1187 | else |
1188 | { |
1189 | int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum<int>(1, max_sym / 2)) + .5f); |
1190 | s = basisu::clamp<int>(s, 0, max_sym); |
1191 | |
1192 | syms[i] = s; |
1193 | } |
1194 | |
1195 | } |
1196 | |
1197 | histogram h1(max_sym + 1); |
1198 | for (uint32_t i = 0; i < num_codes; i++) |
1199 | h1[syms[i]]++; |
1200 | |
1201 | huffman_encoding_table etab2; |
1202 | if (!etab2.init(h1, 16)) |
1203 | { |
1204 | assert(0); |
1205 | printf("Failed 0\n" ); |
1206 | return false; |
1207 | } |
1208 | |
1209 | bitwise_coder c; |
1210 | c.init(1024); |
1211 | |
1212 | c.emit_huffman_table(etab2); |
1213 | |
1214 | for (uint32_t i = 0; i < num_codes; i++) |
1215 | c.put_code(syms[i], etab2); |
1216 | |
1217 | c.flush(); |
1218 | |
1219 | basist::bitwise_decoder d; |
1220 | d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size()); |
1221 | |
1222 | basist::huffman_decoding_table dtab; |
1223 | bool success = d.read_huffman_table(dtab); |
1224 | if (!success) |
1225 | { |
1226 | assert(0); |
1227 | printf("Failed 2\n" ); |
1228 | return false; |
1229 | } |
1230 | |
1231 | for (uint32_t i = 0; i < num_codes; i++) |
1232 | { |
1233 | uint32_t s = d.decode_huffman(dtab); |
1234 | if (s != syms[i]) |
1235 | { |
1236 | assert(0); |
1237 | printf("Failed 4\n" ); |
1238 | return false; |
1239 | } |
1240 | } |
1241 | |
1242 | } |
1243 | return true; |
1244 | } |
1245 | |
1246 | void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) |
1247 | { |
1248 | assert((num_syms > 0) && (num_indices > 0)); |
1249 | assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f)); |
1250 | |
1251 | clear(); |
1252 | |
1253 | m_remap_table.resize(num_syms); |
1254 | m_entries_picked.reserve(num_syms); |
1255 | m_total_count_to_picked.resize(num_syms); |
1256 | |
1257 | if (num_indices <= 1) |
1258 | return; |
1259 | |
1260 | prepare_hist(num_syms, num_indices, pIndices); |
1261 | find_initial(num_syms); |
1262 | |
1263 | while (m_entries_to_do.size()) |
1264 | { |
1265 | // Find the best entry to move into the picked list. |
1266 | uint32_t best_entry; |
1267 | double best_count; |
1268 | find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight); |
1269 | |
1270 | // We now have chosen an entry to place in the picked list, now determine which side it goes on. |
1271 | const uint32_t entry_to_move = m_entries_to_do[best_entry]; |
1272 | |
1273 | float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); |
1274 | |
1275 | // Put entry_to_move either on the "left" or "right" side of the picked entries |
1276 | if (side <= 0) |
1277 | m_entries_picked.push_back(entry_to_move); |
1278 | else |
1279 | m_entries_picked.insert(m_entries_picked.begin(), entry_to_move); |
1280 | |
1281 | // Erase best_entry from the todo list |
1282 | m_entries_to_do.erase(m_entries_to_do.begin() + best_entry); |
1283 | |
1284 | // We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry |
1285 | for (uint32_t i = 0; i < m_entries_to_do.size(); i++) |
1286 | m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms); |
1287 | } |
1288 | |
1289 | for (uint32_t i = 0; i < num_syms; i++) |
1290 | m_remap_table[m_entries_picked[i]] = i; |
1291 | } |
1292 | |
1293 | void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices) |
1294 | { |
1295 | m_hist.resize(0); |
1296 | m_hist.resize(num_syms * num_syms); |
1297 | |
1298 | for (uint32_t i = 0; i < num_indices; i++) |
1299 | { |
1300 | const uint32_t idx = pIndices[i]; |
1301 | inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms); |
1302 | inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms); |
1303 | } |
1304 | } |
1305 | |
1306 | void palette_index_reorderer::find_initial(uint32_t num_syms) |
1307 | { |
1308 | uint32_t max_count = 0, max_index = 0; |
1309 | for (uint32_t i = 0; i < num_syms * num_syms; i++) |
1310 | if (m_hist[i] > max_count) |
1311 | max_count = m_hist[i], max_index = i; |
1312 | |
1313 | uint32_t a = max_index / num_syms, b = max_index % num_syms; |
1314 | |
1315 | m_entries_picked.push_back(a); |
1316 | m_entries_picked.push_back(b); |
1317 | |
1318 | for (uint32_t i = 0; i < num_syms; i++) |
1319 | if ((i != b) && (i != a)) |
1320 | m_entries_to_do.push_back(i); |
1321 | |
1322 | for (uint32_t i = 0; i < m_entries_to_do.size(); i++) |
1323 | for (uint32_t j = 0; j < m_entries_picked.size(); j++) |
1324 | m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms); |
1325 | } |
1326 | |
1327 | void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) |
1328 | { |
1329 | best_entry = 0; |
1330 | best_count = 0; |
1331 | |
1332 | for (uint32_t i = 0; i < m_entries_to_do.size(); i++) |
1333 | { |
1334 | const uint32_t u = m_entries_to_do[i]; |
1335 | double total_count = m_total_count_to_picked[u]; |
1336 | |
1337 | if (pDist_func) |
1338 | { |
1339 | float w = maximum<float>((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx)); |
1340 | assert((w >= 0.0f) && (w <= 1.0f)); |
1341 | total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w); |
1342 | } |
1343 | |
1344 | if (total_count <= best_count) |
1345 | continue; |
1346 | |
1347 | best_entry = i; |
1348 | best_count = total_count; |
1349 | } |
1350 | } |
1351 | |
1352 | float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) |
1353 | { |
1354 | float which_side = 0; |
1355 | |
1356 | int l_count = 0, r_count = 0; |
1357 | for (uint32_t j = 0; j < m_entries_picked.size(); j++) |
1358 | { |
1359 | const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1)); |
1360 | which_side += static_cast<float>(r * count); |
1361 | if (r >= 0) |
1362 | l_count += r * count; |
1363 | else |
1364 | r_count += -r * count; |
1365 | } |
1366 | |
1367 | if (pDist_func) |
1368 | { |
1369 | float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx)); |
1370 | float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx)); |
1371 | which_side = w_left * l_count - w_right * r_count; |
1372 | } |
1373 | return which_side; |
1374 | } |
1375 | |
1376 | void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma) |
1377 | { |
1378 | assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); |
1379 | |
1380 | const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); |
1381 | const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); |
1382 | |
1383 | double hist[256]; |
1384 | clear_obj(hist); |
1385 | |
1386 | for (uint32_t y = 0; y < height; y++) |
1387 | { |
1388 | for (uint32_t x = 0; x < width; x++) |
1389 | { |
1390 | const color_rgba &ca = a(x, y), &cb = b(x, y); |
1391 | |
1392 | if (total_chans) |
1393 | { |
1394 | for (uint32_t c = 0; c < total_chans; c++) |
1395 | hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++; |
1396 | } |
1397 | else |
1398 | { |
1399 | if (use_601_luma) |
1400 | hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++; |
1401 | else |
1402 | hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++; |
1403 | } |
1404 | } |
1405 | } |
1406 | |
1407 | m_max = 0; |
1408 | double sum = 0.0f, sum2 = 0.0f; |
1409 | for (uint32_t i = 0; i < 256; i++) |
1410 | { |
1411 | if (hist[i]) |
1412 | { |
1413 | m_max = basisu::maximum<float>(m_max, (float)i); |
1414 | double v = i * hist[i]; |
1415 | sum += v; |
1416 | sum2 += i * v; |
1417 | } |
1418 | } |
1419 | |
1420 | double total_values = (double)width * (double)height; |
1421 | if (avg_comp_error) |
1422 | total_values *= (double)clamp<uint32_t>(total_chans, 1, 4); |
1423 | |
1424 | m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0); |
1425 | m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f); |
1426 | m_rms = (float)sqrt(m_mean_squared); |
1427 | m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f; |
1428 | } |
1429 | |
1430 | void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed) |
1431 | { |
1432 | rand r(seed); |
1433 | |
1434 | uint8_t *pDst = static_cast<uint8_t *>(pBuf); |
1435 | |
1436 | while (size >= sizeof(uint32_t)) |
1437 | { |
1438 | *(uint32_t *)pDst = r.urand32(); |
1439 | pDst += sizeof(uint32_t); |
1440 | size -= sizeof(uint32_t); |
1441 | } |
1442 | |
1443 | while (size) |
1444 | { |
1445 | *pDst++ = r.byte(); |
1446 | size--; |
1447 | } |
1448 | } |
1449 | |
1450 | uint32_t hash_hsieh(const uint8_t *pBuf, size_t len) |
1451 | { |
1452 | if (!pBuf || !len) |
1453 | return 0; |
1454 | |
1455 | uint32_t h = static_cast<uint32_t>(len); |
1456 | |
1457 | const uint32_t bytes_left = len & 3; |
1458 | len >>= 2; |
1459 | |
1460 | while (len--) |
1461 | { |
1462 | const uint16_t *pWords = reinterpret_cast<const uint16_t *>(pBuf); |
1463 | |
1464 | h += pWords[0]; |
1465 | |
1466 | const uint32_t t = (pWords[1] << 11) ^ h; |
1467 | h = (h << 16) ^ t; |
1468 | |
1469 | pBuf += sizeof(uint32_t); |
1470 | |
1471 | h += h >> 11; |
1472 | } |
1473 | |
1474 | switch (bytes_left) |
1475 | { |
1476 | case 1: |
1477 | h += *reinterpret_cast<const signed char*>(pBuf); |
1478 | h ^= h << 10; |
1479 | h += h >> 1; |
1480 | break; |
1481 | case 2: |
1482 | h += *reinterpret_cast<const uint16_t *>(pBuf); |
1483 | h ^= h << 11; |
1484 | h += h >> 17; |
1485 | break; |
1486 | case 3: |
1487 | h += *reinterpret_cast<const uint16_t *>(pBuf); |
1488 | h ^= h << 16; |
1489 | h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18; |
1490 | h += h >> 11; |
1491 | break; |
1492 | default: |
1493 | break; |
1494 | } |
1495 | |
1496 | h ^= h << 3; |
1497 | h += h >> 5; |
1498 | h ^= h << 4; |
1499 | h += h >> 17; |
1500 | h ^= h << 25; |
1501 | h += h >> 6; |
1502 | |
1503 | return h; |
1504 | } |
1505 | |
1506 | job_pool::job_pool(uint32_t num_threads) : |
1507 | m_num_active_jobs(0), |
1508 | m_kill_flag(false) |
1509 | { |
1510 | assert(num_threads >= 1U); |
1511 | |
1512 | debug_printf("job_pool::job_pool: %u total threads\n" , num_threads); |
1513 | |
1514 | if (num_threads > 1) |
1515 | { |
1516 | m_threads.resize(num_threads - 1); |
1517 | |
1518 | for (int i = 0; i < ((int)num_threads - 1); i++) |
1519 | m_threads[i] = std::thread([this, i] { job_thread(i); }); |
1520 | } |
1521 | } |
1522 | |
1523 | job_pool::~job_pool() |
1524 | { |
1525 | debug_printf("job_pool::~job_pool\n" ); |
1526 | |
1527 | // Notify all workers that they need to die right now. |
1528 | m_kill_flag = true; |
1529 | |
1530 | m_has_work.notify_all(); |
1531 | |
1532 | // Wait for all workers to die. |
1533 | for (uint32_t i = 0; i < m_threads.size(); i++) |
1534 | m_threads[i].join(); |
1535 | } |
1536 | |
1537 | void job_pool::add_job(const std::function<void()>& job) |
1538 | { |
1539 | std::unique_lock<std::mutex> lock(m_mutex); |
1540 | |
1541 | m_queue.emplace_back(job); |
1542 | |
1543 | const size_t queue_size = m_queue.size(); |
1544 | |
1545 | lock.unlock(); |
1546 | |
1547 | if (queue_size > 1) |
1548 | m_has_work.notify_one(); |
1549 | } |
1550 | |
1551 | void job_pool::add_job(std::function<void()>&& job) |
1552 | { |
1553 | std::unique_lock<std::mutex> lock(m_mutex); |
1554 | |
1555 | m_queue.emplace_back(std::move(job)); |
1556 | |
1557 | const size_t queue_size = m_queue.size(); |
1558 | |
1559 | lock.unlock(); |
1560 | |
1561 | if (queue_size > 1) |
1562 | { |
1563 | m_has_work.notify_one(); |
1564 | } |
1565 | } |
1566 | |
1567 | void job_pool::wait_for_all() |
1568 | { |
1569 | std::unique_lock<std::mutex> lock(m_mutex); |
1570 | |
1571 | // Drain the job queue on the calling thread. |
1572 | while (!m_queue.empty()) |
1573 | { |
1574 | std::function<void()> job(m_queue.back()); |
1575 | m_queue.pop_back(); |
1576 | |
1577 | lock.unlock(); |
1578 | |
1579 | job(); |
1580 | |
1581 | lock.lock(); |
1582 | } |
1583 | |
1584 | // The queue is empty, now wait for all active jobs to finish up. |
1585 | m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } ); |
1586 | } |
1587 | |
1588 | void job_pool::job_thread(uint32_t index) |
1589 | { |
1590 | BASISU_NOTE_UNUSED(index); |
1591 | //debug_printf("job_pool::job_thread: starting %u\n", index); |
1592 | |
1593 | while (true) |
1594 | { |
1595 | std::unique_lock<std::mutex> lock(m_mutex); |
1596 | |
1597 | // Wait for any jobs to be issued. |
1598 | m_has_work.wait(lock, [this] { return m_kill_flag || m_queue.size(); } ); |
1599 | |
1600 | // Check to see if we're supposed to exit. |
1601 | if (m_kill_flag) |
1602 | break; |
1603 | |
1604 | // Get the job and execute it. |
1605 | std::function<void()> job(m_queue.back()); |
1606 | m_queue.pop_back(); |
1607 | |
1608 | ++m_num_active_jobs; |
1609 | |
1610 | lock.unlock(); |
1611 | |
1612 | job(); |
1613 | |
1614 | lock.lock(); |
1615 | |
1616 | --m_num_active_jobs; |
1617 | |
1618 | // Now check if there are no more jobs remaining. |
1619 | const bool all_done = m_queue.empty() && !m_num_active_jobs; |
1620 | |
1621 | lock.unlock(); |
1622 | |
1623 | if (all_done) |
1624 | m_no_more_jobs.notify_all(); |
1625 | } |
1626 | |
1627 | //debug_printf("job_pool::job_thread: exiting\n"); |
1628 | } |
1629 | |
1630 | // .TGA image loading |
1631 | #pragma pack(push) |
1632 | #pragma pack(1) |
1633 | struct |
1634 | { |
1635 | uint8_t ; |
1636 | uint8_t ; |
1637 | uint8_t ; |
1638 | packed_uint<2> ; |
1639 | packed_uint<2> ; |
1640 | uint8_t ; |
1641 | packed_uint<2> ; |
1642 | packed_uint<2> ; |
1643 | packed_uint<2> ; |
1644 | packed_uint<2> ; |
1645 | uint8_t ; |
1646 | uint8_t ; |
1647 | }; |
1648 | #pragma pack(pop) |
1649 | |
1650 | const uint32_t MAX_TGA_IMAGE_SIZE = 16384; |
1651 | |
1652 | enum tga_image_type |
1653 | { |
1654 | cITPalettized = 1, |
1655 | cITRGB = 2, |
1656 | cITGrayscale = 3 |
1657 | }; |
1658 | |
1659 | uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans) |
1660 | { |
1661 | width = 0; |
1662 | height = 0; |
1663 | n_chans = 0; |
1664 | |
1665 | if (buf_size <= sizeof(tga_header)) |
1666 | return nullptr; |
1667 | |
1668 | const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf); |
1669 | |
1670 | if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE)) |
1671 | return nullptr; |
1672 | |
1673 | if (hdr.m_desc >> 6) |
1674 | return nullptr; |
1675 | |
1676 | // Simple validation |
1677 | if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) |
1678 | return nullptr; |
1679 | |
1680 | if (hdr.m_cmap) |
1681 | { |
1682 | if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) |
1683 | return nullptr; |
1684 | |
1685 | // Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either. |
1686 | if (hdr.m_cmap_first != 0) |
1687 | return nullptr; |
1688 | } |
1689 | |
1690 | const bool x_flipped = (hdr.m_desc & 0x10) != 0; |
1691 | const bool y_flipped = (hdr.m_desc & 0x20) == 0; |
1692 | |
1693 | bool rle_flag = false; |
1694 | int file_image_type = hdr.m_type; |
1695 | if (file_image_type > 8) |
1696 | { |
1697 | file_image_type -= 8; |
1698 | rle_flag = true; |
1699 | } |
1700 | |
1701 | const tga_image_type image_type = static_cast<tga_image_type>(file_image_type); |
1702 | |
1703 | switch (file_image_type) |
1704 | { |
1705 | case cITRGB: |
1706 | if (hdr.m_depth == 8) |
1707 | return nullptr; |
1708 | break; |
1709 | case cITPalettized: |
1710 | if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0)) |
1711 | return nullptr; |
1712 | break; |
1713 | case cITGrayscale: |
1714 | if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0)) |
1715 | return nullptr; |
1716 | if ((hdr.m_depth != 8) && (hdr.m_depth != 16)) |
1717 | return nullptr; |
1718 | break; |
1719 | default: |
1720 | return nullptr; |
1721 | } |
1722 | |
1723 | uint32_t tga_bytes_per_pixel = 0; |
1724 | |
1725 | switch (hdr.m_depth) |
1726 | { |
1727 | case 32: |
1728 | tga_bytes_per_pixel = 4; |
1729 | n_chans = 4; |
1730 | break; |
1731 | case 24: |
1732 | tga_bytes_per_pixel = 3; |
1733 | n_chans = 3; |
1734 | break; |
1735 | case 16: |
1736 | case 15: |
1737 | tga_bytes_per_pixel = 2; |
1738 | // For compatibility with stb_image_write.h |
1739 | n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3; |
1740 | break; |
1741 | case 8: |
1742 | tga_bytes_per_pixel = 1; |
1743 | // For palettized RGBA support, which both FreeImage and stb_image support. |
1744 | n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3; |
1745 | break; |
1746 | default: |
1747 | return nullptr; |
1748 | } |
1749 | |
1750 | //const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel; |
1751 | |
1752 | const uint8_t *pSrc = pBuf + sizeof(tga_header); |
1753 | uint32_t bytes_remaining = buf_size - sizeof(tga_header); |
1754 | |
1755 | if (hdr.m_id_len) |
1756 | { |
1757 | if (bytes_remaining < hdr.m_id_len) |
1758 | return nullptr; |
1759 | pSrc += hdr.m_id_len; |
1760 | bytes_remaining += hdr.m_id_len; |
1761 | } |
1762 | |
1763 | color_rgba pal[256]; |
1764 | for (uint32_t i = 0; i < 256; i++) |
1765 | pal[i].set(0, 0, 0, 255); |
1766 | |
1767 | if ((hdr.m_cmap) && (hdr.m_cmap_len)) |
1768 | { |
1769 | if (image_type == cITPalettized) |
1770 | { |
1771 | // Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years). |
1772 | if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) ) |
1773 | return nullptr; |
1774 | |
1775 | if (hdr.m_cmap_bpp == 32) |
1776 | { |
1777 | const uint32_t pal_size = hdr.m_cmap_len * 4; |
1778 | if (bytes_remaining < pal_size) |
1779 | return nullptr; |
1780 | |
1781 | for (uint32_t i = 0; i < hdr.m_cmap_len; i++) |
1782 | { |
1783 | pal[i].r = pSrc[i * 4 + 2]; |
1784 | pal[i].g = pSrc[i * 4 + 1]; |
1785 | pal[i].b = pSrc[i * 4 + 0]; |
1786 | pal[i].a = pSrc[i * 4 + 3]; |
1787 | } |
1788 | |
1789 | bytes_remaining -= pal_size; |
1790 | pSrc += pal_size; |
1791 | } |
1792 | else if (hdr.m_cmap_bpp == 24) |
1793 | { |
1794 | const uint32_t pal_size = hdr.m_cmap_len * 3; |
1795 | if (bytes_remaining < pal_size) |
1796 | return nullptr; |
1797 | |
1798 | for (uint32_t i = 0; i < hdr.m_cmap_len; i++) |
1799 | { |
1800 | pal[i].r = pSrc[i * 3 + 2]; |
1801 | pal[i].g = pSrc[i * 3 + 1]; |
1802 | pal[i].b = pSrc[i * 3 + 0]; |
1803 | pal[i].a = 255; |
1804 | } |
1805 | |
1806 | bytes_remaining -= pal_size; |
1807 | pSrc += pal_size; |
1808 | } |
1809 | else |
1810 | { |
1811 | const uint32_t pal_size = hdr.m_cmap_len * 2; |
1812 | if (bytes_remaining < pal_size) |
1813 | return nullptr; |
1814 | |
1815 | for (uint32_t i = 0; i < hdr.m_cmap_len; i++) |
1816 | { |
1817 | const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8); |
1818 | |
1819 | pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31; |
1820 | pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31; |
1821 | pal[i].b = ((v & 31) * 255 + 15) / 31; |
1822 | pal[i].a = 255; |
1823 | } |
1824 | |
1825 | bytes_remaining -= pal_size; |
1826 | pSrc += pal_size; |
1827 | } |
1828 | } |
1829 | else |
1830 | { |
1831 | const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len; |
1832 | if (bytes_remaining < bytes_to_skip) |
1833 | return nullptr; |
1834 | pSrc += bytes_to_skip; |
1835 | bytes_remaining += bytes_to_skip; |
1836 | } |
1837 | } |
1838 | |
1839 | width = hdr.m_width; |
1840 | height = hdr.m_height; |
1841 | |
1842 | const uint32_t source_pitch = width * tga_bytes_per_pixel; |
1843 | const uint32_t dest_pitch = width * n_chans; |
1844 | |
1845 | uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); |
1846 | if (!pImage) |
1847 | return nullptr; |
1848 | |
1849 | std::vector<uint8_t> input_line_buf; |
1850 | if (rle_flag) |
1851 | input_line_buf.resize(source_pitch); |
1852 | |
1853 | int run_type = 0, run_remaining = 0; |
1854 | uint8_t run_pixel[4]; |
1855 | memset(run_pixel, 0, sizeof(run_pixel)); |
1856 | |
1857 | for (int y = 0; y < height; y++) |
1858 | { |
1859 | const uint8_t *pLine_data; |
1860 | |
1861 | if (rle_flag) |
1862 | { |
1863 | int pixels_remaining = width; |
1864 | uint8_t *pDst = &input_line_buf[0]; |
1865 | |
1866 | do |
1867 | { |
1868 | if (!run_remaining) |
1869 | { |
1870 | if (bytes_remaining < 1) |
1871 | { |
1872 | free(pImage); |
1873 | return nullptr; |
1874 | } |
1875 | |
1876 | int v = *pSrc++; |
1877 | bytes_remaining--; |
1878 | |
1879 | run_type = v & 0x80; |
1880 | run_remaining = (v & 0x7F) + 1; |
1881 | |
1882 | if (run_type) |
1883 | { |
1884 | if (bytes_remaining < tga_bytes_per_pixel) |
1885 | { |
1886 | free(pImage); |
1887 | return nullptr; |
1888 | } |
1889 | |
1890 | memcpy(run_pixel, pSrc, tga_bytes_per_pixel); |
1891 | pSrc += tga_bytes_per_pixel; |
1892 | bytes_remaining -= tga_bytes_per_pixel; |
1893 | } |
1894 | } |
1895 | |
1896 | const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining); |
1897 | pixels_remaining -= n; |
1898 | run_remaining -= n; |
1899 | |
1900 | if (run_type) |
1901 | { |
1902 | for (uint32_t i = 0; i < n; i++) |
1903 | for (uint32_t j = 0; j < tga_bytes_per_pixel; j++) |
1904 | *pDst++ = run_pixel[j]; |
1905 | } |
1906 | else |
1907 | { |
1908 | const uint32_t bytes_wanted = n * tga_bytes_per_pixel; |
1909 | |
1910 | if (bytes_remaining < bytes_wanted) |
1911 | { |
1912 | free(pImage); |
1913 | return nullptr; |
1914 | } |
1915 | |
1916 | memcpy(pDst, pSrc, bytes_wanted); |
1917 | pDst += bytes_wanted; |
1918 | |
1919 | pSrc += bytes_wanted; |
1920 | bytes_remaining -= bytes_wanted; |
1921 | } |
1922 | |
1923 | } while (pixels_remaining); |
1924 | |
1925 | assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel); |
1926 | |
1927 | pLine_data = &input_line_buf[0]; |
1928 | } |
1929 | else |
1930 | { |
1931 | if (bytes_remaining < source_pitch) |
1932 | { |
1933 | free(pImage); |
1934 | return nullptr; |
1935 | } |
1936 | |
1937 | pLine_data = pSrc; |
1938 | bytes_remaining -= source_pitch; |
1939 | pSrc += source_pitch; |
1940 | } |
1941 | |
1942 | // Convert to 24bpp RGB or 32bpp RGBA. |
1943 | uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0); |
1944 | const int dst_stride = x_flipped ? -((int)n_chans) : n_chans; |
1945 | |
1946 | switch (hdr.m_depth) |
1947 | { |
1948 | case 32: |
1949 | assert(tga_bytes_per_pixel == 4 && n_chans == 4); |
1950 | for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride) |
1951 | { |
1952 | pDst[0] = pLine_data[2]; |
1953 | pDst[1] = pLine_data[1]; |
1954 | pDst[2] = pLine_data[0]; |
1955 | pDst[3] = pLine_data[3]; |
1956 | } |
1957 | break; |
1958 | case 24: |
1959 | assert(tga_bytes_per_pixel == 3 && n_chans == 3); |
1960 | for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride) |
1961 | { |
1962 | pDst[0] = pLine_data[2]; |
1963 | pDst[1] = pLine_data[1]; |
1964 | pDst[2] = pLine_data[0]; |
1965 | } |
1966 | break; |
1967 | case 16: |
1968 | case 15: |
1969 | if (image_type == cITRGB) |
1970 | { |
1971 | assert(tga_bytes_per_pixel == 2 && n_chans == 3); |
1972 | for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) |
1973 | { |
1974 | const uint32_t v = pLine_data[0] | (pLine_data[1] << 8); |
1975 | pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31; |
1976 | pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31; |
1977 | pDst[2] = ((v & 31) * 255 + 15) / 31; |
1978 | } |
1979 | } |
1980 | else |
1981 | { |
1982 | assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4); |
1983 | for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) |
1984 | { |
1985 | pDst[0] = pLine_data[0]; |
1986 | pDst[1] = pLine_data[0]; |
1987 | pDst[2] = pLine_data[0]; |
1988 | pDst[3] = pLine_data[1]; |
1989 | } |
1990 | } |
1991 | break; |
1992 | case 8: |
1993 | assert(tga_bytes_per_pixel == 1); |
1994 | if (image_type == cITPalettized) |
1995 | { |
1996 | if (hdr.m_cmap_bpp == 32) |
1997 | { |
1998 | assert(n_chans == 4); |
1999 | for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) |
2000 | { |
2001 | const uint32_t c = *pLine_data; |
2002 | pDst[0] = pal[c].r; |
2003 | pDst[1] = pal[c].g; |
2004 | pDst[2] = pal[c].b; |
2005 | pDst[3] = pal[c].a; |
2006 | } |
2007 | } |
2008 | else |
2009 | { |
2010 | assert(n_chans == 3); |
2011 | for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) |
2012 | { |
2013 | const uint32_t c = *pLine_data; |
2014 | pDst[0] = pal[c].r; |
2015 | pDst[1] = pal[c].g; |
2016 | pDst[2] = pal[c].b; |
2017 | } |
2018 | } |
2019 | } |
2020 | else |
2021 | { |
2022 | assert(n_chans == 3); |
2023 | for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) |
2024 | { |
2025 | const uint8_t c = *pLine_data; |
2026 | pDst[0] = c; |
2027 | pDst[1] = c; |
2028 | pDst[2] = c; |
2029 | } |
2030 | } |
2031 | break; |
2032 | default: |
2033 | assert(0); |
2034 | break; |
2035 | } |
2036 | } // y |
2037 | |
2038 | return pImage; |
2039 | } |
2040 | |
2041 | uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans) |
2042 | { |
2043 | width = height = n_chans = 0; |
2044 | |
2045 | uint8_vec filedata; |
2046 | if (!read_file_to_vec(pFilename, filedata)) |
2047 | return nullptr; |
2048 | |
2049 | if (!filedata.size() || (filedata.size() > UINT32_MAX)) |
2050 | return nullptr; |
2051 | |
2052 | return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); |
2053 | } |
2054 | |
2055 | void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...) |
2056 | { |
2057 | char buf[2048]; |
2058 | |
2059 | va_list args; |
2060 | va_start(args, pFmt); |
2061 | #ifdef _WIN32 |
2062 | vsprintf_s(buf, sizeof(buf), pFmt, args); |
2063 | #else |
2064 | vsnprintf(buf, sizeof(buf), pFmt, args); |
2065 | #endif |
2066 | va_end(args); |
2067 | |
2068 | const char* p = buf; |
2069 | |
2070 | const uint32_t orig_x_ofs = x_ofs; |
2071 | |
2072 | while (*p) |
2073 | { |
2074 | uint8_t c = *p++; |
2075 | if ((c < 32) || (c > 127)) |
2076 | c = '.'; |
2077 | |
2078 | const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0]; |
2079 | |
2080 | for (uint32_t y = 0; y < 8; y++) |
2081 | { |
2082 | uint32_t row_bits = pGlpyh[y]; |
2083 | for (uint32_t x = 0; x < 8; x++) |
2084 | { |
2085 | const uint32_t q = row_bits & (1 << x); |
2086 | |
2087 | const color_rgba* pColor = q ? &fg : pBG; |
2088 | if (!pColor) |
2089 | continue; |
2090 | |
2091 | if (alpha_only) |
2092 | fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); |
2093 | else |
2094 | fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); |
2095 | } |
2096 | } |
2097 | |
2098 | x_ofs += 8 * scale_x; |
2099 | if ((x_ofs + 8 * scale_x) > m_width) |
2100 | { |
2101 | x_ofs = orig_x_ofs; |
2102 | y_ofs += 8 * scale_y; |
2103 | } |
2104 | } |
2105 | } |
2106 | |
2107 | } // namespace basisu |
2108 | |