1 | // basisu_pvrtc1_4.cpp |
2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // http://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | #pragma once |
16 | #include "basisu_gpu_texture.h" |
17 | |
18 | namespace basisu |
19 | { |
20 | enum |
21 | { |
22 | PVRTC2_MIN_WIDTH = 16, |
23 | PVRTC2_MIN_HEIGHT = 8, |
24 | PVRTC4_MIN_WIDTH = 8, |
25 | PVRTC4_MIN_HEIGHT = 8 |
26 | }; |
27 | |
28 | struct pvrtc4_block |
29 | { |
30 | uint32_t m_modulation; |
31 | uint32_t m_endpoints; |
32 | |
33 | pvrtc4_block() : m_modulation(0), m_endpoints(0) { } |
34 | |
35 | inline bool operator== (const pvrtc4_block& rhs) const |
36 | { |
37 | return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints); |
38 | } |
39 | |
40 | inline void clear() |
41 | { |
42 | m_modulation = 0; |
43 | m_endpoints = 0; |
44 | } |
45 | |
46 | inline bool get_block_uses_transparent_modulation() const |
47 | { |
48 | return (m_endpoints & 1) != 0; |
49 | } |
50 | |
51 | inline bool is_endpoint_opaque(uint32_t endpoint_index) const |
52 | { |
53 | static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; |
54 | return (m_endpoints & s_bitmasks[open_range_check(endpoint_index, 2U)]) != 0; |
55 | } |
56 | |
57 | // Returns raw endpoint or 8888 |
58 | color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; |
59 | |
60 | color_rgba get_endpoint_5554(uint32_t endpoint_index) const; |
61 | |
62 | static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) |
63 | { |
64 | static const uint32_t s_comp_prec[4][4] = |
65 | { |
66 | // R0 G0 B0 A0 R1 G1 B1 A1 |
67 | { 4, 4, 3, 3 }, { 4, 4, 4, 3 }, // transparent endpoint |
68 | |
69 | { 5, 5, 4, 0 }, { 5, 5, 5, 0 } // opaque endpoint |
70 | }; |
71 | return s_comp_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][open_range_check(c, 4U)]; |
72 | } |
73 | |
74 | static color_rgba get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint) |
75 | { |
76 | static const color_rgba s_color_prec[4] = |
77 | { |
78 | color_rgba(4, 4, 3, 3), color_rgba(4, 4, 4, 3), // transparent endpoint |
79 | color_rgba(5, 5, 4, 0), color_rgba(5, 5, 5, 0) // opaque endpoint |
80 | }; |
81 | return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; |
82 | } |
83 | |
84 | inline uint32_t get_modulation(uint32_t x, uint32_t y) const |
85 | { |
86 | assert((x < 4) && (y < 4)); |
87 | return (m_modulation >> ((y * 4 + x) * 2)) & 3; |
88 | } |
89 | |
90 | inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) |
91 | { |
92 | assert((x < 4) && (y < 4) && (s < 4)); |
93 | uint32_t n = (y * 4 + x) * 2; |
94 | m_modulation = (m_modulation & (~(3 << n))) | (s << n); |
95 | assert(get_modulation(x, y) == s); |
96 | } |
97 | |
98 | // Scaled by 8 |
99 | inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const |
100 | { |
101 | static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 }, { 0, 4, 4, 8 } }; |
102 | return s_block_scales[block_uses_transparent_modulation]; |
103 | } |
104 | |
105 | // Scaled by 8 |
106 | inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const |
107 | { |
108 | return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)]; |
109 | } |
110 | |
111 | inline void byte_swap() |
112 | { |
113 | m_modulation = byteswap32(m_modulation); |
114 | m_endpoints = byteswap32(m_endpoints); |
115 | } |
116 | |
117 | // opaque endpoints: 554, 555 |
118 | // transparent endpoints: 3443, 3444 |
119 | inline void set_endpoint_raw(uint32_t endpoint_index, const color_rgba& c, bool opaque_endpoint) |
120 | { |
121 | assert(endpoint_index < 2); |
122 | const uint32_t m = m_endpoints & 1; |
123 | uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; |
124 | |
125 | uint32_t packed; |
126 | |
127 | if (opaque_endpoint) |
128 | { |
129 | if (!endpoint_index) |
130 | { |
131 | // 554 |
132 | // 1RRRRRGGGGGBBBBM |
133 | assert((r < 32) && (g < 32) && (b < 16)); |
134 | packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m; |
135 | } |
136 | else |
137 | { |
138 | // 555 |
139 | // 1RRRRRGGGGGBBBBB |
140 | assert((r < 32) && (g < 32) && (b < 32)); |
141 | packed = 0x8000 | (r << 10) | (g << 5) | b; |
142 | } |
143 | } |
144 | else |
145 | { |
146 | if (!endpoint_index) |
147 | { |
148 | // 3443 |
149 | // 0AAA RRRR GGGG BBBM |
150 | assert((r < 16) && (g < 16) && (b < 8) && (a < 8)); |
151 | packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m; |
152 | } |
153 | else |
154 | { |
155 | // 3444 |
156 | // 0AAA RRRR GGGG BBBB |
157 | assert((r < 16) && (g < 16) && (b < 16) && (a < 8)); |
158 | packed = (a << 12) | (r << 8) | (g << 4) | b; |
159 | } |
160 | } |
161 | |
162 | assert(packed <= 0xFFFF); |
163 | |
164 | if (endpoint_index) |
165 | m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); |
166 | else |
167 | m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; |
168 | } |
169 | }; |
170 | |
171 | typedef vector2D<pvrtc4_block> pvrtc4_block_vector2D; |
172 | |
173 | uint32_t pvrtc4_swizzle_uv(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos); |
174 | |
175 | class pvrtc4_image |
176 | { |
177 | public: |
178 | inline pvrtc4_image() : |
179 | m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) |
180 | { |
181 | } |
182 | |
183 | inline pvrtc4_image(uint32_t width, uint32_t height) : |
184 | m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) |
185 | { |
186 | resize(width, height); |
187 | } |
188 | |
189 | inline void clear() |
190 | { |
191 | m_width = 0; |
192 | m_height = 0; |
193 | m_block_width = 0; |
194 | m_block_height = 0; |
195 | m_blocks.clear(); |
196 | m_uses_alpha = false; |
197 | } |
198 | |
199 | inline void resize(uint32_t width, uint32_t height) |
200 | { |
201 | if ((width == m_width) && (height == m_height)) |
202 | return; |
203 | |
204 | m_width = width; |
205 | m_height = height; |
206 | |
207 | m_block_width = (width + 3) >> 2; |
208 | m_block_height = (height + 3) >> 2; |
209 | |
210 | m_blocks.resize(m_block_width, m_block_height); |
211 | } |
212 | |
213 | inline uint32_t get_width() const { return m_width; } |
214 | inline uint32_t get_height() const { return m_height; } |
215 | |
216 | inline uint32_t get_block_width() const { return m_block_width; } |
217 | inline uint32_t get_block_height() const { return m_block_height; } |
218 | |
219 | inline const pvrtc4_block_vector2D &get_blocks() const { return m_blocks; } |
220 | inline pvrtc4_block_vector2D &get_blocks() { return m_blocks; } |
221 | |
222 | inline uint32_t get_total_blocks() const { return m_block_width * m_block_height; } |
223 | |
224 | inline bool get_uses_alpha() const { return m_uses_alpha; } |
225 | inline void set_uses_alpha(bool uses_alpha) { m_uses_alpha = uses_alpha; } |
226 | |
227 | inline bool are_blocks_equal(const pvrtc4_image& rhs) const |
228 | { |
229 | return m_blocks == rhs.m_blocks; |
230 | } |
231 | |
232 | inline void set_to_black() |
233 | { |
234 | memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes()); |
235 | } |
236 | |
237 | inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const |
238 | { |
239 | return m_blocks(bx, by).get_block_uses_transparent_modulation(); |
240 | } |
241 | |
242 | inline bool is_endpoint_opaque(uint32_t bx, uint32_t by, uint32_t endpoint_index) const |
243 | { |
244 | return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); |
245 | } |
246 | |
247 | color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const |
248 | { |
249 | assert((bx < m_block_width) && (by < m_block_height)); |
250 | return m_blocks(bx, by).get_endpoint(endpoint_index, unpack); |
251 | } |
252 | |
253 | inline uint32_t get_modulation(uint32_t x, uint32_t y) const |
254 | { |
255 | assert((x < m_width) && (y < m_height)); |
256 | return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); |
257 | } |
258 | |
259 | // Returns true if the block uses transparent modulation. |
260 | bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; |
261 | |
262 | color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; |
263 | |
264 | inline color_rgba get_pixel(uint32_t x, uint32_t y) const |
265 | { |
266 | assert((x < m_width) && (y < m_height)); |
267 | return get_pixel(x, y, m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3)); |
268 | } |
269 | |
270 | void deswizzle() |
271 | { |
272 | pvrtc4_block_vector2D temp(m_blocks); |
273 | |
274 | for (uint32_t y = 0; y < m_block_height; y++) |
275 | for (uint32_t x = 0; x < m_block_width; x++) |
276 | m_blocks(x, y) = temp[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)]; |
277 | } |
278 | |
279 | void swizzle() |
280 | { |
281 | pvrtc4_block_vector2D temp(m_blocks); |
282 | |
283 | for (uint32_t y = 0; y < m_block_height; y++) |
284 | for (uint32_t x = 0; x < m_block_width; x++) |
285 | m_blocks[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)] = temp(x, y); |
286 | } |
287 | |
288 | void unpack_all_pixels(image& img) const |
289 | { |
290 | img.crop(m_width, m_height); |
291 | |
292 | for (uint32_t y = 0; y < m_height; y++) |
293 | for (uint32_t x = 0; x < m_width; x++) |
294 | img(x, y) = get_pixel(x, y); |
295 | } |
296 | |
297 | void unpack_block(image &dst, uint32_t block_x, uint32_t block_y) |
298 | { |
299 | for (uint32_t y = 0; y < 4; y++) |
300 | for (uint32_t x = 0; x < 4; x++) |
301 | dst(x, y) = get_pixel(block_x * 4 + x, block_y * 4 + y); |
302 | } |
303 | |
304 | inline int wrap_x(int x) const |
305 | { |
306 | return posmod(x, m_width); |
307 | } |
308 | |
309 | inline int wrap_y(int y) const |
310 | { |
311 | return posmod(y, m_height); |
312 | } |
313 | |
314 | inline int wrap_block_x(int bx) const |
315 | { |
316 | return posmod(bx, m_block_width); |
317 | } |
318 | |
319 | inline int wrap_block_y(int by) const |
320 | { |
321 | return posmod(by, m_block_height); |
322 | } |
323 | |
324 | inline vec2F get_interpolation_factors(uint32_t x, uint32_t y) const |
325 | { |
326 | // 0 1 2 3 |
327 | // 2 3 0 1 |
328 | // .5 .75 0 .25 |
329 | static const float s_interp[4] = { 2, 3, 0, 1 }; |
330 | return vec2F(s_interp[x & 3], s_interp[y & 3]); |
331 | } |
332 | |
333 | inline color_rgba interpolate(int x, int y, |
334 | const color_rgba& p, const color_rgba& q, |
335 | const color_rgba& r, const color_rgba& s) const |
336 | { |
337 | static const int s_interp[4] = { 2, 3, 0, 1 }; |
338 | const int u_interp = s_interp[x & 3]; |
339 | const int v_interp = s_interp[y & 3]; |
340 | |
341 | color_rgba result; |
342 | |
343 | for (uint32_t c = 0; c < 4; c++) |
344 | { |
345 | int t = p[c] * 4 + u_interp * ((int)q[c] - (int)p[c]); |
346 | int b = r[c] * 4 + u_interp * ((int)s[c] - (int)r[c]); |
347 | int v = t * 4 + v_interp * (b - t); |
348 | if (c < 3) |
349 | { |
350 | v >>= 1; |
351 | v += (v >> 5); |
352 | } |
353 | else |
354 | { |
355 | v += (v >> 4); |
356 | } |
357 | assert((v >= 0) && (v < 256)); |
358 | result[c] = static_cast<uint8_t>(v); |
359 | } |
360 | |
361 | return result; |
362 | } |
363 | |
364 | inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) |
365 | { |
366 | assert((x < m_width) && (y < m_height)); |
367 | return m_blocks(x >> 2, y >> 2).set_modulation(x & 3, y & 3, s); |
368 | } |
369 | |
370 | inline uint64_t map_pixel(uint32_t x, uint32_t y, const color_rgba& c, bool perceptual, bool alpha_is_significant, bool record = true) |
371 | { |
372 | color_rgba v[4]; |
373 | get_interpolated_colors(x, y, v); |
374 | |
375 | uint64_t best_dist = color_distance(perceptual, c, v[0], alpha_is_significant); |
376 | uint32_t best_v = 0; |
377 | for (uint32_t i = 1; i < 4; i++) |
378 | { |
379 | uint64_t dist = color_distance(perceptual, c, v[i], alpha_is_significant); |
380 | if (dist < best_dist) |
381 | { |
382 | best_dist = dist; |
383 | best_v = i; |
384 | } |
385 | } |
386 | |
387 | if (record) |
388 | set_modulation(x, y, best_v); |
389 | |
390 | return best_dist; |
391 | } |
392 | |
393 | inline uint64_t remap_pixels_influenced_by_endpoint(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant) |
394 | { |
395 | uint64_t total_error = 0; |
396 | |
397 | for (int yd = -3; yd <= 3; yd++) |
398 | { |
399 | const int y = wrap_y((int)by * 4 + 2 + yd); |
400 | |
401 | for (int xd = -3; xd <= 3; xd++) |
402 | { |
403 | const int x = wrap_x((int)bx * 4 + 2 + xd); |
404 | |
405 | total_error += map_pixel(x, y, orig_img(x, y), perceptual, alpha_is_significant); |
406 | } |
407 | } |
408 | |
409 | return total_error; |
410 | } |
411 | |
412 | inline uint64_t evaluate_1x1_endpoint_error(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant, uint64_t threshold_error = 0) const |
413 | { |
414 | uint64_t total_error = 0; |
415 | |
416 | for (int yd = -3; yd <= 3; yd++) |
417 | { |
418 | const int y = wrap_y((int)by * 4 + 2 + yd); |
419 | |
420 | for (int xd = -3; xd <= 3; xd++) |
421 | { |
422 | const int x = wrap_x((int)bx * 4 + 2 + xd); |
423 | |
424 | total_error += color_distance(perceptual, get_pixel(x, y), orig_img(x, y), alpha_is_significant); |
425 | |
426 | if ((threshold_error) && (total_error >= threshold_error)) |
427 | return total_error; |
428 | } |
429 | } |
430 | |
431 | return total_error; |
432 | } |
433 | |
434 | uint64_t local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual); |
435 | |
436 | inline uint64_t map_all_pixels(const image& img, bool perceptual, bool alpha_is_significant) |
437 | { |
438 | assert(m_width == img.get_width()); |
439 | assert(m_height == img.get_height()); |
440 | |
441 | uint64_t total_error = 0; |
442 | for (uint32_t y = 0; y < img.get_height(); y++) |
443 | for (uint32_t x = 0; x < img.get_width(); x++) |
444 | total_error += map_pixel(x, y, img(x, y), perceptual, alpha_is_significant); |
445 | |
446 | return total_error; |
447 | } |
448 | |
449 | public: |
450 | uint32_t m_width, m_height; |
451 | pvrtc4_block_vector2D m_blocks; |
452 | uint32_t m_block_width, m_block_height; |
453 | |
454 | bool m_uses_alpha; |
455 | }; |
456 | |
457 | } // namespace basisu |
458 | |