| 1 | // basisu_pvrtc1_4.cpp |
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | #pragma once |
| 16 | #include "basisu_gpu_texture.h" |
| 17 | |
| 18 | namespace basisu |
| 19 | { |
| 20 | enum |
| 21 | { |
| 22 | PVRTC2_MIN_WIDTH = 16, |
| 23 | PVRTC2_MIN_HEIGHT = 8, |
| 24 | PVRTC4_MIN_WIDTH = 8, |
| 25 | PVRTC4_MIN_HEIGHT = 8 |
| 26 | }; |
| 27 | |
| 28 | struct pvrtc4_block |
| 29 | { |
| 30 | uint32_t m_modulation; |
| 31 | uint32_t m_endpoints; |
| 32 | |
| 33 | pvrtc4_block() : m_modulation(0), m_endpoints(0) { } |
| 34 | |
| 35 | inline bool operator== (const pvrtc4_block& rhs) const |
| 36 | { |
| 37 | return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints); |
| 38 | } |
| 39 | |
| 40 | inline void clear() |
| 41 | { |
| 42 | m_modulation = 0; |
| 43 | m_endpoints = 0; |
| 44 | } |
| 45 | |
| 46 | inline bool get_block_uses_transparent_modulation() const |
| 47 | { |
| 48 | return (m_endpoints & 1) != 0; |
| 49 | } |
| 50 | |
| 51 | inline bool is_endpoint_opaque(uint32_t endpoint_index) const |
| 52 | { |
| 53 | static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; |
| 54 | return (m_endpoints & s_bitmasks[open_range_check(endpoint_index, 2U)]) != 0; |
| 55 | } |
| 56 | |
| 57 | // Returns raw endpoint or 8888 |
| 58 | color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; |
| 59 | |
| 60 | color_rgba get_endpoint_5554(uint32_t endpoint_index) const; |
| 61 | |
| 62 | static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) |
| 63 | { |
| 64 | static const uint32_t s_comp_prec[4][4] = |
| 65 | { |
| 66 | // R0 G0 B0 A0 R1 G1 B1 A1 |
| 67 | { 4, 4, 3, 3 }, { 4, 4, 4, 3 }, // transparent endpoint |
| 68 | |
| 69 | { 5, 5, 4, 0 }, { 5, 5, 5, 0 } // opaque endpoint |
| 70 | }; |
| 71 | return s_comp_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][open_range_check(c, 4U)]; |
| 72 | } |
| 73 | |
| 74 | static color_rgba get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint) |
| 75 | { |
| 76 | static const color_rgba s_color_prec[4] = |
| 77 | { |
| 78 | color_rgba(4, 4, 3, 3), color_rgba(4, 4, 4, 3), // transparent endpoint |
| 79 | color_rgba(5, 5, 4, 0), color_rgba(5, 5, 5, 0) // opaque endpoint |
| 80 | }; |
| 81 | return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; |
| 82 | } |
| 83 | |
| 84 | inline uint32_t get_modulation(uint32_t x, uint32_t y) const |
| 85 | { |
| 86 | assert((x < 4) && (y < 4)); |
| 87 | return (m_modulation >> ((y * 4 + x) * 2)) & 3; |
| 88 | } |
| 89 | |
| 90 | inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) |
| 91 | { |
| 92 | assert((x < 4) && (y < 4) && (s < 4)); |
| 93 | uint32_t n = (y * 4 + x) * 2; |
| 94 | m_modulation = (m_modulation & (~(3 << n))) | (s << n); |
| 95 | assert(get_modulation(x, y) == s); |
| 96 | } |
| 97 | |
| 98 | // Scaled by 8 |
| 99 | inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const |
| 100 | { |
| 101 | static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 }, { 0, 4, 4, 8 } }; |
| 102 | return s_block_scales[block_uses_transparent_modulation]; |
| 103 | } |
| 104 | |
| 105 | // Scaled by 8 |
| 106 | inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const |
| 107 | { |
| 108 | return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)]; |
| 109 | } |
| 110 | |
| 111 | inline void byte_swap() |
| 112 | { |
| 113 | m_modulation = byteswap32(m_modulation); |
| 114 | m_endpoints = byteswap32(m_endpoints); |
| 115 | } |
| 116 | |
| 117 | // opaque endpoints: 554, 555 |
| 118 | // transparent endpoints: 3443, 3444 |
| 119 | inline void set_endpoint_raw(uint32_t endpoint_index, const color_rgba& c, bool opaque_endpoint) |
| 120 | { |
| 121 | assert(endpoint_index < 2); |
| 122 | const uint32_t m = m_endpoints & 1; |
| 123 | uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; |
| 124 | |
| 125 | uint32_t packed; |
| 126 | |
| 127 | if (opaque_endpoint) |
| 128 | { |
| 129 | if (!endpoint_index) |
| 130 | { |
| 131 | // 554 |
| 132 | // 1RRRRRGGGGGBBBBM |
| 133 | assert((r < 32) && (g < 32) && (b < 16)); |
| 134 | packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m; |
| 135 | } |
| 136 | else |
| 137 | { |
| 138 | // 555 |
| 139 | // 1RRRRRGGGGGBBBBB |
| 140 | assert((r < 32) && (g < 32) && (b < 32)); |
| 141 | packed = 0x8000 | (r << 10) | (g << 5) | b; |
| 142 | } |
| 143 | } |
| 144 | else |
| 145 | { |
| 146 | if (!endpoint_index) |
| 147 | { |
| 148 | // 3443 |
| 149 | // 0AAA RRRR GGGG BBBM |
| 150 | assert((r < 16) && (g < 16) && (b < 8) && (a < 8)); |
| 151 | packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m; |
| 152 | } |
| 153 | else |
| 154 | { |
| 155 | // 3444 |
| 156 | // 0AAA RRRR GGGG BBBB |
| 157 | assert((r < 16) && (g < 16) && (b < 16) && (a < 8)); |
| 158 | packed = (a << 12) | (r << 8) | (g << 4) | b; |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | assert(packed <= 0xFFFF); |
| 163 | |
| 164 | if (endpoint_index) |
| 165 | m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16); |
| 166 | else |
| 167 | m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; |
| 168 | } |
| 169 | }; |
| 170 | |
| 171 | typedef vector2D<pvrtc4_block> pvrtc4_block_vector2D; |
| 172 | |
| 173 | uint32_t pvrtc4_swizzle_uv(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos); |
| 174 | |
| 175 | class pvrtc4_image |
| 176 | { |
| 177 | public: |
| 178 | inline pvrtc4_image() : |
| 179 | m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) |
| 180 | { |
| 181 | } |
| 182 | |
| 183 | inline pvrtc4_image(uint32_t width, uint32_t height) : |
| 184 | m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_uses_alpha(false) |
| 185 | { |
| 186 | resize(width, height); |
| 187 | } |
| 188 | |
| 189 | inline void clear() |
| 190 | { |
| 191 | m_width = 0; |
| 192 | m_height = 0; |
| 193 | m_block_width = 0; |
| 194 | m_block_height = 0; |
| 195 | m_blocks.clear(); |
| 196 | m_uses_alpha = false; |
| 197 | } |
| 198 | |
| 199 | inline void resize(uint32_t width, uint32_t height) |
| 200 | { |
| 201 | if ((width == m_width) && (height == m_height)) |
| 202 | return; |
| 203 | |
| 204 | m_width = width; |
| 205 | m_height = height; |
| 206 | |
| 207 | m_block_width = (width + 3) >> 2; |
| 208 | m_block_height = (height + 3) >> 2; |
| 209 | |
| 210 | m_blocks.resize(m_block_width, m_block_height); |
| 211 | } |
| 212 | |
| 213 | inline uint32_t get_width() const { return m_width; } |
| 214 | inline uint32_t get_height() const { return m_height; } |
| 215 | |
| 216 | inline uint32_t get_block_width() const { return m_block_width; } |
| 217 | inline uint32_t get_block_height() const { return m_block_height; } |
| 218 | |
| 219 | inline const pvrtc4_block_vector2D &get_blocks() const { return m_blocks; } |
| 220 | inline pvrtc4_block_vector2D &get_blocks() { return m_blocks; } |
| 221 | |
| 222 | inline uint32_t get_total_blocks() const { return m_block_width * m_block_height; } |
| 223 | |
| 224 | inline bool get_uses_alpha() const { return m_uses_alpha; } |
| 225 | inline void set_uses_alpha(bool uses_alpha) { m_uses_alpha = uses_alpha; } |
| 226 | |
| 227 | inline bool are_blocks_equal(const pvrtc4_image& rhs) const |
| 228 | { |
| 229 | return m_blocks == rhs.m_blocks; |
| 230 | } |
| 231 | |
| 232 | inline void set_to_black() |
| 233 | { |
| 234 | memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes()); |
| 235 | } |
| 236 | |
| 237 | inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const |
| 238 | { |
| 239 | return m_blocks(bx, by).get_block_uses_transparent_modulation(); |
| 240 | } |
| 241 | |
| 242 | inline bool is_endpoint_opaque(uint32_t bx, uint32_t by, uint32_t endpoint_index) const |
| 243 | { |
| 244 | return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); |
| 245 | } |
| 246 | |
| 247 | color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const |
| 248 | { |
| 249 | assert((bx < m_block_width) && (by < m_block_height)); |
| 250 | return m_blocks(bx, by).get_endpoint(endpoint_index, unpack); |
| 251 | } |
| 252 | |
| 253 | inline uint32_t get_modulation(uint32_t x, uint32_t y) const |
| 254 | { |
| 255 | assert((x < m_width) && (y < m_height)); |
| 256 | return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); |
| 257 | } |
| 258 | |
| 259 | // Returns true if the block uses transparent modulation. |
| 260 | bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; |
| 261 | |
| 262 | color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; |
| 263 | |
| 264 | inline color_rgba get_pixel(uint32_t x, uint32_t y) const |
| 265 | { |
| 266 | assert((x < m_width) && (y < m_height)); |
| 267 | return get_pixel(x, y, m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3)); |
| 268 | } |
| 269 | |
| 270 | void deswizzle() |
| 271 | { |
| 272 | pvrtc4_block_vector2D temp(m_blocks); |
| 273 | |
| 274 | for (uint32_t y = 0; y < m_block_height; y++) |
| 275 | for (uint32_t x = 0; x < m_block_width; x++) |
| 276 | m_blocks(x, y) = temp[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)]; |
| 277 | } |
| 278 | |
| 279 | void swizzle() |
| 280 | { |
| 281 | pvrtc4_block_vector2D temp(m_blocks); |
| 282 | |
| 283 | for (uint32_t y = 0; y < m_block_height; y++) |
| 284 | for (uint32_t x = 0; x < m_block_width; x++) |
| 285 | m_blocks[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)] = temp(x, y); |
| 286 | } |
| 287 | |
| 288 | void unpack_all_pixels(image& img) const |
| 289 | { |
| 290 | img.crop(m_width, m_height); |
| 291 | |
| 292 | for (uint32_t y = 0; y < m_height; y++) |
| 293 | for (uint32_t x = 0; x < m_width; x++) |
| 294 | img(x, y) = get_pixel(x, y); |
| 295 | } |
| 296 | |
| 297 | void unpack_block(image &dst, uint32_t block_x, uint32_t block_y) |
| 298 | { |
| 299 | for (uint32_t y = 0; y < 4; y++) |
| 300 | for (uint32_t x = 0; x < 4; x++) |
| 301 | dst(x, y) = get_pixel(block_x * 4 + x, block_y * 4 + y); |
| 302 | } |
| 303 | |
| 304 | inline int wrap_x(int x) const |
| 305 | { |
| 306 | return posmod(x, m_width); |
| 307 | } |
| 308 | |
| 309 | inline int wrap_y(int y) const |
| 310 | { |
| 311 | return posmod(y, m_height); |
| 312 | } |
| 313 | |
| 314 | inline int wrap_block_x(int bx) const |
| 315 | { |
| 316 | return posmod(bx, m_block_width); |
| 317 | } |
| 318 | |
| 319 | inline int wrap_block_y(int by) const |
| 320 | { |
| 321 | return posmod(by, m_block_height); |
| 322 | } |
| 323 | |
| 324 | inline vec2F get_interpolation_factors(uint32_t x, uint32_t y) const |
| 325 | { |
| 326 | // 0 1 2 3 |
| 327 | // 2 3 0 1 |
| 328 | // .5 .75 0 .25 |
| 329 | static const float s_interp[4] = { 2, 3, 0, 1 }; |
| 330 | return vec2F(s_interp[x & 3], s_interp[y & 3]); |
| 331 | } |
| 332 | |
| 333 | inline color_rgba interpolate(int x, int y, |
| 334 | const color_rgba& p, const color_rgba& q, |
| 335 | const color_rgba& r, const color_rgba& s) const |
| 336 | { |
| 337 | static const int s_interp[4] = { 2, 3, 0, 1 }; |
| 338 | const int u_interp = s_interp[x & 3]; |
| 339 | const int v_interp = s_interp[y & 3]; |
| 340 | |
| 341 | color_rgba result; |
| 342 | |
| 343 | for (uint32_t c = 0; c < 4; c++) |
| 344 | { |
| 345 | int t = p[c] * 4 + u_interp * ((int)q[c] - (int)p[c]); |
| 346 | int b = r[c] * 4 + u_interp * ((int)s[c] - (int)r[c]); |
| 347 | int v = t * 4 + v_interp * (b - t); |
| 348 | if (c < 3) |
| 349 | { |
| 350 | v >>= 1; |
| 351 | v += (v >> 5); |
| 352 | } |
| 353 | else |
| 354 | { |
| 355 | v += (v >> 4); |
| 356 | } |
| 357 | assert((v >= 0) && (v < 256)); |
| 358 | result[c] = static_cast<uint8_t>(v); |
| 359 | } |
| 360 | |
| 361 | return result; |
| 362 | } |
| 363 | |
| 364 | inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) |
| 365 | { |
| 366 | assert((x < m_width) && (y < m_height)); |
| 367 | return m_blocks(x >> 2, y >> 2).set_modulation(x & 3, y & 3, s); |
| 368 | } |
| 369 | |
| 370 | inline uint64_t map_pixel(uint32_t x, uint32_t y, const color_rgba& c, bool perceptual, bool alpha_is_significant, bool record = true) |
| 371 | { |
| 372 | color_rgba v[4]; |
| 373 | get_interpolated_colors(x, y, v); |
| 374 | |
| 375 | uint64_t best_dist = color_distance(perceptual, c, v[0], alpha_is_significant); |
| 376 | uint32_t best_v = 0; |
| 377 | for (uint32_t i = 1; i < 4; i++) |
| 378 | { |
| 379 | uint64_t dist = color_distance(perceptual, c, v[i], alpha_is_significant); |
| 380 | if (dist < best_dist) |
| 381 | { |
| 382 | best_dist = dist; |
| 383 | best_v = i; |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | if (record) |
| 388 | set_modulation(x, y, best_v); |
| 389 | |
| 390 | return best_dist; |
| 391 | } |
| 392 | |
| 393 | inline uint64_t remap_pixels_influenced_by_endpoint(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant) |
| 394 | { |
| 395 | uint64_t total_error = 0; |
| 396 | |
| 397 | for (int yd = -3; yd <= 3; yd++) |
| 398 | { |
| 399 | const int y = wrap_y((int)by * 4 + 2 + yd); |
| 400 | |
| 401 | for (int xd = -3; xd <= 3; xd++) |
| 402 | { |
| 403 | const int x = wrap_x((int)bx * 4 + 2 + xd); |
| 404 | |
| 405 | total_error += map_pixel(x, y, orig_img(x, y), perceptual, alpha_is_significant); |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | return total_error; |
| 410 | } |
| 411 | |
| 412 | inline uint64_t evaluate_1x1_endpoint_error(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant, uint64_t threshold_error = 0) const |
| 413 | { |
| 414 | uint64_t total_error = 0; |
| 415 | |
| 416 | for (int yd = -3; yd <= 3; yd++) |
| 417 | { |
| 418 | const int y = wrap_y((int)by * 4 + 2 + yd); |
| 419 | |
| 420 | for (int xd = -3; xd <= 3; xd++) |
| 421 | { |
| 422 | const int x = wrap_x((int)bx * 4 + 2 + xd); |
| 423 | |
| 424 | total_error += color_distance(perceptual, get_pixel(x, y), orig_img(x, y), alpha_is_significant); |
| 425 | |
| 426 | if ((threshold_error) && (total_error >= threshold_error)) |
| 427 | return total_error; |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | return total_error; |
| 432 | } |
| 433 | |
| 434 | uint64_t local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual); |
| 435 | |
| 436 | inline uint64_t map_all_pixels(const image& img, bool perceptual, bool alpha_is_significant) |
| 437 | { |
| 438 | assert(m_width == img.get_width()); |
| 439 | assert(m_height == img.get_height()); |
| 440 | |
| 441 | uint64_t total_error = 0; |
| 442 | for (uint32_t y = 0; y < img.get_height(); y++) |
| 443 | for (uint32_t x = 0; x < img.get_width(); x++) |
| 444 | total_error += map_pixel(x, y, img(x, y), perceptual, alpha_is_significant); |
| 445 | |
| 446 | return total_error; |
| 447 | } |
| 448 | |
| 449 | public: |
| 450 | uint32_t m_width, m_height; |
| 451 | pvrtc4_block_vector2D m_blocks; |
| 452 | uint32_t m_block_width, m_block_height; |
| 453 | |
| 454 | bool m_uses_alpha; |
| 455 | }; |
| 456 | |
| 457 | } // namespace basisu |
| 458 | |