| 1 | // basisu_gpu_texture.cpp |
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | #include "basisu_gpu_texture.h" |
| 16 | #include "basisu_enc.h" |
| 17 | #include "basisu_pvrtc1_4.h" |
| 18 | #if BASISU_USE_ASTC_DECOMPRESS |
| 19 | #include "basisu_astc_decomp.h" |
| 20 | #endif |
| 21 | #include "basisu_bc7enc.h" |
| 22 | |
| 23 | namespace basisu |
| 24 | { |
| 25 | void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels) |
| 26 | { |
| 27 | static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8" ); |
| 28 | |
| 29 | const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits); |
| 30 | |
| 31 | const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table]; |
| 32 | |
| 33 | const uint64_t selector_bits = pBlock->get_selector_bits(); |
| 34 | |
| 35 | const int32_t base = pBlock->m_base; |
| 36 | const int32_t mul = pBlock->m_multiplier; |
| 37 | |
| 38 | pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul); |
| 39 | pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul); |
| 40 | pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul); |
| 41 | pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul); |
| 42 | |
| 43 | pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul); |
| 44 | pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul); |
| 45 | pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul); |
| 46 | pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul); |
| 47 | |
| 48 | pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul); |
| 49 | pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul); |
| 50 | pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul); |
| 51 | pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul); |
| 52 | |
| 53 | pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul); |
| 54 | pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul); |
| 55 | pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul); |
| 56 | pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul); |
| 57 | } |
| 58 | |
| 59 | struct bc1_block |
| 60 | { |
| 61 | enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; |
| 62 | |
| 63 | uint8_t m_low_color[cTotalEndpointBytes]; |
| 64 | uint8_t m_high_color[cTotalEndpointBytes]; |
| 65 | uint8_t m_selectors[cTotalSelectorBytes]; |
| 66 | |
| 67 | inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } |
| 68 | inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } |
| 69 | |
| 70 | static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) |
| 71 | { |
| 72 | r = (c >> 11) & 31; |
| 73 | g = (c >> 5) & 63; |
| 74 | b = c & 31; |
| 75 | |
| 76 | r = (r << 3) | (r >> 2); |
| 77 | g = (g << 2) | (g >> 4); |
| 78 | b = (b << 3) | (b >> 2); |
| 79 | } |
| 80 | |
| 81 | inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; } |
| 82 | }; |
| 83 | |
| 84 | // Returns true if the block uses 3 color punchthrough alpha mode. |
| 85 | bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) |
| 86 | { |
| 87 | static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8" ); |
| 88 | |
| 89 | const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits); |
| 90 | |
| 91 | const uint32_t l = pBlock->get_low_color(); |
| 92 | const uint32_t h = pBlock->get_high_color(); |
| 93 | |
| 94 | color_rgba c[4]; |
| 95 | |
| 96 | uint32_t r0, g0, b0, r1, g1, b1; |
| 97 | bc1_block::unpack_color(l, r0, g0, b0); |
| 98 | bc1_block::unpack_color(h, r1, g1, b1); |
| 99 | |
| 100 | c[0].set_noclamp_rgba(r0, g0, b0, 255); |
| 101 | c[1].set_noclamp_rgba(r1, g1, b1, 255); |
| 102 | |
| 103 | bool used_punchthrough = false; |
| 104 | |
| 105 | if (l > h) |
| 106 | { |
| 107 | c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); |
| 108 | c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); |
| 109 | } |
| 110 | else |
| 111 | { |
| 112 | c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); |
| 113 | c[3].set_noclamp_rgba(0, 0, 0, 0); |
| 114 | used_punchthrough = true; |
| 115 | } |
| 116 | |
| 117 | if (set_alpha) |
| 118 | { |
| 119 | for (uint32_t y = 0; y < 4; y++, pPixels += 4) |
| 120 | { |
| 121 | pPixels[0] = c[pBlock->get_selector(0, y)]; |
| 122 | pPixels[1] = c[pBlock->get_selector(1, y)]; |
| 123 | pPixels[2] = c[pBlock->get_selector(2, y)]; |
| 124 | pPixels[3] = c[pBlock->get_selector(3, y)]; |
| 125 | } |
| 126 | } |
| 127 | else |
| 128 | { |
| 129 | for (uint32_t y = 0; y < 4; y++, pPixels += 4) |
| 130 | { |
| 131 | pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); |
| 132 | pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); |
| 133 | pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); |
| 134 | pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | return used_punchthrough; |
| 139 | } |
| 140 | |
| 141 | bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) |
| 142 | { |
| 143 | static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8" ); |
| 144 | |
| 145 | const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits); |
| 146 | |
| 147 | const uint32_t l = pBlock->get_low_color(); |
| 148 | const uint32_t h = pBlock->get_high_color(); |
| 149 | |
| 150 | color_rgba c[4]; |
| 151 | |
| 152 | int r0 = (l >> 11) & 31; |
| 153 | int g0 = (l >> 5) & 63; |
| 154 | int b0 = l & 31; |
| 155 | int r1 = (h >> 11) & 31; |
| 156 | int g1 = (h >> 5) & 63; |
| 157 | int b1 = h & 31; |
| 158 | |
| 159 | c[0].b = (uint8_t)((3 * b0 * 22) / 8); |
| 160 | c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4)); |
| 161 | c[0].r = (uint8_t)((3 * r0 * 22) / 8); |
| 162 | c[0].a = 0xFF; |
| 163 | |
| 164 | c[1].r = (uint8_t)((3 * r1 * 22) / 8); |
| 165 | c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4)); |
| 166 | c[1].b = (uint8_t)((3 * b1 * 22) / 8); |
| 167 | c[1].a = 0xFF; |
| 168 | |
| 169 | int gdiff = c[1].g - c[0].g; |
| 170 | |
| 171 | bool used_punchthrough = false; |
| 172 | |
| 173 | if (l > h) |
| 174 | { |
| 175 | c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8); |
| 176 | c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256)); |
| 177 | c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8); |
| 178 | c[2].a = 0xFF; |
| 179 | |
| 180 | c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8); |
| 181 | c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256); |
| 182 | c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8); |
| 183 | c[3].a = 0xFF; |
| 184 | } |
| 185 | else |
| 186 | { |
| 187 | c[2].r = (uint8_t)(((r0 + r1) * 33) / 8); |
| 188 | c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256); |
| 189 | c[2].b = (uint8_t)(((b0 + b1) * 33) / 8); |
| 190 | c[2].a = 0xFF; |
| 191 | |
| 192 | c[3].set_noclamp_rgba(0, 0, 0, 0); |
| 193 | used_punchthrough = true; |
| 194 | } |
| 195 | |
| 196 | if (set_alpha) |
| 197 | { |
| 198 | for (uint32_t y = 0; y < 4; y++, pPixels += 4) |
| 199 | { |
| 200 | pPixels[0] = c[pBlock->get_selector(0, y)]; |
| 201 | pPixels[1] = c[pBlock->get_selector(1, y)]; |
| 202 | pPixels[2] = c[pBlock->get_selector(2, y)]; |
| 203 | pPixels[3] = c[pBlock->get_selector(3, y)]; |
| 204 | } |
| 205 | } |
| 206 | else |
| 207 | { |
| 208 | for (uint32_t y = 0; y < 4; y++, pPixels += 4) |
| 209 | { |
| 210 | pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); |
| 211 | pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); |
| 212 | pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); |
| 213 | pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | return used_punchthrough; |
| 218 | } |
| 219 | |
| 220 | static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } |
| 221 | static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } |
| 222 | |
| 223 | bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) |
| 224 | { |
| 225 | const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits); |
| 226 | |
| 227 | const uint32_t l = pBlock->get_low_color(); |
| 228 | const uint32_t h = pBlock->get_high_color(); |
| 229 | |
| 230 | color_rgba c[4]; |
| 231 | |
| 232 | uint32_t r0, g0, b0, r1, g1, b1; |
| 233 | bc1_block::unpack_color(l, r0, g0, b0); |
| 234 | bc1_block::unpack_color(h, r1, g1, b1); |
| 235 | |
| 236 | c[0].set_noclamp_rgba(r0, g0, b0, 255); |
| 237 | c[1].set_noclamp_rgba(r1, g1, b1, 255); |
| 238 | |
| 239 | bool used_punchthrough = false; |
| 240 | |
| 241 | if (l > h) |
| 242 | { |
| 243 | c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); |
| 244 | c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); |
| 245 | } |
| 246 | else |
| 247 | { |
| 248 | c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); |
| 249 | c[3].set_noclamp_rgba(0, 0, 0, 0); |
| 250 | used_punchthrough = true; |
| 251 | } |
| 252 | |
| 253 | if (set_alpha) |
| 254 | { |
| 255 | for (uint32_t y = 0; y < 4; y++, pPixels += 4) |
| 256 | { |
| 257 | pPixels[0] = c[pBlock->get_selector(0, y)]; |
| 258 | pPixels[1] = c[pBlock->get_selector(1, y)]; |
| 259 | pPixels[2] = c[pBlock->get_selector(2, y)]; |
| 260 | pPixels[3] = c[pBlock->get_selector(3, y)]; |
| 261 | } |
| 262 | } |
| 263 | else |
| 264 | { |
| 265 | for (uint32_t y = 0; y < 4; y++, pPixels += 4) |
| 266 | { |
| 267 | pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); |
| 268 | pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); |
| 269 | pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); |
| 270 | pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | return used_punchthrough; |
| 275 | } |
| 276 | |
| 277 | struct bc4_block |
| 278 | { |
| 279 | enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; |
| 280 | uint8_t m_endpoints[2]; |
| 281 | |
| 282 | uint8_t m_selectors[cTotalSelectorBytes]; |
| 283 | |
| 284 | inline uint32_t get_low_alpha() const { return m_endpoints[0]; } |
| 285 | inline uint32_t get_high_alpha() const { return m_endpoints[1]; } |
| 286 | inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } |
| 287 | |
| 288 | inline uint64_t get_selector_bits() const |
| 289 | { |
| 290 | return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | |
| 291 | (((uint64_t)m_selectors[4]) << 32U) | |
| 292 | (((uint64_t)m_selectors[5]) << 40U); |
| 293 | } |
| 294 | |
| 295 | inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const |
| 296 | { |
| 297 | assert((x < 4U) && (y < 4U)); |
| 298 | return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); |
| 299 | } |
| 300 | |
| 301 | static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) |
| 302 | { |
| 303 | pDst[0] = static_cast<uint8_t>(l); |
| 304 | pDst[1] = static_cast<uint8_t>(h); |
| 305 | pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5); |
| 306 | pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5); |
| 307 | pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5); |
| 308 | pDst[5] = static_cast<uint8_t>((l + h * 4) / 5); |
| 309 | pDst[6] = 0; |
| 310 | pDst[7] = 255; |
| 311 | return 6; |
| 312 | } |
| 313 | |
| 314 | static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) |
| 315 | { |
| 316 | pDst[0] = static_cast<uint8_t>(l); |
| 317 | pDst[1] = static_cast<uint8_t>(h); |
| 318 | pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7); |
| 319 | pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7); |
| 320 | pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7); |
| 321 | pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7); |
| 322 | pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7); |
| 323 | pDst[7] = static_cast<uint8_t>((l + h * 6) / 7); |
| 324 | return 8; |
| 325 | } |
| 326 | |
| 327 | static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) |
| 328 | { |
| 329 | if (l > h) |
| 330 | return get_block_values8(pDst, l, h); |
| 331 | else |
| 332 | return get_block_values6(pDst, l, h); |
| 333 | } |
| 334 | }; |
| 335 | |
| 336 | void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride) |
| 337 | { |
| 338 | static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8" ); |
| 339 | |
| 340 | const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits); |
| 341 | |
| 342 | uint8_t sel_values[8]; |
| 343 | bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); |
| 344 | |
| 345 | const uint64_t selector_bits = pBlock->get_selector_bits(); |
| 346 | |
| 347 | for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) |
| 348 | { |
| 349 | pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; |
| 350 | pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; |
| 351 | pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; |
| 352 | pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. |
| 357 | bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels) |
| 358 | { |
| 359 | bool success = true; |
| 360 | |
| 361 | if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true)) |
| 362 | success = false; |
| 363 | |
| 364 | unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba)); |
| 365 | |
| 366 | return success; |
| 367 | } |
| 368 | |
| 369 | // writes RG |
| 370 | void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels) |
| 371 | { |
| 372 | unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba)); |
| 373 | unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba)); |
| 374 | } |
| 375 | |
| 376 | // ATC isn't officially documented, so I'm assuming these references: |
| 377 | // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf |
| 378 | // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c |
| 379 | // The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8. |
| 380 | void unpack_atc(const void* pBlock_bits, color_rgba* pPixels) |
| 381 | { |
| 382 | const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits); |
| 383 | |
| 384 | const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U); |
| 385 | const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U); |
| 386 | uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U); |
| 387 | |
| 388 | const bool mode = (color0 & 0x8000) != 0; |
| 389 | |
| 390 | color_rgba c[4]; |
| 391 | |
| 392 | c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255); |
| 393 | c[0].r = (c[0].r << 3) | (c[0].r >> 2); |
| 394 | c[0].g = (c[0].g << 3) | (c[0].g >> 2); |
| 395 | c[0].b = (c[0].b << 3) | (c[0].b >> 2); |
| 396 | |
| 397 | c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255); |
| 398 | c[3].r = (c[3].r << 3) | (c[3].r >> 2); |
| 399 | c[3].g = (c[3].g << 2) | (c[3].g >> 4); |
| 400 | c[3].b = (c[3].b << 3) | (c[3].b >> 2); |
| 401 | |
| 402 | if (mode) |
| 403 | { |
| 404 | c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255); |
| 405 | c[2] = c[0]; |
| 406 | c[0].set(0, 0, 0, 255); |
| 407 | } |
| 408 | else |
| 409 | { |
| 410 | c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3; |
| 411 | c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3; |
| 412 | c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3; |
| 413 | |
| 414 | c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3; |
| 415 | c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3; |
| 416 | c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3; |
| 417 | } |
| 418 | |
| 419 | for (uint32_t i = 0; i < 16; i++) |
| 420 | { |
| 421 | const uint32_t s = sels & 3; |
| 422 | |
| 423 | pPixels[i] = c[s]; |
| 424 | |
| 425 | sels >>= 2; |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | // BC7 mode 0-7 decompression. |
| 430 | // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. |
| 431 | |
| 432 | static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } |
| 433 | static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } |
| 434 | |
| 435 | static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; } |
| 436 | static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; } |
| 437 | static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; } |
| 438 | static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) |
| 439 | { |
| 440 | assert(l <= 255 && h <= 255); |
| 441 | switch (bits) |
| 442 | { |
| 443 | case 2: return bc7_interp2(l, h, w); |
| 444 | case 3: return bc7_interp3(l, h, w); |
| 445 | case 4: return bc7_interp4(l, h, w); |
| 446 | default: |
| 447 | break; |
| 448 | } |
| 449 | return 0; |
| 450 | } |
| 451 | |
| 452 | bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) |
| 453 | { |
| 454 | //const uint32_t SUBSETS = 3; |
| 455 | const uint32_t ENDPOINTS = 6; |
| 456 | const uint32_t COMPS = 3; |
| 457 | const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; |
| 458 | const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; |
| 459 | const uint32_t PBITS = (mode == 0) ? 6 : 0; |
| 460 | const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; |
| 461 | |
| 462 | uint32_t bit_offset = 0; |
| 463 | const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits); |
| 464 | |
| 465 | if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; |
| 466 | |
| 467 | const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); |
| 468 | |
| 469 | color_rgba endpoints[ENDPOINTS]; |
| 470 | for (uint32_t c = 0; c < COMPS; c++) |
| 471 | for (uint32_t e = 0; e < ENDPOINTS; e++) |
| 472 | endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); |
| 473 | |
| 474 | uint32_t pbits[6]; |
| 475 | for (uint32_t p = 0; p < PBITS; p++) |
| 476 | pbits[p] = read_bits32(pBuf, bit_offset, 1); |
| 477 | |
| 478 | uint32_t weights[16]; |
| 479 | for (uint32_t i = 0; i < 16; i++) |
| 480 | weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); |
| 481 | |
| 482 | assert(bit_offset == 128); |
| 483 | |
| 484 | for (uint32_t e = 0; e < ENDPOINTS; e++) |
| 485 | for (uint32_t c = 0; c < 4; c++) |
| 486 | endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); |
| 487 | |
| 488 | color_rgba block_colors[3][8]; |
| 489 | for (uint32_t s = 0; s < 3; s++) |
| 490 | for (uint32_t i = 0; i < WEIGHT_VALS; i++) |
| 491 | { |
| 492 | for (uint32_t c = 0; c < 3; c++) |
| 493 | block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); |
| 494 | block_colors[s][i][3] = 255; |
| 495 | } |
| 496 | |
| 497 | for (uint32_t i = 0; i < 16; i++) |
| 498 | pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]]; |
| 499 | |
| 500 | return true; |
| 501 | } |
| 502 | |
| 503 | bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) |
| 504 | { |
| 505 | //const uint32_t SUBSETS = 2; |
| 506 | const uint32_t ENDPOINTS = 4; |
| 507 | const uint32_t COMPS = (mode == 7) ? 4 : 3; |
| 508 | const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; |
| 509 | const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); |
| 510 | const uint32_t PBITS = (mode == 1) ? 2 : 4; |
| 511 | const uint32_t SHARED_PBITS = (mode == 1) ? true : false; |
| 512 | const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; |
| 513 | |
| 514 | uint32_t bit_offset = 0; |
| 515 | const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits); |
| 516 | |
| 517 | if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; |
| 518 | |
| 519 | const uint32_t part = read_bits32(pBuf, bit_offset, 6); |
| 520 | |
| 521 | color_rgba endpoints[ENDPOINTS]; |
| 522 | for (uint32_t c = 0; c < COMPS; c++) |
| 523 | for (uint32_t e = 0; e < ENDPOINTS; e++) |
| 524 | endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); |
| 525 | |
| 526 | uint32_t pbits[4]; |
| 527 | for (uint32_t p = 0; p < PBITS; p++) |
| 528 | pbits[p] = read_bits32(pBuf, bit_offset, 1); |
| 529 | |
| 530 | uint32_t weights[16]; |
| 531 | for (uint32_t i = 0; i < 16; i++) |
| 532 | weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); |
| 533 | |
| 534 | assert(bit_offset == 128); |
| 535 | |
| 536 | for (uint32_t e = 0; e < ENDPOINTS; e++) |
| 537 | for (uint32_t c = 0; c < 4; c++) |
| 538 | endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); |
| 539 | |
| 540 | color_rgba block_colors[2][8]; |
| 541 | for (uint32_t s = 0; s < 2; s++) |
| 542 | for (uint32_t i = 0; i < WEIGHT_VALS; i++) |
| 543 | { |
| 544 | for (uint32_t c = 0; c < COMPS; c++) |
| 545 | block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); |
| 546 | block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; |
| 547 | } |
| 548 | |
| 549 | for (uint32_t i = 0; i < 16; i++) |
| 550 | pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]]; |
| 551 | |
| 552 | return true; |
| 553 | } |
| 554 | |
| 555 | bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) |
| 556 | { |
| 557 | const uint32_t ENDPOINTS = 2; |
| 558 | const uint32_t COMPS = 4; |
| 559 | const uint32_t WEIGHT_BITS = 2; |
| 560 | const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; |
| 561 | const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; |
| 562 | const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; |
| 563 | //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; |
| 564 | //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; |
| 565 | |
| 566 | uint32_t bit_offset = 0; |
| 567 | const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits); |
| 568 | |
| 569 | if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; |
| 570 | |
| 571 | const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); |
| 572 | const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; |
| 573 | |
| 574 | color_rgba endpoints[ENDPOINTS]; |
| 575 | for (uint32_t c = 0; c < COMPS; c++) |
| 576 | for (uint32_t e = 0; e < ENDPOINTS; e++) |
| 577 | endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); |
| 578 | |
| 579 | const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; |
| 580 | |
| 581 | uint32_t weights[16], a_weights[16]; |
| 582 | |
| 583 | for (uint32_t i = 0; i < 16; i++) |
| 584 | (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); |
| 585 | |
| 586 | for (uint32_t i = 0; i < 16; i++) |
| 587 | (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); |
| 588 | |
| 589 | assert(bit_offset == 128); |
| 590 | |
| 591 | for (uint32_t e = 0; e < ENDPOINTS; e++) |
| 592 | for (uint32_t c = 0; c < 4; c++) |
| 593 | endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); |
| 594 | |
| 595 | color_rgba block_colors[8]; |
| 596 | for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) |
| 597 | for (uint32_t c = 0; c < 3; c++) |
| 598 | block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); |
| 599 | |
| 600 | for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) |
| 601 | block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); |
| 602 | |
| 603 | for (uint32_t i = 0; i < 16; i++) |
| 604 | { |
| 605 | pPixels[i] = block_colors[weights[i]]; |
| 606 | pPixels[i].a = block_colors[a_weights[i]].a; |
| 607 | if (comp_rot >= 1) |
| 608 | std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); |
| 609 | } |
| 610 | |
| 611 | return true; |
| 612 | } |
| 613 | |
| 614 | struct bc7_mode_6 |
| 615 | { |
| 616 | struct |
| 617 | { |
| 618 | uint64_t m_mode : 7; |
| 619 | uint64_t m_r0 : 7; |
| 620 | uint64_t m_r1 : 7; |
| 621 | uint64_t m_g0 : 7; |
| 622 | uint64_t m_g1 : 7; |
| 623 | uint64_t m_b0 : 7; |
| 624 | uint64_t m_b1 : 7; |
| 625 | uint64_t m_a0 : 7; |
| 626 | uint64_t m_a1 : 7; |
| 627 | uint64_t m_p0 : 1; |
| 628 | } m_lo; |
| 629 | |
| 630 | union |
| 631 | { |
| 632 | struct |
| 633 | { |
| 634 | uint64_t m_p1 : 1; |
| 635 | uint64_t m_s00 : 3; |
| 636 | uint64_t m_s10 : 4; |
| 637 | uint64_t m_s20 : 4; |
| 638 | uint64_t m_s30 : 4; |
| 639 | |
| 640 | uint64_t m_s01 : 4; |
| 641 | uint64_t m_s11 : 4; |
| 642 | uint64_t m_s21 : 4; |
| 643 | uint64_t m_s31 : 4; |
| 644 | |
| 645 | uint64_t m_s02 : 4; |
| 646 | uint64_t m_s12 : 4; |
| 647 | uint64_t m_s22 : 4; |
| 648 | uint64_t m_s32 : 4; |
| 649 | |
| 650 | uint64_t m_s03 : 4; |
| 651 | uint64_t m_s13 : 4; |
| 652 | uint64_t m_s23 : 4; |
| 653 | uint64_t m_s33 : 4; |
| 654 | |
| 655 | } m_hi; |
| 656 | |
| 657 | uint64_t m_hi_bits; |
| 658 | }; |
| 659 | }; |
| 660 | |
| 661 | bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) |
| 662 | { |
| 663 | static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16" ); |
| 664 | |
| 665 | const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits); |
| 666 | |
| 667 | if (block.m_lo.m_mode != (1 << 6)) |
| 668 | return false; |
| 669 | |
| 670 | const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); |
| 671 | const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); |
| 672 | const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); |
| 673 | const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); |
| 674 | const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); |
| 675 | const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); |
| 676 | const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); |
| 677 | const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); |
| 678 | |
| 679 | color_rgba vals[16]; |
| 680 | for (uint32_t i = 0; i < 16; i++) |
| 681 | { |
| 682 | const uint32_t w = basist::g_bc7_weights4[i]; |
| 683 | const uint32_t iw = 64 - w; |
| 684 | vals[i].set_noclamp_rgba( |
| 685 | (r0 * iw + r1 * w + 32) >> 6, |
| 686 | (g0 * iw + g1 * w + 32) >> 6, |
| 687 | (b0 * iw + b1 * w + 32) >> 6, |
| 688 | (a0 * iw + a1 * w + 32) >> 6); |
| 689 | } |
| 690 | |
| 691 | pPixels[0] = vals[block.m_hi.m_s00]; |
| 692 | pPixels[1] = vals[block.m_hi.m_s10]; |
| 693 | pPixels[2] = vals[block.m_hi.m_s20]; |
| 694 | pPixels[3] = vals[block.m_hi.m_s30]; |
| 695 | |
| 696 | pPixels[4] = vals[block.m_hi.m_s01]; |
| 697 | pPixels[5] = vals[block.m_hi.m_s11]; |
| 698 | pPixels[6] = vals[block.m_hi.m_s21]; |
| 699 | pPixels[7] = vals[block.m_hi.m_s31]; |
| 700 | |
| 701 | pPixels[8] = vals[block.m_hi.m_s02]; |
| 702 | pPixels[9] = vals[block.m_hi.m_s12]; |
| 703 | pPixels[10] = vals[block.m_hi.m_s22]; |
| 704 | pPixels[11] = vals[block.m_hi.m_s32]; |
| 705 | |
| 706 | pPixels[12] = vals[block.m_hi.m_s03]; |
| 707 | pPixels[13] = vals[block.m_hi.m_s13]; |
| 708 | pPixels[14] = vals[block.m_hi.m_s23]; |
| 709 | pPixels[15] = vals[block.m_hi.m_s33]; |
| 710 | |
| 711 | return true; |
| 712 | } |
| 713 | |
| 714 | bool unpack_bc7(const void *pBlock, color_rgba *pPixels) |
| 715 | { |
| 716 | const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0]; |
| 717 | |
| 718 | for (uint32_t mode = 0; mode <= 7; mode++) |
| 719 | { |
| 720 | if (first_byte & (1U << mode)) |
| 721 | { |
| 722 | switch (mode) |
| 723 | { |
| 724 | case 0: |
| 725 | case 2: |
| 726 | return unpack_bc7_mode0_2(mode, pBlock, pPixels); |
| 727 | case 1: |
| 728 | case 3: |
| 729 | case 7: |
| 730 | return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); |
| 731 | case 4: |
| 732 | case 5: |
| 733 | return unpack_bc7_mode4_5(mode, pBlock, pPixels); |
| 734 | case 6: |
| 735 | return unpack_bc7_mode6(pBlock, pPixels); |
| 736 | default: |
| 737 | break; |
| 738 | } |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | return false; |
| 743 | } |
| 744 | |
| 745 | struct fxt1_block |
| 746 | { |
| 747 | union |
| 748 | { |
| 749 | struct |
| 750 | { |
| 751 | uint64_t m_t00 : 2; |
| 752 | uint64_t m_t01 : 2; |
| 753 | uint64_t m_t02 : 2; |
| 754 | uint64_t m_t03 : 2; |
| 755 | uint64_t m_t04 : 2; |
| 756 | uint64_t m_t05 : 2; |
| 757 | uint64_t m_t06 : 2; |
| 758 | uint64_t m_t07 : 2; |
| 759 | uint64_t m_t08 : 2; |
| 760 | uint64_t m_t09 : 2; |
| 761 | uint64_t m_t10 : 2; |
| 762 | uint64_t m_t11 : 2; |
| 763 | uint64_t m_t12 : 2; |
| 764 | uint64_t m_t13 : 2; |
| 765 | uint64_t m_t14 : 2; |
| 766 | uint64_t m_t15 : 2; |
| 767 | uint64_t m_t16 : 2; |
| 768 | uint64_t m_t17 : 2; |
| 769 | uint64_t m_t18 : 2; |
| 770 | uint64_t m_t19 : 2; |
| 771 | uint64_t m_t20 : 2; |
| 772 | uint64_t m_t21 : 2; |
| 773 | uint64_t m_t22 : 2; |
| 774 | uint64_t m_t23 : 2; |
| 775 | uint64_t m_t24 : 2; |
| 776 | uint64_t m_t25 : 2; |
| 777 | uint64_t m_t26 : 2; |
| 778 | uint64_t m_t27 : 2; |
| 779 | uint64_t m_t28 : 2; |
| 780 | uint64_t m_t29 : 2; |
| 781 | uint64_t m_t30 : 2; |
| 782 | uint64_t m_t31 : 2; |
| 783 | } m_lo; |
| 784 | uint64_t m_lo_bits; |
| 785 | uint8_t m_sels[8]; |
| 786 | }; |
| 787 | |
| 788 | union |
| 789 | { |
| 790 | struct |
| 791 | { |
| 792 | #ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING |
| 793 | // This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted. |
| 794 | // Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation! |
| 795 | uint64_t m_b1 : 5; |
| 796 | uint64_t m_g1 : 5; |
| 797 | uint64_t m_r1 : 5; |
| 798 | uint64_t m_b0 : 5; |
| 799 | uint64_t m_g0 : 5; |
| 800 | uint64_t m_r0 : 5; |
| 801 | uint64_t m_b3 : 5; |
| 802 | uint64_t m_g3 : 5; |
| 803 | uint64_t m_r3 : 5; |
| 804 | uint64_t m_b2 : 5; |
| 805 | uint64_t m_g2 : 5; |
| 806 | uint64_t m_r2 : 5; |
| 807 | #else |
| 808 | // Intel's encoding, and the encoding in the OpenGL FXT1 spec. |
| 809 | uint64_t m_b0 : 5; |
| 810 | uint64_t m_g0 : 5; |
| 811 | uint64_t m_r0 : 5; |
| 812 | uint64_t m_b1 : 5; |
| 813 | uint64_t m_g1 : 5; |
| 814 | uint64_t m_r1 : 5; |
| 815 | uint64_t m_b2 : 5; |
| 816 | uint64_t m_g2 : 5; |
| 817 | uint64_t m_r2 : 5; |
| 818 | uint64_t m_b3 : 5; |
| 819 | uint64_t m_g3 : 5; |
| 820 | uint64_t m_r3 : 5; |
| 821 | #endif |
| 822 | uint64_t m_alpha : 1; |
| 823 | uint64_t m_glsb : 2; |
| 824 | uint64_t m_mode : 1; |
| 825 | } m_hi; |
| 826 | |
| 827 | uint64_t m_hi_bits; |
| 828 | }; |
| 829 | }; |
| 830 | |
| 831 | static color_rgba expand_565(const color_rgba& c) |
| 832 | { |
| 833 | return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255); |
| 834 | } |
| 835 | |
| 836 | // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment. |
| 837 | bool unpack_fxt1(const void *p, color_rgba *pPixels) |
| 838 | { |
| 839 | const fxt1_block* pBlock = static_cast<const fxt1_block*>(p); |
| 840 | |
| 841 | if (pBlock->m_hi.m_mode == 0) |
| 842 | return false; |
| 843 | if (pBlock->m_hi.m_alpha == 1) |
| 844 | return false; |
| 845 | |
| 846 | color_rgba colors[4]; |
| 847 | |
| 848 | colors[0].r = pBlock->m_hi.m_r0; |
| 849 | colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1))); |
| 850 | colors[0].b = pBlock->m_hi.m_b0; |
| 851 | colors[0].a = 255; |
| 852 | |
| 853 | colors[1].r = pBlock->m_hi.m_r1; |
| 854 | colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1)); |
| 855 | colors[1].b = pBlock->m_hi.m_b1; |
| 856 | colors[1].a = 255; |
| 857 | |
| 858 | colors[2].r = pBlock->m_hi.m_r2; |
| 859 | colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1))); |
| 860 | colors[2].b = pBlock->m_hi.m_b2; |
| 861 | colors[2].a = 255; |
| 862 | |
| 863 | colors[3].r = pBlock->m_hi.m_r3; |
| 864 | colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1)); |
| 865 | colors[3].b = pBlock->m_hi.m_b3; |
| 866 | colors[3].a = 255; |
| 867 | |
| 868 | for (uint32_t i = 0; i < 4; i++) |
| 869 | colors[i] = expand_565(colors[i]); |
| 870 | |
| 871 | color_rgba block0_colors[4]; |
| 872 | block0_colors[0] = colors[0]; |
| 873 | block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255); |
| 874 | block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255); |
| 875 | block0_colors[3] = colors[1]; |
| 876 | |
| 877 | for (uint32_t i = 0; i < 16; i++) |
| 878 | { |
| 879 | const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3; |
| 880 | |
| 881 | const uint32_t x = i & 3; |
| 882 | const uint32_t y = i >> 2; |
| 883 | pPixels[x + y * 8] = block0_colors[sel]; |
| 884 | } |
| 885 | |
| 886 | color_rgba block1_colors[4]; |
| 887 | block1_colors[0] = colors[2]; |
| 888 | block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255); |
| 889 | block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255); |
| 890 | block1_colors[3] = colors[3]; |
| 891 | |
| 892 | for (uint32_t i = 0; i < 16; i++) |
| 893 | { |
| 894 | const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3; |
| 895 | |
| 896 | const uint32_t x = i & 3; |
| 897 | const uint32_t y = i >> 2; |
| 898 | pPixels[4 + x + y * 8] = block1_colors[sel]; |
| 899 | } |
| 900 | |
| 901 | return true; |
| 902 | } |
| 903 | |
| 904 | struct pvrtc2_block |
| 905 | { |
| 906 | uint8_t m_modulation[4]; |
| 907 | |
| 908 | union |
| 909 | { |
| 910 | union |
| 911 | { |
| 912 | // Opaque mode: RGB colora=554 and colorb=555 |
| 913 | struct |
| 914 | { |
| 915 | uint32_t m_mod_flag : 1; |
| 916 | uint32_t m_blue_a : 4; |
| 917 | uint32_t m_green_a : 5; |
| 918 | uint32_t m_red_a : 5; |
| 919 | uint32_t m_hard_flag : 1; |
| 920 | uint32_t m_blue_b : 5; |
| 921 | uint32_t m_green_b : 5; |
| 922 | uint32_t m_red_b : 5; |
| 923 | uint32_t m_opaque_flag : 1; |
| 924 | |
| 925 | } m_opaque_color_data; |
| 926 | |
| 927 | // Transparent mode: RGBA colora=4433 and colorb=4443 |
| 928 | struct |
| 929 | { |
| 930 | uint32_t m_mod_flag : 1; |
| 931 | uint32_t m_blue_a : 3; |
| 932 | uint32_t m_green_a : 4; |
| 933 | uint32_t m_red_a : 4; |
| 934 | uint32_t m_alpha_a : 3; |
| 935 | uint32_t m_hard_flag : 1; |
| 936 | uint32_t m_blue_b : 4; |
| 937 | uint32_t m_green_b : 4; |
| 938 | uint32_t m_red_b : 4; |
| 939 | uint32_t m_alpha_b : 3; |
| 940 | uint32_t m_opaque_flag : 1; |
| 941 | |
| 942 | } m_trans_color_data; |
| 943 | }; |
| 944 | |
| 945 | uint32_t m_color_data_bits; |
| 946 | }; |
| 947 | }; |
| 948 | |
| 949 | static color_rgba convert_rgb_555_to_888(const color_rgba& col) |
| 950 | { |
| 951 | return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255); |
| 952 | } |
| 953 | |
| 954 | static color_rgba convert_rgba_5554_to_8888(const color_rgba& col) |
| 955 | { |
| 956 | return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]); |
| 957 | } |
| 958 | |
| 959 | // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC. |
| 960 | bool unpack_pvrtc2(const void *p, color_rgba *pPixels) |
| 961 | { |
| 962 | const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p); |
| 963 | |
| 964 | if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag)) |
| 965 | { |
| 966 | // This mode isn't supported by the transcoder, so we aren't bothering with it here. |
| 967 | return false; |
| 968 | } |
| 969 | |
| 970 | color_rgba colors[4]; |
| 971 | |
| 972 | if (pBlock->m_opaque_color_data.m_opaque_flag) |
| 973 | { |
| 974 | // colora=554 |
| 975 | color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255); |
| 976 | |
| 977 | // colora=555 |
| 978 | color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255); |
| 979 | |
| 980 | colors[0] = convert_rgb_555_to_888(color_a); |
| 981 | colors[3] = convert_rgb_555_to_888(color_b); |
| 982 | |
| 983 | colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255); |
| 984 | colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255); |
| 985 | } |
| 986 | else |
| 987 | { |
| 988 | // colora=4433 |
| 989 | color_rgba color_a( |
| 990 | (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), |
| 991 | (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3), |
| 992 | (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), |
| 993 | pBlock->m_trans_color_data.m_alpha_a << 1); |
| 994 | |
| 995 | //colorb=4443 |
| 996 | color_rgba color_b( |
| 997 | (pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3), |
| 998 | (pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3), |
| 999 | (pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3), |
| 1000 | (pBlock->m_trans_color_data.m_alpha_b << 1) | 1); |
| 1001 | |
| 1002 | colors[0] = convert_rgba_5554_to_8888(color_a); |
| 1003 | colors[3] = convert_rgba_5554_to_8888(color_b); |
| 1004 | } |
| 1005 | |
| 1006 | colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8); |
| 1007 | colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8); |
| 1008 | |
| 1009 | for (uint32_t i = 0; i < 16; i++) |
| 1010 | { |
| 1011 | const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3; |
| 1012 | pPixels[i] = colors[sel]; |
| 1013 | } |
| 1014 | |
| 1015 | return true; |
| 1016 | } |
| 1017 | |
| 1018 | struct etc2_eac_r11 |
| 1019 | { |
| 1020 | uint64_t m_base : 8; |
| 1021 | uint64_t m_table : 4; |
| 1022 | uint64_t m_mul : 4; |
| 1023 | uint64_t m_sels_0 : 8; |
| 1024 | uint64_t m_sels_1 : 8; |
| 1025 | uint64_t m_sels_2 : 8; |
| 1026 | uint64_t m_sels_3 : 8; |
| 1027 | uint64_t m_sels_4 : 8; |
| 1028 | uint64_t m_sels_5 : 8; |
| 1029 | |
| 1030 | uint64_t get_sels() const |
| 1031 | { |
| 1032 | return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5; |
| 1033 | } |
| 1034 | |
| 1035 | void set_sels(uint64_t v) |
| 1036 | { |
| 1037 | m_sels_0 = (v >> 40U) & 0xFF; |
| 1038 | m_sels_1 = (v >> 32U) & 0xFF; |
| 1039 | m_sels_2 = (v >> 24U) & 0xFF; |
| 1040 | m_sels_3 = (v >> 16U) & 0xFF; |
| 1041 | m_sels_4 = (v >> 8U) & 0xFF; |
| 1042 | m_sels_5 = v & 0xFF; |
| 1043 | } |
| 1044 | }; |
| 1045 | |
| 1046 | struct etc2_eac_rg11 |
| 1047 | { |
| 1048 | etc2_eac_r11 m_c[2]; |
| 1049 | }; |
| 1050 | |
| 1051 | void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c) |
| 1052 | { |
| 1053 | const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p); |
| 1054 | const uint64_t sels = pBlock->get_sels(); |
| 1055 | |
| 1056 | const int base = (int)pBlock->m_base * 8 + 4; |
| 1057 | const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1; |
| 1058 | const int table = (int)pBlock->m_table; |
| 1059 | |
| 1060 | for (uint32_t y = 0; y < 4; y++) |
| 1061 | { |
| 1062 | for (uint32_t x = 0; x < 4; x++) |
| 1063 | { |
| 1064 | const uint32_t shift = 45 - ((y + x * 4) * 3); |
| 1065 | |
| 1066 | const uint32_t sel = (uint32_t)((sels >> shift) & 7); |
| 1067 | |
| 1068 | int val = base + g_etc2_eac_tables[table][sel] * mul; |
| 1069 | val = clamp<int>(val, 0, 2047); |
| 1070 | |
| 1071 | // Convert to 8-bits with rounding |
| 1072 | //pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047); |
| 1073 | pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047); |
| 1074 | |
| 1075 | } // x |
| 1076 | } // y |
| 1077 | } |
| 1078 | |
| 1079 | void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels) |
| 1080 | { |
| 1081 | for (uint32_t c = 0; c < 2; c++) |
| 1082 | { |
| 1083 | const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c]; |
| 1084 | |
| 1085 | unpack_etc2_eac_r(pBlock, pPixels, c); |
| 1086 | } |
| 1087 | } |
| 1088 | |
| 1089 | void unpack_uastc(const void* p, color_rgba* pPixels) |
| 1090 | { |
| 1091 | basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false); |
| 1092 | } |
| 1093 | |
| 1094 | // Unpacks to RGBA, R, RG, or A |
| 1095 | bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) |
| 1096 | { |
| 1097 | switch (fmt) |
| 1098 | { |
| 1099 | case texture_format::cBC1: |
| 1100 | { |
| 1101 | unpack_bc1(pBlock, pPixels, true); |
| 1102 | break; |
| 1103 | } |
| 1104 | case texture_format::cBC1_NV: |
| 1105 | { |
| 1106 | unpack_bc1_nv(pBlock, pPixels, true); |
| 1107 | break; |
| 1108 | } |
| 1109 | case texture_format::cBC1_AMD: |
| 1110 | { |
| 1111 | unpack_bc1_amd(pBlock, pPixels, true); |
| 1112 | break; |
| 1113 | } |
| 1114 | case texture_format::cBC3: |
| 1115 | { |
| 1116 | return unpack_bc3(pBlock, pPixels); |
| 1117 | } |
| 1118 | case texture_format::cBC4: |
| 1119 | { |
| 1120 | // Unpack to R |
| 1121 | unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba)); |
| 1122 | break; |
| 1123 | } |
| 1124 | case texture_format::cBC5: |
| 1125 | { |
| 1126 | unpack_bc5(pBlock, pPixels); |
| 1127 | break; |
| 1128 | } |
| 1129 | case texture_format::cBC7: |
| 1130 | { |
| 1131 | return unpack_bc7(pBlock, pPixels); |
| 1132 | } |
| 1133 | // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color) |
| 1134 | case texture_format::cETC2_RGB: |
| 1135 | case texture_format::cETC1: |
| 1136 | case texture_format::cETC1S: |
| 1137 | { |
| 1138 | return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels); |
| 1139 | } |
| 1140 | case texture_format::cETC2_RGBA: |
| 1141 | { |
| 1142 | if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels)) |
| 1143 | return false; |
| 1144 | unpack_etc2_eac(pBlock, pPixels); |
| 1145 | break; |
| 1146 | } |
| 1147 | case texture_format::cETC2_ALPHA: |
| 1148 | { |
| 1149 | // Unpack to A |
| 1150 | unpack_etc2_eac(pBlock, pPixels); |
| 1151 | break; |
| 1152 | } |
| 1153 | case texture_format::cASTC4x4: |
| 1154 | { |
| 1155 | #if BASISU_USE_ASTC_DECOMPRESS |
| 1156 | const bool astc_srgb = false; |
| 1157 | basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4); |
| 1158 | #else |
| 1159 | memset(pPixels, 255, 16 * sizeof(color_rgba)); |
| 1160 | #endif |
| 1161 | break; |
| 1162 | } |
| 1163 | case texture_format::cATC_RGB: |
| 1164 | { |
| 1165 | unpack_atc(pBlock, pPixels); |
| 1166 | break; |
| 1167 | } |
| 1168 | case texture_format::cATC_RGBA_INTERPOLATED_ALPHA: |
| 1169 | { |
| 1170 | unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels); |
| 1171 | unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba)); |
| 1172 | break; |
| 1173 | } |
| 1174 | case texture_format::cFXT1_RGB: |
| 1175 | { |
| 1176 | unpack_fxt1(pBlock, pPixels); |
| 1177 | break; |
| 1178 | } |
| 1179 | case texture_format::cPVRTC2_4_RGBA: |
| 1180 | { |
| 1181 | unpack_pvrtc2(pBlock, pPixels); |
| 1182 | break; |
| 1183 | } |
| 1184 | case texture_format::cETC2_R11_EAC: |
| 1185 | { |
| 1186 | unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, 0); |
| 1187 | break; |
| 1188 | } |
| 1189 | case texture_format::cETC2_RG11_EAC: |
| 1190 | { |
| 1191 | unpack_etc2_eac_rg(pBlock, pPixels); |
| 1192 | break; |
| 1193 | } |
| 1194 | case texture_format::cUASTC4x4: |
| 1195 | { |
| 1196 | unpack_uastc(pBlock, pPixels); |
| 1197 | break; |
| 1198 | } |
| 1199 | default: |
| 1200 | { |
| 1201 | assert(0); |
| 1202 | // TODO |
| 1203 | return false; |
| 1204 | } |
| 1205 | } |
| 1206 | return true; |
| 1207 | } |
| 1208 | |
| 1209 | bool gpu_image::unpack(image& img) const |
| 1210 | { |
| 1211 | img.resize(get_pixel_width(), get_pixel_height()); |
| 1212 | img.set_all(g_black_color); |
| 1213 | |
| 1214 | if (!img.get_width() || !img.get_height()) |
| 1215 | return true; |
| 1216 | |
| 1217 | if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA)) |
| 1218 | { |
| 1219 | pvrtc4_image pi(m_width, m_height); |
| 1220 | |
| 1221 | if (get_total_blocks() != pi.get_total_blocks()) |
| 1222 | return false; |
| 1223 | |
| 1224 | memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes()); |
| 1225 | |
| 1226 | pi.deswizzle(); |
| 1227 | |
| 1228 | pi.unpack_all_pixels(img); |
| 1229 | |
| 1230 | return true; |
| 1231 | } |
| 1232 | |
| 1233 | assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize)); |
| 1234 | color_rgba pixels[cMaxBlockSize * cMaxBlockSize]; |
| 1235 | for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++) |
| 1236 | pixels[i] = g_black_color; |
| 1237 | |
| 1238 | bool success = true; |
| 1239 | |
| 1240 | for (uint32_t by = 0; by < m_blocks_y; by++) |
| 1241 | { |
| 1242 | for (uint32_t bx = 0; bx < m_blocks_x; bx++) |
| 1243 | { |
| 1244 | const void* pBlock = get_block_ptr(bx, by); |
| 1245 | |
| 1246 | if (!unpack_block(m_fmt, pBlock, pixels)) |
| 1247 | success = false; |
| 1248 | |
| 1249 | img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height); |
| 1250 | } // bx |
| 1251 | } // by |
| 1252 | |
| 1253 | return success; |
| 1254 | } |
| 1255 | |
| 1256 | static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; |
| 1257 | |
| 1258 | // KTX/GL enums |
| 1259 | enum |
| 1260 | { |
| 1261 | KTX_ENDIAN = 0x04030201, |
| 1262 | KTX_OPPOSITE_ENDIAN = 0x01020304, |
| 1263 | KTX_ETC1_RGB8_OES = 0x8D64, |
| 1264 | KTX_RED = 0x1903, |
| 1265 | KTX_RG = 0x8227, |
| 1266 | KTX_RGB = 0x1907, |
| 1267 | KTX_RGBA = 0x1908, |
| 1268 | KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, |
| 1269 | KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, |
| 1270 | KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, |
| 1271 | KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, |
| 1272 | KTX_COMPRESSED_RGB8_ETC2 = 0x9274, |
| 1273 | KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, |
| 1274 | KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C, |
| 1275 | KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D, |
| 1276 | KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00, |
| 1277 | KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02, |
| 1278 | KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0, |
| 1279 | KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0, |
| 1280 | KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value! |
| 1281 | KTX_ATC_RGB_AMD = 0x8C92, |
| 1282 | KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE, |
| 1283 | KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0, |
| 1284 | KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1, |
| 1285 | KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138, |
| 1286 | KTX_COMPRESSED_R11_EAC = 0x9270, |
| 1287 | KTX_COMPRESSED_RG11_EAC = 0x9272 |
| 1288 | }; |
| 1289 | |
| 1290 | struct |
| 1291 | { |
| 1292 | uint8_t [12]; |
| 1293 | packed_uint<4> ; |
| 1294 | packed_uint<4> ; |
| 1295 | packed_uint<4> ; |
| 1296 | packed_uint<4> ; |
| 1297 | packed_uint<4> ; |
| 1298 | packed_uint<4> ; |
| 1299 | packed_uint<4> ; |
| 1300 | packed_uint<4> ; |
| 1301 | packed_uint<4> ; |
| 1302 | packed_uint<4> ; |
| 1303 | packed_uint<4> ; |
| 1304 | packed_uint<4> ; |
| 1305 | packed_uint<4> ; |
| 1306 | |
| 1307 | void () { clear_obj(*this); } |
| 1308 | }; |
| 1309 | |
| 1310 | // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index] |
| 1311 | bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag) |
| 1312 | { |
| 1313 | if (!gpu_images.size()) |
| 1314 | { |
| 1315 | assert(0); |
| 1316 | return false; |
| 1317 | } |
| 1318 | |
| 1319 | uint32_t width = 0, height = 0, total_levels = 0; |
| 1320 | basisu::texture_format fmt = texture_format::cInvalidTextureFormat; |
| 1321 | |
| 1322 | if (cubemap_flag) |
| 1323 | { |
| 1324 | if ((gpu_images.size() % 6) != 0) |
| 1325 | { |
| 1326 | assert(0); |
| 1327 | return false; |
| 1328 | } |
| 1329 | } |
| 1330 | |
| 1331 | for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) |
| 1332 | { |
| 1333 | const gpu_image_vec &levels = gpu_images[array_index]; |
| 1334 | |
| 1335 | if (!levels.size()) |
| 1336 | { |
| 1337 | // Empty mip chain |
| 1338 | assert(0); |
| 1339 | return false; |
| 1340 | } |
| 1341 | |
| 1342 | if (!array_index) |
| 1343 | { |
| 1344 | width = levels[0].get_pixel_width(); |
| 1345 | height = levels[0].get_pixel_height(); |
| 1346 | total_levels = (uint32_t)levels.size(); |
| 1347 | fmt = levels[0].get_format(); |
| 1348 | } |
| 1349 | else |
| 1350 | { |
| 1351 | if ((width != levels[0].get_pixel_width()) || |
| 1352 | (height != levels[0].get_pixel_height()) || |
| 1353 | (total_levels != levels.size())) |
| 1354 | { |
| 1355 | // All cubemap/texture array faces must be the same dimension |
| 1356 | assert(0); |
| 1357 | return false; |
| 1358 | } |
| 1359 | } |
| 1360 | |
| 1361 | for (uint32_t level_index = 0; level_index < levels.size(); level_index++) |
| 1362 | { |
| 1363 | if (level_index) |
| 1364 | { |
| 1365 | if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) || |
| 1366 | (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) ) |
| 1367 | { |
| 1368 | // Malformed mipmap chain |
| 1369 | assert(0); |
| 1370 | return false; |
| 1371 | } |
| 1372 | } |
| 1373 | |
| 1374 | if (fmt != levels[level_index].get_format()) |
| 1375 | { |
| 1376 | // All input textures must use the same GPU format |
| 1377 | assert(0); |
| 1378 | return false; |
| 1379 | } |
| 1380 | } |
| 1381 | } |
| 1382 | |
| 1383 | uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB; |
| 1384 | |
| 1385 | switch (fmt) |
| 1386 | { |
| 1387 | case texture_format::cBC1: |
| 1388 | case texture_format::cBC1_NV: |
| 1389 | case texture_format::cBC1_AMD: |
| 1390 | { |
| 1391 | internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; |
| 1392 | break; |
| 1393 | } |
| 1394 | case texture_format::cBC3: |
| 1395 | { |
| 1396 | internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT; |
| 1397 | base_internal_fmt = KTX_RGBA; |
| 1398 | break; |
| 1399 | } |
| 1400 | case texture_format::cBC4: |
| 1401 | { |
| 1402 | internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT; |
| 1403 | base_internal_fmt = KTX_RED; |
| 1404 | break; |
| 1405 | } |
| 1406 | case texture_format::cBC5: |
| 1407 | { |
| 1408 | internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT; |
| 1409 | base_internal_fmt = KTX_RG; |
| 1410 | break; |
| 1411 | } |
| 1412 | case texture_format::cETC1: |
| 1413 | case texture_format::cETC1S: |
| 1414 | { |
| 1415 | internal_fmt = KTX_ETC1_RGB8_OES; |
| 1416 | break; |
| 1417 | } |
| 1418 | case texture_format::cETC2_RGB: |
| 1419 | { |
| 1420 | internal_fmt = KTX_COMPRESSED_RGB8_ETC2; |
| 1421 | break; |
| 1422 | } |
| 1423 | case texture_format::cETC2_RGBA: |
| 1424 | { |
| 1425 | internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC; |
| 1426 | base_internal_fmt = KTX_RGBA; |
| 1427 | break; |
| 1428 | } |
| 1429 | case texture_format::cBC7: |
| 1430 | { |
| 1431 | internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM; |
| 1432 | base_internal_fmt = KTX_RGBA; |
| 1433 | break; |
| 1434 | } |
| 1435 | case texture_format::cPVRTC1_4_RGB: |
| 1436 | { |
| 1437 | internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG; |
| 1438 | break; |
| 1439 | } |
| 1440 | case texture_format::cPVRTC1_4_RGBA: |
| 1441 | { |
| 1442 | internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG; |
| 1443 | base_internal_fmt = KTX_RGBA; |
| 1444 | break; |
| 1445 | } |
| 1446 | case texture_format::cASTC4x4: |
| 1447 | { |
| 1448 | internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR; |
| 1449 | base_internal_fmt = KTX_RGBA; |
| 1450 | break; |
| 1451 | } |
| 1452 | case texture_format::cATC_RGB: |
| 1453 | { |
| 1454 | internal_fmt = KTX_ATC_RGB_AMD; |
| 1455 | break; |
| 1456 | } |
| 1457 | case texture_format::cATC_RGBA_INTERPOLATED_ALPHA: |
| 1458 | { |
| 1459 | internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD; |
| 1460 | base_internal_fmt = KTX_RGBA; |
| 1461 | break; |
| 1462 | } |
| 1463 | case texture_format::cETC2_R11_EAC: |
| 1464 | { |
| 1465 | internal_fmt = KTX_COMPRESSED_R11_EAC; |
| 1466 | base_internal_fmt = KTX_RED; |
| 1467 | break; |
| 1468 | } |
| 1469 | case texture_format::cETC2_RG11_EAC: |
| 1470 | { |
| 1471 | internal_fmt = KTX_COMPRESSED_RG11_EAC; |
| 1472 | base_internal_fmt = KTX_RG; |
| 1473 | break; |
| 1474 | } |
| 1475 | case texture_format::cUASTC4x4: |
| 1476 | { |
| 1477 | internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR; |
| 1478 | base_internal_fmt = KTX_RGBA; |
| 1479 | break; |
| 1480 | } |
| 1481 | case texture_format::cFXT1_RGB: |
| 1482 | { |
| 1483 | internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX; |
| 1484 | break; |
| 1485 | } |
| 1486 | case texture_format::cPVRTC2_4_RGBA: |
| 1487 | { |
| 1488 | internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG; |
| 1489 | base_internal_fmt = KTX_RGBA; |
| 1490 | break; |
| 1491 | } |
| 1492 | default: |
| 1493 | { |
| 1494 | // TODO |
| 1495 | assert(0); |
| 1496 | return false; |
| 1497 | } |
| 1498 | } |
| 1499 | |
| 1500 | ktx_header ; |
| 1501 | header.clear(); |
| 1502 | memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id)); |
| 1503 | header.m_endianness = KTX_ENDIAN; |
| 1504 | |
| 1505 | header.m_pixelWidth = width; |
| 1506 | header.m_pixelHeight = height; |
| 1507 | |
| 1508 | header.m_glTypeSize = 1; |
| 1509 | |
| 1510 | header.m_glInternalFormat = internal_fmt; |
| 1511 | header.m_glBaseInternalFormat = base_internal_fmt; |
| 1512 | |
| 1513 | header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size()); |
| 1514 | if (header.m_numberOfArrayElements == 1) |
| 1515 | header.m_numberOfArrayElements = 0; |
| 1516 | |
| 1517 | header.m_numberOfMipmapLevels = total_levels; |
| 1518 | header.m_numberOfFaces = cubemap_flag ? 6 : 1; |
| 1519 | |
| 1520 | append_vector(ktx_data, (uint8_t *)&header, sizeof(header)); |
| 1521 | |
| 1522 | for (uint32_t level_index = 0; level_index < total_levels; level_index++) |
| 1523 | { |
| 1524 | uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes(); |
| 1525 | |
| 1526 | if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1)) |
| 1527 | { |
| 1528 | img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements); |
| 1529 | } |
| 1530 | |
| 1531 | assert(img_size && ((img_size & 3) == 0)); |
| 1532 | |
| 1533 | packed_uint<4> packed_img_size(img_size); |
| 1534 | append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size)); |
| 1535 | |
| 1536 | uint32_t bytes_written = 0; |
| 1537 | |
| 1538 | for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++) |
| 1539 | { |
| 1540 | for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++) |
| 1541 | { |
| 1542 | const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index]; |
| 1543 | |
| 1544 | append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes()); |
| 1545 | |
| 1546 | bytes_written += img.get_size_in_bytes(); |
| 1547 | } |
| 1548 | |
| 1549 | } // array_index |
| 1550 | |
| 1551 | } // level_index |
| 1552 | |
| 1553 | return true; |
| 1554 | } |
| 1555 | |
| 1556 | bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag) |
| 1557 | { |
| 1558 | std::string extension(string_tolower(string_get_extension(pFilename))); |
| 1559 | |
| 1560 | uint8_vec filedata; |
| 1561 | if (extension == "ktx" ) |
| 1562 | { |
| 1563 | if (!create_ktx_texture_file(filedata, g, cubemap_flag)) |
| 1564 | return false; |
| 1565 | } |
| 1566 | else if (extension == "pvr" ) |
| 1567 | { |
| 1568 | // TODO |
| 1569 | return false; |
| 1570 | } |
| 1571 | else if (extension == "dds" ) |
| 1572 | { |
| 1573 | // TODO |
| 1574 | return false; |
| 1575 | } |
| 1576 | else |
| 1577 | { |
| 1578 | // unsupported texture format |
| 1579 | assert(0); |
| 1580 | return false; |
| 1581 | } |
| 1582 | |
| 1583 | return basisu::write_vec_to_file(pFilename, filedata); |
| 1584 | } |
| 1585 | |
| 1586 | bool write_compressed_texture_file(const char* pFilename, const gpu_image& g) |
| 1587 | { |
| 1588 | basisu::vector<gpu_image_vec> v; |
| 1589 | enlarge_vector(v, 1)->push_back(g); |
| 1590 | return write_compressed_texture_file(pFilename, v, false); |
| 1591 | } |
| 1592 | |
| 1593 | //const uint32_t OUT_FILE_MAGIC = 'TEXC'; |
| 1594 | struct |
| 1595 | { |
| 1596 | packed_uint<4> ; |
| 1597 | packed_uint<4> ; |
| 1598 | packed_uint<4> ; |
| 1599 | packed_uint<4> ; |
| 1600 | }; |
| 1601 | |
| 1602 | // As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output. |
| 1603 | bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi) |
| 1604 | { |
| 1605 | out_file_header hdr; |
| 1606 | //hdr.m_magic = OUT_FILE_MAGIC; |
| 1607 | hdr.m_magic.m_bytes[0] = 67; |
| 1608 | hdr.m_magic.m_bytes[1] = 88; |
| 1609 | hdr.m_magic.m_bytes[2] = 69; |
| 1610 | hdr.m_magic.m_bytes[3] = 84; |
| 1611 | hdr.m_pad = 0; |
| 1612 | hdr.m_width = gi.get_blocks_x() * 8; |
| 1613 | hdr.m_height = gi.get_blocks_y() * 4; |
| 1614 | |
| 1615 | FILE* pFile = nullptr; |
| 1616 | #ifdef _WIN32 |
| 1617 | fopen_s(&pFile, pFilename, "wb" ); |
| 1618 | #else |
| 1619 | pFile = fopen(pFilename, "wb" ); |
| 1620 | #endif |
| 1621 | if (!pFile) |
| 1622 | return false; |
| 1623 | |
| 1624 | fwrite(&hdr, sizeof(hdr), 1, pFile); |
| 1625 | fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile); |
| 1626 | |
| 1627 | return fclose(pFile) != EOF; |
| 1628 | } |
| 1629 | } // basisu |
| 1630 | |
| 1631 | |