1 | /* ----------------------------------------------------------------------------- |
2 | |
3 | Copyright (c) 2006 Simon Brown si@sjbrown.co.uk |
4 | |
5 | Permission is hereby granted, free of charge, to any person obtaining |
6 | a copy of this software and associated documentation files (the |
7 | "Software"), to deal in the Software without restriction, including |
8 | without limitation the rights to use, copy, modify, merge, publish, |
9 | distribute, sublicense, and/or sell copies of the Software, and to |
10 | permit persons to whom the Software is furnished to do so, subject to |
11 | the following conditions: |
12 | |
13 | The above copyright notice and this permission notice shall be included |
14 | in all copies or substantial portions of the Software. |
15 | |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | |
24 | -------------------------------------------------------------------------- */ |
25 | |
26 | #include <string.h> |
27 | #include "squish.h" |
28 | #include "colourset.h" |
29 | #include "maths.h" |
30 | #include "rangefit.h" |
31 | #include "clusterfit.h" |
32 | #include "colourblock.h" |
33 | #include "alpha.h" |
34 | #include "singlecolourfit.h" |
35 | |
36 | namespace squish { |
37 | |
38 | static int FixFlags( int flags ) |
39 | { |
40 | // grab the flag bits |
41 | int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 ); |
42 | int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit ); |
43 | int = flags & kWeightColourByAlpha; |
44 | |
45 | // set defaults |
46 | if ( method != kDxt3 |
47 | && method != kDxt5 |
48 | && method != kBc4 |
49 | && method != kBc5 ) |
50 | { |
51 | method = kDxt1; |
52 | } |
53 | if( fit != kColourRangeFit && fit != kColourIterativeClusterFit ) |
54 | fit = kColourClusterFit; |
55 | |
56 | // done |
57 | return method | fit | extra; |
58 | } |
59 | |
60 | void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric ) |
61 | { |
62 | // fix any bad flags |
63 | flags = FixFlags( flags ); |
64 | |
65 | if ( ( flags & ( kBc4 | kBc5 ) ) != 0 ) |
66 | { |
67 | u8 alpha[16*4]; |
68 | for( int i = 0; i < 16; ++i ) |
69 | { |
70 | alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A |
71 | } |
72 | |
73 | u8* rBlock = reinterpret_cast< u8* >( block ); |
74 | CompressAlphaDxt5( alpha, mask, rBlock ); |
75 | |
76 | if ( ( flags & ( kBc5 ) ) != 0 ) |
77 | { |
78 | for( int i = 0; i < 16; ++i ) |
79 | { |
80 | alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A |
81 | } |
82 | |
83 | u8* gBlock = reinterpret_cast< u8* >( block ) + 8; |
84 | CompressAlphaDxt5( alpha, mask, gBlock ); |
85 | } |
86 | |
87 | return; |
88 | } |
89 | |
90 | // get the block locations |
91 | void* colourBlock = block; |
92 | void* alphaBlock = block; |
93 | if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 ) |
94 | colourBlock = reinterpret_cast< u8* >( block ) + 8; |
95 | |
96 | // create the minimal point set |
97 | ColourSet colours( rgba, mask, flags ); |
98 | |
99 | // check the compression type and compress colour |
100 | if( colours.GetCount() == 1 ) |
101 | { |
102 | // always do a single colour fit |
103 | SingleColourFit fit( &colours, flags ); |
104 | fit.Compress( colourBlock ); |
105 | } |
106 | else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 ) |
107 | { |
108 | // do a range fit |
109 | RangeFit fit( &colours, flags, metric ); |
110 | fit.Compress( colourBlock ); |
111 | } |
112 | else |
113 | { |
114 | // default to a cluster fit (could be iterative or not) |
115 | ClusterFit fit( &colours, flags, metric ); |
116 | fit.Compress( colourBlock ); |
117 | } |
118 | |
119 | // compress alpha separately if necessary |
120 | if( ( flags & kDxt3 ) != 0 ) |
121 | CompressAlphaDxt3( rgba, mask, alphaBlock ); |
122 | else if( ( flags & kDxt5 ) != 0 ) |
123 | CompressAlphaDxt5( rgba, mask, alphaBlock ); |
124 | } |
125 | |
126 | void Decompress( u8* rgba, void const* block, int flags ) |
127 | { |
128 | // fix any bad flags |
129 | flags = FixFlags( flags ); |
130 | |
131 | // get the block locations |
132 | void const* colourBlock = block; |
133 | void const* alphaBlock = block; |
134 | if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 ) |
135 | colourBlock = reinterpret_cast< u8 const* >( block ) + 8; |
136 | |
137 | // decompress colour |
138 | // -- GODOT start -- |
139 | //DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 ); |
140 | if(( flags & ( kBc5 ) ) != 0) |
141 | DecompressColourBc5( rgba, colourBlock); |
142 | else |
143 | DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 ); |
144 | // -- GODOT end -- |
145 | |
146 | // decompress alpha separately if necessary |
147 | if( ( flags & kDxt3 ) != 0 ) |
148 | DecompressAlphaDxt3( rgba, alphaBlock ); |
149 | else if( ( flags & kDxt5 ) != 0 ) |
150 | DecompressAlphaDxt5( rgba, alphaBlock ); |
151 | } |
152 | |
153 | int GetStorageRequirements( int width, int height, int flags ) |
154 | { |
155 | // fix any bad flags |
156 | flags = FixFlags( flags ); |
157 | |
158 | // compute the storage requirements |
159 | int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 ); |
160 | int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; |
161 | return blockcount*blocksize; |
162 | } |
163 | |
164 | void CopyRGBA( u8 const* source, u8* dest, int flags ) |
165 | { |
166 | if (flags & kSourceBGRA) |
167 | { |
168 | // convert from bgra to rgba |
169 | dest[0] = source[2]; |
170 | dest[1] = source[1]; |
171 | dest[2] = source[0]; |
172 | dest[3] = source[3]; |
173 | } |
174 | else |
175 | { |
176 | for( int i = 0; i < 4; ++i ) |
177 | *dest++ = *source++; |
178 | } |
179 | } |
180 | |
181 | void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric ) |
182 | { |
183 | // fix any bad flags |
184 | flags = FixFlags( flags ); |
185 | |
186 | // loop over blocks |
187 | #ifdef SQUISH_USE_OPENMP |
188 | # pragma omp parallel for |
189 | #endif |
190 | for( int y = 0; y < height; y += 4 ) |
191 | { |
192 | // initialise the block output |
193 | u8* targetBlock = reinterpret_cast< u8* >( blocks ); |
194 | int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; |
195 | targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock; |
196 | |
197 | for( int x = 0; x < width; x += 4 ) |
198 | { |
199 | // build the 4x4 block of pixels |
200 | u8 sourceRgba[16*4]; |
201 | u8* targetPixel = sourceRgba; |
202 | int mask = 0; |
203 | for( int py = 0; py < 4; ++py ) |
204 | { |
205 | for( int px = 0; px < 4; ++px ) |
206 | { |
207 | // get the source pixel in the image |
208 | int sx = x + px; |
209 | int sy = y + py; |
210 | |
211 | // enable if we're in the image |
212 | if( sx < width && sy < height ) |
213 | { |
214 | // copy the rgba value |
215 | u8 const* sourcePixel = rgba + pitch*sy + 4*sx; |
216 | CopyRGBA(sourcePixel, targetPixel, flags); |
217 | // enable this pixel |
218 | mask |= ( 1 << ( 4*py + px ) ); |
219 | } |
220 | |
221 | // advance to the next pixel |
222 | targetPixel += 4; |
223 | } |
224 | } |
225 | |
226 | // compress it into the output |
227 | CompressMasked( sourceRgba, mask, targetBlock, flags, metric ); |
228 | |
229 | // advance |
230 | targetBlock += bytesPerBlock; |
231 | } |
232 | } |
233 | } |
234 | |
235 | void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric ) |
236 | { |
237 | CompressImage(rgba, width, height, width*4, blocks, flags, metric); |
238 | } |
239 | |
240 | void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags ) |
241 | { |
242 | // fix any bad flags |
243 | flags = FixFlags( flags ); |
244 | |
245 | // loop over blocks |
246 | #ifdef SQUISH_USE_OPENMP |
247 | # pragma omp parallel for |
248 | #endif |
249 | for( int y = 0; y < height; y += 4 ) |
250 | { |
251 | // initialise the block input |
252 | u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks ); |
253 | int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16; |
254 | sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock; |
255 | |
256 | for( int x = 0; x < width; x += 4 ) |
257 | { |
258 | // decompress the block |
259 | u8 targetRgba[4*16]; |
260 | Decompress( targetRgba, sourceBlock, flags ); |
261 | |
262 | // write the decompressed pixels to the correct image locations |
263 | u8 const* sourcePixel = targetRgba; |
264 | for( int py = 0; py < 4; ++py ) |
265 | { |
266 | for( int px = 0; px < 4; ++px ) |
267 | { |
268 | // get the target location |
269 | int sx = x + px; |
270 | int sy = y + py; |
271 | |
272 | // write if we're in the image |
273 | if( sx < width && sy < height ) |
274 | { |
275 | // copy the rgba value |
276 | u8* targetPixel = rgba + pitch*sy + 4*sx; |
277 | CopyRGBA(sourcePixel, targetPixel, flags); |
278 | } |
279 | |
280 | // advance to the next pixel |
281 | sourcePixel += 4; |
282 | } |
283 | } |
284 | |
285 | // advance |
286 | sourceBlock += bytesPerBlock; |
287 | } |
288 | } |
289 | } |
290 | |
291 | void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags ) |
292 | { |
293 | DecompressImage( rgba, width, height, width*4, blocks, flags ); |
294 | } |
295 | |
296 | static double ErrorSq(double x, double y) |
297 | { |
298 | return (x - y) * (x - y); |
299 | } |
300 | |
301 | static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse) |
302 | { |
303 | // Computes the MSE for the block and weights it by the variance of the original block. |
304 | // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel) |
305 | // then the block is close to being a single colour. Quantisation errors in single colour blocks |
306 | // are easier to see than similar errors in blocks that contain more colours, particularly when there |
307 | // are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that |
308 | // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor |
309 | // of 5. This implies that images with large, single colour areas will have a higher potential WMSE |
310 | // than images with lots of detail. |
311 | |
312 | cmse = amse = 0; |
313 | unsigned int sum_p[4]; // per channel sum of pixels |
314 | unsigned int sum_p2[4]; // per channel sum of pixels squared |
315 | memset(sum_p, 0, sizeof(sum_p)); |
316 | memset(sum_p2, 0, sizeof(sum_p2)); |
317 | for( unsigned int py = 0; py < 4; ++py ) |
318 | { |
319 | for( unsigned int px = 0; px < 4; ++px ) |
320 | { |
321 | if( px < w && py < h ) |
322 | { |
323 | double pixelCMSE = 0; |
324 | for( int i = 0; i < 3; ++i ) |
325 | { |
326 | pixelCMSE += ErrorSq(original[i], compressed[i]); |
327 | sum_p[i] += original[i]; |
328 | sum_p2[i] += (unsigned int)original[i]*original[i]; |
329 | } |
330 | if( original[3] == 0 && compressed[3] == 0 ) |
331 | pixelCMSE = 0; // transparent in both, so colour is inconsequential |
332 | amse += ErrorSq(original[3], compressed[3]); |
333 | cmse += pixelCMSE; |
334 | sum_p[3] += original[3]; |
335 | sum_p2[3] += (unsigned int)original[3]*original[3]; |
336 | } |
337 | original += 4; |
338 | compressed += 4; |
339 | } |
340 | } |
341 | unsigned int variance = 0; |
342 | for( int i = 0; i < 4; ++i ) |
343 | variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i]; |
344 | if( variance < 4 * w * w * h * h ) |
345 | { |
346 | amse *= 5; |
347 | cmse *= 5; |
348 | } |
349 | } |
350 | |
351 | void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE ) |
352 | { |
353 | // fix any bad flags |
354 | flags = FixFlags( flags ); |
355 | colourMSE = alphaMSE = 0; |
356 | |
357 | // initialise the block input |
358 | squish::u8 const* sourceBlock = dxt; |
359 | int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16; |
360 | |
361 | // loop over blocks |
362 | for( int y = 0; y < height; y += 4 ) |
363 | { |
364 | for( int x = 0; x < width; x += 4 ) |
365 | { |
366 | // decompress the block |
367 | u8 targetRgba[4*16]; |
368 | Decompress( targetRgba, sourceBlock, flags ); |
369 | u8 const* sourcePixel = targetRgba; |
370 | |
371 | // copy across to a similar pixel block |
372 | u8 originalRgba[4*16]; |
373 | u8* originalPixel = originalRgba; |
374 | |
375 | for( int py = 0; py < 4; ++py ) |
376 | { |
377 | for( int px = 0; px < 4; ++px ) |
378 | { |
379 | int sx = x + px; |
380 | int sy = y + py; |
381 | if( sx < width && sy < height ) |
382 | { |
383 | u8 const* targetPixel = rgba + pitch*sy + 4*sx; |
384 | CopyRGBA(targetPixel, originalPixel, flags); |
385 | } |
386 | sourcePixel += 4; |
387 | originalPixel += 4; |
388 | } |
389 | } |
390 | |
391 | // compute the weighted MSE of the block |
392 | double blockCMSE, blockAMSE; |
393 | ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE); |
394 | colourMSE += blockCMSE; |
395 | alphaMSE += blockAMSE; |
396 | // advance |
397 | sourceBlock += bytesPerBlock; |
398 | } |
399 | } |
400 | colourMSE /= (width * height * 3); |
401 | alphaMSE /= (width * height); |
402 | } |
403 | |
404 | void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE ) |
405 | { |
406 | ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE); |
407 | } |
408 | |
409 | } // namespace squish |
410 | |