1/* -----------------------------------------------------------------------------
2
3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be included
14 in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 -------------------------------------------------------------------------- */
25
26#include <string.h>
27#include "squish.h"
28#include "colourset.h"
29#include "maths.h"
30#include "rangefit.h"
31#include "clusterfit.h"
32#include "colourblock.h"
33#include "alpha.h"
34#include "singlecolourfit.h"
35
36namespace squish {
37
38static int FixFlags( int flags )
39{
40 // grab the flag bits
41 int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
42 int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
43 int extra = flags & kWeightColourByAlpha;
44
45 // set defaults
46 if ( method != kDxt3
47 && method != kDxt5
48 && method != kBc4
49 && method != kBc5 )
50 {
51 method = kDxt1;
52 }
53 if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
54 fit = kColourClusterFit;
55
56 // done
57 return method | fit | extra;
58}
59
60void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
61{
62 // fix any bad flags
63 flags = FixFlags( flags );
64
65 if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
66 {
67 u8 alpha[16*4];
68 for( int i = 0; i < 16; ++i )
69 {
70 alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
71 }
72
73 u8* rBlock = reinterpret_cast< u8* >( block );
74 CompressAlphaDxt5( alpha, mask, rBlock );
75
76 if ( ( flags & ( kBc5 ) ) != 0 )
77 {
78 for( int i = 0; i < 16; ++i )
79 {
80 alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
81 }
82
83 u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
84 CompressAlphaDxt5( alpha, mask, gBlock );
85 }
86
87 return;
88 }
89
90 // get the block locations
91 void* colourBlock = block;
92 void* alphaBlock = block;
93 if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
94 colourBlock = reinterpret_cast< u8* >( block ) + 8;
95
96 // create the minimal point set
97 ColourSet colours( rgba, mask, flags );
98
99 // check the compression type and compress colour
100 if( colours.GetCount() == 1 )
101 {
102 // always do a single colour fit
103 SingleColourFit fit( &colours, flags );
104 fit.Compress( colourBlock );
105 }
106 else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
107 {
108 // do a range fit
109 RangeFit fit( &colours, flags, metric );
110 fit.Compress( colourBlock );
111 }
112 else
113 {
114 // default to a cluster fit (could be iterative or not)
115 ClusterFit fit( &colours, flags, metric );
116 fit.Compress( colourBlock );
117 }
118
119 // compress alpha separately if necessary
120 if( ( flags & kDxt3 ) != 0 )
121 CompressAlphaDxt3( rgba, mask, alphaBlock );
122 else if( ( flags & kDxt5 ) != 0 )
123 CompressAlphaDxt5( rgba, mask, alphaBlock );
124}
125
126void Decompress( u8* rgba, void const* block, int flags )
127{
128 // fix any bad flags
129 flags = FixFlags( flags );
130
131 // get the block locations
132 void const* colourBlock = block;
133 void const* alphaBlock = block;
134 if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
135 colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
136
137 // decompress colour
138 // -- GODOT start --
139 //DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
140 if(( flags & ( kBc5 ) ) != 0)
141 DecompressColourBc5( rgba, colourBlock);
142 else
143 DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
144 // -- GODOT end --
145
146 // decompress alpha separately if necessary
147 if( ( flags & kDxt3 ) != 0 )
148 DecompressAlphaDxt3( rgba, alphaBlock );
149 else if( ( flags & kDxt5 ) != 0 )
150 DecompressAlphaDxt5( rgba, alphaBlock );
151}
152
153int GetStorageRequirements( int width, int height, int flags )
154{
155 // fix any bad flags
156 flags = FixFlags( flags );
157
158 // compute the storage requirements
159 int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
160 int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
161 return blockcount*blocksize;
162}
163
164void CopyRGBA( u8 const* source, u8* dest, int flags )
165{
166 if (flags & kSourceBGRA)
167 {
168 // convert from bgra to rgba
169 dest[0] = source[2];
170 dest[1] = source[1];
171 dest[2] = source[0];
172 dest[3] = source[3];
173 }
174 else
175 {
176 for( int i = 0; i < 4; ++i )
177 *dest++ = *source++;
178 }
179}
180
181void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
182{
183 // fix any bad flags
184 flags = FixFlags( flags );
185
186 // loop over blocks
187#ifdef SQUISH_USE_OPENMP
188# pragma omp parallel for
189#endif
190 for( int y = 0; y < height; y += 4 )
191 {
192 // initialise the block output
193 u8* targetBlock = reinterpret_cast< u8* >( blocks );
194 int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
195 targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
196
197 for( int x = 0; x < width; x += 4 )
198 {
199 // build the 4x4 block of pixels
200 u8 sourceRgba[16*4];
201 u8* targetPixel = sourceRgba;
202 int mask = 0;
203 for( int py = 0; py < 4; ++py )
204 {
205 for( int px = 0; px < 4; ++px )
206 {
207 // get the source pixel in the image
208 int sx = x + px;
209 int sy = y + py;
210
211 // enable if we're in the image
212 if( sx < width && sy < height )
213 {
214 // copy the rgba value
215 u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
216 CopyRGBA(sourcePixel, targetPixel, flags);
217 // enable this pixel
218 mask |= ( 1 << ( 4*py + px ) );
219 }
220
221 // advance to the next pixel
222 targetPixel += 4;
223 }
224 }
225
226 // compress it into the output
227 CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
228
229 // advance
230 targetBlock += bytesPerBlock;
231 }
232 }
233}
234
235void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
236{
237 CompressImage(rgba, width, height, width*4, blocks, flags, metric);
238}
239
240void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
241{
242 // fix any bad flags
243 flags = FixFlags( flags );
244
245 // loop over blocks
246#ifdef SQUISH_USE_OPENMP
247# pragma omp parallel for
248#endif
249 for( int y = 0; y < height; y += 4 )
250 {
251 // initialise the block input
252 u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
253 int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
254 sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
255
256 for( int x = 0; x < width; x += 4 )
257 {
258 // decompress the block
259 u8 targetRgba[4*16];
260 Decompress( targetRgba, sourceBlock, flags );
261
262 // write the decompressed pixels to the correct image locations
263 u8 const* sourcePixel = targetRgba;
264 for( int py = 0; py < 4; ++py )
265 {
266 for( int px = 0; px < 4; ++px )
267 {
268 // get the target location
269 int sx = x + px;
270 int sy = y + py;
271
272 // write if we're in the image
273 if( sx < width && sy < height )
274 {
275 // copy the rgba value
276 u8* targetPixel = rgba + pitch*sy + 4*sx;
277 CopyRGBA(sourcePixel, targetPixel, flags);
278 }
279
280 // advance to the next pixel
281 sourcePixel += 4;
282 }
283 }
284
285 // advance
286 sourceBlock += bytesPerBlock;
287 }
288 }
289}
290
291void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
292{
293 DecompressImage( rgba, width, height, width*4, blocks, flags );
294}
295
296static double ErrorSq(double x, double y)
297{
298 return (x - y) * (x - y);
299}
300
301static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
302{
303 // Computes the MSE for the block and weights it by the variance of the original block.
304 // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
305 // then the block is close to being a single colour. Quantisation errors in single colour blocks
306 // are easier to see than similar errors in blocks that contain more colours, particularly when there
307 // are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that
308 // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
309 // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
310 // than images with lots of detail.
311
312 cmse = amse = 0;
313 unsigned int sum_p[4]; // per channel sum of pixels
314 unsigned int sum_p2[4]; // per channel sum of pixels squared
315 memset(sum_p, 0, sizeof(sum_p));
316 memset(sum_p2, 0, sizeof(sum_p2));
317 for( unsigned int py = 0; py < 4; ++py )
318 {
319 for( unsigned int px = 0; px < 4; ++px )
320 {
321 if( px < w && py < h )
322 {
323 double pixelCMSE = 0;
324 for( int i = 0; i < 3; ++i )
325 {
326 pixelCMSE += ErrorSq(original[i], compressed[i]);
327 sum_p[i] += original[i];
328 sum_p2[i] += (unsigned int)original[i]*original[i];
329 }
330 if( original[3] == 0 && compressed[3] == 0 )
331 pixelCMSE = 0; // transparent in both, so colour is inconsequential
332 amse += ErrorSq(original[3], compressed[3]);
333 cmse += pixelCMSE;
334 sum_p[3] += original[3];
335 sum_p2[3] += (unsigned int)original[3]*original[3];
336 }
337 original += 4;
338 compressed += 4;
339 }
340 }
341 unsigned int variance = 0;
342 for( int i = 0; i < 4; ++i )
343 variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
344 if( variance < 4 * w * w * h * h )
345 {
346 amse *= 5;
347 cmse *= 5;
348 }
349}
350
351void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
352{
353 // fix any bad flags
354 flags = FixFlags( flags );
355 colourMSE = alphaMSE = 0;
356
357 // initialise the block input
358 squish::u8 const* sourceBlock = dxt;
359 int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
360
361 // loop over blocks
362 for( int y = 0; y < height; y += 4 )
363 {
364 for( int x = 0; x < width; x += 4 )
365 {
366 // decompress the block
367 u8 targetRgba[4*16];
368 Decompress( targetRgba, sourceBlock, flags );
369 u8 const* sourcePixel = targetRgba;
370
371 // copy across to a similar pixel block
372 u8 originalRgba[4*16];
373 u8* originalPixel = originalRgba;
374
375 for( int py = 0; py < 4; ++py )
376 {
377 for( int px = 0; px < 4; ++px )
378 {
379 int sx = x + px;
380 int sy = y + py;
381 if( sx < width && sy < height )
382 {
383 u8 const* targetPixel = rgba + pitch*sy + 4*sx;
384 CopyRGBA(targetPixel, originalPixel, flags);
385 }
386 sourcePixel += 4;
387 originalPixel += 4;
388 }
389 }
390
391 // compute the weighted MSE of the block
392 double blockCMSE, blockAMSE;
393 ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
394 colourMSE += blockCMSE;
395 alphaMSE += blockAMSE;
396 // advance
397 sourceBlock += bytesPerBlock;
398 }
399 }
400 colourMSE /= (width * height * 3);
401 alphaMSE /= (width * height);
402}
403
404void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
405{
406 ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
407}
408
409} // namespace squish
410