squish.cpp source code [Godot/thirdparty/squish/squish.cpp]

1	/ -----------------------------------------------------------------------------*
2
3	Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4
5	Permission is hereby granted, free of charge, to any person obtaining
6	a copy of this software and associated documentation files (the
7	"Software"), to deal in the Software without restriction, including
8	without limitation the rights to use, copy, modify, merge, publish,
9	distribute, sublicense, and/or sell copies of the Software, and to
10	permit persons to whom the Software is furnished to do so, subject to
11	the following conditions:
12
13	The above copyright notice and this permission notice shall be included
14	in all copies or substantial portions of the Software.
15
16	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24	-------------------------------------------------------------------------- /*
25
26	#include <string.h>
27	#include "squish.h"
28	#include "colourset.h"
29	#include "maths.h"
30	#include "rangefit.h"
31	#include "clusterfit.h"
32	#include "colourblock.h"
33	#include "alpha.h"
34	#include "singlecolourfit.h"
35
36	namespace squish {
37
38	static int FixFlags( int flags )
39	{
40	// grab the flag bits
41	int method = flags & ( kDxt1 \| kDxt3 \| kDxt5 \| kBc4 \| kBc5 );
42	int fit = flags & ( kColourIterativeClusterFit \| kColourClusterFit \| kColourRangeFit );
43	int extra = flags & kWeightColourByAlpha;
44
45	// set defaults
46	if ( method != kDxt3
47	&& method != kDxt5
48	&& method != kBc4
49	&& method != kBc5 )
50	{
51	method = kDxt1;
52	}
53	if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
54	fit = kColourClusterFit;
55
56	// done
57	return method \| fit \| extra;
58	}
59
60	void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
61	{
62	// fix any bad flags
63	flags = FixFlags( flags );
64
65	if ( ( flags & ( kBc4 \| kBc5 ) ) != `0` )
66	{
67	u8 alpha[`16`*`4`];
68	for( int i = `0`; i < `16`; ++i )
69	{
70	alpha[i`4` + `3`] = rgba[i`4` + `0`]; // copy R to A
71	}
72
73	u8* rBlock = reinterpret_cast< u8* >( block );
74	CompressAlphaDxt5( alpha, mask, rBlock );
75
76	if ( ( flags & ( kBc5 ) ) != `0` )
77	{
78	for( int i = `0`; i < `16`; ++i )
79	{
80	alpha[i`4` + `3`] = rgba[i`4` + `1`]; // copy G to A
81	}
82
83	u8* gBlock = reinterpret_cast< u8* >( block ) + `8`;
84	CompressAlphaDxt5( alpha, mask, gBlock );
85	}
86
87	return;
88	}
89
90	// get the block locations
91	void* colourBlock = block;
92	void* alphaBlock = block;
93	if( ( flags & ( kDxt3 \| kDxt5 ) ) != `0` )
94	colourBlock = reinterpret_cast< u8* >( block ) + `8`;
95
96	// create the minimal point set
97	ColourSet colours( rgba, mask, flags );
98
99	// check the compression type and compress colour
100	if( colours.GetCount() == `1` )
101	{
102	// always do a single colour fit
103	SingleColourFit fit( &colours, flags );
104	fit.Compress( colourBlock );
105	}
106	else if( ( flags & kColourRangeFit ) != `0` \|\| colours.GetCount() == `0` )
107	{
108	// do a range fit
109	RangeFit fit( &colours, flags, metric );
110	fit.Compress( colourBlock );
111	}
112	else
113	{
114	// default to a cluster fit (could be iterative or not)
115	ClusterFit fit( &colours, flags, metric );
116	fit.Compress( colourBlock );
117	}
118
119	// compress alpha separately if necessary
120	if( ( flags & kDxt3 ) != `0` )
121	CompressAlphaDxt3( rgba, mask, alphaBlock );
122	else if( ( flags & kDxt5 ) != `0` )
123	CompressAlphaDxt5( rgba, mask, alphaBlock );
124	}
125
126	void Decompress( u8* rgba, void const* block, int flags )
127	{
128	// fix any bad flags
129	flags = FixFlags( flags );
130
131	// get the block locations
132	void const* colourBlock = block;
133	void const* alphaBlock = block;
134	if( ( flags & ( kDxt3 \| kDxt5 ) ) != `0` )
135	colourBlock = reinterpret_cast< u8 const* >( block ) + `8`;
136
137	// decompress colour
138	// -- GODOT start --
139	//DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
140	if(( flags & ( kBc5 ) ) != `0`)
141	DecompressColourBc5( rgba, colourBlock);
142	else
143	DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != `0` );
144	// -- GODOT end --
145
146	// decompress alpha separately if necessary
147	if( ( flags & kDxt3 ) != `0` )
148	DecompressAlphaDxt3( rgba, alphaBlock );
149	else if( ( flags & kDxt5 ) != `0` )
150	DecompressAlphaDxt5( rgba, alphaBlock );
151	}
152
153	int GetStorageRequirements( int width, int height, int flags )
154	{
155	// fix any bad flags
156	flags = FixFlags( flags );
157
158	// compute the storage requirements
159	int blockcount = ( ( width + `3` )/`4` ) * ( ( height + `3` )/`4` );
160	int blocksize = ( ( flags & ( kDxt1 \| kBc4 ) ) != `0` ) ? `8` : `16`;
161	return blockcount*blocksize;
162	}
163
164	void CopyRGBA( u8 const* source, u8* dest, int flags )
165	{
166	if (flags & kSourceBGRA)
167	{
168	// convert from bgra to rgba
169	dest[`0`] = source[`2`];
170	dest[`1`] = source[`1`];
171	dest[`2`] = source[`0`];
172	dest[`3`] = source[`3`];
173	}
174	else
175	{
176	for( int i = `0`; i < `4`; ++i )
177	dest++ = source++;
178	}
179	}
180
181	void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
182	{
183	// fix any bad flags
184	flags = FixFlags( flags );
185
186	// loop over blocks
187	#ifdef SQUISH_USE_OPENMP
188	# pragma omp parallel for
189	#endif
190	for( int y = `0`; y < height; y += `4` )
191	{
192	// initialise the block output
193	u8* targetBlock = reinterpret_cast< u8* >( blocks );
194	int bytesPerBlock = ( ( flags & ( kDxt1 \| kBc4 ) ) != `0` ) ? `8` : `16`;
195	targetBlock += ( (y / `4`) * ( (width + `3`) / `4`) ) * bytesPerBlock;
196
197	for( int x = `0`; x < width; x += `4` )
198	{
199	// build the 4x4 block of pixels
200	u8 sourceRgba[`16`*`4`];
201	u8* targetPixel = sourceRgba;
202	int mask = `0`;
203	for( int py = `0`; py < `4`; ++py )
204	{
205	for( int px = `0`; px < `4`; ++px )
206	{
207	// get the source pixel in the image
208	int sx = x + px;
209	int sy = y + py;
210
211	// enable if we're in the image
212	if( sx < width && sy < height )
213	{
214	// copy the rgba value
215	u8 const* sourcePixel = rgba + pitchsy + `4`sx;
216	CopyRGBA(sourcePixel, targetPixel, flags);
217	// enable this pixel
218	mask \|= ( `1` << ( `4`*py + px ) );
219	}
220
221	// advance to the next pixel
222	targetPixel += `4`;
223	}
224	}
225
226	// compress it into the output
227	CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
228
229	// advance
230	targetBlock += bytesPerBlock;
231	}
232	}
233	}
234
235	void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
236	{
237	CompressImage(rgba, width, height, width*`4`, blocks, flags, metric);
238	}
239
240	void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
241	{
242	// fix any bad flags
243	flags = FixFlags( flags );
244
245	// loop over blocks
246	#ifdef SQUISH_USE_OPENMP
247	# pragma omp parallel for
248	#endif
249	for( int y = `0`; y < height; y += `4` )
250	{
251	// initialise the block input
252	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
253	int bytesPerBlock = ( ( flags & ( kDxt1 \| kBc4 ) ) != `0` ) ? `8` : `16`;
254	sourceBlock += ( (y / `4`) * ( (width + `3`) / `4`) ) * bytesPerBlock;
255
256	for( int x = `0`; x < width; x += `4` )
257	{
258	// decompress the block
259	u8 targetRgba[`4`*`16`];
260	Decompress( targetRgba, sourceBlock, flags );
261
262	// write the decompressed pixels to the correct image locations
263	u8 const* sourcePixel = targetRgba;
264	for( int py = `0`; py < `4`; ++py )
265	{
266	for( int px = `0`; px < `4`; ++px )
267	{
268	// get the target location
269	int sx = x + px;
270	int sy = y + py;
271
272	// write if we're in the image
273	if( sx < width && sy < height )
274	{
275	// copy the rgba value
276	u8* targetPixel = rgba + pitchsy + `4`sx;
277	CopyRGBA(sourcePixel, targetPixel, flags);
278	}
279
280	// advance to the next pixel
281	sourcePixel += `4`;
282	}
283	}
284
285	// advance
286	sourceBlock += bytesPerBlock;
287	}
288	}
289	}
290
291	void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
292	{
293	DecompressImage( rgba, width, height, width*`4`, blocks, flags );
294	}
295
296	static double ErrorSq(double x, double y)
297	{
298	return (x - y) * (x - y);
299	}
300
301	static void ComputeBlockWMSE(u8 const original, u8 const* compressed, unsigned* int w, unsigned int h, double &cmse, double &amse)
302	{
303	// Computes the MSE for the block and weights it by the variance of the original block.
304	// If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
305	// then the block is close to being a single colour. Quantisation errors in single colour blocks
306	// are easier to see than similar errors in blocks that contain more colours, particularly when there
307	// are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that
308	// banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
309	// of 5. This implies that images with large, single colour areas will have a higher potential WMSE
310	// than images with lots of detail.
311
312	cmse = amse = `0`;
313	unsigned int sum_p[`4`]; // per channel sum of pixels
314	unsigned int sum_p2[`4`]; // per channel sum of pixels squared
315	memset(sum_p, `0`, sizeof(sum_p));
316	memset(sum_p2, `0`, sizeof(sum_p2));
317	for( unsigned int py = `0`; py < `4`; ++py )
318	{
319	for( unsigned int px = `0`; px < `4`; ++px )
320	{
321	if( px < w && py < h )
322	{
323	double pixelCMSE = `0`;
324	for( int i = `0`; i < `3`; ++i )
325	{
326	pixelCMSE += ErrorSq(original[i], compressed[i]);
327	sum_p[i] += original[i];
328	sum_p2[i] += (unsigned int)original[i]*original[i];
329	}
330	if( original[`3`] == `0` && compressed[`3`] == `0` )
331	pixelCMSE = `0`; // transparent in both, so colour is inconsequential
332	amse += ErrorSq(original[`3`], compressed[`3`]);
333	cmse += pixelCMSE;
334	sum_p[`3`] += original[`3`];
335	sum_p2[`3`] += (unsigned int)original[`3`]*original[`3`];
336	}
337	original += `4`;
338	compressed += `4`;
339	}
340	}
341	unsigned int variance = `0`;
342	for( int i = `0`; i < `4`; ++i )
343	variance += whsum_p2[i] - sum_p[i]*sum_p[i];
344	if( variance < `4` * w * w * h * h )
345	{
346	amse *= `5`;
347	cmse *= `5`;
348	}
349	}
350
351	void ComputeMSE( u8 const rgba, int* width, int height, int pitch, u8 const dxt, int* flags, double &colourMSE, double &alphaMSE )
352	{
353	// fix any bad flags
354	flags = FixFlags( flags );
355	colourMSE = alphaMSE = `0`;
356
357	// initialise the block input
358	squish::u8 const* sourceBlock = dxt;
359	int bytesPerBlock = ( ( flags & squish::kDxt1 ) != `0` ) ? `8` : `16`;
360
361	// loop over blocks
362	for( int y = `0`; y < height; y += `4` )
363	{
364	for( int x = `0`; x < width; x += `4` )
365	{
366	// decompress the block
367	u8 targetRgba[`4`*`16`];
368	Decompress( targetRgba, sourceBlock, flags );
369	u8 const* sourcePixel = targetRgba;
370
371	// copy across to a similar pixel block
372	u8 originalRgba[`4`*`16`];
373	u8* originalPixel = originalRgba;
374
375	for( int py = `0`; py < `4`; ++py )
376	{
377	for( int px = `0`; px < `4`; ++px )
378	{
379	int sx = x + px;
380	int sy = y + py;
381	if( sx < width && sy < height )
382	{
383	u8 const* targetPixel = rgba + pitchsy + `4`sx;
384	CopyRGBA(targetPixel, originalPixel, flags);
385	}
386	sourcePixel += `4`;
387	originalPixel += `4`;
388	}
389	}
390
391	// compute the weighted MSE of the block
392	double blockCMSE, blockAMSE;
393	ComputeBlockWMSE(originalRgba, targetRgba, std::min(`4`, width - x), std::min(`4`, height - y), blockCMSE, blockAMSE);
394	colourMSE += blockCMSE;
395	alphaMSE += blockAMSE;
396	// advance
397	sourceBlock += bytesPerBlock;
398	}
399	}
400	colourMSE /= (width * height * `3`);
401	alphaMSE /= (width * height);
402	}
403
404	void ComputeMSE( u8 const rgba, int* width, int height, u8 const dxt, int* flags, double &colourMSE, double &alphaMSE )
405	{
406	ComputeMSE(rgba, width, height, width*`4`, dxt, flags, colourMSE, alphaMSE);
407	}
408
409	} // namespace squish
410

Browse the source code of Godot/thirdparty/squish/squish.cpp