clusterfit.cpp source code [Godot/thirdparty/squish/clusterfit.cpp]

1	/ -----------------------------------------------------------------------------*
2
3	Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4	Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
5
6	Permission is hereby granted, free of charge, to any person obtaining
7	a copy of this software and associated documentation files (the
8	"Software"), to deal in the Software without restriction, including
9	without limitation the rights to use, copy, modify, merge, publish,
10	distribute, sublicense, and/or sell copies of the Software, and to
11	permit persons to whom the Software is furnished to do so, subject to
12	the following conditions:
13
14	The above copyright notice and this permission notice shall be included
15	in all copies or substantial portions of the Software.
16
17	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25	-------------------------------------------------------------------------- /*
26
27	#include "clusterfit.h"
28	#include "colourset.h"
29	#include "colourblock.h"
30	#include <cfloat>
31
32	namespace squish {
33
34	ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
35	: ColourFit ( colours, flags )
36	{
37	// set the iteration count
38	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : `1`;
39
40	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
41	if( metric )
42	m_metric = Vec4 ( metric[`0`], metric[`1`], metric[`2`], `1.0f` );
43	else
44	m_metric = VEC4_CONST( `1.0f` );
45
46	// initialise the best error
47	m_besterror = VEC4_CONST( FLT_MAX );
48
49	// cache some values
50	int const count = m_colours->GetCount();
51	Vec3 const* values = m_colours->GetPoints();
52
53	// get the covariance matrix
54	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
55
56	// compute the principle component
57	m_principle = ComputePrincipleComponent( covariance );
58	}
59
60	bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
61	{
62	// cache some values
63	int const count = m_colours->GetCount();
64	Vec3 const* values = m_colours->GetPoints();
65
66	// build the list of dot products
67	float dps[`16`];
68	u8* order = ( u8* )m_order + `16`*iteration;
69	for( int i = `0`; i < count; ++i )
70	{
71	dps[i] = Dot( values[i], axis );
72	order[i] = ( u8 )i;
73	}
74
75	// stable sort using them
76	for( int i = `0`; i < count; ++i )
77	{
78	for( int j = i; j > `0` && dps[j] < dps[j - `1`]; --j )
79	{
80	std::swap( dps[j], dps[j - `1`] );
81	std::swap( order[j], order[j - `1`] );
82	}
83	}
84
85	// check this ordering is unique
86	for( int it = `0`; it < iteration; ++it )
87	{
88	u8 const* prev = ( u8* )m_order + `16`*it;
89	bool same = true;
90	for( int i = `0`; i < count; ++i )
91	{
92	if( order[i] != prev[i] )
93	{
94	same = false;
95	break;
96	}
97	}
98	if( same )
99	return false;
100	}
101
102	// copy the ordering and weight all the points
103	Vec3 const* unweighted = m_colours->GetPoints();
104	float const* weights = m_colours->GetWeights();
105	m_xsum_wsum = VEC4_CONST( `0.0f` );
106	for( int i = `0`; i < count; ++i )
107	{
108	int j = order[i];
109	Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), `1.0f` );
110	Vec4 w( weights[j] );
111	Vec4 x = p *w;
112	m_points_weights[i] = x;
113	m_xsum_wsum += x;
114	}
115	return true;
116	}
117
118	void ClusterFit::Compress3( void* block )
119	{
120	// declare variables
121	int const count = m_colours->GetCount();
122	Vec4 const two = VEC4_CONST( `2.0` );
123	Vec4 const one = VEC4_CONST( `1.0f` );
124	Vec4 const half_half2( `0.5f`, `0.5f`, `0.5f`, `0.25f` );
125	Vec4 const zero = VEC4_CONST( `0.0f` );
126	Vec4 const half = VEC4_CONST( `0.5f` );
127	Vec4 const grid( `31.0f`, `63.0f`, `31.0f`, `0.0f` );
128	Vec4 const gridrcp( `1.0f`/`31.0f`, `1.0f`/`63.0f`, `1.0f`/`31.0f`, `0.0f` );
129
130	// prepare an ordering using the principle axis
131	ConstructOrdering( m_principle, `0` );
132
133	// check all possible clusters and iterate on the total order
134	Vec4 beststart = VEC4_CONST( `0.0f` );
135	Vec4 bestend = VEC4_CONST( `0.0f` );
136	Vec4 besterror = m_besterror;
137	u8 bestindices[`16`];
138	int bestiteration = `0`;
139	int besti = `0`, bestj = `0`;
140
141	// loop over iterations (we avoid the case that all points in first or last cluster)
142	for( int iterationIndex = `0`;; )
143	{
144	// first cluster [0,i) is at the start
145	Vec4 part0 = VEC4_CONST( `0.0f` );
146	for( int i = `0`; i < count; ++i )
147	{
148	// second cluster [i,j) is half along
149	Vec4 part1 = ( i == `0` ) ? m_points_weights[`0`] : VEC4_CONST( `0.0f` );
150	int jmin = ( i == `0` ) ? `1` : i;
151	for( int j = jmin;; )
152	{
153	// last cluster [j,count) is at the end
154	Vec4 part2 = m_xsum_wsum - part1 - part0;
155
156	// compute least squares terms directly
157	Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
158	Vec4 alpha2_sum = alphax_sum.SplatW();
159
160	Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
161	Vec4 beta2_sum = betax_sum.SplatW();
162
163	Vec4 alphabeta_sum = ( part1 *half_half2 ).SplatW();
164
165	// compute the least-squares optimal points
166	Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum *beta2_sum ) );
167	Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum beta2_sum )factor;
168	Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum alpha2_sum )factor;
169
170	// clamp to the grid
171	a = Min( one, Max( zero, a ) );
172	b = Min( one, Max( zero, b ) );
173	a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
174	b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
175
176	// compute the error (we skip the constant xxsum)
177	Vec4 e1 = MultiplyAdd( a a, alpha2_sum, b b *beta2_sum );
178	Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a b alphabeta_sum );
179	Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
180	Vec4 e4 = MultiplyAdd( two, e3, e1 );
181
182	// apply the metric to the error term
183	Vec4 e5 = e4 *m_metric;
184	Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
185
186	// keep the solution if it wins
187	if( CompareAnyLessThan( error, besterror ) )
188	{
189	beststart = a;
190	bestend = b;
191	besti = i;
192	bestj = j;
193	besterror = error;
194	bestiteration = iterationIndex;
195	}
196
197	// advance
198	if( j == count )
199	break;
200	part1 += m_points_weights[j];
201	++j;
202	}
203
204	// advance
205	part0 += m_points_weights[i];
206	}
207
208	// stop if we didn't improve in this iteration
209	if( bestiteration != iterationIndex )
210	break;
211
212	// advance if possible
213	++iterationIndex;
214	if( iterationIndex == m_iterationCount )
215	break;
216
217	// stop if a new iteration is an ordering that has already been tried
218	Vec3 axis = ( bestend - beststart ).GetVec3();
219	if( !ConstructOrdering( axis, iterationIndex ) )
220	break;
221	}
222
223	// save the block if necessary
224	if( CompareAnyLessThan( besterror, m_besterror ) )
225	{
226	// remap the indices
227	u8 const* order = ( u8* )m_order + `16`*bestiteration;
228
229	u8 unordered[`16`];
230	for( int m = `0`; m < besti; ++m )
231	unordered[order[m]] = `0`;
232	for( int m = besti; m < bestj; ++m )
233	unordered[order[m]] = `2`;
234	for( int m = bestj; m < count; ++m )
235	unordered[order[m]] = `1`;
236
237	m_colours->RemapIndices( unordered, bestindices );
238
239	// save the block
240	WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
241
242	// save the error
243	m_besterror = besterror;
244	}
245	}
246
247	void ClusterFit::Compress4( void* block )
248	{
249	// declare variables
250	int const count = m_colours->GetCount();
251	Vec4 const two = VEC4_CONST( `2.0f` );
252	Vec4 const one = VEC4_CONST( `1.0f` );
253	Vec4 const onethird_onethird2( `1.0f`/`3.0f`, `1.0f`/`3.0f`, `1.0f`/`3.0f`, `1.0f`/`9.0f` );
254	Vec4 const twothirds_twothirds2( `2.0f`/`3.0f`, `2.0f`/`3.0f`, `2.0f`/`3.0f`, `4.0f`/`9.0f` );
255	Vec4 const twonineths = VEC4_CONST( `2.0f`/`9.0f` );
256	Vec4 const zero = VEC4_CONST( `0.0f` );
257	Vec4 const half = VEC4_CONST( `0.5f` );
258	Vec4 const grid( `31.0f`, `63.0f`, `31.0f`, `0.0f` );
259	Vec4 const gridrcp( `1.0f`/`31.0f`, `1.0f`/`63.0f`, `1.0f`/`31.0f`, `0.0f` );
260
261	// prepare an ordering using the principle axis
262	ConstructOrdering( m_principle, `0` );
263
264	// check all possible clusters and iterate on the total order
265	Vec4 beststart = VEC4_CONST( `0.0f` );
266	Vec4 bestend = VEC4_CONST( `0.0f` );
267	Vec4 besterror = m_besterror;
268	u8 bestindices[`16`];
269	int bestiteration = `0`;
270	int besti = `0`, bestj = `0`, bestk = `0`;
271
272	// loop over iterations (we avoid the case that all points in first or last cluster)
273	for( int iterationIndex = `0`;; )
274	{
275	// first cluster [0,i) is at the start
276	Vec4 part0 = VEC4_CONST( `0.0f` );
277	for( int i = `0`; i < count; ++i )
278	{
279	// second cluster [i,j) is one third along
280	Vec4 part1 = VEC4_CONST( `0.0f` );
281	for( int j = i;; )
282	{
283	// third cluster [j,k) is two thirds along
284	Vec4 part2 = ( j == `0` ) ? m_points_weights[`0`] : VEC4_CONST( `0.0f` );
285	int kmin = ( j == `0` ) ? `1` : j;
286	for( int k = kmin;; )
287	{
288	// last cluster [k,count) is at the end
289	Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
290
291	// compute least squares terms directly
292	Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
293	Vec4 const alpha2_sum = alphax_sum.SplatW();
294
295	Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
296	Vec4 const beta2_sum = betax_sum.SplatW();
297
298	Vec4 const alphabeta_sum = twonineths *( part1 + part2 ).SplatW();
299
300	// compute the least-squares optimal points
301	Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum *beta2_sum ) );
302	Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum beta2_sum )factor;
303	Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum alpha2_sum )factor;
304
305	// clamp to the grid
306	a = Min( one, Max( zero, a ) );
307	b = Min( one, Max( zero, b ) );
308	a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
309	b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
310
311	// compute the error (we skip the constant xxsum)
312	Vec4 e1 = MultiplyAdd( a a, alpha2_sum, b b *beta2_sum );
313	Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a b alphabeta_sum );
314	Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
315	Vec4 e4 = MultiplyAdd( two, e3, e1 );
316
317	// apply the metric to the error term
318	Vec4 e5 = e4 *m_metric;
319	Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
320
321	// keep the solution if it wins
322	if( CompareAnyLessThan( error, besterror ) )
323	{
324	beststart = a;
325	bestend = b;
326	besterror = error;
327	besti = i;
328	bestj = j;
329	bestk = k;
330	bestiteration = iterationIndex;
331	}
332
333	// advance
334	if( k == count )
335	break;
336	part2 += m_points_weights[k];
337	++k;
338	}
339
340	// advance
341	if( j == count )
342	break;
343	part1 += m_points_weights[j];
344	++j;
345	}
346
347	// advance
348	part0 += m_points_weights[i];
349	}
350
351	// stop if we didn't improve in this iteration
352	if( bestiteration != iterationIndex )
353	break;
354
355	// advance if possible
356	++iterationIndex;
357	if( iterationIndex == m_iterationCount )
358	break;
359
360	// stop if a new iteration is an ordering that has already been tried
361	Vec3 axis = ( bestend - beststart ).GetVec3();
362	if( !ConstructOrdering( axis, iterationIndex ) )
363	break;
364	}
365
366	// save the block if necessary
367	if( CompareAnyLessThan( besterror, m_besterror ) )
368	{
369	// remap the indices
370	u8 const* order = ( u8* )m_order + `16`*bestiteration;
371
372	u8 unordered[`16`];
373	for( int m = `0`; m < besti; ++m )
374	unordered[order[m]] = `0`;
375	for( int m = besti; m < bestj; ++m )
376	unordered[order[m]] = `2`;
377	for( int m = bestj; m < bestk; ++m )
378	unordered[order[m]] = `3`;
379	for( int m = bestk; m < count; ++m )
380	unordered[order[m]] = `1`;
381
382	m_colours->RemapIndices( unordered, bestindices );
383
384	// save the block
385	WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
386
387	// save the error
388	m_besterror = besterror;
389	}
390	}
391
392	} // namespace squish
393

Browse the source code of Godot/thirdparty/squish/clusterfit.cpp