1/* -----------------------------------------------------------------------------
2
3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4 Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice shall be included
15 in all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25 -------------------------------------------------------------------------- */
26
27#include "clusterfit.h"
28#include "colourset.h"
29#include "colourblock.h"
30#include <cfloat>
31
32namespace squish {
33
34ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
35 : ColourFit( colours, flags )
36{
37 // set the iteration count
38 m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
39
40 // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
41 if( metric )
42 m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
43 else
44 m_metric = VEC4_CONST( 1.0f );
45
46 // initialise the best error
47 m_besterror = VEC4_CONST( FLT_MAX );
48
49 // cache some values
50 int const count = m_colours->GetCount();
51 Vec3 const* values = m_colours->GetPoints();
52
53 // get the covariance matrix
54 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
55
56 // compute the principle component
57 m_principle = ComputePrincipleComponent( covariance );
58}
59
60bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
61{
62 // cache some values
63 int const count = m_colours->GetCount();
64 Vec3 const* values = m_colours->GetPoints();
65
66 // build the list of dot products
67 float dps[16];
68 u8* order = ( u8* )m_order + 16*iteration;
69 for( int i = 0; i < count; ++i )
70 {
71 dps[i] = Dot( values[i], axis );
72 order[i] = ( u8 )i;
73 }
74
75 // stable sort using them
76 for( int i = 0; i < count; ++i )
77 {
78 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
79 {
80 std::swap( dps[j], dps[j - 1] );
81 std::swap( order[j], order[j - 1] );
82 }
83 }
84
85 // check this ordering is unique
86 for( int it = 0; it < iteration; ++it )
87 {
88 u8 const* prev = ( u8* )m_order + 16*it;
89 bool same = true;
90 for( int i = 0; i < count; ++i )
91 {
92 if( order[i] != prev[i] )
93 {
94 same = false;
95 break;
96 }
97 }
98 if( same )
99 return false;
100 }
101
102 // copy the ordering and weight all the points
103 Vec3 const* unweighted = m_colours->GetPoints();
104 float const* weights = m_colours->GetWeights();
105 m_xsum_wsum = VEC4_CONST( 0.0f );
106 for( int i = 0; i < count; ++i )
107 {
108 int j = order[i];
109 Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
110 Vec4 w( weights[j] );
111 Vec4 x = p*w;
112 m_points_weights[i] = x;
113 m_xsum_wsum += x;
114 }
115 return true;
116}
117
118void ClusterFit::Compress3( void* block )
119{
120 // declare variables
121 int const count = m_colours->GetCount();
122 Vec4 const two = VEC4_CONST( 2.0 );
123 Vec4 const one = VEC4_CONST( 1.0f );
124 Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
125 Vec4 const zero = VEC4_CONST( 0.0f );
126 Vec4 const half = VEC4_CONST( 0.5f );
127 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
128 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
129
130 // prepare an ordering using the principle axis
131 ConstructOrdering( m_principle, 0 );
132
133 // check all possible clusters and iterate on the total order
134 Vec4 beststart = VEC4_CONST( 0.0f );
135 Vec4 bestend = VEC4_CONST( 0.0f );
136 Vec4 besterror = m_besterror;
137 u8 bestindices[16];
138 int bestiteration = 0;
139 int besti = 0, bestj = 0;
140
141 // loop over iterations (we avoid the case that all points in first or last cluster)
142 for( int iterationIndex = 0;; )
143 {
144 // first cluster [0,i) is at the start
145 Vec4 part0 = VEC4_CONST( 0.0f );
146 for( int i = 0; i < count; ++i )
147 {
148 // second cluster [i,j) is half along
149 Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
150 int jmin = ( i == 0 ) ? 1 : i;
151 for( int j = jmin;; )
152 {
153 // last cluster [j,count) is at the end
154 Vec4 part2 = m_xsum_wsum - part1 - part0;
155
156 // compute least squares terms directly
157 Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
158 Vec4 alpha2_sum = alphax_sum.SplatW();
159
160 Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
161 Vec4 beta2_sum = betax_sum.SplatW();
162
163 Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
164
165 // compute the least-squares optimal points
166 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
167 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
168 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
169
170 // clamp to the grid
171 a = Min( one, Max( zero, a ) );
172 b = Min( one, Max( zero, b ) );
173 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
174 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
175
176 // compute the error (we skip the constant xxsum)
177 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
178 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
179 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
180 Vec4 e4 = MultiplyAdd( two, e3, e1 );
181
182 // apply the metric to the error term
183 Vec4 e5 = e4*m_metric;
184 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
185
186 // keep the solution if it wins
187 if( CompareAnyLessThan( error, besterror ) )
188 {
189 beststart = a;
190 bestend = b;
191 besti = i;
192 bestj = j;
193 besterror = error;
194 bestiteration = iterationIndex;
195 }
196
197 // advance
198 if( j == count )
199 break;
200 part1 += m_points_weights[j];
201 ++j;
202 }
203
204 // advance
205 part0 += m_points_weights[i];
206 }
207
208 // stop if we didn't improve in this iteration
209 if( bestiteration != iterationIndex )
210 break;
211
212 // advance if possible
213 ++iterationIndex;
214 if( iterationIndex == m_iterationCount )
215 break;
216
217 // stop if a new iteration is an ordering that has already been tried
218 Vec3 axis = ( bestend - beststart ).GetVec3();
219 if( !ConstructOrdering( axis, iterationIndex ) )
220 break;
221 }
222
223 // save the block if necessary
224 if( CompareAnyLessThan( besterror, m_besterror ) )
225 {
226 // remap the indices
227 u8 const* order = ( u8* )m_order + 16*bestiteration;
228
229 u8 unordered[16];
230 for( int m = 0; m < besti; ++m )
231 unordered[order[m]] = 0;
232 for( int m = besti; m < bestj; ++m )
233 unordered[order[m]] = 2;
234 for( int m = bestj; m < count; ++m )
235 unordered[order[m]] = 1;
236
237 m_colours->RemapIndices( unordered, bestindices );
238
239 // save the block
240 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
241
242 // save the error
243 m_besterror = besterror;
244 }
245}
246
247void ClusterFit::Compress4( void* block )
248{
249 // declare variables
250 int const count = m_colours->GetCount();
251 Vec4 const two = VEC4_CONST( 2.0f );
252 Vec4 const one = VEC4_CONST( 1.0f );
253 Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
254 Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
255 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
256 Vec4 const zero = VEC4_CONST( 0.0f );
257 Vec4 const half = VEC4_CONST( 0.5f );
258 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
259 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
260
261 // prepare an ordering using the principle axis
262 ConstructOrdering( m_principle, 0 );
263
264 // check all possible clusters and iterate on the total order
265 Vec4 beststart = VEC4_CONST( 0.0f );
266 Vec4 bestend = VEC4_CONST( 0.0f );
267 Vec4 besterror = m_besterror;
268 u8 bestindices[16];
269 int bestiteration = 0;
270 int besti = 0, bestj = 0, bestk = 0;
271
272 // loop over iterations (we avoid the case that all points in first or last cluster)
273 for( int iterationIndex = 0;; )
274 {
275 // first cluster [0,i) is at the start
276 Vec4 part0 = VEC4_CONST( 0.0f );
277 for( int i = 0; i < count; ++i )
278 {
279 // second cluster [i,j) is one third along
280 Vec4 part1 = VEC4_CONST( 0.0f );
281 for( int j = i;; )
282 {
283 // third cluster [j,k) is two thirds along
284 Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
285 int kmin = ( j == 0 ) ? 1 : j;
286 for( int k = kmin;; )
287 {
288 // last cluster [k,count) is at the end
289 Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
290
291 // compute least squares terms directly
292 Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
293 Vec4 const alpha2_sum = alphax_sum.SplatW();
294
295 Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
296 Vec4 const beta2_sum = betax_sum.SplatW();
297
298 Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
299
300 // compute the least-squares optimal points
301 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
302 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
303 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
304
305 // clamp to the grid
306 a = Min( one, Max( zero, a ) );
307 b = Min( one, Max( zero, b ) );
308 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
309 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
310
311 // compute the error (we skip the constant xxsum)
312 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
313 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
314 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
315 Vec4 e4 = MultiplyAdd( two, e3, e1 );
316
317 // apply the metric to the error term
318 Vec4 e5 = e4*m_metric;
319 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
320
321 // keep the solution if it wins
322 if( CompareAnyLessThan( error, besterror ) )
323 {
324 beststart = a;
325 bestend = b;
326 besterror = error;
327 besti = i;
328 bestj = j;
329 bestk = k;
330 bestiteration = iterationIndex;
331 }
332
333 // advance
334 if( k == count )
335 break;
336 part2 += m_points_weights[k];
337 ++k;
338 }
339
340 // advance
341 if( j == count )
342 break;
343 part1 += m_points_weights[j];
344 ++j;
345 }
346
347 // advance
348 part0 += m_points_weights[i];
349 }
350
351 // stop if we didn't improve in this iteration
352 if( bestiteration != iterationIndex )
353 break;
354
355 // advance if possible
356 ++iterationIndex;
357 if( iterationIndex == m_iterationCount )
358 break;
359
360 // stop if a new iteration is an ordering that has already been tried
361 Vec3 axis = ( bestend - beststart ).GetVec3();
362 if( !ConstructOrdering( axis, iterationIndex ) )
363 break;
364 }
365
366 // save the block if necessary
367 if( CompareAnyLessThan( besterror, m_besterror ) )
368 {
369 // remap the indices
370 u8 const* order = ( u8* )m_order + 16*bestiteration;
371
372 u8 unordered[16];
373 for( int m = 0; m < besti; ++m )
374 unordered[order[m]] = 0;
375 for( int m = besti; m < bestj; ++m )
376 unordered[order[m]] = 2;
377 for( int m = bestj; m < bestk; ++m )
378 unordered[order[m]] = 3;
379 for( int m = bestk; m < count; ++m )
380 unordered[order[m]] = 1;
381
382 m_colours->RemapIndices( unordered, bestindices );
383
384 // save the block
385 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
386
387 // save the error
388 m_besterror = besterror;
389 }
390}
391
392} // namespace squish
393