1/*
2 Jonathan Dummer
3 2007-07-31-10.32
4
5 simple DXT compression / decompression code
6
7 public domain
8*/
9
10#include "image_DXT.h"
11#include <math.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdio.h>
15
16/* set this =1 if you want to use the covarince matrix method...
17 which is better than my method of using standard deviations
18 overall, except on the infintesimal chance that the power
19 method fails for finding the largest eigenvector */
20#define USE_COV_MAT 1
21
22/********* Function Prototypes *********/
23/*
24 Takes a 4x4 block of pixels and compresses it into 8 bytes
25 in DXT1 format (color only, no alpha). Speed is valued
26 over prettyness, at least for now.
27*/
28void compress_DDS_color_block(
29 int channels,
30 const unsigned char *const uncompressed,
31 unsigned char compressed[8] );
32/*
33 Takes a 4x4 block of pixels and compresses the alpha
34 component it into 8 bytes for use in DXT5 DDS files.
35 Speed is valued over prettyness, at least for now.
36*/
37void compress_DDS_alpha_block(
38 const unsigned char *const uncompressed,
39 unsigned char compressed[8] );
40
41/********* Actual Exposed Functions *********/
42int
43 save_image_as_DDS
44 (
45 const char *filename,
46 int width, int height, int channels,
47 const unsigned char *const data
48 )
49{
50 /* variables */
51 FILE *fout;
52 unsigned char *DDS_data;
53 DDS_header header;
54 int DDS_size;
55 /* error check */
56 if( (NULL == filename) ||
57 (width < 1) || (height < 1) ||
58 (channels < 1) || (channels > 4) ||
59 (data == NULL ) )
60 {
61 return 0;
62 }
63 /* Convert the image */
64 if( (channels & 1) == 1 )
65 {
66 /* no alpha, just use DXT1 */
67 DDS_data = convert_image_to_DXT1( data, width, height, channels, &DDS_size );
68 } else
69 {
70 /* has alpha, so use DXT5 */
71 DDS_data = convert_image_to_DXT5( data, width, height, channels, &DDS_size );
72 }
73 /* save it */
74 memset( &header, 0, sizeof( DDS_header ) );
75 header.dwMagic = ('D' << 0) | ('D' << 8) | ('S' << 16) | (' ' << 24);
76 header.dwSize = 124;
77 header.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | DDSD_LINEARSIZE;
78 header.dwWidth = width;
79 header.dwHeight = height;
80 header.dwPitchOrLinearSize = DDS_size;
81 header.sPixelFormat.dwSize = 32;
82 header.sPixelFormat.dwFlags = DDPF_FOURCC;
83 if( (channels & 1) == 1 )
84 {
85 header.sPixelFormat.dwFourCC = ('D' << 0) | ('X' << 8) | ('T' << 16) | ('1' << 24);
86 } else
87 {
88 header.sPixelFormat.dwFourCC = ('D' << 0) | ('X' << 8) | ('T' << 16) | ('5' << 24);
89 }
90 header.sCaps.dwCaps1 = DDSCAPS_TEXTURE;
91 /* write it out */
92 fout = fopen( filename, "wb");
93 fwrite( &header, sizeof( DDS_header ), 1, fout );
94 fwrite( DDS_data, 1, DDS_size, fout );
95 fclose( fout );
96 /* done */
97 free( DDS_data );
98 return 1;
99}
100
101unsigned char* convert_image_to_DXT1(
102 const unsigned char *const uncompressed,
103 int width, int height, int channels,
104 int *out_size )
105{
106 unsigned char *compressed;
107 int i, j, x, y;
108 unsigned char ublock[16*3];
109 unsigned char cblock[8];
110 int index = 0, chan_step = 1;
111 int block_count = 0;
112 /* error check */
113 *out_size = 0;
114 if( (width < 1) || (height < 1) ||
115 (NULL == uncompressed) ||
116 (channels < 1) || (channels > 4) )
117 {
118 return NULL;
119 }
120 /* for channels == 1 or 2, I do not step forward for R,G,B values */
121 if( channels < 3 )
122 {
123 chan_step = 0;
124 }
125 /* get the RAM for the compressed image
126 (8 bytes per 4x4 pixel block) */
127 *out_size = ((width+3) >> 2) * ((height+3) >> 2) * 8;
128 compressed = (unsigned char*)malloc( *out_size );
129 /* go through each block */
130 for( j = 0; j < height; j += 4 )
131 {
132 for( i = 0; i < width; i += 4 )
133 {
134 /* copy this block into a new one */
135 int idx = 0;
136 int mx = 4, my = 4;
137 if( j+4 >= height )
138 {
139 my = height - j;
140 }
141 if( i+4 >= width )
142 {
143 mx = width - i;
144 }
145 for( y = 0; y < my; ++y )
146 {
147 for( x = 0; x < mx; ++x )
148 {
149 ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels];
150 ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step];
151 ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step+chan_step];
152 }
153 for( x = mx; x < 4; ++x )
154 {
155 ublock[idx++] = ublock[0];
156 ublock[idx++] = ublock[1];
157 ublock[idx++] = ublock[2];
158 }
159 }
160 for( y = my; y < 4; ++y )
161 {
162 for( x = 0; x < 4; ++x )
163 {
164 ublock[idx++] = ublock[0];
165 ublock[idx++] = ublock[1];
166 ublock[idx++] = ublock[2];
167 }
168 }
169 /* compress the block */
170 ++block_count;
171 compress_DDS_color_block( 3, ublock, cblock );
172 /* copy the data from the block into the main block */
173 for( x = 0; x < 8; ++x )
174 {
175 compressed[index++] = cblock[x];
176 }
177 }
178 }
179 return compressed;
180}
181
182unsigned char* convert_image_to_DXT5(
183 const unsigned char *const uncompressed,
184 int width, int height, int channels,
185 int *out_size )
186{
187 unsigned char *compressed;
188 int i, j, x, y;
189 unsigned char ublock[16*4];
190 unsigned char cblock[8];
191 int index = 0, chan_step = 1;
192 int block_count = 0, has_alpha;
193 /* error check */
194 *out_size = 0;
195 if( (width < 1) || (height < 1) ||
196 (NULL == uncompressed) ||
197 (channels < 1) || ( channels > 4) )
198 {
199 return NULL;
200 }
201 /* for channels == 1 or 2, I do not step forward for R,G,B vales */
202 if( channels < 3 )
203 {
204 chan_step = 0;
205 }
206 /* # channels = 1 or 3 have no alpha, 2 & 4 do have alpha */
207 has_alpha = 1 - (channels & 1);
208 /* get the RAM for the compressed image
209 (16 bytes per 4x4 pixel block) */
210 *out_size = ((width+3) >> 2) * ((height+3) >> 2) * 16;
211 compressed = (unsigned char*)malloc( *out_size );
212 /* go through each block */
213 for( j = 0; j < height; j += 4 )
214 {
215 for( i = 0; i < width; i += 4 )
216 {
217 /* local variables, and my block counter */
218 int idx = 0;
219 int mx = 4, my = 4;
220 if( j+4 >= height )
221 {
222 my = height - j;
223 }
224 if( i+4 >= width )
225 {
226 mx = width - i;
227 }
228 for( y = 0; y < my; ++y )
229 {
230 for( x = 0; x < mx; ++x )
231 {
232 ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels];
233 ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step];
234 ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step+chan_step];
235 ublock[idx++] =
236 has_alpha * uncompressed[(j+y)*width*channels+(i+x)*channels+channels-1]
237 + (1-has_alpha)*255;
238 }
239 for( x = mx; x < 4; ++x )
240 {
241 ublock[idx++] = ublock[0];
242 ublock[idx++] = ublock[1];
243 ublock[idx++] = ublock[2];
244 ublock[idx++] = ublock[3];
245 }
246 }
247 for( y = my; y < 4; ++y )
248 {
249 for( x = 0; x < 4; ++x )
250 {
251 ublock[idx++] = ublock[0];
252 ublock[idx++] = ublock[1];
253 ublock[idx++] = ublock[2];
254 ublock[idx++] = ublock[3];
255 }
256 }
257 /* now compress the alpha block */
258 compress_DDS_alpha_block( ublock, cblock );
259 /* copy the data from the compressed alpha block into the main buffer */
260 for( x = 0; x < 8; ++x )
261 {
262 compressed[index++] = cblock[x];
263 }
264 /* then compress the color block */
265 ++block_count;
266 compress_DDS_color_block( 4, ublock, cblock );
267 /* copy the data from the compressed color block into the main buffer */
268 for( x = 0; x < 8; ++x )
269 {
270 compressed[index++] = cblock[x];
271 }
272 }
273 }
274 return compressed;
275}
276
277/********* Helper Functions *********/
278int convert_bit_range( int c, int from_bits, int to_bits )
279{
280 int b = (1 << (from_bits - 1)) + c * ((1 << to_bits) - 1);
281 return (b + (b >> from_bits)) >> from_bits;
282}
283
284int rgb_to_565( int r, int g, int b )
285{
286 return
287 (convert_bit_range( r, 8, 5 ) << 11) |
288 (convert_bit_range( g, 8, 6 ) << 05) |
289 (convert_bit_range( b, 8, 5 ) << 00);
290}
291
292void rgb_888_from_565( unsigned int c, int *r, int *g, int *b )
293{
294 *r = convert_bit_range( (c >> 11) & 31, 5, 8 );
295 *g = convert_bit_range( (c >> 05) & 63, 6, 8 );
296 *b = convert_bit_range( (c >> 00) & 31, 5, 8 );
297}
298
299void compute_color_line_STDEV(
300 const unsigned char *const uncompressed,
301 int channels,
302 float point[3], float direction[3] )
303{
304 const float inv_16 = 1.0f / 16.0f;
305 int i;
306 float sum_r = 0.0f, sum_g = 0.0f, sum_b = 0.0f;
307 float sum_rr = 0.0f, sum_gg = 0.0f, sum_bb = 0.0f;
308 float sum_rg = 0.0f, sum_rb = 0.0f, sum_gb = 0.0f;
309 /* calculate all data needed for the covariance matrix
310 ( to compare with _rygdxt code) */
311 for( i = 0; i < 16*channels; i += channels )
312 {
313 sum_r += uncompressed[i+0];
314 sum_rr += uncompressed[i+0] * uncompressed[i+0];
315 sum_g += uncompressed[i+1];
316 sum_gg += uncompressed[i+1] * uncompressed[i+1];
317 sum_b += uncompressed[i+2];
318 sum_bb += uncompressed[i+2] * uncompressed[i+2];
319 sum_rg += uncompressed[i+0] * uncompressed[i+1];
320 sum_rb += uncompressed[i+0] * uncompressed[i+2];
321 sum_gb += uncompressed[i+1] * uncompressed[i+2];
322 }
323 /* convert the sums to averages */
324 sum_r *= inv_16;
325 sum_g *= inv_16;
326 sum_b *= inv_16;
327 /* and convert the squares to the squares of the value - avg_value */
328 sum_rr -= 16.0f * sum_r * sum_r;
329 sum_gg -= 16.0f * sum_g * sum_g;
330 sum_bb -= 16.0f * sum_b * sum_b;
331 sum_rg -= 16.0f * sum_r * sum_g;
332 sum_rb -= 16.0f * sum_r * sum_b;
333 sum_gb -= 16.0f * sum_g * sum_b;
334 /* the point on the color line is the average */
335 point[0] = sum_r;
336 point[1] = sum_g;
337 point[2] = sum_b;
338 #if USE_COV_MAT
339 /*
340 The following idea was from ryg.
341 (https://mollyrocket.com/forums/viewtopic.php?t=392)
342 The method worked great (less RMSE than mine) most of
343 the time, but had some issues handling some simple
344 boundary cases, like full green next to full red,
345 which would generate a covariance matrix like this:
346
347 | 1 -1 0 |
348 | -1 1 0 |
349 | 0 0 0 |
350
351 For a given starting vector, the power method can
352 generate all zeros! So no starting with {1,1,1}
353 as I was doing! This kind of error is still a
354 slight posibillity, but will be very rare.
355 */
356 /* use the covariance matrix directly
357 (1st iteration, don't use all 1.0 values!) */
358 sum_r = 1.0f;
359 sum_g = 2.718281828f;
360 sum_b = 3.141592654f;
361 direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
362 direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
363 direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
364 /* 2nd iteration, use results from the 1st guy */
365 sum_r = direction[0];
366 sum_g = direction[1];
367 sum_b = direction[2];
368 direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
369 direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
370 direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
371 /* 3rd iteration, use results from the 2nd guy */
372 sum_r = direction[0];
373 sum_g = direction[1];
374 sum_b = direction[2];
375 direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
376 direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
377 direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
378 #else
379 /* use my standard deviation method
380 (very robust, a tiny bit slower and less accurate) */
381 direction[0] = sqrt( sum_rr );
382 direction[1] = sqrt( sum_gg );
383 direction[2] = sqrt( sum_bb );
384 /* which has a greater component */
385 if( sum_gg > sum_rr )
386 {
387 /* green has greater component, so base the other signs off of green */
388 if( sum_rg < 0.0f )
389 {
390 direction[0] = -direction[0];
391 }
392 if( sum_gb < 0.0f )
393 {
394 direction[2] = -direction[2];
395 }
396 } else
397 {
398 /* red has a greater component */
399 if( sum_rg < 0.0f )
400 {
401 direction[1] = -direction[1];
402 }
403 if( sum_rb < 0.0f )
404 {
405 direction[2] = -direction[2];
406 }
407 }
408 #endif
409}
410
411void LSE_master_colors_max_min(
412 int *cmax, int *cmin,
413 int channels,
414 const unsigned char *const uncompressed )
415{
416 int i, j;
417 /* the master colors */
418 int c0[3], c1[3];
419 /* used for fitting the line */
420 float sum_x[] = { 0.0f, 0.0f, 0.0f };
421 float sum_x2[] = { 0.0f, 0.0f, 0.0f };
422 float dot_max = 1.0f, dot_min = -1.0f;
423 float vec_len2 = 0.0f;
424 float dot;
425 /* error check */
426 if( (channels < 3) || (channels > 4) )
427 {
428 return;
429 }
430 compute_color_line_STDEV( uncompressed, channels, sum_x, sum_x2 );
431 vec_len2 = 1.0f / ( 0.00001f +
432 sum_x2[0]*sum_x2[0] + sum_x2[1]*sum_x2[1] + sum_x2[2]*sum_x2[2] );
433 /* finding the max and min vector values */
434 dot_max =
435 (
436 sum_x2[0] * uncompressed[0] +
437 sum_x2[1] * uncompressed[1] +
438 sum_x2[2] * uncompressed[2]
439 );
440 dot_min = dot_max;
441 for( i = 1; i < 16; ++i )
442 {
443 dot =
444 (
445 sum_x2[0] * uncompressed[i*channels+0] +
446 sum_x2[1] * uncompressed[i*channels+1] +
447 sum_x2[2] * uncompressed[i*channels+2]
448 );
449 if( dot < dot_min )
450 {
451 dot_min = dot;
452 } else if( dot > dot_max )
453 {
454 dot_max = dot;
455 }
456 }
457 /* and the offset (from the average location) */
458 dot = sum_x2[0]*sum_x[0] + sum_x2[1]*sum_x[1] + sum_x2[2]*sum_x[2];
459 dot_min -= dot;
460 dot_max -= dot;
461 /* post multiply by the scaling factor */
462 dot_min *= vec_len2;
463 dot_max *= vec_len2;
464 /* OK, build the master colors */
465 for( i = 0; i < 3; ++i )
466 {
467 /* color 0 */
468 c0[i] = (int)(0.5f + sum_x[i] + dot_max * sum_x2[i]);
469 if( c0[i] < 0 )
470 {
471 c0[i] = 0;
472 } else if( c0[i] > 255 )
473 {
474 c0[i] = 255;
475 }
476 /* color 1 */
477 c1[i] = (int)(0.5f + sum_x[i] + dot_min * sum_x2[i]);
478 if( c1[i] < 0 )
479 {
480 c1[i] = 0;
481 } else if( c1[i] > 255 )
482 {
483 c1[i] = 255;
484 }
485 }
486 /* down_sample (with rounding?) */
487 i = rgb_to_565( c0[0], c0[1], c0[2] );
488 j = rgb_to_565( c1[0], c1[1], c1[2] );
489 if( i > j )
490 {
491 *cmax = i;
492 *cmin = j;
493 } else
494 {
495 *cmax = j;
496 *cmin = i;
497 }
498}
499
500void
501 compress_DDS_color_block
502 (
503 int channels,
504 const unsigned char *const uncompressed,
505 unsigned char compressed[8]
506 )
507{
508 /* variables */
509 int i;
510 int next_bit;
511 int enc_c0, enc_c1;
512 int c0[4], c1[4];
513 float color_line[] = { 0.0f, 0.0f, 0.0f, 0.0f };
514 float vec_len2 = 0.0f, dot_offset = 0.0f;
515 /* stupid order */
516 int swizzle4[] = { 0, 2, 3, 1 };
517 /* get the master colors */
518 LSE_master_colors_max_min( &enc_c0, &enc_c1, channels, uncompressed );
519 /* store the 565 color 0 and color 1 */
520 compressed[0] = (enc_c0 >> 0) & 255;
521 compressed[1] = (enc_c0 >> 8) & 255;
522 compressed[2] = (enc_c1 >> 0) & 255;
523 compressed[3] = (enc_c1 >> 8) & 255;
524 /* zero out the compressed data */
525 compressed[4] = 0;
526 compressed[5] = 0;
527 compressed[6] = 0;
528 compressed[7] = 0;
529 /* reconstitute the master color vectors */
530 rgb_888_from_565( enc_c0, &c0[0], &c0[1], &c0[2] );
531 rgb_888_from_565( enc_c1, &c1[0], &c1[1], &c1[2] );
532 /* the new vector */
533 vec_len2 = 0.0f;
534 for( i = 0; i < 3; ++i )
535 {
536 color_line[i] = (float)(c1[i] - c0[i]);
537 vec_len2 += color_line[i] * color_line[i];
538 }
539 if( vec_len2 > 0.0f )
540 {
541 vec_len2 = 1.0f / vec_len2;
542 }
543 /* pre-proform the scaling */
544 color_line[0] *= vec_len2;
545 color_line[1] *= vec_len2;
546 color_line[2] *= vec_len2;
547 /* compute the offset (constant) portion of the dot product */
548 dot_offset = color_line[0]*c0[0] + color_line[1]*c0[1] + color_line[2]*c0[2];
549 /* store the rest of the bits */
550 next_bit = 8*4;
551 for( i = 0; i < 16; ++i )
552 {
553 /* find the dot product of this color, to place it on the line
554 (should be [-1,1]) */
555 int next_value = 0;
556 float dot_product =
557 color_line[0] * uncompressed[i*channels+0] +
558 color_line[1] * uncompressed[i*channels+1] +
559 color_line[2] * uncompressed[i*channels+2] -
560 dot_offset;
561 /* map to [0,3] */
562 next_value = (int)( dot_product * 3.0f + 0.5f );
563 if( next_value > 3 )
564 {
565 next_value = 3;
566 } else if( next_value < 0 )
567 {
568 next_value = 0;
569 }
570 /* OK, store this value */
571 compressed[next_bit >> 3] |= swizzle4[ next_value ] << (next_bit & 7);
572 next_bit += 2;
573 }
574 /* done compressing to DXT1 */
575}
576
577void
578 compress_DDS_alpha_block
579 (
580 const unsigned char *const uncompressed,
581 unsigned char compressed[8]
582 )
583{
584 /* variables */
585 int i;
586 int next_bit;
587 int a0, a1;
588 float scale_me;
589 /* stupid order */
590 int swizzle8[] = { 1, 7, 6, 5, 4, 3, 2, 0 };
591 /* get the alpha limits (a0 > a1) */
592 a0 = a1 = uncompressed[3];
593 for( i = 4+3; i < 16*4; i += 4 )
594 {
595 if( uncompressed[i] > a0 )
596 {
597 a0 = uncompressed[i];
598 } else if( uncompressed[i] < a1 )
599 {
600 a1 = uncompressed[i];
601 }
602 }
603 /* store those limits, and zero the rest of the compressed dataset */
604 compressed[0] = a0;
605 compressed[1] = a1;
606 /* zero out the compressed data */
607 compressed[2] = 0;
608 compressed[3] = 0;
609 compressed[4] = 0;
610 compressed[5] = 0;
611 compressed[6] = 0;
612 compressed[7] = 0;
613 /* store the all of the alpha values */
614 next_bit = 8*2;
615 scale_me = 7.9999f / (a0 - a1);
616 for( i = 3; i < 16*4; i += 4 )
617 {
618 /* convert this alpha value to a 3 bit number */
619 int svalue;
620 int value = (int)((uncompressed[i] - a1) * scale_me);
621 svalue = swizzle8[ value&7 ];
622 /* OK, store this value, start with the 1st byte */
623 compressed[next_bit >> 3] |= svalue << (next_bit & 7);
624 if( (next_bit & 7) > 5 )
625 {
626 /* spans 2 bytes, fill in the start of the 2nd byte */
627 compressed[1 + (next_bit >> 3)] |= svalue >> (8 - (next_bit & 7) );
628 }
629 next_bit += 3;
630 }
631 /* done compressing to DXT1 */
632}
633