1/* stbi-1.16 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
2 when you control the images you're loading
3
4 QUICK NOTES:
5 Primarily of interest to game developers and other people who can
6 avoid problematic images and only need the trivial interface
7
8 JPEG baseline (no JPEG progressive, no oddball channel decimations)
9 PNG non-interlaced
10 BMP non-1bpp, non-RLE
11 TGA (not sure what subset, if a subset)
12 PSD (composited view only, no extra channels)
13 HDR (radiance rgbE format)
14 writes BMP,TGA (define STBI_NO_WRITE to remove code)
15 decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
16 supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
17
18 TODO:
19 stbi_info_*
20
21 history:
22 1.16 major bugfix - convert_format converted one too many pixels
23 1.15 initialize some fields for thread safety
24 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
25 1.13 threadsafe
26 1.12 const qualifiers in the API
27 1.11 Support installable IDCT, colorspace conversion routines
28 1.10 Fixes for 64-bit (don't use "unsigned long")
29 optimized upsampling by Fabian "ryg" Giesen
30 1.09 Fix format-conversion for PSD code (bad global variables!)
31 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
32 1.07 attempt to fix C++ warning/errors again
33 1.06 attempt to fix C++ warning/errors again
34 1.05 fix TGA loading to return correct *comp and use good luminance calc
35 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
36 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
37 1.02 support for (subset of) HDR files, float interface for preferred access to them
38 1.01 fix bug: possible bug in handling right-side up bmps... not sure
39 fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
40 1.00 interface to zlib that skips zlib header
41 0.99 correct handling of alpha in palette
42 0.98 TGA loader by lonesock; dynamically add loaders (untested)
43 0.97 jpeg errors on too large a file; also catch another malloc failure
44 0.96 fix detection of invalid v value - particleman@mollyrocket forum
45 0.95 during header scan, seek to markers in case of padding
46 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
47 0.93 handle jpegtran output; verbose errors
48 0.92 read 4,8,16,24,32-bit BMP files of several formats
49 0.91 output 24-bit Windows 3.0 BMP files
50 0.90 fix a few more warnings; bump version number to approach 1.0
51 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
52 0.60 fix compiling as c++
53 0.59 fix warnings: merge Dave Moore's -Wall fixes
54 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
55 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less
56 than 16 available
57 0.56 fix bug: zlib uncompressed mode len vs. nlen
58 0.55 fix bug: restart_interval not initialized to 0
59 0.54 allow NULL for 'int *comp'
60 0.53 fix bug in png 3->4; speedup png decoding
61 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
62 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
63 on 'test' only check type, not whether we support this variant
64*/
65
66#include "stb_image_aug.h"
67
68#ifndef STBI_NO_HDR
69#include <math.h> // ldexp
70#include <string.h> // strcmp
71#endif
72
73#ifndef STBI_NO_STDIO
74#include <stdio.h>
75#endif
76#include <stdlib.h>
77#include <memory.h>
78#include <assert.h>
79#include <stdarg.h>
80
81#ifndef _MSC_VER
82 #ifdef __cplusplus
83 #define __forceinline inline
84 #else
85 #define __forceinline
86 #endif
87#endif
88
89
90// implementation:
91typedef unsigned char uint8;
92typedef unsigned short uint16;
93typedef signed short int16;
94typedef unsigned int uint32;
95typedef signed int int32;
96typedef unsigned int uint;
97
98// should produce compiler error if size is wrong
99typedef unsigned char validate_uint32[sizeof(uint32)==4];
100
101#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
102#define STBI_NO_WRITE
103#endif
104
105#ifndef STBI_NO_DDS
106#include "stbi_DDS_aug.h"
107#endif
108
109// I (JLD) want full messages for SOIL
110#define STBI_FAILURE_USERMSG 1
111
112//////////////////////////////////////////////////////////////////////////////
113//
114// Generic API that works on all image types
115//
116
117// this is not threadsafe
118static char *failure_reason;
119
120char *stbi_failure_reason(void)
121{
122 return failure_reason;
123}
124
125static int e(char *str)
126{
127 failure_reason = str;
128 return 0;
129}
130
131#ifdef STBI_NO_FAILURE_STRINGS
132 #define e(x,y) 0
133#elif defined(STBI_FAILURE_USERMSG)
134 #define e(x,y) e(y)
135#else
136 #define e(x,y) e(x)
137#endif
138
139#define epf(x,y) ((float *) (e(x,y)?NULL:NULL))
140#define epuc(x,y) ((unsigned char *) (e(x,y)?NULL:NULL))
141
142void stbi_image_free(void *retval_from_stbi_load)
143{
144 free(retval_from_stbi_load);
145}
146
147#define MAX_LOADERS 32
148stbi_loader *loaders[MAX_LOADERS];
149static int max_loaders = 0;
150
151int stbi_register_loader(stbi_loader *loader)
152{
153 int i;
154 for (i=0; i < MAX_LOADERS; ++i) {
155 // already present?
156 if (loaders[i] == loader)
157 return 1;
158 // end of the list?
159 if (loaders[i] == NULL) {
160 loaders[i] = loader;
161 max_loaders = i+1;
162 return 1;
163 }
164 }
165 // no room for it
166 return 0;
167}
168
169#ifndef STBI_NO_HDR
170static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
171static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp);
172#endif
173
174#ifndef STBI_NO_STDIO
175unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
176{
177 FILE *f = fopen(filename, "rb");
178 unsigned char *result;
179 if (!f) return epuc("can't fopen", "Unable to open file");
180 result = stbi_load_from_file(f,x,y,comp,req_comp);
181 fclose(f);
182 return result;
183}
184
185unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
186{
187 int i;
188 if (stbi_jpeg_test_file(f))
189 return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
190 if (stbi_png_test_file(f))
191 return stbi_png_load_from_file(f,x,y,comp,req_comp);
192 if (stbi_bmp_test_file(f))
193 return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
194 if (stbi_psd_test_file(f))
195 return stbi_psd_load_from_file(f,x,y,comp,req_comp);
196 #ifndef STBI_NO_DDS
197 if (stbi_dds_test_file(f))
198 return stbi_dds_load_from_file(f,x,y,comp,req_comp);
199 #endif
200 #ifndef STBI_NO_HDR
201 if (stbi_hdr_test_file(f)) {
202 float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
203 return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
204 }
205 #endif
206 for (i=0; i < max_loaders; ++i)
207 if (loaders[i]->test_file(f))
208 return loaders[i]->load_from_file(f,x,y,comp,req_comp);
209 // test tga last because it's a crappy test!
210 if (stbi_tga_test_file(f))
211 return stbi_tga_load_from_file(f,x,y,comp,req_comp);
212 return epuc("unknown image type", "Image not of any known type, or corrupt");
213}
214#endif
215
216unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
217{
218 int i;
219 if (stbi_jpeg_test_memory(buffer,len))
220 return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
221 if (stbi_png_test_memory(buffer,len))
222 return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
223 if (stbi_bmp_test_memory(buffer,len))
224 return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
225 if (stbi_psd_test_memory(buffer,len))
226 return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp);
227 #ifndef STBI_NO_DDS
228 if (stbi_dds_test_memory(buffer,len))
229 return stbi_dds_load_from_memory(buffer,len,x,y,comp,req_comp);
230 #endif
231 #ifndef STBI_NO_HDR
232 if (stbi_hdr_test_memory(buffer, len)) {
233 float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
234 return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
235 }
236 #endif
237 for (i=0; i < max_loaders; ++i)
238 if (loaders[i]->test_memory(buffer,len))
239 return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
240 // test tga last because it's a crappy test!
241 if (stbi_tga_test_memory(buffer,len))
242 return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
243 return epuc("unknown image type", "Image not of any known type, or corrupt");
244}
245
246#ifndef STBI_NO_HDR
247
248#ifndef STBI_NO_STDIO
249float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
250{
251 FILE *f = fopen(filename, "rb");
252 float *result;
253 if (!f) return epf("can't fopen", "Unable to open file");
254 result = stbi_loadf_from_file(f,x,y,comp,req_comp);
255 fclose(f);
256 return result;
257}
258
259float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
260{
261 unsigned char *data;
262 #ifndef STBI_NO_HDR
263 if (stbi_hdr_test_file(f))
264 return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
265 #endif
266 data = stbi_load_from_file(f, x, y, comp, req_comp);
267 if (data)
268 return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
269 return epf("unknown image type", "Image not of any known type, or corrupt");
270}
271#endif
272
273float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
274{
275 stbi_uc *data;
276 #ifndef STBI_NO_HDR
277 if (stbi_hdr_test_memory(buffer, len))
278 return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
279 #endif
280 data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
281 if (data)
282 return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
283 return epf("unknown image type", "Image not of any known type, or corrupt");
284}
285#endif
286
287// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
288// defined, for API simplicity; if STBI_NO_HDR is defined, it always
289// reports false!
290
291int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
292{
293 #ifndef STBI_NO_HDR
294 return stbi_hdr_test_memory(buffer, len);
295 #else
296 return 0;
297 #endif
298}
299
300#ifndef STBI_NO_STDIO
301extern int stbi_is_hdr (char const *filename)
302{
303 FILE *f = fopen(filename, "rb");
304 int result=0;
305 if (f) {
306 result = stbi_is_hdr_from_file(f);
307 fclose(f);
308 }
309 return result;
310}
311
312extern int stbi_is_hdr_from_file(FILE *f)
313{
314 #ifndef STBI_NO_HDR
315 return stbi_hdr_test_file(f);
316 #else
317 return 0;
318 #endif
319}
320
321#endif
322
323// @TODO: get image dimensions & components without fully decoding
324#ifndef STBI_NO_STDIO
325extern int stbi_info (char const *filename, int *x, int *y, int *comp);
326extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
327#endif
328extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
329
330#ifndef STBI_NO_HDR
331static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
332static float l2h_gamma=2.2f, l2h_scale=1.0f;
333
334void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; }
335void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
336
337void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
338void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
339#endif
340
341
342//////////////////////////////////////////////////////////////////////////////
343//
344// Common code used by all image loaders
345//
346
347enum
348{
349 SCAN_load=0,
350 SCAN_type,
351 SCAN_header,
352};
353
354typedef struct
355{
356 uint32 img_x, img_y;
357 int img_n, img_out_n;
358
359 #ifndef STBI_NO_STDIO
360 FILE *img_file;
361 #endif
362 uint8 *img_buffer, *img_buffer_end;
363} stbi;
364
365#ifndef STBI_NO_STDIO
366static void start_file(stbi *s, FILE *f)
367{
368 s->img_file = f;
369}
370#endif
371
372static void start_mem(stbi *s, uint8 const *buffer, int len)
373{
374#ifndef STBI_NO_STDIO
375 s->img_file = NULL;
376#endif
377 s->img_buffer = (uint8 *) buffer;
378 s->img_buffer_end = (uint8 *) buffer+len;
379}
380
381__forceinline static int get8(stbi *s)
382{
383#ifndef STBI_NO_STDIO
384 if (s->img_file) {
385 int c = fgetc(s->img_file);
386 return c == EOF ? 0 : c;
387 }
388#endif
389 if (s->img_buffer < s->img_buffer_end)
390 return *s->img_buffer++;
391 return 0;
392}
393
394__forceinline static int at_eof(stbi *s)
395{
396#ifndef STBI_NO_STDIO
397 if (s->img_file)
398 return feof(s->img_file);
399#endif
400 return s->img_buffer >= s->img_buffer_end;
401}
402
403__forceinline static uint8 get8u(stbi *s)
404{
405 return (uint8) get8(s);
406}
407
408static void skip(stbi *s, int n)
409{
410#ifndef STBI_NO_STDIO
411 if (s->img_file)
412 fseek(s->img_file, n, SEEK_CUR);
413 else
414#endif
415 s->img_buffer += n;
416}
417
418static int get16(stbi *s)
419{
420 int z = get8(s);
421 return (z << 8) + get8(s);
422}
423
424static uint32 get32(stbi *s)
425{
426 uint32 z = get16(s);
427 return (z << 16) + get16(s);
428}
429
430static int get16le(stbi *s)
431{
432 int z = get8(s);
433 return z + (get8(s) << 8);
434}
435
436static uint32 get32le(stbi *s)
437{
438 uint32 z = get16le(s);
439 return z + (get16le(s) << 16);
440}
441
442static void getn(stbi *s, stbi_uc *buffer, int n)
443{
444#ifndef STBI_NO_STDIO
445 if (s->img_file) {
446 fread(buffer, 1, n, s->img_file);
447 return;
448 }
449#endif
450 memcpy(buffer, s->img_buffer, n);
451 s->img_buffer += n;
452}
453
454//////////////////////////////////////////////////////////////////////////////
455//
456// generic converter from built-in img_n to req_comp
457// individual types do this automatically as much as possible (e.g. jpeg
458// does all cases internally since it needs to colorspace convert anyway,
459// and it never has alpha, so very few cases ). png can automatically
460// interleave an alpha=255 channel, but falls back to this for other cases
461//
462// assume data buffer is malloced, so malloc a new one and free that one
463// only failure mode is malloc failing
464
465static uint8 compute_y(int r, int g, int b)
466{
467 return (uint8) (((r*77) + (g*150) + (29*b)) >> 8);
468}
469
470static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
471{
472 int i,j;
473 unsigned char *good;
474
475 if (req_comp == img_n) return data;
476 assert(req_comp >= 1 && req_comp <= 4);
477
478 good = (unsigned char *) malloc(req_comp * x * y);
479 if (good == NULL) {
480 free(data);
481 return epuc("outofmem", "Out of memory");
482 }
483
484 for (j=0; j < (int) y; ++j) {
485 unsigned char *src = data + j * x * img_n ;
486 unsigned char *dest = good + j * x * req_comp;
487
488 #define COMBO(a,b) ((a)*8+(b))
489 #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
490 // convert source image with img_n components to one with req_comp components;
491 // avoid switch per pixel, so use switch per scanline and massive macros
492 switch(COMBO(img_n, req_comp)) {
493 CASE(1,2) dest[0]=src[0], dest[1]=255; break;
494 CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
495 CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
496 CASE(2,1) dest[0]=src[0]; break;
497 CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
498 CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
499 CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
500 CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
501 CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
502 CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
503 CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
504 CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
505 default: assert(0);
506 }
507 #undef CASE
508 }
509
510 free(data);
511 return good;
512}
513
514#ifndef STBI_NO_HDR
515static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
516{
517 int i,k,n;
518 float *output = (float *) malloc(x * y * comp * sizeof(float));
519 if (output == NULL) { free(data); return epf("outofmem", "Out of memory"); }
520 // compute number of non-alpha components
521 if (comp & 1) n = comp; else n = comp-1;
522 for (i=0; i < x*y; ++i) {
523 for (k=0; k < n; ++k) {
524 output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale;
525 }
526 if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
527 }
528 free(data);
529 return output;
530}
531
532#define float2int(x) ((int) (x))
533static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp)
534{
535 int i,k,n;
536 stbi_uc *output = (stbi_uc *) malloc(x * y * comp);
537 if (output == NULL) { free(data); return epuc("outofmem", "Out of memory"); }
538 // compute number of non-alpha components
539 if (comp & 1) n = comp; else n = comp-1;
540 for (i=0; i < x*y; ++i) {
541 for (k=0; k < n; ++k) {
542 float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
543 if (z < 0) z = 0;
544 if (z > 255) z = 255;
545 output[i*comp + k] = float2int(z);
546 }
547 if (k < comp) {
548 float z = data[i*comp+k] * 255 + 0.5f;
549 if (z < 0) z = 0;
550 if (z > 255) z = 255;
551 output[i*comp + k] = float2int(z);
552 }
553 }
554 free(data);
555 return output;
556}
557#endif
558
559//////////////////////////////////////////////////////////////////////////////
560//
561// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
562//
563// simple implementation
564// - channel subsampling of at most 2 in each dimension
565// - doesn't support delayed output of y-dimension
566// - simple interface (only one output format: 8-bit interleaved RGB)
567// - doesn't try to recover corrupt jpegs
568// - doesn't allow partial loading, loading multiple at once
569// - still fast on x86 (copying globals into locals doesn't help x86)
570// - allocates lots of intermediate memory (full size of all components)
571// - non-interleaved case requires this anyway
572// - allows good upsampling (see next)
573// high-quality
574// - upsampled channels are bilinearly interpolated, even across blocks
575// - quality integer IDCT derived from IJG's 'slow'
576// performance
577// - fast huffman; reasonable integer IDCT
578// - uses a lot of intermediate memory, could cache poorly
579// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
580// stb_jpeg: 1.34 seconds (MSVC6, default release build)
581// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro)
582// IJL11.dll: 1.08 seconds (compiled by intel)
583// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG)
584// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro)
585
586// huffman decoding acceleration
587#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
588
589typedef struct
590{
591 uint8 fast[1 << FAST_BITS];
592 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
593 uint16 code[256];
594 uint8 values[256];
595 uint8 size[257];
596 unsigned int maxcode[18];
597 int delta[17]; // old 'firstsymbol' - old 'firstcode'
598} huffman;
599
600typedef struct
601{
602 #if STBI_SIMD
603 unsigned short dequant2[4][64];
604 #endif
605 stbi s;
606 huffman huff_dc[4];
607 huffman huff_ac[4];
608 uint8 dequant[4][64];
609
610// sizes for components, interleaved MCUs
611 int img_h_max, img_v_max;
612 int img_mcu_x, img_mcu_y;
613 int img_mcu_w, img_mcu_h;
614
615// definition of jpeg image component
616 struct
617 {
618 int id;
619 int h,v;
620 int tq;
621 int hd,ha;
622 int dc_pred;
623
624 int x,y,w2,h2;
625 uint8 *data;
626 void *raw_data;
627 uint8 *linebuf;
628 } img_comp[4];
629
630 uint32 code_buffer; // jpeg entropy-coded buffer
631 int code_bits; // number of valid bits
632 unsigned char marker; // marker seen while filling entropy buffer
633 int nomore; // flag if we saw a marker so must stop
634
635 int scan_n, order[4];
636 int restart_interval, todo;
637} jpeg;
638
639static int build_huffman(huffman *h, int *count)
640{
641 int i,j,k=0,code;
642 // build size list for each symbol (from JPEG spec)
643 for (i=0; i < 16; ++i)
644 for (j=0; j < count[i]; ++j)
645 h->size[k++] = (uint8) (i+1);
646 h->size[k] = 0;
647
648 // compute actual symbols (from jpeg spec)
649 code = 0;
650 k = 0;
651 for(j=1; j <= 16; ++j) {
652 // compute delta to add to code to compute symbol id
653 h->delta[j] = k - code;
654 if (h->size[k] == j) {
655 while (h->size[k] == j)
656 h->code[k++] = (uint16) (code++);
657 if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG");
658 }
659 // compute largest code + 1 for this size, preshifted as needed later
660 h->maxcode[j] = code << (16-j);
661 code <<= 1;
662 }
663 h->maxcode[j] = 0xffffffff;
664
665 // build non-spec acceleration table; 255 is flag for not-accelerated
666 memset(h->fast, 255, 1 << FAST_BITS);
667 for (i=0; i < k; ++i) {
668 int s = h->size[i];
669 if (s <= FAST_BITS) {
670 int c = h->code[i] << (FAST_BITS-s);
671 int m = 1 << (FAST_BITS-s);
672 for (j=0; j < m; ++j) {
673 h->fast[c+j] = (uint8) i;
674 }
675 }
676 }
677 return 1;
678}
679
680static void grow_buffer_unsafe(jpeg *j)
681{
682 do {
683 int b = j->nomore ? 0 : get8(&j->s);
684 if (b == 0xff) {
685 int c = get8(&j->s);
686 if (c != 0) {
687 j->marker = (unsigned char) c;
688 j->nomore = 1;
689 return;
690 }
691 }
692 j->code_buffer = (j->code_buffer << 8) | b;
693 j->code_bits += 8;
694 } while (j->code_bits <= 24);
695}
696
697// (1 << n) - 1
698static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
699
700// decode a jpeg huffman value from the bitstream
701__forceinline static int decode(jpeg *j, huffman *h)
702{
703 unsigned int temp;
704 int c,k;
705
706 if (j->code_bits < 16) grow_buffer_unsafe(j);
707
708 // look at the top FAST_BITS and determine what symbol ID it is,
709 // if the code is <= FAST_BITS
710 c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1);
711 k = h->fast[c];
712 if (k < 255) {
713 if (h->size[k] > j->code_bits)
714 return -1;
715 j->code_bits -= h->size[k];
716 return h->values[k];
717 }
718
719 // naive test is to shift the code_buffer down so k bits are
720 // valid, then test against maxcode. To speed this up, we've
721 // preshifted maxcode left so that it has (16-k) 0s at the
722 // end; in other words, regardless of the number of bits, it
723 // wants to be compared against something shifted to have 16;
724 // that way we don't need to shift inside the loop.
725 if (j->code_bits < 16)
726 temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff;
727 else
728 temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff;
729 for (k=FAST_BITS+1 ; ; ++k)
730 if (temp < h->maxcode[k])
731 break;
732 if (k == 17) {
733 // error! code not found
734 j->code_bits -= 16;
735 return -1;
736 }
737
738 if (k > j->code_bits)
739 return -1;
740
741 // convert the huffman code to the symbol id
742 c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k];
743 assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
744
745 // convert the id to a symbol
746 j->code_bits -= k;
747 return h->values[c];
748}
749
750// combined JPEG 'receive' and JPEG 'extend', since baseline
751// always extends everything it receives.
752__forceinline static int extend_receive(jpeg *j, int n)
753{
754 unsigned int m = 1 << (n-1);
755 unsigned int k;
756 if (j->code_bits < n) grow_buffer_unsafe(j);
757 k = (j->code_buffer >> (j->code_bits - n)) & bmask[n];
758 j->code_bits -= n;
759 // the following test is probably a random branch that won't
760 // predict well. I tried to table accelerate it but failed.
761 // maybe it's compiling as a conditional move?
762 if (k < m)
763 return (-1 << n) + k + 1;
764 else
765 return k;
766}
767
768// given a value that's at position X in the zigzag stream,
769// where does it appear in the 8x8 matrix coded as row-major?
770static uint8 dezigzag[64+15] =
771{
772 0, 1, 8, 16, 9, 2, 3, 10,
773 17, 24, 32, 25, 18, 11, 4, 5,
774 12, 19, 26, 33, 40, 48, 41, 34,
775 27, 20, 13, 6, 7, 14, 21, 28,
776 35, 42, 49, 56, 57, 50, 43, 36,
777 29, 22, 15, 23, 30, 37, 44, 51,
778 58, 59, 52, 45, 38, 31, 39, 46,
779 53, 60, 61, 54, 47, 55, 62, 63,
780 // let corrupt input sample past end
781 63, 63, 63, 63, 63, 63, 63, 63,
782 63, 63, 63, 63, 63, 63, 63
783};
784
785// decode one 64-entry block--
786static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
787{
788 int diff,dc,k;
789 int t = decode(j, hdc);
790 if (t < 0) return e("bad huffman code","Corrupt JPEG");
791
792 // 0 all the ac values now so we can do it 32-bits at a time
793 memset(data,0,64*sizeof(data[0]));
794
795 diff = t ? extend_receive(j, t) : 0;
796 dc = j->img_comp[b].dc_pred + diff;
797 j->img_comp[b].dc_pred = dc;
798 data[0] = (short) dc;
799
800 // decode AC components, see JPEG spec
801 k = 1;
802 do {
803 int r,s;
804 int rs = decode(j, hac);
805 if (rs < 0) return e("bad huffman code","Corrupt JPEG");
806 s = rs & 15;
807 r = rs >> 4;
808 if (s == 0) {
809 if (rs != 0xf0) break; // end block
810 k += 16;
811 } else {
812 k += r;
813 // decode into unzigzag'd location
814 data[dezigzag[k++]] = (short) extend_receive(j,s);
815 }
816 } while (k < 64);
817 return 1;
818}
819
820// take a -128..127 value and clamp it and convert to 0..255
821__forceinline static uint8 clamp(int x)
822{
823 x += 128;
824 // trick to use a single test to catch both cases
825 if ((unsigned int) x > 255) {
826 if (x < 0) return 0;
827 if (x > 255) return 255;
828 }
829 return (uint8) x;
830}
831
832#define f2f(x) (int) (((x) * 4096 + 0.5))
833#define fsh(x) ((x) << 12)
834
835// derived from jidctint -- DCT_ISLOW
836#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
837 int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
838 p2 = s2; \
839 p3 = s6; \
840 p1 = (p2+p3) * f2f(0.5411961f); \
841 t2 = p1 + p3*f2f(-1.847759065f); \
842 t3 = p1 + p2*f2f( 0.765366865f); \
843 p2 = s0; \
844 p3 = s4; \
845 t0 = fsh(p2+p3); \
846 t1 = fsh(p2-p3); \
847 x0 = t0+t3; \
848 x3 = t0-t3; \
849 x1 = t1+t2; \
850 x2 = t1-t2; \
851 t0 = s7; \
852 t1 = s5; \
853 t2 = s3; \
854 t3 = s1; \
855 p3 = t0+t2; \
856 p4 = t1+t3; \
857 p1 = t0+t3; \
858 p2 = t1+t2; \
859 p5 = (p3+p4)*f2f( 1.175875602f); \
860 t0 = t0*f2f( 0.298631336f); \
861 t1 = t1*f2f( 2.053119869f); \
862 t2 = t2*f2f( 3.072711026f); \
863 t3 = t3*f2f( 1.501321110f); \
864 p1 = p5 + p1*f2f(-0.899976223f); \
865 p2 = p5 + p2*f2f(-2.562915447f); \
866 p3 = p3*f2f(-1.961570560f); \
867 p4 = p4*f2f(-0.390180644f); \
868 t3 += p1+p4; \
869 t2 += p2+p3; \
870 t1 += p2+p4; \
871 t0 += p1+p3;
872
873#if !STBI_SIMD
874// .344 seconds on 3*anemones.jpg
875static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize)
876{
877 int i,val[64],*v=val;
878 uint8 *o,*dq = dequantize;
879 short *d = data;
880
881 // columns
882 for (i=0; i < 8; ++i,++d,++dq, ++v) {
883 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
884 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
885 && d[40]==0 && d[48]==0 && d[56]==0) {
886 // no shortcut 0 seconds
887 // (1|2|3|4|5|6|7)==0 0 seconds
888 // all separate -0.047 seconds
889 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
890 int dcterm = d[0] * dq[0] << 2;
891 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
892 } else {
893 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
894 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
895 // constants scaled things up by 1<<12; let's bring them back
896 // down, but keep 2 extra bits of precision
897 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
898 v[ 0] = (x0+t3) >> 10;
899 v[56] = (x0-t3) >> 10;
900 v[ 8] = (x1+t2) >> 10;
901 v[48] = (x1-t2) >> 10;
902 v[16] = (x2+t1) >> 10;
903 v[40] = (x2-t1) >> 10;
904 v[24] = (x3+t0) >> 10;
905 v[32] = (x3-t0) >> 10;
906 }
907 }
908
909 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
910 // no fast case since the first 1D IDCT spread components out
911 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
912 // constants scaled things up by 1<<12, plus we had 1<<2 from first
913 // loop, plus horizontal and vertical each scale by sqrt(8) so together
914 // we've got an extra 1<<3, so 1<<17 total we need to remove.
915 x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
916 o[0] = clamp((x0+t3) >> 17);
917 o[7] = clamp((x0-t3) >> 17);
918 o[1] = clamp((x1+t2) >> 17);
919 o[6] = clamp((x1-t2) >> 17);
920 o[2] = clamp((x2+t1) >> 17);
921 o[5] = clamp((x2-t1) >> 17);
922 o[3] = clamp((x3+t0) >> 17);
923 o[4] = clamp((x3-t0) >> 17);
924 }
925}
926#else
927static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize)
928{
929 int i,val[64],*v=val;
930 uint8 *o;
931 unsigned short *dq = dequantize;
932 short *d = data;
933
934 // columns
935 for (i=0; i < 8; ++i,++d,++dq, ++v) {
936 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
937 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
938 && d[40]==0 && d[48]==0 && d[56]==0) {
939 // no shortcut 0 seconds
940 // (1|2|3|4|5|6|7)==0 0 seconds
941 // all separate -0.047 seconds
942 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
943 int dcterm = d[0] * dq[0] << 2;
944 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
945 } else {
946 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
947 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
948 // constants scaled things up by 1<<12; let's bring them back
949 // down, but keep 2 extra bits of precision
950 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
951 v[ 0] = (x0+t3) >> 10;
952 v[56] = (x0-t3) >> 10;
953 v[ 8] = (x1+t2) >> 10;
954 v[48] = (x1-t2) >> 10;
955 v[16] = (x2+t1) >> 10;
956 v[40] = (x2-t1) >> 10;
957 v[24] = (x3+t0) >> 10;
958 v[32] = (x3-t0) >> 10;
959 }
960 }
961
962 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
963 // no fast case since the first 1D IDCT spread components out
964 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
965 // constants scaled things up by 1<<12, plus we had 1<<2 from first
966 // loop, plus horizontal and vertical each scale by sqrt(8) so together
967 // we've got an extra 1<<3, so 1<<17 total we need to remove.
968 x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
969 o[0] = clamp((x0+t3) >> 17);
970 o[7] = clamp((x0-t3) >> 17);
971 o[1] = clamp((x1+t2) >> 17);
972 o[6] = clamp((x1-t2) >> 17);
973 o[2] = clamp((x2+t1) >> 17);
974 o[5] = clamp((x2-t1) >> 17);
975 o[3] = clamp((x3+t0) >> 17);
976 o[4] = clamp((x3-t0) >> 17);
977 }
978}
979static stbi_idct_8x8 stbi_idct_installed = idct_block;
980
981extern void stbi_install_idct(stbi_idct_8x8 func)
982{
983 stbi_idct_installed = func;
984}
985#endif
986
987#define MARKER_none 0xff
988// if there's a pending marker from the entropy stream, return that
989// otherwise, fetch from the stream and get a marker. if there's no
990// marker, return 0xff, which is never a valid marker value
991static uint8 get_marker(jpeg *j)
992{
993 uint8 x;
994 if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; }
995 x = get8u(&j->s);
996 if (x != 0xff) return MARKER_none;
997 while (x == 0xff)
998 x = get8u(&j->s);
999 return x;
1000}
1001
1002// in each scan, we'll have scan_n components, and the order
1003// of the components is specified by order[]
1004#define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
1005
1006// after a restart interval, reset the entropy decoder and
1007// the dc prediction
1008static void reset(jpeg *j)
1009{
1010 j->code_bits = 0;
1011 j->code_buffer = 0;
1012 j->nomore = 0;
1013 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1014 j->marker = MARKER_none;
1015 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1016 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1017 // since we don't even allow 1<<30 pixels
1018}
1019
1020static int parse_entropy_coded_data(jpeg *z)
1021{
1022 reset(z);
1023 if (z->scan_n == 1) {
1024 int i,j;
1025 #if STBI_SIMD
1026 __declspec(align(16))
1027 #endif
1028 short data[64];
1029 int n = z->order[0];
1030 // non-interleaved data, we just need to process one block at a time,
1031 // in trivial scanline order
1032 // number of blocks to do just depends on how many actual "pixels" this
1033 // component has, independent of interleaved MCU blocking and such
1034 int w = (z->img_comp[n].x+7) >> 3;
1035 int h = (z->img_comp[n].y+7) >> 3;
1036 for (j=0; j < h; ++j) {
1037 for (i=0; i < w; ++i) {
1038 if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1039 #if STBI_SIMD
1040 stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1041 #else
1042 idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1043 #endif
1044 // every data block is an MCU, so countdown the restart interval
1045 if (--z->todo <= 0) {
1046 if (z->code_bits < 24) grow_buffer_unsafe(z);
1047 // if it's NOT a restart, then just bail, so we get corrupt data
1048 // rather than no data
1049 if (!RESTART(z->marker)) return 1;
1050 reset(z);
1051 }
1052 }
1053 }
1054 } else { // interleaved!
1055 int i,j,k,x,y;
1056 short data[64];
1057 for (j=0; j < z->img_mcu_y; ++j) {
1058 for (i=0; i < z->img_mcu_x; ++i) {
1059 // scan an interleaved mcu... process scan_n components in order
1060 for (k=0; k < z->scan_n; ++k) {
1061 int n = z->order[k];
1062 // scan out an mcu's worth of this component; that's just determined
1063 // by the basic H and V specified for the component
1064 for (y=0; y < z->img_comp[n].v; ++y) {
1065 for (x=0; x < z->img_comp[n].h; ++x) {
1066 int x2 = (i*z->img_comp[n].h + x)*8;
1067 int y2 = (j*z->img_comp[n].v + y)*8;
1068 if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1069 #if STBI_SIMD
1070 stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1071 #else
1072 idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1073 #endif
1074 }
1075 }
1076 }
1077 // after all interleaved components, that's an interleaved MCU,
1078 // so now count down the restart interval
1079 if (--z->todo <= 0) {
1080 if (z->code_bits < 24) grow_buffer_unsafe(z);
1081 // if it's NOT a restart, then just bail, so we get corrupt data
1082 // rather than no data
1083 if (!RESTART(z->marker)) return 1;
1084 reset(z);
1085 }
1086 }
1087 }
1088 }
1089 return 1;
1090}
1091
1092static int process_marker(jpeg *z, int m)
1093{
1094 int L;
1095 switch (m) {
1096 case MARKER_none: // no marker found
1097 return e("expected marker","Corrupt JPEG");
1098
1099 case 0xC2: // SOF - progressive
1100 return e("progressive jpeg","JPEG format not supported (progressive)");
1101
1102 case 0xDD: // DRI - specify restart interval
1103 if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG");
1104 z->restart_interval = get16(&z->s);
1105 return 1;
1106
1107 case 0xDB: // DQT - define quantization table
1108 L = get16(&z->s)-2;
1109 while (L > 0) {
1110 int q = get8(&z->s);
1111 int p = q >> 4;
1112 int t = q & 15,i;
1113 if (p != 0) return e("bad DQT type","Corrupt JPEG");
1114 if (t > 3) return e("bad DQT table","Corrupt JPEG");
1115 for (i=0; i < 64; ++i)
1116 z->dequant[t][dezigzag[i]] = get8u(&z->s);
1117 #if STBI_SIMD
1118 for (i=0; i < 64; ++i)
1119 z->dequant2[t][i] = dequant[t][i];
1120 #endif
1121 L -= 65;
1122 }
1123 return L==0;
1124
1125 case 0xC4: // DHT - define huffman table
1126 L = get16(&z->s)-2;
1127 while (L > 0) {
1128 uint8 *v;
1129 int sizes[16],i,m=0;
1130 int q = get8(&z->s);
1131 int tc = q >> 4;
1132 int th = q & 15;
1133 if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG");
1134 for (i=0; i < 16; ++i) {
1135 sizes[i] = get8(&z->s);
1136 m += sizes[i];
1137 }
1138 L -= 17;
1139 if (tc == 0) {
1140 if (!build_huffman(z->huff_dc+th, sizes)) return 0;
1141 v = z->huff_dc[th].values;
1142 } else {
1143 if (!build_huffman(z->huff_ac+th, sizes)) return 0;
1144 v = z->huff_ac[th].values;
1145 }
1146 for (i=0; i < m; ++i)
1147 v[i] = get8u(&z->s);
1148 L -= m;
1149 }
1150 return L==0;
1151 }
1152 // check for comment block or APP blocks
1153 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
1154 skip(&z->s, get16(&z->s)-2);
1155 return 1;
1156 }
1157 return 0;
1158}
1159
1160// after we see SOS
1161static int process_scan_header(jpeg *z)
1162{
1163 int i;
1164 int Ls = get16(&z->s);
1165 z->scan_n = get8(&z->s);
1166 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG");
1167 if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG");
1168 for (i=0; i < z->scan_n; ++i) {
1169 int id = get8(&z->s), which;
1170 int q = get8(&z->s);
1171 for (which = 0; which < z->s.img_n; ++which)
1172 if (z->img_comp[which].id == id)
1173 break;
1174 if (which == z->s.img_n) return 0;
1175 z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG");
1176 z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG");
1177 z->order[i] = which;
1178 }
1179 if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1180 get8(&z->s); // should be 63, but might be 0
1181 if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1182
1183 return 1;
1184}
1185
1186static int process_frame_header(jpeg *z, int scan)
1187{
1188 stbi *s = &z->s;
1189 int Lf,p,i,q, h_max=1,v_max=1,c;
1190 Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG
1191 p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
1192 s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1193 s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires
1194 c = get8(s);
1195 if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires
1196 s->img_n = c;
1197 for (i=0; i < c; ++i) {
1198 z->img_comp[i].data = NULL;
1199 z->img_comp[i].linebuf = NULL;
1200 }
1201
1202 if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG");
1203
1204 for (i=0; i < s->img_n; ++i) {
1205 z->img_comp[i].id = get8(s);
1206 if (z->img_comp[i].id != i+1) // JFIF requires
1207 if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files!
1208 return e("bad component ID","Corrupt JPEG");
1209 q = get8(s);
1210 z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG");
1211 z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG");
1212 z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG");
1213 }
1214
1215 if (scan != SCAN_load) return 1;
1216
1217 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
1218
1219 for (i=0; i < s->img_n; ++i) {
1220 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1221 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1222 }
1223
1224 // compute interleaved mcu info
1225 z->img_h_max = h_max;
1226 z->img_v_max = v_max;
1227 z->img_mcu_w = h_max * 8;
1228 z->img_mcu_h = v_max * 8;
1229 z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
1230 z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
1231
1232 for (i=0; i < s->img_n; ++i) {
1233 // number of effective pixels (e.g. for non-interleaved MCU)
1234 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
1235 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
1236 // to simplify generation, we'll allocate enough memory to decode
1237 // the bogus oversized data from using interleaved MCUs and their
1238 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1239 // discard the extra data until colorspace conversion
1240 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1241 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1242 z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
1243 if (z->img_comp[i].raw_data == NULL) {
1244 for(--i; i >= 0; --i) {
1245 free(z->img_comp[i].raw_data);
1246 z->img_comp[i].data = NULL;
1247 }
1248 return e("outofmem", "Out of memory");
1249 }
1250 // align blocks for installable-idct using mmx/sse
1251 z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
1252 z->img_comp[i].linebuf = NULL;
1253 }
1254
1255 return 1;
1256}
1257
1258// use comparisons since in some cases we handle more than one case (e.g. SOF)
1259#define DNL(x) ((x) == 0xdc)
1260#define SOI(x) ((x) == 0xd8)
1261#define EOI(x) ((x) == 0xd9)
1262#define SOF(x) ((x) == 0xc0 || (x) == 0xc1)
1263#define SOS(x) ((x) == 0xda)
1264
1265static int decode_jpeg_header(jpeg *z, int scan)
1266{
1267 int m;
1268 z->marker = MARKER_none; // initialize cached marker to empty
1269 m = get_marker(z);
1270 if (!SOI(m)) return e("no SOI","Corrupt JPEG");
1271 if (scan == SCAN_type) return 1;
1272 m = get_marker(z);
1273 while (!SOF(m)) {
1274 if (!process_marker(z,m)) return 0;
1275 m = get_marker(z);
1276 while (m == MARKER_none) {
1277 // some files have extra padding after their blocks, so ok, we'll scan
1278 if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG");
1279 m = get_marker(z);
1280 }
1281 }
1282 if (!process_frame_header(z, scan)) return 0;
1283 return 1;
1284}
1285
1286static int decode_jpeg_image(jpeg *j)
1287{
1288 int m;
1289 j->restart_interval = 0;
1290 if (!decode_jpeg_header(j, SCAN_load)) return 0;
1291 m = get_marker(j);
1292 while (!EOI(m)) {
1293 if (SOS(m)) {
1294 if (!process_scan_header(j)) return 0;
1295 if (!parse_entropy_coded_data(j)) return 0;
1296 } else {
1297 if (!process_marker(j, m)) return 0;
1298 }
1299 m = get_marker(j);
1300 }
1301 return 1;
1302}
1303
1304// static jfif-centered resampling (across block boundaries)
1305
1306typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1307 int w, int hs);
1308
1309#define div4(x) ((uint8) ((x) >> 2))
1310
1311static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1312{
1313 return in_near;
1314}
1315
1316static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1317{
1318 // need to generate two samples vertically for every one in input
1319 int i;
1320 for (i=0; i < w; ++i)
1321 out[i] = div4(3*in_near[i] + in_far[i] + 2);
1322 return out;
1323}
1324
1325static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1326{
1327 // need to generate two samples horizontally for every one in input
1328 int i;
1329 uint8 *input = in_near;
1330 if (w == 1) {
1331 // if only one sample, can't do any interpolation
1332 out[0] = out[1] = input[0];
1333 return out;
1334 }
1335
1336 out[0] = input[0];
1337 out[1] = div4(input[0]*3 + input[1] + 2);
1338 for (i=1; i < w-1; ++i) {
1339 int n = 3*input[i]+2;
1340 out[i*2+0] = div4(n+input[i-1]);
1341 out[i*2+1] = div4(n+input[i+1]);
1342 }
1343 out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2);
1344 out[i*2+1] = input[w-1];
1345 return out;
1346}
1347
1348#define div16(x) ((uint8) ((x) >> 4))
1349
1350static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1351{
1352 // need to generate 2x2 samples for every one in input
1353 int i,t0,t1;
1354 if (w == 1) {
1355 out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2);
1356 return out;
1357 }
1358
1359 t1 = 3*in_near[0] + in_far[0];
1360 out[0] = div4(t1+2);
1361 for (i=1; i < w; ++i) {
1362 t0 = t1;
1363 t1 = 3*in_near[i]+in_far[i];
1364 out[i*2-1] = div16(3*t0 + t1 + 8);
1365 out[i*2 ] = div16(3*t1 + t0 + 8);
1366 }
1367 out[w*2-1] = div4(t1+2);
1368 return out;
1369}
1370
1371static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1372{
1373 // resample with nearest-neighbor
1374 int i,j;
1375 for (i=0; i < w; ++i)
1376 for (j=0; j < hs; ++j)
1377 out[i*hs+j] = in_near[i];
1378 return out;
1379}
1380
1381#define float2fixed(x) ((int) ((x) * 65536 + 0.5))
1382
1383// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro)
1384// VC6 without processor=Pro is generating multiple LEAs per multiply!
1385static void YCbCr_to_RGB_row(uint8 *out, uint8 *y, uint8 *pcb, uint8 *pcr, int count, int step)
1386{
1387 int i;
1388 for (i=0; i < count; ++i) {
1389 int y_fixed = (y[i] << 16) + 32768; // rounding
1390 int r,g,b;
1391 int cr = pcr[i] - 128;
1392 int cb = pcb[i] - 128;
1393 r = y_fixed + cr*float2fixed(1.40200f);
1394 g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
1395 b = y_fixed + cb*float2fixed(1.77200f);
1396 r >>= 16;
1397 g >>= 16;
1398 b >>= 16;
1399 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
1400 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
1401 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
1402 out[0] = (uint8)r;
1403 out[1] = (uint8)g;
1404 out[2] = (uint8)b;
1405 out[3] = 255;
1406 out += step;
1407 }
1408}
1409
1410#if STBI_SIMD
1411static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1412
1413void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1414{
1415 stbi_YCbCr_installed = func;
1416}
1417#endif
1418
1419
1420// clean up the temporary component buffers
1421static void cleanup_jpeg(jpeg *j)
1422{
1423 int i;
1424 for (i=0; i < j->s.img_n; ++i) {
1425 if (j->img_comp[i].data) {
1426 free(j->img_comp[i].raw_data);
1427 j->img_comp[i].data = NULL;
1428 }
1429 if (j->img_comp[i].linebuf) {
1430 free(j->img_comp[i].linebuf);
1431 j->img_comp[i].linebuf = NULL;
1432 }
1433 }
1434}
1435
1436typedef struct
1437{
1438 resample_row_func resample;
1439 uint8 *line0,*line1;
1440 int hs,vs; // expansion factor in each axis
1441 int w_lores; // horizontal pixels pre-expansion
1442 int ystep; // how far through vertical expansion we are
1443 int ypos; // which pre-expansion row we're on
1444} stbi_resample;
1445
1446static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1447{
1448 int n, decode_n;
1449 // validate req_comp
1450 if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
1451 z->s.img_n = 0;
1452
1453 // load a jpeg image from whichever source
1454 if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; }
1455
1456 // determine actual number of components to generate
1457 n = req_comp ? req_comp : z->s.img_n;
1458
1459 if (z->s.img_n == 3 && n < 3)
1460 decode_n = 1;
1461 else
1462 decode_n = z->s.img_n;
1463
1464 // resample and color-convert
1465 {
1466 int k;
1467 uint i,j;
1468 uint8 *output;
1469 uint8 *coutput[4];
1470
1471 stbi_resample res_comp[4];
1472
1473 for (k=0; k < decode_n; ++k) {
1474 stbi_resample *r = &res_comp[k];
1475
1476 // allocate line buffer big enough for upsampling off the edges
1477 // with upsample factor of 4
1478 z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3);
1479 if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1480
1481 r->hs = z->img_h_max / z->img_comp[k].h;
1482 r->vs = z->img_v_max / z->img_comp[k].v;
1483 r->ystep = r->vs >> 1;
1484 r->w_lores = (z->s.img_x + r->hs-1) / r->hs;
1485 r->ypos = 0;
1486 r->line0 = r->line1 = z->img_comp[k].data;
1487
1488 if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
1489 else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2;
1490 else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2;
1491 else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2;
1492 else r->resample = resample_row_generic;
1493 }
1494
1495 // can't error after this so, this is safe
1496 output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1);
1497 if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1498
1499 // now go ahead and resample
1500 for (j=0; j < z->s.img_y; ++j) {
1501 uint8 *out = output + n * z->s.img_x * j;
1502 for (k=0; k < decode_n; ++k) {
1503 stbi_resample *r = &res_comp[k];
1504 int y_bot = r->ystep >= (r->vs >> 1);
1505 coutput[k] = r->resample(z->img_comp[k].linebuf,
1506 y_bot ? r->line1 : r->line0,
1507 y_bot ? r->line0 : r->line1,
1508 r->w_lores, r->hs);
1509 if (++r->ystep >= r->vs) {
1510 r->ystep = 0;
1511 r->line0 = r->line1;
1512 if (++r->ypos < z->img_comp[k].y)
1513 r->line1 += z->img_comp[k].w2;
1514 }
1515 }
1516 if (n >= 3) {
1517 uint8 *y = coutput[0];
1518 if (z->s.img_n == 3) {
1519 #if STBI_SIMD
1520 stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
1521 #else
1522 YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
1523 #endif
1524 } else
1525 for (i=0; i < z->s.img_x; ++i) {
1526 out[0] = out[1] = out[2] = y[i];
1527 out[3] = 255; // not used if n==3
1528 out += n;
1529 }
1530 } else {
1531 uint8 *y = coutput[0];
1532 if (n == 1)
1533 for (i=0; i < z->s.img_x; ++i) out[i] = y[i];
1534 else
1535 for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255;
1536 }
1537 }
1538 cleanup_jpeg(z);
1539 *out_x = z->s.img_x;
1540 *out_y = z->s.img_y;
1541 if (comp) *comp = z->s.img_n; // report original components, not output
1542 return output;
1543 }
1544}
1545
1546#ifndef STBI_NO_STDIO
1547unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1548{
1549 jpeg j;
1550 start_file(&j.s, f);
1551 return load_jpeg_image(&j, x,y,comp,req_comp);
1552}
1553
1554unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1555{
1556 unsigned char *data;
1557 FILE *f = fopen(filename, "rb");
1558 if (!f) return NULL;
1559 data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
1560 fclose(f);
1561 return data;
1562}
1563#endif
1564
1565unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1566{
1567 jpeg j;
1568 start_mem(&j.s, buffer,len);
1569 return load_jpeg_image(&j, x,y,comp,req_comp);
1570}
1571
1572#ifndef STBI_NO_STDIO
1573int stbi_jpeg_test_file(FILE *f)
1574{
1575 int n,r;
1576 jpeg j;
1577 n = ftell(f);
1578 start_file(&j.s, f);
1579 r = decode_jpeg_header(&j, SCAN_type);
1580 fseek(f,n,SEEK_SET);
1581 return r;
1582}
1583#endif
1584
1585int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
1586{
1587 jpeg j;
1588 start_mem(&j.s, buffer,len);
1589 return decode_jpeg_header(&j, SCAN_type);
1590}
1591
1592// @TODO:
1593#ifndef STBI_NO_STDIO
1594extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp);
1595extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp);
1596#endif
1597extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
1598
1599// public domain zlib decode v0.2 Sean Barrett 2006-11-18
1600// simple implementation
1601// - all input must be provided in an upfront buffer
1602// - all output is written to a single output buffer (can malloc/realloc)
1603// performance
1604// - fast huffman
1605
1606// fast-way is faster to check than jpeg huffman, but slow way is slower
1607#define ZFAST_BITS 9 // accelerate all cases in default tables
1608#define ZFAST_MASK ((1 << ZFAST_BITS) - 1)
1609
1610// zlib-style huffman encoding
1611// (jpegs packs from left, zlib from right, so can't share code)
1612typedef struct
1613{
1614 uint16 fast[1 << ZFAST_BITS];
1615 uint16 firstcode[16];
1616 int maxcode[17];
1617 uint16 firstsymbol[16];
1618 uint8 size[288];
1619 uint16 value[288];
1620} zhuffman;
1621
1622__forceinline static int bitreverse16(int n)
1623{
1624 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
1625 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
1626 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
1627 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
1628 return n;
1629}
1630
1631__forceinline static int bit_reverse(int v, int bits)
1632{
1633 assert(bits <= 16);
1634 // to bit reverse n bits, reverse 16 and shift
1635 // e.g. 11 bits, bit reverse and shift away 5
1636 return bitreverse16(v) >> (16-bits);
1637}
1638
1639static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
1640{
1641 int i,k=0;
1642 int code, next_code[16], sizes[17];
1643
1644 // DEFLATE spec for generating codes
1645 memset(sizes, 0, sizeof(sizes));
1646 memset(z->fast, 255, sizeof(z->fast));
1647 for (i=0; i < num; ++i)
1648 ++sizes[sizelist[i]];
1649 sizes[0] = 0;
1650 for (i=1; i < 16; ++i)
1651 assert(sizes[i] <= (1 << i));
1652 code = 0;
1653 for (i=1; i < 16; ++i) {
1654 next_code[i] = code;
1655 z->firstcode[i] = (uint16) code;
1656 z->firstsymbol[i] = (uint16) k;
1657 code = (code + sizes[i]);
1658 if (sizes[i])
1659 if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG");
1660 z->maxcode[i] = code << (16-i); // preshift for inner loop
1661 code <<= 1;
1662 k += sizes[i];
1663 }
1664 z->maxcode[16] = 0x10000; // sentinel
1665 for (i=0; i < num; ++i) {
1666 int s = sizelist[i];
1667 if (s) {
1668 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
1669 z->size[c] = (uint8)s;
1670 z->value[c] = (uint16)i;
1671 if (s <= ZFAST_BITS) {
1672 int k = bit_reverse(next_code[s],s);
1673 while (k < (1 << ZFAST_BITS)) {
1674 z->fast[k] = (uint16) c;
1675 k += (1 << s);
1676 }
1677 }
1678 ++next_code[s];
1679 }
1680 }
1681 return 1;
1682}
1683
1684// zlib-from-memory implementation for PNG reading
1685// because PNG allows splitting the zlib stream arbitrarily,
1686// and it's annoying structurally to have PNG call ZLIB call PNG,
1687// we require PNG read all the IDATs and combine them into a single
1688// memory buffer
1689
1690typedef struct
1691{
1692 uint8 *zbuffer, *zbuffer_end;
1693 int num_bits;
1694 uint32 code_buffer;
1695
1696 char *zout;
1697 char *zout_start;
1698 char *zout_end;
1699 int z_expandable;
1700
1701 zhuffman z_length, z_distance;
1702} zbuf;
1703
1704__forceinline static int zget8(zbuf *z)
1705{
1706 if (z->zbuffer >= z->zbuffer_end) return 0;
1707 return *z->zbuffer++;
1708}
1709
1710static void fill_bits(zbuf *z)
1711{
1712 do {
1713 assert(z->code_buffer < (1U << z->num_bits));
1714 z->code_buffer |= zget8(z) << z->num_bits;
1715 z->num_bits += 8;
1716 } while (z->num_bits <= 24);
1717}
1718
1719__forceinline static unsigned int zreceive(zbuf *z, int n)
1720{
1721 unsigned int k;
1722 if (z->num_bits < n) fill_bits(z);
1723 k = z->code_buffer & ((1 << n) - 1);
1724 z->code_buffer >>= n;
1725 z->num_bits -= n;
1726 return k;
1727}
1728
1729__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
1730{
1731 int b,s,k;
1732 if (a->num_bits < 16) fill_bits(a);
1733 b = z->fast[a->code_buffer & ZFAST_MASK];
1734 if (b < 0xffff) {
1735 s = z->size[b];
1736 a->code_buffer >>= s;
1737 a->num_bits -= s;
1738 return z->value[b];
1739 }
1740
1741 // not resolved by fast table, so compute it the slow way
1742 // use jpeg approach, which requires MSbits at top
1743 k = bit_reverse(a->code_buffer, 16);
1744 for (s=ZFAST_BITS+1; ; ++s)
1745 if (k < z->maxcode[s])
1746 break;
1747 if (s == 16) return -1; // invalid code!
1748 // code size is s, so:
1749 b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
1750 assert(z->size[b] == s);
1751 a->code_buffer >>= s;
1752 a->num_bits -= s;
1753 return z->value[b];
1754}
1755
1756static int expand(zbuf *z, int n) // need to make room for n bytes
1757{
1758 char *q;
1759 int cur, limit;
1760 if (!z->z_expandable) return e("output buffer limit","Corrupt PNG");
1761 cur = (int) (z->zout - z->zout_start);
1762 limit = (int) (z->zout_end - z->zout_start);
1763 while (cur + n > limit)
1764 limit *= 2;
1765 q = (char *) realloc(z->zout_start, limit);
1766 if (q == NULL) return e("outofmem", "Out of memory");
1767 z->zout_start = q;
1768 z->zout = q + cur;
1769 z->zout_end = q + limit;
1770 return 1;
1771}
1772
1773static int length_base[31] = {
1774 3,4,5,6,7,8,9,10,11,13,
1775 15,17,19,23,27,31,35,43,51,59,
1776 67,83,99,115,131,163,195,227,258,0,0 };
1777
1778static int length_extra[31]=
1779{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
1780
1781static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
1782257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
1783
1784static int dist_extra[32] =
1785{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
1786
1787static int parse_huffman_block(zbuf *a)
1788{
1789 for(;;) {
1790 int z = zhuffman_decode(a, &a->z_length);
1791 if (z < 256) {
1792 if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes
1793 if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0;
1794 *a->zout++ = (char) z;
1795 } else {
1796 uint8 *p;
1797 int len,dist;
1798 if (z == 256) return 1;
1799 z -= 257;
1800 len = length_base[z];
1801 if (length_extra[z]) len += zreceive(a, length_extra[z]);
1802 z = zhuffman_decode(a, &a->z_distance);
1803 if (z < 0) return e("bad huffman code","Corrupt PNG");
1804 dist = dist_base[z];
1805 if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
1806 if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG");
1807 if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0;
1808 p = (uint8 *) (a->zout - dist);
1809 while (len--)
1810 *a->zout++ = *p++;
1811 }
1812 }
1813}
1814
1815static int compute_huffman_codes(zbuf *a)
1816{
1817 static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
1818 static zhuffman z_codelength; // static just to save stack space
1819 uint8 lencodes[286+32+137];//padding for maximum single op
1820 uint8 codelength_sizes[19];
1821 int i,n;
1822
1823 int hlit = zreceive(a,5) + 257;
1824 int hdist = zreceive(a,5) + 1;
1825 int hclen = zreceive(a,4) + 4;
1826
1827 memset(codelength_sizes, 0, sizeof(codelength_sizes));
1828 for (i=0; i < hclen; ++i) {
1829 int s = zreceive(a,3);
1830 codelength_sizes[length_dezigzag[i]] = (uint8) s;
1831 }
1832 if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
1833
1834 n = 0;
1835 while (n < hlit + hdist) {
1836 int c = zhuffman_decode(a, &z_codelength);
1837 assert(c >= 0 && c < 19);
1838 if (c < 16)
1839 lencodes[n++] = (uint8) c;
1840 else if (c == 16) {
1841 c = zreceive(a,2)+3;
1842 memset(lencodes+n, lencodes[n-1], c);
1843 n += c;
1844 } else if (c == 17) {
1845 c = zreceive(a,3)+3;
1846 memset(lencodes+n, 0, c);
1847 n += c;
1848 } else {
1849 assert(c == 18);
1850 c = zreceive(a,7)+11;
1851 memset(lencodes+n, 0, c);
1852 n += c;
1853 }
1854 }
1855 if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG");
1856 if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
1857 if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
1858 return 1;
1859}
1860
1861static int parse_uncompressed_block(zbuf *a)
1862{
1863 uint8 header[4];
1864 int len,nlen,k;
1865 if (a->num_bits & 7)
1866 zreceive(a, a->num_bits & 7); // discard
1867 // drain the bit-packed data into header
1868 k = 0;
1869 while (a->num_bits > 0) {
1870 header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns?
1871 a->code_buffer >>= 8;
1872 a->num_bits -= 8;
1873 }
1874 assert(a->num_bits == 0);
1875 // now fill header the normal way
1876 while (k < 4)
1877 header[k++] = (uint8) zget8(a);
1878 len = header[1] * 256 + header[0];
1879 nlen = header[3] * 256 + header[2];
1880 if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG");
1881 if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG");
1882 if (a->zout + len > a->zout_end)
1883 if (!expand(a, len)) return 0;
1884 memcpy(a->zout, a->zbuffer, len);
1885 a->zbuffer += len;
1886 a->zout += len;
1887 return 1;
1888}
1889
1890static int parse_zlib_header(zbuf *a)
1891{
1892 int cmf = zget8(a);
1893 int cm = cmf & 15;
1894 /* int cinfo = cmf >> 4; */
1895 int flg = zget8(a);
1896 if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec
1897 if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
1898 if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png
1899 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
1900 return 1;
1901}
1902
1903// @TODO: should statically initialize these for optimal thread safety
1904static uint8 default_length[288], default_distance[32];
1905static void init_defaults(void)
1906{
1907 int i; // use <= to match clearly with spec
1908 for (i=0; i <= 143; ++i) default_length[i] = 8;
1909 for ( ; i <= 255; ++i) default_length[i] = 9;
1910 for ( ; i <= 279; ++i) default_length[i] = 7;
1911 for ( ; i <= 287; ++i) default_length[i] = 8;
1912
1913 for (i=0; i <= 31; ++i) default_distance[i] = 5;
1914}
1915
1916static int parse_zlib(zbuf *a, int parse_header)
1917{
1918 int final, type;
1919 if (parse_header)
1920 if (!parse_zlib_header(a)) return 0;
1921 a->num_bits = 0;
1922 a->code_buffer = 0;
1923 do {
1924 final = zreceive(a,1);
1925 type = zreceive(a,2);
1926 if (type == 0) {
1927 if (!parse_uncompressed_block(a)) return 0;
1928 } else if (type == 3) {
1929 return 0;
1930 } else {
1931 if (type == 1) {
1932 // use fixed code lengths
1933 if (!default_distance[31]) init_defaults();
1934 if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0;
1935 if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0;
1936 } else {
1937 if (!compute_huffman_codes(a)) return 0;
1938 }
1939 if (!parse_huffman_block(a)) return 0;
1940 }
1941 } while (!final);
1942 return 1;
1943}
1944
1945static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
1946{
1947 a->zout_start = obuf;
1948 a->zout = obuf;
1949 a->zout_end = obuf + olen;
1950 a->z_expandable = exp;
1951
1952 return parse_zlib(a, parse_header);
1953}
1954
1955char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
1956{
1957 zbuf a;
1958 char *p = (char *) malloc(initial_size);
1959 if (p == NULL) return NULL;
1960 a.zbuffer = (uint8 *) buffer;
1961 a.zbuffer_end = (uint8 *) buffer + len;
1962 if (do_zlib(&a, p, initial_size, 1, 1)) {
1963 if (outlen) *outlen = (int) (a.zout - a.zout_start);
1964 return a.zout_start;
1965 } else {
1966 free(a.zout_start);
1967 return NULL;
1968 }
1969}
1970
1971char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
1972{
1973 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
1974}
1975
1976int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
1977{
1978 zbuf a;
1979 a.zbuffer = (uint8 *) ibuffer;
1980 a.zbuffer_end = (uint8 *) ibuffer + ilen;
1981 if (do_zlib(&a, obuffer, olen, 0, 1))
1982 return (int) (a.zout - a.zout_start);
1983 else
1984 return -1;
1985}
1986
1987char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
1988{
1989 zbuf a;
1990 char *p = (char *) malloc(16384);
1991 if (p == NULL) return NULL;
1992 a.zbuffer = (uint8 *) buffer;
1993 a.zbuffer_end = (uint8 *) buffer+len;
1994 if (do_zlib(&a, p, 16384, 1, 0)) {
1995 if (outlen) *outlen = (int) (a.zout - a.zout_start);
1996 return a.zout_start;
1997 } else {
1998 free(a.zout_start);
1999 return NULL;
2000 }
2001}
2002
2003int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2004{
2005 zbuf a;
2006 a.zbuffer = (uint8 *) ibuffer;
2007 a.zbuffer_end = (uint8 *) ibuffer + ilen;
2008 if (do_zlib(&a, obuffer, olen, 0, 0))
2009 return (int) (a.zout - a.zout_start);
2010 else
2011 return -1;
2012}
2013
2014// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
2015// simple implementation
2016// - only 8-bit samples
2017// - no CRC checking
2018// - allocates lots of intermediate memory
2019// - avoids problem of streaming data between subsystems
2020// - avoids explicit window management
2021// performance
2022// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2023
2024
2025typedef struct
2026{
2027 uint32 length;
2028 uint32 type;
2029} chunk;
2030
2031#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2032
2033static chunk get_chunk_header(stbi *s)
2034{
2035 chunk c;
2036 c.length = get32(s);
2037 c.type = get32(s);
2038 return c;
2039}
2040
2041static int check_png_header(stbi *s)
2042{
2043 static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
2044 int i;
2045 for (i=0; i < 8; ++i)
2046 if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG");
2047 return 1;
2048}
2049
2050typedef struct
2051{
2052 stbi s;
2053 uint8 *idata, *expanded, *out;
2054} png;
2055
2056
2057enum {
2058 F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
2059 F_avg_first, F_paeth_first,
2060};
2061
2062static uint8 first_row_filter[5] =
2063{
2064 F_none, F_sub, F_none, F_avg_first, F_paeth_first
2065};
2066
2067static int paeth(int a, int b, int c)
2068{
2069 int p = a + b - c;
2070 int pa = abs(p-a);
2071 int pb = abs(p-b);
2072 int pc = abs(p-c);
2073 if (pa <= pb && pa <= pc) return a;
2074 if (pb <= pc) return b;
2075 return c;
2076}
2077
2078// create the png data from post-deflated data
2079static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n)
2080{
2081 stbi *s = &a->s;
2082 uint32 i,j,stride = s->img_x*out_n;
2083 int k;
2084 int img_n = s->img_n; // copy it into a local for later
2085 assert(out_n == s->img_n || out_n == s->img_n+1);
2086 a->out = (uint8 *) malloc(s->img_x * s->img_y * out_n);
2087 if (!a->out) return e("outofmem", "Out of memory");
2088 if (raw_len != (img_n * s->img_x + 1) * s->img_y) return e("not enough pixels","Corrupt PNG");
2089 for (j=0; j < s->img_y; ++j) {
2090 uint8 *cur = a->out + stride*j;
2091 uint8 *prior = cur - stride;
2092 int filter = *raw++;
2093 if (filter > 4) return e("invalid filter","Corrupt PNG");
2094 // if first row, use special filter that doesn't sample previous row
2095 if (j == 0) filter = first_row_filter[filter];
2096 // handle first pixel explicitly
2097 for (k=0; k < img_n; ++k) {
2098 switch(filter) {
2099 case F_none : cur[k] = raw[k]; break;
2100 case F_sub : cur[k] = raw[k]; break;
2101 case F_up : cur[k] = raw[k] + prior[k]; break;
2102 case F_avg : cur[k] = raw[k] + (prior[k]>>1); break;
2103 case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break;
2104 case F_avg_first : cur[k] = raw[k]; break;
2105 case F_paeth_first: cur[k] = raw[k]; break;
2106 }
2107 }
2108 if (img_n != out_n) cur[img_n] = 255;
2109 raw += img_n;
2110 cur += out_n;
2111 prior += out_n;
2112 // this is a little gross, so that we don't switch per-pixel or per-component
2113 if (img_n == out_n) {
2114 #define CASE(f) \
2115 case f: \
2116 for (i=s->img_x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
2117 for (k=0; k < img_n; ++k)
2118 switch(filter) {
2119 CASE(F_none) cur[k] = raw[k]; break;
2120 CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break;
2121 CASE(F_up) cur[k] = raw[k] + prior[k]; break;
2122 CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break;
2123 CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
2124 CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break;
2125 CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break;
2126 }
2127 #undef CASE
2128 } else {
2129 assert(img_n+1 == out_n);
2130 #define CASE(f) \
2131 case f: \
2132 for (i=s->img_x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
2133 for (k=0; k < img_n; ++k)
2134 switch(filter) {
2135 CASE(F_none) cur[k] = raw[k]; break;
2136 CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break;
2137 CASE(F_up) cur[k] = raw[k] + prior[k]; break;
2138 CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break;
2139 CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
2140 CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break;
2141 CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break;
2142 }
2143 #undef CASE
2144 }
2145 }
2146 return 1;
2147}
2148
2149static int compute_transparency(png *z, uint8 tc[3], int out_n)
2150{
2151 stbi *s = &z->s;
2152 uint32 i, pixel_count = s->img_x * s->img_y;
2153 uint8 *p = z->out;
2154
2155 // compute color-based transparency, assuming we've
2156 // already got 255 as the alpha value in the output
2157 assert(out_n == 2 || out_n == 4);
2158
2159 if (out_n == 2) {
2160 for (i=0; i < pixel_count; ++i) {
2161 p[1] = (p[0] == tc[0] ? 0 : 255);
2162 p += 2;
2163 }
2164 } else {
2165 for (i=0; i < pixel_count; ++i) {
2166 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2167 p[3] = 0;
2168 p += 4;
2169 }
2170 }
2171 return 1;
2172}
2173
2174static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2175{
2176 uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2177 uint8 *p, *temp_out, *orig = a->out;
2178
2179 p = (uint8 *) malloc(pixel_count * pal_img_n);
2180 if (p == NULL) return e("outofmem", "Out of memory");
2181
2182 // between here and free(out) below, exitting would leak
2183 temp_out = p;
2184
2185 if (pal_img_n == 3) {
2186 for (i=0; i < pixel_count; ++i) {
2187 int n = orig[i]*4;
2188 p[0] = palette[n ];
2189 p[1] = palette[n+1];
2190 p[2] = palette[n+2];
2191 p += 3;
2192 }
2193 } else {
2194 for (i=0; i < pixel_count; ++i) {
2195 int n = orig[i]*4;
2196 p[0] = palette[n ];
2197 p[1] = palette[n+1];
2198 p[2] = palette[n+2];
2199 p[3] = palette[n+3];
2200 p += 4;
2201 }
2202 }
2203 free(a->out);
2204 a->out = temp_out;
2205 return 1;
2206}
2207
2208static int parse_png_file(png *z, int scan, int req_comp)
2209{
2210 uint8 palette[1024], pal_img_n=0;
2211 uint8 has_trans=0, tc[3];
2212 uint32 ioff=0, idata_limit=0, i, pal_len=0;
2213 int first=1,k;
2214 stbi *s = &z->s;
2215
2216 if (!check_png_header(s)) return 0;
2217
2218 if (scan == SCAN_type) return 1;
2219
2220 for(;;first=0) {
2221 chunk c = get_chunk_header(s);
2222 if (first && c.type != PNG_TYPE('I','H','D','R'))
2223 return e("first not IHDR","Corrupt PNG");
2224 switch (c.type) {
2225 case PNG_TYPE('I','H','D','R'): {
2226 int depth,color,interlace,comp,filter;
2227 if (!first) return e("multiple IHDR","Corrupt PNG");
2228 if (c.length != 13) return e("bad IHDR len","Corrupt PNG");
2229 s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)");
2230 s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)");
2231 depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only");
2232 color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG");
2233 if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG");
2234 comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG");
2235 filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG");
2236 interlace = get8(s); if (interlace) return e("interlaced","PNG not supported: interlaced mode");
2237 if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG");
2238 if (!pal_img_n) {
2239 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2240 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
2241 if (scan == SCAN_header) return 1;
2242 } else {
2243 // if paletted, then pal_n is our final components, and
2244 // img_n is # components to decompress/filter.
2245 s->img_n = 1;
2246 if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG");
2247 // if SCAN_header, have to scan to see if we have a tRNS
2248 }
2249 break;
2250 }
2251
2252 case PNG_TYPE('P','L','T','E'): {
2253 if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG");
2254 pal_len = c.length / 3;
2255 if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG");
2256 for (i=0; i < pal_len; ++i) {
2257 palette[i*4+0] = get8u(s);
2258 palette[i*4+1] = get8u(s);
2259 palette[i*4+2] = get8u(s);
2260 palette[i*4+3] = 255;
2261 }
2262 break;
2263 }
2264
2265 case PNG_TYPE('t','R','N','S'): {
2266 if (z->idata) return e("tRNS after IDAT","Corrupt PNG");
2267 if (pal_img_n) {
2268 if (scan == SCAN_header) { s->img_n = 4; return 1; }
2269 if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG");
2270 if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG");
2271 pal_img_n = 4;
2272 for (i=0; i < c.length; ++i)
2273 palette[i*4+3] = get8u(s);
2274 } else {
2275 if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG");
2276 if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG");
2277 has_trans = 1;
2278 for (k=0; k < s->img_n; ++k)
2279 tc[k] = (uint8) get16(s); // non 8-bit images will be larger
2280 }
2281 break;
2282 }
2283
2284 case PNG_TYPE('I','D','A','T'): {
2285 if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG");
2286 if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
2287 if (ioff + c.length > idata_limit) {
2288 uint8 *p;
2289 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
2290 while (ioff + c.length > idata_limit)
2291 idata_limit *= 2;
2292 p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory");
2293 z->idata = p;
2294 }
2295 #ifndef STBI_NO_STDIO
2296 if (s->img_file)
2297 {
2298 if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG");
2299 }
2300 else
2301 #endif
2302 {
2303 memcpy(z->idata+ioff, s->img_buffer, c.length);
2304 s->img_buffer += c.length;
2305 }
2306 ioff += c.length;
2307 break;
2308 }
2309
2310 case PNG_TYPE('I','E','N','D'): {
2311 uint32 raw_len;
2312 if (scan != SCAN_load) return 1;
2313 if (z->idata == NULL) return e("no IDAT","Corrupt PNG");
2314 z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len);
2315 if (z->expanded == NULL) return 0; // zlib should set error
2316 free(z->idata); z->idata = NULL;
2317 if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
2318 s->img_out_n = s->img_n+1;
2319 else
2320 s->img_out_n = s->img_n;
2321 if (!create_png_image(z, z->expanded, raw_len, s->img_out_n)) return 0;
2322 if (has_trans)
2323 if (!compute_transparency(z, tc, s->img_out_n)) return 0;
2324 if (pal_img_n) {
2325 // pal_img_n == 3 or 4
2326 s->img_n = pal_img_n; // record the actual colors we had
2327 s->img_out_n = pal_img_n;
2328 if (req_comp >= 3) s->img_out_n = req_comp;
2329 if (!expand_palette(z, palette, pal_len, s->img_out_n))
2330 return 0;
2331 }
2332 free(z->expanded); z->expanded = NULL;
2333 return 1;
2334 }
2335
2336 default:
2337 // if critical, fail
2338 if ((c.type & (1 << 29)) == 0) {
2339 #ifndef STBI_NO_FAILURE_STRINGS
2340 // not threadsafe
2341 static char invalid_chunk[] = "XXXX chunk not known";
2342 invalid_chunk[0] = (uint8) (c.type >> 24);
2343 invalid_chunk[1] = (uint8) (c.type >> 16);
2344 invalid_chunk[2] = (uint8) (c.type >> 8);
2345 invalid_chunk[3] = (uint8) (c.type >> 0);
2346 #endif
2347 return e(invalid_chunk, "PNG not supported: unknown chunk type");
2348 }
2349 skip(s, c.length);
2350 break;
2351 }
2352 // end of chunk, read and skip CRC
2353 get32(s);
2354 }
2355}
2356
2357static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
2358{
2359 unsigned char *result=NULL;
2360 p->expanded = NULL;
2361 p->idata = NULL;
2362 p->out = NULL;
2363 if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
2364 if (parse_png_file(p, SCAN_load, req_comp)) {
2365 result = p->out;
2366 p->out = NULL;
2367 if (req_comp && req_comp != p->s.img_out_n) {
2368 result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
2369 p->s.img_out_n = req_comp;
2370 if (result == NULL) return result;
2371 }
2372 *x = p->s.img_x;
2373 *y = p->s.img_y;
2374 if (n) *n = p->s.img_n;
2375 }
2376 free(p->out); p->out = NULL;
2377 free(p->expanded); p->expanded = NULL;
2378 free(p->idata); p->idata = NULL;
2379
2380 return result;
2381}
2382
2383#ifndef STBI_NO_STDIO
2384unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2385{
2386 png p;
2387 start_file(&p.s, f);
2388 return do_png(&p, x,y,comp,req_comp);
2389}
2390
2391unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2392{
2393 unsigned char *data;
2394 FILE *f = fopen(filename, "rb");
2395 if (!f) return NULL;
2396 data = stbi_png_load_from_file(f,x,y,comp,req_comp);
2397 fclose(f);
2398 return data;
2399}
2400#endif
2401
2402unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2403{
2404 png p;
2405 start_mem(&p.s, buffer,len);
2406 return do_png(&p, x,y,comp,req_comp);
2407}
2408
2409#ifndef STBI_NO_STDIO
2410int stbi_png_test_file(FILE *f)
2411{
2412 png p;
2413 int n,r;
2414 n = ftell(f);
2415 start_file(&p.s, f);
2416 r = parse_png_file(&p, SCAN_type,STBI_default);
2417 fseek(f,n,SEEK_SET);
2418 return r;
2419}
2420#endif
2421
2422int stbi_png_test_memory(stbi_uc const *buffer, int len)
2423{
2424 png p;
2425 start_mem(&p.s, buffer, len);
2426 return parse_png_file(&p, SCAN_type,STBI_default);
2427}
2428
2429// TODO: load header from png
2430#ifndef STBI_NO_STDIO
2431extern int stbi_png_info (char const *filename, int *x, int *y, int *comp);
2432extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp);
2433#endif
2434extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
2435
2436// Microsoft/Windows BMP image
2437
2438static int bmp_test(stbi *s)
2439{
2440 int sz;
2441 if (get8(s) != 'B') return 0;
2442 if (get8(s) != 'M') return 0;
2443 get32le(s); // discard filesize
2444 get16le(s); // discard reserved
2445 get16le(s); // discard reserved
2446 get32le(s); // discard data offset
2447 sz = get32le(s);
2448 if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
2449 return 0;
2450}
2451
2452#ifndef STBI_NO_STDIO
2453int stbi_bmp_test_file (FILE *f)
2454{
2455 stbi s;
2456 int r,n = ftell(f);
2457 start_file(&s,f);
2458 r = bmp_test(&s);
2459 fseek(f,n,SEEK_SET);
2460 return r;
2461}
2462#endif
2463
2464int stbi_bmp_test_memory (stbi_uc const *buffer, int len)
2465{
2466 stbi s;
2467 start_mem(&s, buffer, len);
2468 return bmp_test(&s);
2469}
2470
2471// returns 0..31 for the highest set bit
2472static int high_bit(unsigned int z)
2473{
2474 int n=0;
2475 if (z == 0) return -1;
2476 if (z >= 0x10000) n += 16, z >>= 16;
2477 if (z >= 0x00100) n += 8, z >>= 8;
2478 if (z >= 0x00010) n += 4, z >>= 4;
2479 if (z >= 0x00004) n += 2, z >>= 2;
2480 if (z >= 0x00002) n += 1, z >>= 1;
2481 return n;
2482}
2483
2484static int bitcount(unsigned int a)
2485{
2486 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
2487 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
2488 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
2489 a = (a + (a >> 8)); // max 16 per 8 bits
2490 a = (a + (a >> 16)); // max 32 per 8 bits
2491 return a & 0xff;
2492}
2493
2494static int shiftsigned(int v, int shift, int bits)
2495{
2496 int result;
2497 int z=0;
2498
2499 if (shift < 0) v <<= -shift;
2500 else v >>= shift;
2501 result = v;
2502
2503 z = bits;
2504 while (z < 8) {
2505 result += v >> z;
2506 z += bits;
2507 }
2508 return result;
2509}
2510
2511static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2512{
2513 uint8 *out;
2514 unsigned int mr=0,mg=0,mb=0,ma=0;
2515 stbi_uc pal[256][4];
2516 int psize=0,i,j,compress=0,width;
2517 int bpp, flip_vertically, pad, target, offset, hsz;
2518 if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP");
2519 get32le(s); // discard filesize
2520 get16le(s); // discard reserved
2521 get16le(s); // discard reserved
2522 offset = get32le(s);
2523 hsz = get32le(s);
2524 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown");
2525 failure_reason = "bad BMP";
2526 if (hsz == 12) {
2527 s->img_x = get16le(s);
2528 s->img_y = get16le(s);
2529 } else {
2530 s->img_x = get32le(s);
2531 s->img_y = get32le(s);
2532 }
2533 if (get16le(s) != 1) return 0;
2534 bpp = get16le(s);
2535 if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit");
2536 flip_vertically = ((int) s->img_y) > 0;
2537 s->img_y = abs((int) s->img_y);
2538 if (hsz == 12) {
2539 if (bpp < 24)
2540 psize = (offset - 14 - 24) / 3;
2541 } else {
2542 compress = get32le(s);
2543 if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE");
2544 get32le(s); // discard sizeof
2545 get32le(s); // discard hres
2546 get32le(s); // discard vres
2547 get32le(s); // discard colorsused
2548 get32le(s); // discard max important
2549 if (hsz == 40 || hsz == 56) {
2550 if (hsz == 56) {
2551 get32le(s);
2552 get32le(s);
2553 get32le(s);
2554 get32le(s);
2555 }
2556 if (bpp == 16 || bpp == 32) {
2557 mr = mg = mb = 0;
2558 if (compress == 0) {
2559 if (bpp == 32) {
2560 mr = 0xff << 16;
2561 mg = 0xff << 8;
2562 mb = 0xff << 0;
2563 } else {
2564 mr = 31 << 10;
2565 mg = 31 << 5;
2566 mb = 31 << 0;
2567 }
2568 } else if (compress == 3) {
2569 mr = get32le(s);
2570 mg = get32le(s);
2571 mb = get32le(s);
2572 // not documented, but generated by photoshop and handled by mspaint
2573 if (mr == mg && mg == mb) {
2574 // ?!?!?
2575 return NULL;
2576 }
2577 } else
2578 return NULL;
2579 }
2580 } else {
2581 assert(hsz == 108);
2582 mr = get32le(s);
2583 mg = get32le(s);
2584 mb = get32le(s);
2585 ma = get32le(s);
2586 get32le(s); // discard color space
2587 for (i=0; i < 12; ++i)
2588 get32le(s); // discard color space parameters
2589 }
2590 if (bpp < 16)
2591 psize = (offset - 14 - hsz) >> 2;
2592 }
2593 s->img_n = ma ? 4 : 3;
2594 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
2595 target = req_comp;
2596 else
2597 target = s->img_n; // if they want monochrome, we'll post-convert
2598 out = (stbi_uc *) malloc(target * s->img_x * s->img_y);
2599 if (!out) return epuc("outofmem", "Out of memory");
2600 if (bpp < 16) {
2601 int z=0;
2602 if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP"); }
2603 for (i=0; i < psize; ++i) {
2604 pal[i][2] = get8(s);
2605 pal[i][1] = get8(s);
2606 pal[i][0] = get8(s);
2607 if (hsz != 12) get8(s);
2608 pal[i][3] = 255;
2609 }
2610 skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
2611 if (bpp == 4) width = (s->img_x + 1) >> 1;
2612 else if (bpp == 8) width = s->img_x;
2613 else { free(out); return epuc("bad bpp", "Corrupt BMP"); }
2614 pad = (-width)&3;
2615 for (j=0; j < (int) s->img_y; ++j) {
2616 for (i=0; i < (int) s->img_x; i += 2) {
2617 int v=get8(s),v2=0;
2618 if (bpp == 4) {
2619 v2 = v & 15;
2620 v >>= 4;
2621 }
2622 out[z++] = pal[v][0];
2623 out[z++] = pal[v][1];
2624 out[z++] = pal[v][2];
2625 if (target == 4) out[z++] = 255;
2626 if (i+1 == (int) s->img_x) break;
2627 v = (bpp == 8) ? get8(s) : v2;
2628 out[z++] = pal[v][0];
2629 out[z++] = pal[v][1];
2630 out[z++] = pal[v][2];
2631 if (target == 4) out[z++] = 255;
2632 }
2633 skip(s, pad);
2634 }
2635 } else {
2636 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
2637 int z = 0;
2638 int easy=0;
2639 skip(s, offset - 14 - hsz);
2640 if (bpp == 24) width = 3 * s->img_x;
2641 else if (bpp == 16) width = 2*s->img_x;
2642 else /* bpp = 32 and pad = 0 */ width=0;
2643 pad = (-width) & 3;
2644 if (bpp == 24) {
2645 easy = 1;
2646 } else if (bpp == 32) {
2647 if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
2648 easy = 2;
2649 }
2650 if (!easy) {
2651 if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP");
2652 // right shift amt to put high bit in position #7
2653 rshift = high_bit(mr)-7; rcount = bitcount(mr);
2654 gshift = high_bit(mg)-7; gcount = bitcount(mr);
2655 bshift = high_bit(mb)-7; bcount = bitcount(mr);
2656 ashift = high_bit(ma)-7; acount = bitcount(mr);
2657 }
2658 for (j=0; j < (int) s->img_y; ++j) {
2659 if (easy) {
2660 for (i=0; i < (int) s->img_x; ++i) {
2661 int a;
2662 out[z+2] = get8(s);
2663 out[z+1] = get8(s);
2664 out[z+0] = get8(s);
2665 z += 3;
2666 a = (easy == 2 ? get8(s) : 255);
2667 if (target == 4) out[z++] = a;
2668 }
2669 } else {
2670 for (i=0; i < (int) s->img_x; ++i) {
2671 uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
2672 int a;
2673 out[z++] = shiftsigned(v & mr, rshift, rcount);
2674 out[z++] = shiftsigned(v & mg, gshift, gcount);
2675 out[z++] = shiftsigned(v & mb, bshift, bcount);
2676 a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
2677 if (target == 4) out[z++] = a;
2678 }
2679 }
2680 skip(s, pad);
2681 }
2682 }
2683 if (flip_vertically) {
2684 stbi_uc t;
2685 for (j=0; j < (int) s->img_y>>1; ++j) {
2686 stbi_uc *p1 = out + j *s->img_x*target;
2687 stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
2688 for (i=0; i < (int) s->img_x*target; ++i) {
2689 t = p1[i], p1[i] = p2[i], p2[i] = t;
2690 }
2691 }
2692 }
2693
2694 if (req_comp && req_comp != target) {
2695 out = convert_format(out, target, req_comp, s->img_x, s->img_y);
2696 if (out == NULL) return out; // convert_format frees input on failure
2697 }
2698
2699 *x = s->img_x;
2700 *y = s->img_y;
2701 if (comp) *comp = target;
2702 return out;
2703}
2704
2705#ifndef STBI_NO_STDIO
2706stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp)
2707{
2708 stbi_uc *data;
2709 FILE *f = fopen(filename, "rb");
2710 if (!f) return NULL;
2711 data = stbi_bmp_load_from_file(f, x,y,comp,req_comp);
2712 fclose(f);
2713 return data;
2714}
2715
2716stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp)
2717{
2718 stbi s;
2719 start_file(&s, f);
2720 return bmp_load(&s, x,y,comp,req_comp);
2721}
2722#endif
2723
2724stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2725{
2726 stbi s;
2727 start_mem(&s, buffer, len);
2728 return bmp_load(&s, x,y,comp,req_comp);
2729}
2730
2731// Targa Truevision - TGA
2732// by Jonathan Dummer
2733
2734static int tga_test(stbi *s)
2735{
2736 int sz;
2737 get8u(s); // discard Offset
2738 sz = get8u(s); // color type
2739 if( sz > 1 ) return 0; // only RGB or indexed allowed
2740 sz = get8u(s); // image type
2741 if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE
2742 get16(s); // discard palette start
2743 get16(s); // discard palette length
2744 get8(s); // discard bits per palette color entry
2745 get16(s); // discard x origin
2746 get16(s); // discard y origin
2747 if( get16(s) < 1 ) return 0; // test width
2748 if( get16(s) < 1 ) return 0; // test height
2749 sz = get8(s); // bits per pixel
2750 if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed
2751 return 1; // seems to have passed everything
2752}
2753
2754#ifndef STBI_NO_STDIO
2755int stbi_tga_test_file (FILE *f)
2756{
2757 stbi s;
2758 int r,n = ftell(f);
2759 start_file(&s, f);
2760 r = tga_test(&s);
2761 fseek(f,n,SEEK_SET);
2762 return r;
2763}
2764#endif
2765
2766int stbi_tga_test_memory (stbi_uc const *buffer, int len)
2767{
2768 stbi s;
2769 start_mem(&s, buffer, len);
2770 return tga_test(&s);
2771}
2772
2773static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2774{
2775 // read in the TGA header stuff
2776 int tga_offset = get8u(s);
2777 int tga_indexed = get8u(s);
2778 int tga_image_type = get8u(s);
2779 int tga_is_RLE = 0;
2780 int tga_palette_start = get16le(s);
2781 int tga_palette_len = get16le(s);
2782 int tga_palette_bits = get8u(s);
2783 int tga_x_origin = get16le(s);
2784 int tga_y_origin = get16le(s);
2785 int tga_width = get16le(s);
2786 int tga_height = get16le(s);
2787 int tga_bits_per_pixel = get8u(s);
2788 int tga_inverted = get8u(s);
2789 // image data
2790 unsigned char *tga_data;
2791 unsigned char *tga_palette = NULL;
2792 int i, j;
2793 unsigned char raw_data[4];
2794 unsigned char trans_data[] = { 0,0,0,0 };
2795 int RLE_count = 0;
2796 int RLE_repeating = 0;
2797 int read_next_pixel = 1;
2798 // do a tiny bit of precessing
2799 if( tga_image_type >= 8 )
2800 {
2801 tga_image_type -= 8;
2802 tga_is_RLE = 1;
2803 }
2804 /* int tga_alpha_bits = tga_inverted & 15; */
2805 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
2806
2807 // error check
2808 if( //(tga_indexed) ||
2809 (tga_width < 1) || (tga_height < 1) ||
2810 (tga_image_type < 1) || (tga_image_type > 3) ||
2811 ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
2812 (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
2813 )
2814 {
2815 return NULL;
2816 }
2817
2818 // If I'm paletted, then I'll use the number of bits from the palette
2819 if( tga_indexed )
2820 {
2821 tga_bits_per_pixel = tga_palette_bits;
2822 }
2823
2824 // tga info
2825 *x = tga_width;
2826 *y = tga_height;
2827 if( (req_comp < 1) || (req_comp > 4) )
2828 {
2829 // just use whatever the file was
2830 req_comp = tga_bits_per_pixel / 8;
2831 *comp = req_comp;
2832 } else
2833 {
2834 // force a new number of components
2835 *comp = tga_bits_per_pixel/8;
2836 }
2837 tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp );
2838
2839 // skip to the data's starting position (offset usually = 0)
2840 skip(s, tga_offset );
2841 // do I need to load a palette?
2842 if( tga_indexed )
2843 {
2844 // any data to skip? (offset usually = 0)
2845 skip(s, tga_palette_start );
2846 // load the palette
2847 tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 );
2848 getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 );
2849 }
2850 // load the data
2851 for( i = 0; i < tga_width * tga_height; ++i )
2852 {
2853 // if I'm in RLE mode, do I need to get a RLE chunk?
2854 if( tga_is_RLE )
2855 {
2856 if( RLE_count == 0 )
2857 {
2858 // yep, get the next byte as a RLE command
2859 int RLE_cmd = get8u(s);
2860 RLE_count = 1 + (RLE_cmd & 127);
2861 RLE_repeating = RLE_cmd >> 7;
2862 read_next_pixel = 1;
2863 } else if( !RLE_repeating )
2864 {
2865 read_next_pixel = 1;
2866 }
2867 } else
2868 {
2869 read_next_pixel = 1;
2870 }
2871 // OK, if I need to read a pixel, do it now
2872 if( read_next_pixel )
2873 {
2874 // load however much data we did have
2875 if( tga_indexed )
2876 {
2877 // read in 1 byte, then perform the lookup
2878 int pal_idx = get8u(s);
2879 if( pal_idx >= tga_palette_len )
2880 {
2881 // invalid index
2882 pal_idx = 0;
2883 }
2884 pal_idx *= tga_bits_per_pixel / 8;
2885 for( j = 0; j*8 < tga_bits_per_pixel; ++j )
2886 {
2887 raw_data[j] = tga_palette[pal_idx+j];
2888 }
2889 } else
2890 {
2891 // read in the data raw
2892 for( j = 0; j*8 < tga_bits_per_pixel; ++j )
2893 {
2894 raw_data[j] = get8u(s);
2895 }
2896 }
2897 // convert raw to the intermediate format
2898 switch( tga_bits_per_pixel )
2899 {
2900 case 8:
2901 // Luminous => RGBA
2902 trans_data[0] = raw_data[0];
2903 trans_data[1] = raw_data[0];
2904 trans_data[2] = raw_data[0];
2905 trans_data[3] = 255;
2906 break;
2907 case 16:
2908 // Luminous,Alpha => RGBA
2909 trans_data[0] = raw_data[0];
2910 trans_data[1] = raw_data[0];
2911 trans_data[2] = raw_data[0];
2912 trans_data[3] = raw_data[1];
2913 break;
2914 case 24:
2915 // BGR => RGBA
2916 trans_data[0] = raw_data[2];
2917 trans_data[1] = raw_data[1];
2918 trans_data[2] = raw_data[0];
2919 trans_data[3] = 255;
2920 break;
2921 case 32:
2922 // BGRA => RGBA
2923 trans_data[0] = raw_data[2];
2924 trans_data[1] = raw_data[1];
2925 trans_data[2] = raw_data[0];
2926 trans_data[3] = raw_data[3];
2927 break;
2928 }
2929 // clear the reading flag for the next pixel
2930 read_next_pixel = 0;
2931 } // end of reading a pixel
2932 // convert to final format
2933 switch( req_comp )
2934 {
2935 case 1:
2936 // RGBA => Luminance
2937 tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
2938 break;
2939 case 2:
2940 // RGBA => Luminance,Alpha
2941 tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
2942 tga_data[i*req_comp+1] = trans_data[3];
2943 break;
2944 case 3:
2945 // RGBA => RGB
2946 tga_data[i*req_comp+0] = trans_data[0];
2947 tga_data[i*req_comp+1] = trans_data[1];
2948 tga_data[i*req_comp+2] = trans_data[2];
2949 break;
2950 case 4:
2951 // RGBA => RGBA
2952 tga_data[i*req_comp+0] = trans_data[0];
2953 tga_data[i*req_comp+1] = trans_data[1];
2954 tga_data[i*req_comp+2] = trans_data[2];
2955 tga_data[i*req_comp+3] = trans_data[3];
2956 break;
2957 }
2958 // in case we're in RLE mode, keep counting down
2959 --RLE_count;
2960 }
2961 // do I need to invert the image?
2962 if( tga_inverted )
2963 {
2964 for( j = 0; j*2 < tga_height; ++j )
2965 {
2966 int index1 = j * tga_width * req_comp;
2967 int index2 = (tga_height - 1 - j) * tga_width * req_comp;
2968 for( i = tga_width * req_comp; i > 0; --i )
2969 {
2970 unsigned char temp = tga_data[index1];
2971 tga_data[index1] = tga_data[index2];
2972 tga_data[index2] = temp;
2973 ++index1;
2974 ++index2;
2975 }
2976 }
2977 }
2978 // clear my palette, if I had one
2979 if( tga_palette != NULL )
2980 {
2981 free( tga_palette );
2982 }
2983 // the things I do to get rid of an error message, and yet keep
2984 // Microsoft's C compilers happy... [8^(
2985 tga_palette_start = tga_palette_len = tga_palette_bits =
2986 tga_x_origin = tga_y_origin = 0;
2987 // OK, done
2988 return tga_data;
2989}
2990
2991#ifndef STBI_NO_STDIO
2992stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp)
2993{
2994 stbi_uc *data;
2995 FILE *f = fopen(filename, "rb");
2996 if (!f) return NULL;
2997 data = stbi_tga_load_from_file(f, x,y,comp,req_comp);
2998 fclose(f);
2999 return data;
3000}
3001
3002stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp)
3003{
3004 stbi s;
3005 start_file(&s, f);
3006 return tga_load(&s, x,y,comp,req_comp);
3007}
3008#endif
3009
3010stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3011{
3012 stbi s;
3013 start_mem(&s, buffer, len);
3014 return tga_load(&s, x,y,comp,req_comp);
3015}
3016
3017
3018// *************************************************************************************************
3019// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB
3020
3021static int psd_test(stbi *s)
3022{
3023 if (get32(s) != 0x38425053) return 0; // "8BPS"
3024 else return 1;
3025}
3026
3027#ifndef STBI_NO_STDIO
3028int stbi_psd_test_file(FILE *f)
3029{
3030 stbi s;
3031 int r,n = ftell(f);
3032 start_file(&s, f);
3033 r = psd_test(&s);
3034 fseek(f,n,SEEK_SET);
3035 return r;
3036}
3037#endif
3038
3039int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3040{
3041 stbi s;
3042 start_mem(&s, buffer, len);
3043 return psd_test(&s);
3044}
3045
3046static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3047{
3048 int pixelCount;
3049 int channelCount, compression;
3050 int channel, i, count, len;
3051 int w,h;
3052 uint8 *out;
3053
3054 // Check identifier
3055 if (get32(s) != 0x38425053) // "8BPS"
3056 return epuc("not PSD", "Corrupt PSD image");
3057
3058 // Check file type version.
3059 if (get16(s) != 1)
3060 return epuc("wrong version", "Unsupported version of PSD image");
3061
3062 // Skip 6 reserved bytes.
3063 skip(s, 6 );
3064
3065 // Read the number of channels (R, G, B, A, etc).
3066 channelCount = get16(s);
3067 if (channelCount < 0 || channelCount > 16)
3068 return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3069
3070 // Read the rows and columns of the image.
3071 h = get32(s);
3072 w = get32(s);
3073
3074 // Make sure the depth is 8 bits.
3075 if (get16(s) != 8)
3076 return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3077
3078 // Make sure the color mode is RGB.
3079 // Valid options are:
3080 // 0: Bitmap
3081 // 1: Grayscale
3082 // 2: Indexed color
3083 // 3: RGB color
3084 // 4: CMYK color
3085 // 7: Multichannel
3086 // 8: Duotone
3087 // 9: Lab color
3088 if (get16(s) != 3)
3089 return epuc("wrong color format", "PSD is not in RGB color format");
3090
3091 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
3092 skip(s,get32(s) );
3093
3094 // Skip the image resources. (resolution, pen tool paths, etc)
3095 skip(s, get32(s) );
3096
3097 // Skip the reserved data.
3098 skip(s, get32(s) );
3099
3100 // Find out if the data is compressed.
3101 // Known values:
3102 // 0: no compression
3103 // 1: RLE compressed
3104 compression = get16(s);
3105 if (compression > 1)
3106 return epuc("bad compression", "PSD has an unknown compression format");
3107
3108 // Create the destination image.
3109 out = (stbi_uc *) malloc(4 * w*h);
3110 if (!out) return epuc("outofmem", "Out of memory");
3111 pixelCount = w*h;
3112
3113 // Initialize the data to zero.
3114 //memset( out, 0, pixelCount * 4 );
3115
3116 // Finally, the image data.
3117 if (compression) {
3118 // RLE as used by .PSD and .TIFF
3119 // Loop until you get the number of unpacked bytes you are expecting:
3120 // Read the next source byte into n.
3121 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3122 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3123 // Else if n is 128, noop.
3124 // Endloop
3125
3126 // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3127 // which we're going to just skip.
3128 skip(s, h * channelCount * 2 );
3129
3130 // Read the RLE data by channel.
3131 for (channel = 0; channel < 4; channel++) {
3132 uint8 *p;
3133
3134 p = out+channel;
3135 if (channel >= channelCount) {
3136 // Fill this channel with default data.
3137 for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3138 } else {
3139 // Read the RLE data.
3140 count = 0;
3141 while (count < pixelCount) {
3142 len = get8(s);
3143 if (len == 128) {
3144 // No-op.
3145 } else if (len < 128) {
3146 // Copy next len+1 bytes literally.
3147 len++;
3148 count += len;
3149 while (len) {
3150 *p = get8(s);
3151 p += 4;
3152 len--;
3153 }
3154 } else if (len > 128) {
3155 uint32 val;
3156 // Next -len+1 bytes in the dest are replicated from next source byte.
3157 // (Interpret len as a negative 8-bit int.)
3158 len ^= 0x0FF;
3159 len += 2;
3160 val = get8(s);
3161 count += len;
3162 while (len) {
3163 *p = val;
3164 p += 4;
3165 len--;
3166 }
3167 }
3168 }
3169 }
3170 }
3171
3172 } else {
3173 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
3174 // where each channel consists of an 8-bit value for each pixel in the image.
3175
3176 // Read the data by channel.
3177 for (channel = 0; channel < 4; channel++) {
3178 uint8 *p;
3179
3180 p = out + channel;
3181 if (channel > channelCount) {
3182 // Fill this channel with default data.
3183 for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3184 } else {
3185 // Read the data.
3186 count = 0;
3187 for (i = 0; i < pixelCount; i++)
3188 *p = get8(s), p += 4;
3189 }
3190 }
3191 }
3192
3193 if (req_comp && req_comp != 4) {
3194 out = convert_format(out, 4, req_comp, w, h);
3195 if (out == NULL) return out; // convert_format frees input on failure
3196 }
3197
3198 if (comp) *comp = channelCount;
3199 *y = h;
3200 *x = w;
3201
3202 return out;
3203}
3204
3205#ifndef STBI_NO_STDIO
3206stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3207{
3208 stbi_uc *data;
3209 FILE *f = fopen(filename, "rb");
3210 if (!f) return NULL;
3211 data = stbi_psd_load_from_file(f, x,y,comp,req_comp);
3212 fclose(f);
3213 return data;
3214}
3215
3216stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3217{
3218 stbi s;
3219 start_file(&s, f);
3220 return psd_load(&s, x,y,comp,req_comp);
3221}
3222#endif
3223
3224stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3225{
3226 stbi s;
3227 start_mem(&s, buffer, len);
3228 return psd_load(&s, x,y,comp,req_comp);
3229}
3230
3231
3232// *************************************************************************************************
3233// Radiance RGBE HDR loader
3234// originally by Nicolas Schulz
3235#ifndef STBI_NO_HDR
3236static int hdr_test(stbi *s)
3237{
3238 char *signature = "#?RADIANCE\n";
3239 int i;
3240 for (i=0; signature[i]; ++i)
3241 if (get8(s) != signature[i])
3242 return 0;
3243 return 1;
3244}
3245
3246int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
3247{
3248 stbi s;
3249 start_mem(&s, buffer, len);
3250 return hdr_test(&s);
3251}
3252
3253#ifndef STBI_NO_STDIO
3254int stbi_hdr_test_file(FILE *f)
3255{
3256 stbi s;
3257 int r,n = ftell(f);
3258 start_file(&s, f);
3259 r = hdr_test(&s);
3260 fseek(f,n,SEEK_SET);
3261 return r;
3262}
3263#endif
3264
3265#define HDR_BUFLEN 1024
3266static char *hdr_gettoken(stbi *z, char *buffer)
3267{
3268 int len=0;
3269 //char *s = buffer,
3270 char c = '\0';
3271
3272 c = get8(z);
3273
3274 while (!at_eof(z) && c != '\n') {
3275 buffer[len++] = c;
3276 if (len == HDR_BUFLEN-1) {
3277 // flush to end of line
3278 while (!at_eof(z) && get8(z) != '\n')
3279 ;
3280 break;
3281 }
3282 c = get8(z);
3283 }
3284
3285 buffer[len] = 0;
3286 return buffer;
3287}
3288
3289static void hdr_convert(float *output, stbi_uc *input, int req_comp)
3290{
3291 if( input[3] != 0 ) {
3292 float f1;
3293 // Exponent
3294 f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
3295 if (req_comp <= 2)
3296 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
3297 else {
3298 output[0] = input[0] * f1;
3299 output[1] = input[1] * f1;
3300 output[2] = input[2] * f1;
3301 }
3302 if (req_comp == 2) output[1] = 1;
3303 if (req_comp == 4) output[3] = 1;
3304 } else {
3305 switch (req_comp) {
3306 case 4: output[3] = 1; /* fallthrough */
3307 case 3: output[0] = output[1] = output[2] = 0;
3308 break;
3309 case 2: output[1] = 1; /* fallthrough */
3310 case 1: output[0] = 0;
3311 break;
3312 }
3313 }
3314}
3315
3316
3317static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3318{
3319 char buffer[HDR_BUFLEN];
3320 char *token;
3321 int valid = 0;
3322 int width, height;
3323 stbi_uc *scanline;
3324 float *hdr_data;
3325 int len;
3326 unsigned char count, value;
3327 int i, j, k, c1,c2, z;
3328
3329
3330 // Check identifier
3331 if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3332 return epf("not HDR", "Corrupt HDR image");
3333
3334 // Parse header
3335 while(1) {
3336 token = hdr_gettoken(s,buffer);
3337 if (token[0] == 0) break;
3338 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3339 }
3340
3341 if (!valid) return epf("unsupported format", "Unsupported HDR format");
3342
3343 // Parse width and height
3344 // can't use sscanf() if we're not using stdio!
3345 token = hdr_gettoken(s,buffer);
3346 if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format");
3347 token += 3;
3348 height = strtol(token, &token, 10);
3349 while (*token == ' ') ++token;
3350 if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format");
3351 token += 3;
3352 width = strtol(token, NULL, 10);
3353
3354 *x = width;
3355 *y = height;
3356
3357 *comp = 3;
3358 if (req_comp == 0) req_comp = 3;
3359
3360 // Read data
3361 hdr_data = (float *) malloc(height * width * req_comp * sizeof(float));
3362
3363 // Load image data
3364 // image data is stored as some number of sca
3365 if( width < 8 || width >= 32768) {
3366 // Read flat data
3367 for (j=0; j < height; ++j) {
3368 for (i=0; i < width; ++i) {
3369 stbi_uc rgbe[4];
3370 main_decode_loop:
3371 getn(s, rgbe, 4);
3372 hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
3373 }
3374 }
3375 } else {
3376 // Read RLE-encoded data
3377 scanline = NULL;
3378
3379 for (j = 0; j < height; ++j) {
3380 c1 = get8(s);
3381 c2 = get8(s);
3382 len = get8(s);
3383 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3384 // not run-length encoded, so we have to actually use THIS data as a decoded
3385 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3386 stbi_uc rgbe[4] = { c1,c2,len, get8(s) };
3387 hdr_convert(hdr_data, rgbe, req_comp);
3388 i = 1;
3389 j = 0;
3390 free(scanline);
3391 goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format
3392 }
3393 len <<= 8;
3394 len |= get8(s);
3395 if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); }
3396 if (scanline == NULL) scanline = (stbi_uc *) malloc(width * 4);
3397
3398 for (k = 0; k < 4; ++k) {
3399 i = 0;
3400 while (i < width) {
3401 count = get8(s);
3402 if (count > 128) {
3403 // Run
3404 value = get8(s);
3405 count -= 128;
3406 for (z = 0; z < count; ++z)
3407 scanline[i++ * 4 + k] = value;
3408 } else {
3409 // Dump
3410 for (z = 0; z < count; ++z)
3411 scanline[i++ * 4 + k] = get8(s);
3412 }
3413 }
3414 }
3415 for (i=0; i < width; ++i)
3416 hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
3417 }
3418 free(scanline);
3419 }
3420
3421 return hdr_data;
3422}
3423
3424static stbi_uc *hdr_load_rgbe(stbi *s, int *x, int *y, int *comp, int req_comp)
3425{
3426 char buffer[HDR_BUFLEN];
3427 char *token;
3428 int valid = 0;
3429 int width, height;
3430 stbi_uc *scanline;
3431 stbi_uc *rgbe_data;
3432 int len;
3433 unsigned char count, value;
3434 int i, j, k, c1,c2, z;
3435
3436
3437 // Check identifier
3438 if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3439 return epuc("not HDR", "Corrupt HDR image");
3440
3441 // Parse header
3442 while(1) {
3443 token = hdr_gettoken(s,buffer);
3444 if (token[0] == 0) break;
3445 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3446 }
3447
3448 if (!valid) return epuc("unsupported format", "Unsupported HDR format");
3449
3450 // Parse width and height
3451 // can't use sscanf() if we're not using stdio!
3452 token = hdr_gettoken(s,buffer);
3453 if (strncmp(token, "-Y ", 3)) return epuc("unsupported data layout", "Unsupported HDR format");
3454 token += 3;
3455 height = strtol(token, &token, 10);
3456 while (*token == ' ') ++token;
3457 if (strncmp(token, "+X ", 3)) return epuc("unsupported data layout", "Unsupported HDR format");
3458 token += 3;
3459 width = strtol(token, NULL, 10);
3460
3461 *x = width;
3462 *y = height;
3463
3464 // RGBE _MUST_ come out as 4 components
3465 *comp = 4;
3466 req_comp = 4;
3467
3468 // Read data
3469 rgbe_data = (stbi_uc *) malloc(height * width * req_comp * sizeof(stbi_uc));
3470 // point to the beginning
3471 scanline = rgbe_data;
3472
3473 // Load image data
3474 // image data is stored as some number of scan lines
3475 if( width < 8 || width >= 32768) {
3476 // Read flat data
3477 for (j=0; j < height; ++j) {
3478 for (i=0; i < width; ++i) {
3479 main_decode_loop:
3480 //getn(rgbe, 4);
3481 getn(s,scanline, 4);
3482 scanline += 4;
3483 }
3484 }
3485 } else {
3486 // Read RLE-encoded data
3487 for (j = 0; j < height; ++j) {
3488 c1 = get8(s);
3489 c2 = get8(s);
3490 len = get8(s);
3491 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3492 // not run-length encoded, so we have to actually use THIS data as a decoded
3493 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3494 scanline[0] = c1;
3495 scanline[1] = c2;
3496 scanline[2] = len;
3497 scanline[3] = get8(s);
3498 scanline += 4;
3499 i = 1;
3500 j = 0;
3501 goto main_decode_loop; // yes, this is insane; blame the insane format
3502 }
3503 len <<= 8;
3504 len |= get8(s);
3505 if (len != width) { free(rgbe_data); return epuc("invalid decoded scanline length", "corrupt HDR"); }
3506 for (k = 0; k < 4; ++k) {
3507 i = 0;
3508 while (i < width) {
3509 count = get8(s);
3510 if (count > 128) {
3511 // Run
3512 value = get8(s);
3513 count -= 128;
3514 for (z = 0; z < count; ++z)
3515 scanline[i++ * 4 + k] = value;
3516 } else {
3517 // Dump
3518 for (z = 0; z < count; ++z)
3519 scanline[i++ * 4 + k] = get8(s);
3520 }
3521 }
3522 }
3523 // move the scanline on
3524 scanline += 4 * width;
3525 }
3526 }
3527
3528 return rgbe_data;
3529}
3530
3531#ifndef STBI_NO_STDIO
3532float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3533{
3534 stbi s;
3535 start_file(&s,f);
3536 return hdr_load(&s,x,y,comp,req_comp);
3537}
3538
3539stbi_uc *stbi_hdr_load_rgbe_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3540{
3541 stbi s;
3542 start_file(&s,f);
3543 return hdr_load_rgbe(&s,x,y,comp,req_comp);
3544}
3545
3546stbi_uc *stbi_hdr_load_rgbe (char const *filename, int *x, int *y, int *comp, int req_comp)
3547{
3548 FILE *f = fopen(filename, "rb");
3549 unsigned char *result;
3550 if (!f) return epuc("can't fopen", "Unable to open file");
3551 result = stbi_hdr_load_rgbe_file(f,x,y,comp,req_comp);
3552 fclose(f);
3553 return result;
3554}
3555#endif
3556
3557float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3558{
3559 stbi s;
3560 start_mem(&s,buffer, len);
3561 return hdr_load(&s,x,y,comp,req_comp);
3562}
3563
3564stbi_uc *stbi_hdr_load_rgbe_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp)
3565{
3566 stbi s;
3567 start_mem(&s,buffer, len);
3568 return hdr_load_rgbe(&s,x,y,comp,req_comp);
3569}
3570
3571#endif // STBI_NO_HDR
3572
3573/////////////////////// write image ///////////////////////
3574
3575#ifndef STBI_NO_WRITE
3576
3577static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); }
3578
3579static void writefv(FILE *f, char *fmt, va_list v)
3580{
3581 while (*fmt) {
3582 switch (*fmt++) {
3583 case ' ': break;
3584 case '1': { uint8 x = va_arg(v, int); write8(f,x); break; }
3585 case '2': { int16 x = va_arg(v, int); write8(f,x); write8(f,x>>8); break; }
3586 case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; }
3587 default:
3588 assert(0);
3589 va_end(v);
3590 return;
3591 }
3592 }
3593}
3594
3595static void writef(FILE *f, char *fmt, ...)
3596{
3597 va_list v;
3598 va_start(v, fmt);
3599 writefv(f,fmt,v);
3600 va_end(v);
3601}
3602
3603static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad)
3604{
3605 uint8 bg[3] = { 255, 0, 255}, px[3];
3606 uint32 zero = 0;
3607 int i,j,k, j_end;
3608
3609 if (vdir < 0)
3610 j_end = -1, j = y-1;
3611 else
3612 j_end = y, j = 0;
3613
3614 for (; j != j_end; j += vdir) {
3615 for (i=0; i < x; ++i) {
3616 uint8 *d = (uint8 *) data + (j*x+i)*comp;
3617 if (write_alpha < 0)
3618 fwrite(&d[comp-1], 1, 1, f);
3619 switch (comp) {
3620 case 1:
3621 case 2: writef(f, "111", d[0],d[0],d[0]);
3622 break;
3623 case 4:
3624 if (!write_alpha) {
3625 for (k=0; k < 3; ++k)
3626 px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255;
3627 writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]);
3628 break;
3629 }
3630 /* FALLTHROUGH */
3631 case 3:
3632 writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]);
3633 break;
3634 }
3635 if (write_alpha > 0)
3636 fwrite(&d[comp-1], 1, 1, f);
3637 }
3638 fwrite(&zero,scanline_pad,1,f);
3639 }
3640}
3641
3642static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, char *fmt, ...)
3643{
3644 FILE *f = fopen(filename, "wb");
3645 if (f) {
3646 va_list v;
3647 va_start(v, fmt);
3648 writefv(f, fmt, v);
3649 va_end(v);
3650 write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad);
3651 fclose(f);
3652 }
3653 return f != NULL;
3654}
3655
3656int stbi_write_bmp(char const *filename, int x, int y, int comp, void *data)
3657{
3658 int pad = (-x*3) & 3;
3659 return outfile(filename,-1,-1,x,y,comp,data,0,pad,
3660 "11 4 22 4" "4 44 22 444444",
3661 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header
3662 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header
3663}
3664
3665int stbi_write_tga(char const *filename, int x, int y, int comp, void *data)
3666{
3667 int has_alpha = !(comp & 1);
3668 return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0,
3669 "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha);
3670}
3671
3672// any other image formats that do interleaved rgb data?
3673// PNG: requires adler32,crc32 -- significant amount of code
3674// PSD: no, channels output separately
3675// TIFF: no, stripwise-interleaved... i think
3676
3677#endif // STBI_NO_WRITE
3678
3679// add in my DDS loading support
3680#ifndef STBI_NO_DDS
3681#include "stbi_DDS_aug_c.h"
3682#endif
3683