1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4#include <string.h>
5
6static fz_image *pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask);
7
8static fz_image *
9pdf_load_jpx_imp(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask)
10{
11 fz_image *image = pdf_load_jpx(ctx, doc, dict, forcemask);
12
13 if (forcemask)
14 {
15 fz_pixmap_image *cimg = (fz_pixmap_image *)image;
16 fz_pixmap *mask_pixmap;
17 fz_pixmap *tile = fz_pixmap_image_tile(ctx, cimg);
18
19 if (tile->n != 1)
20 {
21 fz_pixmap *gray = fz_convert_pixmap(ctx, tile, fz_device_gray(ctx), NULL, NULL, fz_default_color_params, 0);
22 fz_drop_pixmap(ctx, tile);
23 tile = gray;
24 }
25
26 mask_pixmap = fz_alpha_from_gray(ctx, tile);
27 fz_drop_pixmap(ctx, tile);
28 fz_set_pixmap_image_tile(ctx, cimg, mask_pixmap);
29 }
30
31 return image;
32}
33
34static fz_image *
35pdf_load_image_imp(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *cstm, int forcemask)
36{
37 fz_image *image = NULL;
38 pdf_obj *obj, *res;
39
40 int w, h, bpc, n;
41 int imagemask;
42 int interpolate;
43 int indexed;
44 fz_image *mask = NULL; /* explicit mask/soft mask image */
45 int use_colorkey = 0;
46 fz_colorspace *colorspace = NULL;
47 float decode[FZ_MAX_COLORS * 2];
48 int colorkey[FZ_MAX_COLORS * 2];
49 int stride;
50
51 int i;
52 fz_compressed_buffer *buffer;
53
54 /* special case for JPEG2000 images */
55 if (pdf_is_jpx_image(ctx, dict))
56 return pdf_load_jpx_imp(ctx, doc, rdb, dict, cstm, forcemask);
57
58 w = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Width), PDF_NAME(W)));
59 h = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Height), PDF_NAME(H)));
60 bpc = pdf_to_int(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(BitsPerComponent), PDF_NAME(BPC)));
61 if (bpc == 0)
62 bpc = 8;
63 imagemask = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(ImageMask), PDF_NAME(IM)));
64 interpolate = pdf_to_bool(ctx, pdf_dict_geta(ctx, dict, PDF_NAME(Interpolate), PDF_NAME(I)));
65
66 indexed = 0;
67 use_colorkey = 0;
68
69 if (imagemask)
70 bpc = 1;
71
72 if (w <= 0)
73 fz_throw(ctx, FZ_ERROR_GENERIC, "image width is zero (or less)");
74 if (h <= 0)
75 fz_throw(ctx, FZ_ERROR_GENERIC, "image height is zero (or less)");
76 if (bpc <= 0)
77 fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is zero (or less)");
78 if (bpc > 16)
79 fz_throw(ctx, FZ_ERROR_GENERIC, "image depth is too large: %d", bpc);
80 if (w > (1 << 16))
81 fz_throw(ctx, FZ_ERROR_GENERIC, "image is too wide");
82 if (h > (1 << 16))
83 fz_throw(ctx, FZ_ERROR_GENERIC, "image is too high");
84
85 fz_var(mask);
86 fz_var(image);
87 fz_var(colorspace);
88
89 fz_try(ctx)
90 {
91 obj = pdf_dict_geta(ctx, dict, PDF_NAME(ColorSpace), PDF_NAME(CS));
92 if (obj && !imagemask && !forcemask)
93 {
94 /* colorspace resource lookup is only done for inline images */
95 if (pdf_is_name(ctx, obj))
96 {
97 res = pdf_dict_get(ctx, pdf_dict_get(ctx, rdb, PDF_NAME(ColorSpace)), obj);
98 if (res)
99 obj = res;
100 }
101
102 colorspace = pdf_load_colorspace(ctx, obj);
103 indexed = fz_colorspace_is_indexed(ctx, colorspace);
104
105 n = fz_colorspace_n(ctx, colorspace);
106 }
107 else
108 {
109 n = 1;
110 }
111
112 obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D));
113 if (obj)
114 {
115 for (i = 0; i < n * 2; i++)
116 decode[i] = pdf_array_get_real(ctx, obj, i);
117 }
118 else if (fz_colorspace_is_lab(ctx, colorspace))
119 {
120 decode[0] = 0;
121 decode[1] = 100;
122 decode[2] = -128;
123 decode[3] = 127;
124 decode[4] = -128;
125 decode[5] = 127;
126 }
127 else
128 {
129 float maxval = indexed ? (1 << bpc) - 1 : 1;
130 for (i = 0; i < n * 2; i++)
131 decode[i] = i & 1 ? maxval : 0;
132 }
133
134 obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask));
135 if (pdf_is_dict(ctx, obj))
136 {
137 /* Not allowed for inline images or soft masks */
138 if (cstm)
139 fz_warn(ctx, "Ignoring invalid inline image soft mask");
140 else if (forcemask)
141 fz_warn(ctx, "Ignoring recursive image soft mask");
142 else
143 {
144 mask = pdf_load_image_imp(ctx, doc, rdb, obj, NULL, 1);
145 obj = pdf_dict_get(ctx, obj, PDF_NAME(Matte));
146 if (pdf_is_array(ctx, obj))
147 {
148 use_colorkey = 1;
149 for (i = 0; i < n; i++)
150 colorkey[i] = pdf_array_get_real(ctx, obj, i) * 255;
151 }
152 }
153 }
154 else if (pdf_is_array(ctx, obj))
155 {
156 use_colorkey = 1;
157 for (i = 0; i < n * 2; i++)
158 {
159 if (!pdf_is_int(ctx, pdf_array_get(ctx, obj, i)))
160 {
161 fz_warn(ctx, "invalid value in color key mask");
162 use_colorkey = 0;
163 }
164 colorkey[i] = pdf_array_get_int(ctx, obj, i);
165 }
166 }
167
168 /* Do we load from a ref, or do we load an inline stream? */
169 if (cstm == NULL)
170 {
171 /* Just load the compressed image data now and we can decode it on demand. */
172 buffer = pdf_load_compressed_stream(ctx, doc, pdf_to_num(ctx, dict));
173 image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, buffer, mask);
174 image->invert_cmyk_jpeg = 0;
175 }
176 else
177 {
178 /* Inline stream */
179 stride = (w * n * bpc + 7) / 8;
180 image = fz_new_image_from_compressed_buffer(ctx, w, h, bpc, colorspace, 96, 96, interpolate, imagemask, decode, use_colorkey ? colorkey : NULL, NULL, mask);
181 image->invert_cmyk_jpeg = 0;
182 pdf_load_compressed_inline_image(ctx, doc, dict, stride * h, cstm, indexed, (fz_compressed_image *)image);
183 }
184 }
185 fz_always(ctx)
186 {
187 fz_drop_colorspace(ctx, colorspace);
188 fz_drop_image(ctx, mask);
189 }
190 fz_catch(ctx)
191 {
192 fz_drop_image(ctx, image);
193 fz_rethrow(ctx);
194 }
195 return image;
196}
197
198fz_image *
199pdf_load_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict, fz_stream *file)
200{
201 return pdf_load_image_imp(ctx, doc, rdb, dict, file, 0);
202}
203
204int
205pdf_is_jpx_image(fz_context *ctx, pdf_obj *dict)
206{
207 pdf_obj *filter;
208 int i, n;
209
210 filter = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
211 if (pdf_name_eq(ctx, filter, PDF_NAME(JPXDecode)))
212 return 1;
213 n = pdf_array_len(ctx, filter);
214 for (i = 0; i < n; i++)
215 if (pdf_name_eq(ctx, pdf_array_get(ctx, filter, i), PDF_NAME(JPXDecode)))
216 return 1;
217 return 0;
218}
219
220static fz_image *
221pdf_load_jpx(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int forcemask)
222{
223 fz_buffer *buf = NULL;
224 fz_colorspace *colorspace = NULL;
225 fz_pixmap *pix = NULL;
226 pdf_obj *obj;
227 fz_image *mask = NULL;
228 fz_image *img = NULL;
229
230 fz_var(pix);
231 fz_var(buf);
232 fz_var(colorspace);
233 fz_var(mask);
234
235 buf = pdf_load_stream(ctx, dict);
236
237 /* FIXME: We can't handle decode arrays for indexed images currently */
238 fz_try(ctx)
239 {
240 unsigned char *data;
241 size_t len;
242
243 obj = pdf_dict_get(ctx, dict, PDF_NAME(ColorSpace));
244 if (obj)
245 colorspace = pdf_load_colorspace(ctx, obj);
246
247 len = fz_buffer_storage(ctx, buf, &data);
248 pix = fz_load_jpx(ctx, data, len, colorspace);
249
250 obj = pdf_dict_geta(ctx, dict, PDF_NAME(SMask), PDF_NAME(Mask));
251 if (pdf_is_dict(ctx, obj))
252 {
253 if (forcemask)
254 fz_warn(ctx, "Ignoring recursive JPX soft mask");
255 else
256 mask = pdf_load_image_imp(ctx, doc, NULL, obj, NULL, 1);
257 }
258
259 obj = pdf_dict_geta(ctx, dict, PDF_NAME(Decode), PDF_NAME(D));
260 if (obj && !fz_colorspace_is_indexed(ctx, colorspace))
261 {
262 float decode[FZ_MAX_COLORS * 2];
263 int i;
264
265 for (i = 0; i < pix->n * 2; i++)
266 decode[i] = pdf_array_get_real(ctx, obj, i);
267
268 fz_decode_tile(ctx, pix, decode);
269 }
270
271 img = fz_new_image_from_pixmap(ctx, pix, mask);
272 }
273 fz_always(ctx)
274 {
275 fz_drop_image(ctx, mask);
276 fz_drop_pixmap(ctx, pix);
277 fz_drop_colorspace(ctx, colorspace);
278 fz_drop_buffer(ctx, buf);
279 }
280 fz_catch(ctx)
281 {
282 fz_rethrow(ctx);
283 }
284
285 return img;
286}
287
288fz_image *
289pdf_load_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
290{
291 fz_image *image;
292
293 if ((image = pdf_find_item(ctx, fz_drop_image_imp, dict)) != NULL)
294 return image;
295
296 image = pdf_load_image_imp(ctx, doc, NULL, dict, NULL, 0);
297 pdf_store_item(ctx, dict, image, fz_image_size(ctx, image));
298 return image;
299}
300
301pdf_obj *
302pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image)
303{
304 fz_pixmap *pixmap = NULL;
305 pdf_obj *imobj = NULL;
306 pdf_obj *dp;
307 fz_buffer *buffer = NULL;
308 pdf_obj *imref = NULL;
309 fz_compressed_buffer *cbuffer;
310 unsigned char digest[16];
311 int i, n;
312
313 /* If we can maintain compression, do so */
314 cbuffer = fz_compressed_image_buffer(ctx, image);
315
316 fz_var(pixmap);
317 fz_var(buffer);
318 fz_var(imobj);
319 fz_var(imref);
320
321 /* Check if the same image already exists in this doc. */
322 imref = pdf_find_image_resource(ctx, doc, image, digest);
323 if (imref)
324 return imref;
325
326 imobj = pdf_add_new_dict(ctx, doc, 3);
327 fz_try(ctx)
328 {
329 dp = pdf_dict_put_dict(ctx, imobj, PDF_NAME(DecodeParms), 3);
330 pdf_dict_put(ctx, imobj, PDF_NAME(Type), PDF_NAME(XObject));
331 pdf_dict_put(ctx, imobj, PDF_NAME(Subtype), PDF_NAME(Image));
332
333 if (cbuffer)
334 {
335 fz_compression_params *cp = &cbuffer->params;
336 switch (cp ? cp->type : FZ_IMAGE_UNKNOWN)
337 {
338 default:
339 goto raw_or_unknown_compression;
340 case FZ_IMAGE_JPEG:
341 if (cp->u.jpeg.color_transform != -1)
342 pdf_dict_put_int(ctx, dp, PDF_NAME(ColorTransform), cp->u.jpeg.color_transform);
343 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(DCTDecode));
344 break;
345 case FZ_IMAGE_JPX:
346 if (cp->u.jpx.smask_in_data)
347 pdf_dict_put_int(ctx, dp, PDF_NAME(SMaskInData), cp->u.jpx.smask_in_data);
348 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JPXDecode));
349 break;
350 case FZ_IMAGE_FAX:
351 if (cp->u.fax.columns)
352 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.fax.columns);
353 if (cp->u.fax.rows)
354 pdf_dict_put_int(ctx, dp, PDF_NAME(Rows), cp->u.fax.rows);
355 if (cp->u.fax.k)
356 pdf_dict_put_int(ctx, dp, PDF_NAME(K), cp->u.fax.k);
357 if (cp->u.fax.end_of_line)
358 pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfLine), cp->u.fax.end_of_line);
359 if (cp->u.fax.encoded_byte_align)
360 pdf_dict_put_bool(ctx, dp, PDF_NAME(EncodedByteAlign), cp->u.fax.encoded_byte_align);
361 if (cp->u.fax.end_of_block)
362 pdf_dict_put_bool(ctx, dp, PDF_NAME(EndOfBlock), cp->u.fax.end_of_block);
363 if (cp->u.fax.black_is_1)
364 pdf_dict_put_bool(ctx, dp, PDF_NAME(BlackIs1), cp->u.fax.black_is_1);
365 if (cp->u.fax.damaged_rows_before_error)
366 pdf_dict_put_int(ctx, dp, PDF_NAME(DamagedRowsBeforeError), cp->u.fax.damaged_rows_before_error);
367 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
368 break;
369 case FZ_IMAGE_FLATE:
370 if (cp->u.flate.columns)
371 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.flate.columns);
372 if (cp->u.flate.colors)
373 pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.flate.colors);
374 if (cp->u.flate.predictor)
375 pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.flate.predictor);
376 if (cp->u.flate.bpc)
377 pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.flate.bpc);
378 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
379 pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc);
380 break;
381 case FZ_IMAGE_LZW:
382 if (cp->u.lzw.columns)
383 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.lzw.columns);
384 if (cp->u.lzw.colors)
385 pdf_dict_put_int(ctx, dp, PDF_NAME(Colors), cp->u.lzw.colors);
386 if (cp->u.lzw.predictor)
387 pdf_dict_put_int(ctx, dp, PDF_NAME(Predictor), cp->u.lzw.predictor);
388 if (cp->u.lzw.early_change)
389 pdf_dict_put_int(ctx, dp, PDF_NAME(EarlyChange), cp->u.lzw.early_change);
390 if (cp->u.lzw.bpc)
391 pdf_dict_put_int(ctx, dp, PDF_NAME(BitsPerComponent), cp->u.lzw.bpc);
392 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(LZWDecode));
393 break;
394 case FZ_IMAGE_RLD:
395 pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(RunLengthDecode));
396 break;
397 }
398
399 if (!pdf_dict_len(ctx, dp))
400 pdf_dict_del(ctx, imobj, PDF_NAME(DecodeParms));
401
402 buffer = fz_keep_buffer(ctx, cbuffer->buffer);
403
404 if (image->use_decode)
405 {
406 pdf_obj *ary = pdf_dict_put_array(ctx, imobj, PDF_NAME(Decode), image->n * 2);
407 for (i = 0; i < image->n * 2; ++i)
408 pdf_array_push_real(ctx, ary, image->decode[i]);
409 }
410 }
411 else
412 {
413 unsigned int size;
414 int h;
415 unsigned char *d, *s;
416
417raw_or_unknown_compression:
418 /* Currently, set to maintain resolution; should we consider
419 * subsampling here according to desired output res? */
420 pixmap = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL);
421 n = pixmap->n - pixmap->alpha - pixmap->s; /* number of colorants */
422 if (n == 0)
423 n = 1; /* treat pixmaps with only alpha or spots as grayscale */
424
425 size = image->w * n;
426 h = image->h;
427 s = pixmap->samples;
428 d = fz_malloc(ctx, size * h);
429 buffer = fz_new_buffer_from_data(ctx, d, size * h);
430
431 if (n == pixmap->n)
432 {
433 /* If we use all channels, we can copy the data as is. */
434 while (h--)
435 {
436 memcpy(d, s, size);
437 d += size;
438 s += pixmap->stride;
439 }
440 }
441 else
442 {
443 /* Need to remove the alpha and spot planes. */
444 /* TODO: extract alpha plane to a soft mask. */
445 /* TODO: convert spots to colors. */
446
447 int line_skip = pixmap->stride - pixmap->w * pixmap->n;
448 int skip = pixmap->n - n;
449 while (h--)
450 {
451 int w = pixmap->w;
452 while (w--)
453 {
454 int k;
455 for (k = 0; k < n; ++k)
456 *d++ = *s++;
457 s += skip;
458 }
459 s += line_skip;
460 }
461 }
462 }
463
464 pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), pixmap ? pixmap->w : image->w);
465 pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), pixmap ? pixmap->h : image->h);
466
467 if (image->imagemask)
468 {
469 pdf_dict_put_bool(ctx, imobj, PDF_NAME(ImageMask), 1);
470 }
471 else
472 {
473 fz_colorspace *cs;
474
475 pdf_dict_put_int(ctx, imobj, PDF_NAME(BitsPerComponent), image->bpc);
476
477 cs = pixmap ? pixmap->colorspace : image->colorspace;
478 switch (fz_colorspace_type(ctx, cs))
479 {
480 case FZ_COLORSPACE_INDEXED:
481 {
482 fz_colorspace *basecs;
483 unsigned char *lookup = NULL;
484 int high = 0;
485 int basen;
486 pdf_obj *arr;
487
488 basecs = cs->u.indexed.base;
489 high = cs->u.indexed.high;
490 lookup = cs->u.indexed.lookup;
491 basen = basecs->n;
492
493 arr = pdf_dict_put_array(ctx, imobj, PDF_NAME(ColorSpace), 4);
494
495 pdf_array_push(ctx, arr, PDF_NAME(Indexed));
496 switch (fz_colorspace_type(ctx, basecs))
497 {
498 case FZ_COLORSPACE_GRAY:
499 pdf_array_push(ctx, arr, PDF_NAME(DeviceGray));
500 break;
501 case FZ_COLORSPACE_RGB:
502 pdf_array_push(ctx, arr, PDF_NAME(DeviceRGB));
503 break;
504 case FZ_COLORSPACE_CMYK:
505 pdf_array_push(ctx, arr, PDF_NAME(DeviceCMYK));
506 break;
507 default:
508 // TODO: convert to RGB!
509 fz_throw(ctx, FZ_ERROR_GENERIC, "only indexed Gray, RGB, and CMYK colorspaces supported");
510 break;
511 }
512
513 pdf_array_push_int(ctx, arr, high);
514 pdf_array_push_string(ctx, arr, (char *) lookup, basen * (high + 1));
515 }
516 break;
517 case FZ_COLORSPACE_NONE:
518 case FZ_COLORSPACE_GRAY:
519 pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceGray));
520 break;
521 case FZ_COLORSPACE_RGB:
522 pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceRGB));
523 break;
524 case FZ_COLORSPACE_CMYK:
525 pdf_dict_put(ctx, imobj, PDF_NAME(ColorSpace), PDF_NAME(DeviceCMYK));
526 break;
527 default:
528 // TODO: convert to RGB!
529 fz_throw(ctx, FZ_ERROR_GENERIC, "only Gray, RGB, and CMYK colorspaces supported");
530 break;
531 }
532 }
533
534 if (image->mask)
535 {
536 pdf_dict_put_drop(ctx, imobj, PDF_NAME(SMask), pdf_add_image(ctx, doc, image->mask));
537 }
538
539 pdf_update_stream(ctx, doc, imobj, buffer, 1);
540
541 /* Add ref to our image resource hash table. */
542 imref = pdf_insert_image_resource(ctx, doc, digest, imobj);
543 }
544 fz_always(ctx)
545 {
546 fz_drop_pixmap(ctx, pixmap);
547 fz_drop_buffer(ctx, buffer);
548 pdf_drop_obj(ctx, imobj);
549 }
550 fz_catch(ctx)
551 fz_rethrow(ctx);
552 return imref;
553}
554