1 | #include "mupdf/fitz.h" |
2 | #include "mupdf/pdf.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | fz_rect |
7 | pdf_to_rect(fz_context *ctx, pdf_obj *array) |
8 | { |
9 | if (!pdf_is_array(ctx, array)) |
10 | return fz_empty_rect; |
11 | else |
12 | { |
13 | float a = pdf_array_get_real(ctx, array, 0); |
14 | float b = pdf_array_get_real(ctx, array, 1); |
15 | float c = pdf_array_get_real(ctx, array, 2); |
16 | float d = pdf_array_get_real(ctx, array, 3); |
17 | fz_rect r; |
18 | r.x0 = fz_min(a, c); |
19 | r.y0 = fz_min(b, d); |
20 | r.x1 = fz_max(a, c); |
21 | r.y1 = fz_max(b, d); |
22 | return r; |
23 | } |
24 | } |
25 | |
26 | fz_quad |
27 | pdf_to_quad(fz_context *ctx, pdf_obj *array, int offset) |
28 | { |
29 | fz_quad q; |
30 | q.ul.x = pdf_array_get_real(ctx, array, offset+0); |
31 | q.ul.y = pdf_array_get_real(ctx, array, offset+1); |
32 | q.ur.x = pdf_array_get_real(ctx, array, offset+2); |
33 | q.ur.y = pdf_array_get_real(ctx, array, offset+3); |
34 | q.ll.x = pdf_array_get_real(ctx, array, offset+4); |
35 | q.ll.y = pdf_array_get_real(ctx, array, offset+5); |
36 | q.lr.x = pdf_array_get_real(ctx, array, offset+6); |
37 | q.lr.y = pdf_array_get_real(ctx, array, offset+7); |
38 | return q; |
39 | } |
40 | |
41 | fz_matrix |
42 | pdf_to_matrix(fz_context *ctx, pdf_obj *array) |
43 | { |
44 | if (!pdf_is_array(ctx, array)) |
45 | return fz_identity; |
46 | else |
47 | { |
48 | fz_matrix m; |
49 | m.a = pdf_array_get_real(ctx, array, 0); |
50 | m.b = pdf_array_get_real(ctx, array, 1); |
51 | m.c = pdf_array_get_real(ctx, array, 2); |
52 | m.d = pdf_array_get_real(ctx, array, 3); |
53 | m.e = pdf_array_get_real(ctx, array, 4); |
54 | m.f = pdf_array_get_real(ctx, array, 5); |
55 | return m; |
56 | } |
57 | } |
58 | |
59 | static int |
60 | rune_from_utf16be(int *out, const unsigned char *s, const unsigned char *end) |
61 | { |
62 | if (s + 2 <= end) |
63 | { |
64 | int a = s[0] << 8 | s[1]; |
65 | if (a >= 0xD800 && a <= 0xDFFF && s + 4 <= end) |
66 | { |
67 | int b = s[2] << 8 | s[3]; |
68 | *out = ((a - 0xD800) << 10) + (b - 0xDC00) + 0x10000; |
69 | return 4; |
70 | } |
71 | *out = a; |
72 | return 2; |
73 | } |
74 | *out = FZ_REPLACEMENT_CHARACTER; |
75 | return 1; |
76 | } |
77 | |
78 | static int |
79 | rune_from_utf16le(int *out, const unsigned char *s, const unsigned char *end) |
80 | { |
81 | if (s + 2 <= end) |
82 | { |
83 | int a = s[1] << 8 | s[0]; |
84 | if (a >= 0xD800 && a <= 0xDFFF && s + 4 <= end) |
85 | { |
86 | int b = s[3] << 8 | s[2]; |
87 | *out = ((a - 0xD800) << 10) + (b - 0xDC00) + 0x10000; |
88 | return 4; |
89 | } |
90 | *out = a; |
91 | return 2; |
92 | } |
93 | *out = FZ_REPLACEMENT_CHARACTER; |
94 | return 1; |
95 | } |
96 | |
97 | static size_t |
98 | skip_language_code_utf16le(const unsigned char *s, size_t n, size_t i) |
99 | { |
100 | /* skip language escape codes */ |
101 | if (i + 6 <= n && s[i+1] == 0 && s[i+0] == 27 && s[i+5] == 0 && s[i+4] == 27) |
102 | return 6; |
103 | else if (i + 8 <= n && s[i+1] == 0 && s[i+0] == 27 && s[i+7] == 0 && s[i+6] == 27) |
104 | return 8; |
105 | return 0; |
106 | } |
107 | |
108 | static size_t |
109 | skip_language_code_utf16be(const unsigned char *s, size_t n, size_t i) |
110 | { |
111 | /* skip language escape codes */ |
112 | if (i + 6 <= n && s[i+0] == 0 && s[i+1] == 27 && s[i+4] == 0 && s[i+5] == 27) |
113 | return 6; |
114 | else if (i + 8 <= n && s[i+0] == 0 && s[i+1] == 27 && s[i+6] == 0 && s[i+7] == 27) |
115 | return 8; |
116 | return 0; |
117 | } |
118 | |
119 | static size_t |
120 | skip_language_code_utf8(const unsigned char *s, size_t n, size_t i) |
121 | { |
122 | /* skip language escape codes */ |
123 | if (i + 3 <= n && s[i] == 27 && s[i+3]) |
124 | return 3; |
125 | else if (i + 5 <= n && s[i] == 27 && s[i+5] == 27) |
126 | return 5; |
127 | return 0; |
128 | } |
129 | |
130 | /* Convert Unicode/PdfDocEncoding string into utf-8 */ |
131 | char * |
132 | pdf_new_utf8_from_pdf_string(fz_context *ctx, const char *ssrcptr, size_t srclen) |
133 | { |
134 | const unsigned char *srcptr = (const unsigned char*)ssrcptr; |
135 | char *dstptr, *dst; |
136 | size_t dstlen = 0; |
137 | int ucs; |
138 | size_t i, n; |
139 | |
140 | /* UTF-16BE */ |
141 | if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) |
142 | { |
143 | i = 2; |
144 | while (i + 2 <= srclen) |
145 | { |
146 | n = skip_language_code_utf16be(srcptr, srclen, i); |
147 | if (n) |
148 | i += n; |
149 | else |
150 | { |
151 | i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen); |
152 | dstlen += fz_runelen(ucs); |
153 | } |
154 | } |
155 | |
156 | dstptr = dst = fz_malloc(ctx, dstlen + 1); |
157 | |
158 | i = 2; |
159 | while (i + 2 <= srclen) |
160 | { |
161 | n = skip_language_code_utf16be(srcptr, srclen, i); |
162 | if (n) |
163 | i += n; |
164 | else |
165 | { |
166 | i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen); |
167 | dstptr += fz_runetochar(dstptr, ucs); |
168 | } |
169 | } |
170 | } |
171 | |
172 | /* UTF-16LE */ |
173 | else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) |
174 | { |
175 | i = 2; |
176 | while (i + 2 <= srclen) |
177 | { |
178 | n = skip_language_code_utf16le(srcptr, srclen, i); |
179 | if (n) |
180 | i += n; |
181 | else |
182 | { |
183 | i += rune_from_utf16le(&ucs, srcptr + i, srcptr + srclen); |
184 | dstlen += fz_runelen(ucs); |
185 | } |
186 | } |
187 | |
188 | dstptr = dst = fz_malloc(ctx, dstlen + 1); |
189 | |
190 | i = 2; |
191 | while (i + 2 <= srclen) |
192 | { |
193 | n = skip_language_code_utf16le(srcptr, srclen, i); |
194 | if (n) |
195 | i += n; |
196 | else |
197 | { |
198 | i += rune_from_utf16le(&ucs, srcptr + i, srcptr + srclen); |
199 | dstptr += fz_runetochar(dstptr, ucs); |
200 | } |
201 | } |
202 | } |
203 | |
204 | /* UTF-8 */ |
205 | else if (srclen >= 3 && srcptr[0] == 239 && srcptr[1] == 187 && srcptr[2] == 191) |
206 | { |
207 | i = 3; |
208 | while (i < srclen) |
209 | { |
210 | n = skip_language_code_utf8(srcptr, srclen, i); |
211 | if (n) |
212 | i += n; |
213 | else |
214 | { |
215 | i += 1; |
216 | dstlen += 1; |
217 | } |
218 | } |
219 | |
220 | dstptr = dst = fz_malloc(ctx, dstlen + 1); |
221 | |
222 | i = 3; |
223 | while (i < srclen) |
224 | { |
225 | n = skip_language_code_utf8(srcptr, srclen, i); |
226 | if (n) |
227 | i += n; |
228 | else |
229 | *dstptr++ = srcptr[i++]; |
230 | } |
231 | } |
232 | |
233 | /* PDFDocEncoding */ |
234 | else |
235 | { |
236 | for (i = 0; i < srclen; i++) |
237 | dstlen += fz_runelen(fz_unicode_from_pdf_doc_encoding[srcptr[i]]); |
238 | |
239 | dstptr = dst = fz_malloc(ctx, dstlen + 1); |
240 | |
241 | for (i = 0; i < srclen; i++) |
242 | { |
243 | ucs = fz_unicode_from_pdf_doc_encoding[srcptr[i]]; |
244 | dstptr += fz_runetochar(dstptr, ucs); |
245 | } |
246 | } |
247 | |
248 | *dstptr = 0; |
249 | return dst; |
250 | } |
251 | |
252 | /* Convert text string object to UTF-8 */ |
253 | char * |
254 | pdf_new_utf8_from_pdf_string_obj(fz_context *ctx, pdf_obj *src) |
255 | { |
256 | const char *srcptr; |
257 | size_t srclen; |
258 | srcptr = pdf_to_string(ctx, src, &srclen); |
259 | return pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen); |
260 | } |
261 | |
262 | /* Load text stream and convert to UTF-8 */ |
263 | char * |
264 | pdf_new_utf8_from_pdf_stream_obj(fz_context *ctx, pdf_obj *src) |
265 | { |
266 | fz_buffer *stmbuf; |
267 | char *srcptr; |
268 | size_t srclen; |
269 | char *dst = NULL; |
270 | |
271 | stmbuf = pdf_load_stream(ctx, src); |
272 | srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr); |
273 | fz_try(ctx) |
274 | dst = pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen); |
275 | fz_always(ctx) |
276 | fz_drop_buffer(ctx, stmbuf); |
277 | fz_catch(ctx) |
278 | fz_rethrow(ctx); |
279 | return dst; |
280 | } |
281 | |
282 | /* Load text stream or text string and convert to UTF-8 */ |
283 | char * |
284 | pdf_load_stream_or_string_as_utf8(fz_context *ctx, pdf_obj *src) |
285 | { |
286 | if (pdf_is_stream(ctx, src)) |
287 | return pdf_new_utf8_from_pdf_stream_obj(ctx, src); |
288 | return pdf_new_utf8_from_pdf_string_obj(ctx, src); |
289 | } |
290 | |
291 | static pdf_obj * |
292 | pdf_new_text_string_utf16be(fz_context *ctx, const char *s) |
293 | { |
294 | int c, i = 0, n = fz_utflen(s); |
295 | unsigned char *p = fz_malloc(ctx, n * 2 + 2); |
296 | pdf_obj *obj; |
297 | p[i++] = 254; |
298 | p[i++] = 255; |
299 | while (*s) |
300 | { |
301 | s += fz_chartorune(&c, s); |
302 | p[i++] = (c>>8) & 0xff; |
303 | p[i++] = (c) & 0xff; |
304 | } |
305 | fz_try(ctx) |
306 | obj = pdf_new_string(ctx, (char*)p, i); |
307 | fz_always(ctx) |
308 | fz_free(ctx, p); |
309 | fz_catch(ctx) |
310 | fz_rethrow(ctx); |
311 | return obj; |
312 | } |
313 | |
314 | /* |
315 | * Create a PDF 'text string' by encoding input string as either ASCII or UTF-16BE. |
316 | * In theory, we could also use PDFDocEncoding. |
317 | */ |
318 | pdf_obj * |
319 | pdf_new_text_string(fz_context *ctx, const char *s) |
320 | { |
321 | int i = 0; |
322 | while (s[i] != 0) |
323 | { |
324 | if (((unsigned char)s[i]) >= 128) |
325 | return pdf_new_text_string_utf16be(ctx, s); |
326 | ++i; |
327 | } |
328 | return pdf_new_string(ctx, s, i); |
329 | } |
330 | |
331 | pdf_obj * |
332 | pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) |
333 | { |
334 | pdf_obj *ary = NULL; |
335 | pdf_obj *obj = NULL; |
336 | int64_t a = 0, b = 0, n = 0; |
337 | pdf_token tok; |
338 | pdf_obj *op = NULL; |
339 | |
340 | fz_var(obj); |
341 | |
342 | ary = pdf_new_array(ctx, doc, 4); |
343 | |
344 | fz_try(ctx) |
345 | { |
346 | while (1) |
347 | { |
348 | tok = pdf_lex(ctx, file, buf); |
349 | |
350 | if (tok != PDF_TOK_INT && tok != PDF_TOK_R) |
351 | { |
352 | if (n > 0) |
353 | pdf_array_push_int(ctx, ary, a); |
354 | if (n > 1) |
355 | pdf_array_push_int(ctx, ary, b); |
356 | n = 0; |
357 | } |
358 | |
359 | if (tok == PDF_TOK_INT && n == 2) |
360 | { |
361 | pdf_array_push_int(ctx, ary, a); |
362 | a = b; |
363 | n --; |
364 | } |
365 | |
366 | switch (tok) |
367 | { |
368 | case PDF_TOK_EOF: |
369 | fz_throw(ctx, FZ_ERROR_SYNTAX, "array not closed before end of file" ); |
370 | |
371 | case PDF_TOK_CLOSE_ARRAY: |
372 | op = ary; |
373 | goto end; |
374 | |
375 | case PDF_TOK_INT: |
376 | if (n == 0) |
377 | a = buf->i; |
378 | if (n == 1) |
379 | b = buf->i; |
380 | n ++; |
381 | break; |
382 | |
383 | case PDF_TOK_R: |
384 | if (n != 2) |
385 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot parse indirect reference in array" ); |
386 | pdf_array_push_drop(ctx, ary, pdf_new_indirect(ctx, doc, a, b)); |
387 | n = 0; |
388 | break; |
389 | |
390 | case PDF_TOK_OPEN_ARRAY: |
391 | obj = pdf_parse_array(ctx, doc, file, buf); |
392 | pdf_array_push_drop(ctx, ary, obj); |
393 | break; |
394 | |
395 | case PDF_TOK_OPEN_DICT: |
396 | obj = pdf_parse_dict(ctx, doc, file, buf); |
397 | pdf_array_push_drop(ctx, ary, obj); |
398 | break; |
399 | |
400 | case PDF_TOK_NAME: |
401 | pdf_array_push_name(ctx, ary, buf->scratch); |
402 | break; |
403 | case PDF_TOK_REAL: |
404 | pdf_array_push_real(ctx, ary, buf->f); |
405 | break; |
406 | case PDF_TOK_STRING: |
407 | pdf_array_push_string(ctx, ary, buf->scratch, buf->len); |
408 | break; |
409 | case PDF_TOK_TRUE: |
410 | pdf_array_push_bool(ctx, ary, 1); |
411 | break; |
412 | case PDF_TOK_FALSE: |
413 | pdf_array_push_bool(ctx, ary, 0); |
414 | break; |
415 | case PDF_TOK_NULL: |
416 | pdf_array_push(ctx, ary, PDF_NULL); |
417 | break; |
418 | |
419 | default: |
420 | pdf_array_push(ctx, ary, PDF_NULL); |
421 | break; |
422 | } |
423 | } |
424 | end: |
425 | {} |
426 | } |
427 | fz_catch(ctx) |
428 | { |
429 | pdf_drop_obj(ctx, ary); |
430 | fz_rethrow(ctx); |
431 | } |
432 | return op; |
433 | } |
434 | |
435 | pdf_obj * |
436 | pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) |
437 | { |
438 | pdf_obj *dict; |
439 | pdf_obj *key = NULL; |
440 | pdf_obj *val = NULL; |
441 | pdf_token tok; |
442 | int64_t a, b; |
443 | |
444 | dict = pdf_new_dict(ctx, doc, 8); |
445 | |
446 | fz_var(key); |
447 | fz_var(val); |
448 | |
449 | fz_try(ctx) |
450 | { |
451 | while (1) |
452 | { |
453 | tok = pdf_lex(ctx, file, buf); |
454 | skip: |
455 | if (tok == PDF_TOK_CLOSE_DICT) |
456 | break; |
457 | |
458 | /* for BI .. ID .. EI in content streams */ |
459 | if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID" )) |
460 | break; |
461 | |
462 | if (tok != PDF_TOK_NAME) |
463 | fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid key in dict" ); |
464 | |
465 | key = pdf_new_name(ctx, buf->scratch); |
466 | |
467 | tok = pdf_lex(ctx, file, buf); |
468 | |
469 | switch (tok) |
470 | { |
471 | case PDF_TOK_OPEN_ARRAY: |
472 | val = pdf_parse_array(ctx, doc, file, buf); |
473 | break; |
474 | |
475 | case PDF_TOK_OPEN_DICT: |
476 | val = pdf_parse_dict(ctx, doc, file, buf); |
477 | break; |
478 | |
479 | case PDF_TOK_NAME: val = pdf_new_name(ctx, buf->scratch); break; |
480 | case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break; |
481 | case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break; |
482 | case PDF_TOK_TRUE: val = PDF_TRUE; break; |
483 | case PDF_TOK_FALSE: val = PDF_FALSE; break; |
484 | case PDF_TOK_NULL: val = PDF_NULL; break; |
485 | |
486 | case PDF_TOK_INT: |
487 | /* 64-bit to allow for numbers > INT_MAX and overflow */ |
488 | a = buf->i; |
489 | tok = pdf_lex(ctx, file, buf); |
490 | if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || |
491 | (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID" ))) |
492 | { |
493 | val = pdf_new_int(ctx, a); |
494 | pdf_dict_put(ctx, dict, key, val); |
495 | pdf_drop_obj(ctx, val); |
496 | val = NULL; |
497 | pdf_drop_obj(ctx, key); |
498 | key = NULL; |
499 | goto skip; |
500 | } |
501 | if (tok == PDF_TOK_INT) |
502 | { |
503 | b = buf->i; |
504 | tok = pdf_lex(ctx, file, buf); |
505 | if (tok == PDF_TOK_R) |
506 | { |
507 | val = pdf_new_indirect(ctx, doc, a, b); |
508 | break; |
509 | } |
510 | } |
511 | fz_warn(ctx, "invalid indirect reference in dict" ); |
512 | val = PDF_NULL; |
513 | break; |
514 | |
515 | default: |
516 | val = PDF_NULL; |
517 | break; |
518 | } |
519 | |
520 | pdf_dict_put(ctx, dict, key, val); |
521 | pdf_drop_obj(ctx, val); |
522 | val = NULL; |
523 | pdf_drop_obj(ctx, key); |
524 | key = NULL; |
525 | } |
526 | } |
527 | fz_catch(ctx) |
528 | { |
529 | pdf_drop_obj(ctx, dict); |
530 | pdf_drop_obj(ctx, key); |
531 | pdf_drop_obj(ctx, val); |
532 | fz_rethrow(ctx); |
533 | } |
534 | return dict; |
535 | } |
536 | |
537 | pdf_obj * |
538 | pdf_parse_stm_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf) |
539 | { |
540 | pdf_token tok; |
541 | |
542 | tok = pdf_lex(ctx, file, buf); |
543 | |
544 | switch (tok) |
545 | { |
546 | case PDF_TOK_OPEN_ARRAY: |
547 | return pdf_parse_array(ctx, doc, file, buf); |
548 | case PDF_TOK_OPEN_DICT: |
549 | return pdf_parse_dict(ctx, doc, file, buf); |
550 | case PDF_TOK_NAME: return pdf_new_name(ctx, buf->scratch); |
551 | case PDF_TOK_REAL: return pdf_new_real(ctx, buf->f); |
552 | case PDF_TOK_STRING: return pdf_new_string(ctx, buf->scratch, buf->len); |
553 | case PDF_TOK_TRUE: return PDF_TRUE; |
554 | case PDF_TOK_FALSE: return PDF_FALSE; |
555 | case PDF_TOK_NULL: return PDF_NULL; |
556 | case PDF_TOK_INT: return pdf_new_int(ctx, buf->i); |
557 | default: fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown token in object stream" ); |
558 | } |
559 | } |
560 | |
561 | pdf_obj * |
562 | pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, |
563 | fz_stream *file, pdf_lexbuf *buf, |
564 | int *onum, int *ogen, int64_t *ostmofs, int *try_repair) |
565 | { |
566 | pdf_obj *obj = NULL; |
567 | int num = 0, gen = 0; |
568 | int64_t stm_ofs; |
569 | pdf_token tok; |
570 | int64_t a, b; |
571 | int read_next_token = 1; |
572 | |
573 | fz_var(obj); |
574 | |
575 | tok = pdf_lex(ctx, file, buf); |
576 | if (tok != PDF_TOK_INT) |
577 | { |
578 | if (try_repair) |
579 | *try_repair = 1; |
580 | fz_throw(ctx, FZ_ERROR_SYNTAX, "expected object number" ); |
581 | } |
582 | num = buf->i; |
583 | if (num < 0 || num > PDF_MAX_OBJECT_NUMBER) |
584 | fz_throw(ctx, FZ_ERROR_SYNTAX, "object number out of range" ); |
585 | |
586 | tok = pdf_lex(ctx, file, buf); |
587 | if (tok != PDF_TOK_INT) |
588 | { |
589 | if (try_repair) |
590 | *try_repair = 1; |
591 | fz_throw(ctx, FZ_ERROR_SYNTAX, "expected generation number (%d ? obj)" , num); |
592 | } |
593 | gen = buf->i; |
594 | |
595 | tok = pdf_lex(ctx, file, buf); |
596 | if (tok != PDF_TOK_OBJ) |
597 | { |
598 | if (try_repair) |
599 | *try_repair = 1; |
600 | fz_throw(ctx, FZ_ERROR_SYNTAX, "expected 'obj' keyword (%d %d ?)" , num, gen); |
601 | } |
602 | |
603 | tok = pdf_lex(ctx, file, buf); |
604 | |
605 | switch (tok) |
606 | { |
607 | case PDF_TOK_OPEN_ARRAY: |
608 | obj = pdf_parse_array(ctx, doc, file, buf); |
609 | break; |
610 | |
611 | case PDF_TOK_OPEN_DICT: |
612 | obj = pdf_parse_dict(ctx, doc, file, buf); |
613 | break; |
614 | |
615 | case PDF_TOK_NAME: obj = pdf_new_name(ctx, buf->scratch); break; |
616 | case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break; |
617 | case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break; |
618 | case PDF_TOK_TRUE: obj = PDF_TRUE; break; |
619 | case PDF_TOK_FALSE: obj = PDF_FALSE; break; |
620 | case PDF_TOK_NULL: obj = PDF_NULL; break; |
621 | |
622 | case PDF_TOK_INT: |
623 | a = buf->i; |
624 | tok = pdf_lex(ctx, file, buf); |
625 | |
626 | if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) |
627 | { |
628 | obj = pdf_new_int(ctx, a); |
629 | read_next_token = 0; |
630 | break; |
631 | } |
632 | else if (tok == PDF_TOK_INT) |
633 | { |
634 | b = buf->i; |
635 | tok = pdf_lex(ctx, file, buf); |
636 | if (tok == PDF_TOK_R) |
637 | { |
638 | obj = pdf_new_indirect(ctx, doc, a, b); |
639 | break; |
640 | } |
641 | } |
642 | fz_throw(ctx, FZ_ERROR_SYNTAX, "expected 'R' keyword (%d %d R)" , num, gen); |
643 | |
644 | case PDF_TOK_ENDOBJ: |
645 | obj = PDF_NULL; |
646 | read_next_token = 0; |
647 | break; |
648 | |
649 | default: |
650 | fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in object (%d %d R)" , num, gen); |
651 | } |
652 | |
653 | fz_try(ctx) |
654 | { |
655 | if (read_next_token) |
656 | tok = pdf_lex(ctx, file, buf); |
657 | |
658 | if (tok == PDF_TOK_STREAM) |
659 | { |
660 | int c = fz_read_byte(ctx, file); |
661 | while (c == ' ') |
662 | c = fz_read_byte(ctx, file); |
663 | if (c == '\r') |
664 | { |
665 | c = fz_peek_byte(ctx, file); |
666 | if (c != '\n') |
667 | fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)" , num, gen); |
668 | else |
669 | fz_read_byte(ctx, file); |
670 | } |
671 | stm_ofs = fz_tell(ctx, file); |
672 | } |
673 | else if (tok == PDF_TOK_ENDOBJ) |
674 | { |
675 | stm_ofs = 0; |
676 | } |
677 | else |
678 | { |
679 | fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)" , num, gen); |
680 | stm_ofs = 0; |
681 | } |
682 | } |
683 | fz_catch(ctx) |
684 | { |
685 | pdf_drop_obj(ctx, obj); |
686 | fz_rethrow(ctx); |
687 | } |
688 | |
689 | if (onum) *onum = num; |
690 | if (ogen) *ogen = gen; |
691 | if (ostmofs) *ostmofs = stm_ofs; |
692 | |
693 | return obj; |
694 | } |
695 | |