1 | #include "mupdf/fitz.h" |
2 | |
3 | #include <float.h> |
4 | |
5 | fz_display_list * |
6 | fz_new_display_list_from_page(fz_context *ctx, fz_page *page) |
7 | { |
8 | fz_display_list *list; |
9 | fz_device *dev = NULL; |
10 | |
11 | fz_var(dev); |
12 | |
13 | list = fz_new_display_list(ctx, fz_bound_page(ctx, page)); |
14 | fz_try(ctx) |
15 | { |
16 | dev = fz_new_list_device(ctx, list); |
17 | fz_run_page(ctx, page, dev, fz_identity, NULL); |
18 | fz_close_device(ctx, dev); |
19 | } |
20 | fz_always(ctx) |
21 | { |
22 | fz_drop_device(ctx, dev); |
23 | } |
24 | fz_catch(ctx) |
25 | { |
26 | fz_drop_display_list(ctx, list); |
27 | fz_rethrow(ctx); |
28 | } |
29 | |
30 | return list; |
31 | } |
32 | |
33 | fz_display_list * |
34 | fz_new_display_list_from_page_number(fz_context *ctx, fz_document *doc, int number) |
35 | { |
36 | fz_page *page; |
37 | fz_display_list *list = NULL; |
38 | |
39 | page = fz_load_page(ctx, doc, number); |
40 | fz_try(ctx) |
41 | list = fz_new_display_list_from_page(ctx, page); |
42 | fz_always(ctx) |
43 | fz_drop_page(ctx, page); |
44 | fz_catch(ctx) |
45 | fz_rethrow(ctx); |
46 | return list; |
47 | } |
48 | |
49 | fz_display_list * |
50 | fz_new_display_list_from_page_contents(fz_context *ctx, fz_page *page) |
51 | { |
52 | fz_display_list *list; |
53 | fz_device *dev = NULL; |
54 | |
55 | fz_var(dev); |
56 | |
57 | list = fz_new_display_list(ctx, fz_bound_page(ctx, page)); |
58 | fz_try(ctx) |
59 | { |
60 | dev = fz_new_list_device(ctx, list); |
61 | fz_run_page_contents(ctx, page, dev, fz_identity, NULL); |
62 | fz_close_device(ctx, dev); |
63 | } |
64 | fz_always(ctx) |
65 | { |
66 | fz_drop_device(ctx, dev); |
67 | } |
68 | fz_catch(ctx) |
69 | { |
70 | fz_drop_display_list(ctx, list); |
71 | fz_rethrow(ctx); |
72 | } |
73 | |
74 | return list; |
75 | } |
76 | |
77 | /* |
78 | Render the page to a pixmap using the transform and colorspace. |
79 | */ |
80 | fz_pixmap * |
81 | fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, fz_matrix ctm, fz_colorspace *cs, int alpha) |
82 | { |
83 | return fz_new_pixmap_from_display_list_with_separations(ctx, list, ctm, cs, NULL, alpha); |
84 | } |
85 | |
86 | fz_pixmap * |
87 | fz_new_pixmap_from_display_list_with_separations(fz_context *ctx, fz_display_list *list, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha) |
88 | { |
89 | fz_rect rect; |
90 | fz_irect bbox; |
91 | fz_pixmap *pix; |
92 | fz_device *dev = NULL; |
93 | |
94 | fz_var(dev); |
95 | |
96 | rect = fz_bound_display_list(ctx, list); |
97 | rect = fz_transform_rect(rect, ctm); |
98 | bbox = fz_round_rect(rect); |
99 | |
100 | pix = fz_new_pixmap_with_bbox(ctx, cs, bbox, seps, alpha); |
101 | if (alpha) |
102 | fz_clear_pixmap(ctx, pix); |
103 | else |
104 | fz_clear_pixmap_with_value(ctx, pix, 0xFF); |
105 | |
106 | fz_try(ctx) |
107 | { |
108 | dev = fz_new_draw_device(ctx, ctm, pix); |
109 | fz_run_display_list(ctx, list, dev, fz_identity, fz_infinite_rect, NULL); |
110 | fz_close_device(ctx, dev); |
111 | } |
112 | fz_always(ctx) |
113 | { |
114 | fz_drop_device(ctx, dev); |
115 | } |
116 | fz_catch(ctx) |
117 | { |
118 | fz_drop_pixmap(ctx, pix); |
119 | fz_rethrow(ctx); |
120 | } |
121 | |
122 | return pix; |
123 | } |
124 | |
125 | /* |
126 | Render the page contents without annotations. |
127 | */ |
128 | fz_pixmap * |
129 | fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha) |
130 | { |
131 | return fz_new_pixmap_from_page_contents_with_separations(ctx, page, ctm, cs, NULL, alpha); |
132 | } |
133 | |
134 | fz_pixmap * |
135 | fz_new_pixmap_from_page_contents_with_separations(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha) |
136 | { |
137 | fz_rect rect; |
138 | fz_irect bbox; |
139 | fz_pixmap *pix; |
140 | fz_device *dev = NULL; |
141 | |
142 | fz_var(dev); |
143 | |
144 | rect = fz_bound_page(ctx, page); |
145 | rect = fz_transform_rect(rect, ctm); |
146 | bbox = fz_round_rect(rect); |
147 | |
148 | pix = fz_new_pixmap_with_bbox(ctx, cs, bbox, seps, alpha); |
149 | if (alpha) |
150 | fz_clear_pixmap(ctx, pix); |
151 | else |
152 | fz_clear_pixmap_with_value(ctx, pix, 0xFF); |
153 | |
154 | fz_try(ctx) |
155 | { |
156 | dev = fz_new_draw_device(ctx, ctm, pix); |
157 | fz_run_page_contents(ctx, page, dev, fz_identity, NULL); |
158 | fz_close_device(ctx, dev); |
159 | } |
160 | fz_always(ctx) |
161 | { |
162 | fz_drop_device(ctx, dev); |
163 | } |
164 | fz_catch(ctx) |
165 | { |
166 | fz_drop_pixmap(ctx, pix); |
167 | fz_rethrow(ctx); |
168 | } |
169 | |
170 | return pix; |
171 | } |
172 | |
173 | fz_pixmap * |
174 | fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha) |
175 | { |
176 | return fz_new_pixmap_from_page_with_separations(ctx, page, ctm, cs, NULL, alpha); |
177 | } |
178 | |
179 | fz_pixmap * |
180 | fz_new_pixmap_from_page_with_separations(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha) |
181 | { |
182 | fz_rect rect; |
183 | fz_irect bbox; |
184 | fz_pixmap *pix; |
185 | fz_device *dev = NULL; |
186 | |
187 | fz_var(dev); |
188 | |
189 | rect = fz_bound_page(ctx, page); |
190 | rect = fz_transform_rect(rect, ctm); |
191 | bbox = fz_round_rect(rect); |
192 | |
193 | pix = fz_new_pixmap_with_bbox(ctx, cs, bbox, seps, alpha); |
194 | |
195 | fz_try(ctx) |
196 | { |
197 | if (alpha) |
198 | fz_clear_pixmap(ctx, pix); |
199 | else |
200 | fz_clear_pixmap_with_value(ctx, pix, 0xFF); |
201 | |
202 | dev = fz_new_draw_device(ctx, ctm, pix); |
203 | fz_run_page(ctx, page, dev, fz_identity, NULL); |
204 | fz_close_device(ctx, dev); |
205 | } |
206 | fz_always(ctx) |
207 | { |
208 | fz_drop_device(ctx, dev); |
209 | } |
210 | fz_catch(ctx) |
211 | { |
212 | fz_drop_pixmap(ctx, pix); |
213 | fz_rethrow(ctx); |
214 | } |
215 | |
216 | return pix; |
217 | } |
218 | |
219 | fz_pixmap * |
220 | fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_matrix ctm, fz_colorspace *cs, int alpha) |
221 | { |
222 | return fz_new_pixmap_from_page_number_with_separations(ctx, doc, number, ctm, cs, NULL, alpha); |
223 | } |
224 | |
225 | fz_pixmap * |
226 | fz_new_pixmap_from_page_number_with_separations(fz_context *ctx, fz_document *doc, int number, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha) |
227 | { |
228 | fz_page *page; |
229 | fz_pixmap *pix = NULL; |
230 | |
231 | page = fz_load_page(ctx, doc, number); |
232 | fz_try(ctx) |
233 | pix = fz_new_pixmap_from_page_with_separations(ctx, page, ctm, cs, seps, alpha); |
234 | fz_always(ctx) |
235 | fz_drop_page(ctx, page); |
236 | fz_catch(ctx) |
237 | fz_rethrow(ctx); |
238 | return pix; |
239 | } |
240 | |
241 | fz_stext_page * |
242 | fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, const fz_stext_options *options) |
243 | { |
244 | fz_stext_page *text; |
245 | fz_device *dev = NULL; |
246 | |
247 | fz_var(dev); |
248 | |
249 | if (list == NULL) |
250 | return NULL; |
251 | |
252 | text = fz_new_stext_page(ctx, fz_bound_display_list(ctx, list)); |
253 | fz_try(ctx) |
254 | { |
255 | dev = fz_new_stext_device(ctx, text, options); |
256 | fz_run_display_list(ctx, list, dev, fz_identity, fz_infinite_rect, NULL); |
257 | fz_close_device(ctx, dev); |
258 | } |
259 | fz_always(ctx) |
260 | { |
261 | fz_drop_device(ctx, dev); |
262 | } |
263 | fz_catch(ctx) |
264 | { |
265 | fz_drop_stext_page(ctx, text); |
266 | fz_rethrow(ctx); |
267 | } |
268 | |
269 | return text; |
270 | } |
271 | |
272 | fz_stext_page * |
273 | fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, const fz_stext_options *options) |
274 | { |
275 | fz_stext_page *text; |
276 | fz_device *dev = NULL; |
277 | |
278 | fz_var(dev); |
279 | |
280 | if (page == NULL) |
281 | return NULL; |
282 | |
283 | text = fz_new_stext_page(ctx, fz_bound_page(ctx, page)); |
284 | fz_try(ctx) |
285 | { |
286 | dev = fz_new_stext_device(ctx, text, options); |
287 | fz_run_page_contents(ctx, page, dev, fz_identity, NULL); |
288 | fz_close_device(ctx, dev); |
289 | } |
290 | fz_always(ctx) |
291 | { |
292 | fz_drop_device(ctx, dev); |
293 | } |
294 | fz_catch(ctx) |
295 | { |
296 | fz_drop_stext_page(ctx, text); |
297 | fz_rethrow(ctx); |
298 | } |
299 | |
300 | return text; |
301 | } |
302 | |
303 | fz_stext_page * |
304 | fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_stext_options *options) |
305 | { |
306 | fz_page *page; |
307 | fz_stext_page *text = NULL; |
308 | |
309 | page = fz_load_page(ctx, doc, number); |
310 | fz_try(ctx) |
311 | text = fz_new_stext_page_from_page(ctx, page, options); |
312 | fz_always(ctx) |
313 | fz_drop_page(ctx, page); |
314 | fz_catch(ctx) |
315 | fz_rethrow(ctx); |
316 | return text; |
317 | } |
318 | |
319 | int |
320 | fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_quad *hit_bbox, int hit_max) |
321 | { |
322 | fz_stext_page *text; |
323 | int count = 0; |
324 | |
325 | text = fz_new_stext_page_from_display_list(ctx, list, NULL); |
326 | fz_try(ctx) |
327 | count = fz_search_stext_page(ctx, text, needle, hit_bbox, hit_max); |
328 | fz_always(ctx) |
329 | fz_drop_stext_page(ctx, text); |
330 | fz_catch(ctx) |
331 | fz_rethrow(ctx); |
332 | return count; |
333 | } |
334 | |
335 | /* |
336 | Search for the 'needle' text on the page. |
337 | Record the hits in the hit_bbox array and return the number of hits. |
338 | Will stop looking once it has filled hit_max rectangles. |
339 | */ |
340 | int |
341 | fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_quad *hit_bbox, int hit_max) |
342 | { |
343 | fz_stext_page *text; |
344 | int count = 0; |
345 | |
346 | text = fz_new_stext_page_from_page(ctx, page, NULL); |
347 | fz_try(ctx) |
348 | count = fz_search_stext_page(ctx, text, needle, hit_bbox, hit_max); |
349 | fz_always(ctx) |
350 | fz_drop_stext_page(ctx, text); |
351 | fz_catch(ctx) |
352 | fz_rethrow(ctx); |
353 | return count; |
354 | } |
355 | |
356 | int |
357 | fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_quad *hit_bbox, int hit_max) |
358 | { |
359 | fz_page *page; |
360 | int count = 0; |
361 | |
362 | page = fz_load_page(ctx, doc, number); |
363 | fz_try(ctx) |
364 | count = fz_search_page(ctx, page, needle, hit_bbox, hit_max); |
365 | fz_always(ctx) |
366 | fz_drop_page(ctx, page); |
367 | fz_catch(ctx) |
368 | fz_rethrow(ctx); |
369 | return count; |
370 | } |
371 | |
372 | /* |
373 | Convert structured text into plain text. |
374 | */ |
375 | fz_buffer * |
376 | fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *page) |
377 | { |
378 | fz_stext_block *block; |
379 | fz_stext_line *line; |
380 | fz_stext_char *ch; |
381 | fz_buffer *buf; |
382 | |
383 | buf = fz_new_buffer(ctx, 256); |
384 | fz_try(ctx) |
385 | { |
386 | for (block = page->first_block; block; block = block->next) |
387 | { |
388 | if (block->type == FZ_STEXT_BLOCK_TEXT) |
389 | { |
390 | for (line = block->u.t.first_line; line; line = line->next) |
391 | { |
392 | for (ch = line->first_char; ch; ch = ch->next) |
393 | fz_append_rune(ctx, buf, ch->c); |
394 | fz_append_byte(ctx, buf, '\n'); |
395 | } |
396 | fz_append_byte(ctx, buf, '\n'); |
397 | } |
398 | } |
399 | } |
400 | fz_catch(ctx) |
401 | { |
402 | fz_drop_buffer(ctx, buf); |
403 | fz_rethrow(ctx); |
404 | } |
405 | |
406 | return buf; |
407 | } |
408 | |
409 | fz_buffer * |
410 | fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_stext_options *options) |
411 | { |
412 | fz_stext_page *text; |
413 | fz_buffer *buf = NULL; |
414 | |
415 | text = fz_new_stext_page_from_display_list(ctx, list, options); |
416 | fz_try(ctx) |
417 | buf = fz_new_buffer_from_stext_page(ctx, text); |
418 | fz_always(ctx) |
419 | fz_drop_stext_page(ctx, text); |
420 | fz_catch(ctx) |
421 | fz_rethrow(ctx); |
422 | return buf; |
423 | } |
424 | |
425 | fz_buffer * |
426 | fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_stext_options *options) |
427 | { |
428 | fz_stext_page *text; |
429 | fz_buffer *buf = NULL; |
430 | |
431 | text = fz_new_stext_page_from_page(ctx, page, options); |
432 | fz_try(ctx) |
433 | buf = fz_new_buffer_from_stext_page(ctx, text); |
434 | fz_always(ctx) |
435 | fz_drop_stext_page(ctx, text); |
436 | fz_catch(ctx) |
437 | fz_rethrow(ctx); |
438 | return buf; |
439 | } |
440 | |
441 | fz_buffer * |
442 | fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_stext_options *options) |
443 | { |
444 | fz_page *page; |
445 | fz_buffer *buf = NULL; |
446 | |
447 | page = fz_load_page(ctx, doc, number); |
448 | fz_try(ctx) |
449 | buf = fz_new_buffer_from_page(ctx, page, options); |
450 | fz_always(ctx) |
451 | fz_drop_page(ctx, page); |
452 | fz_catch(ctx) |
453 | fz_rethrow(ctx); |
454 | return buf; |
455 | } |
456 | |
457 | /* |
458 | Write image as a data URI (for HTML and SVG output). |
459 | */ |
460 | void |
461 | fz_write_image_as_data_uri(fz_context *ctx, fz_output *out, fz_image *image) |
462 | { |
463 | fz_compressed_buffer *cbuf; |
464 | fz_buffer *buf; |
465 | |
466 | cbuf = fz_compressed_image_buffer(ctx, image); |
467 | |
468 | if (cbuf && cbuf->params.type == FZ_IMAGE_JPEG) |
469 | { |
470 | int type = fz_colorspace_type(ctx, image->colorspace); |
471 | if (type == FZ_COLORSPACE_GRAY || type == FZ_COLORSPACE_RGB) |
472 | { |
473 | fz_write_string(ctx, out, "data:image/jpeg;base64," ); |
474 | fz_write_base64_buffer(ctx, out, cbuf->buffer, 1); |
475 | return; |
476 | } |
477 | } |
478 | if (cbuf && cbuf->params.type == FZ_IMAGE_PNG) |
479 | { |
480 | fz_write_string(ctx, out, "data:image/png;base64," ); |
481 | fz_write_base64_buffer(ctx, out, cbuf->buffer, 1); |
482 | return; |
483 | } |
484 | |
485 | buf = fz_new_buffer_from_image_as_png(ctx, image, fz_default_color_params); |
486 | fz_try(ctx) |
487 | { |
488 | fz_write_string(ctx, out, "data:image/png;base64," ); |
489 | fz_write_base64_buffer(ctx, out, buf, 1); |
490 | } |
491 | fz_always(ctx) |
492 | fz_drop_buffer(ctx, buf); |
493 | fz_catch(ctx) |
494 | fz_rethrow(ctx); |
495 | } |
496 | |
497 | void |
498 | fz_write_pixmap_as_data_uri(fz_context *ctx, fz_output *out, fz_pixmap *pixmap) |
499 | { |
500 | fz_buffer *buf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params); |
501 | fz_try(ctx) |
502 | { |
503 | fz_write_string(ctx, out, "data:image/png;base64," ); |
504 | fz_write_base64_buffer(ctx, out, buf, 1); |
505 | } |
506 | fz_always(ctx) |
507 | fz_drop_buffer(ctx, buf); |
508 | fz_catch(ctx) |
509 | fz_rethrow(ctx); |
510 | } |
511 | |
512 | /* |
513 | * Use text extraction to convert the input document into XHTML, then |
514 | * open the result as a new document that can be reflowed. |
515 | */ |
516 | fz_document * |
517 | fz_new_xhtml_document_from_document(fz_context *ctx, fz_document *old_doc) |
518 | { |
519 | fz_stext_options opts = { FZ_STEXT_PRESERVE_IMAGES }; |
520 | fz_document *new_doc; |
521 | fz_buffer *buf = NULL; |
522 | fz_output *out = NULL; |
523 | fz_stream *stm = NULL; |
524 | fz_stext_page *text = NULL; |
525 | int i; |
526 | |
527 | fz_var(buf); |
528 | fz_var(out); |
529 | fz_var(stm); |
530 | fz_var(text); |
531 | |
532 | fz_try(ctx) |
533 | { |
534 | buf = fz_new_buffer(ctx, 8192); |
535 | out = fz_new_output_with_buffer(ctx, buf); |
536 | fz_print_stext_header_as_xhtml(ctx, out); |
537 | |
538 | for (i = 0; i < fz_count_pages(ctx, old_doc); ++i) |
539 | { |
540 | text = fz_new_stext_page_from_page_number(ctx, old_doc, i, &opts); |
541 | fz_print_stext_page_as_xhtml(ctx, out, text, i+1); |
542 | fz_drop_stext_page(ctx, text); |
543 | text = NULL; |
544 | } |
545 | |
546 | fz_print_stext_trailer_as_xhtml(ctx, out); |
547 | fz_close_output(ctx, out); |
548 | fz_terminate_buffer(ctx, buf); |
549 | |
550 | stm = fz_open_buffer(ctx, buf); |
551 | new_doc = fz_open_document_with_stream(ctx, "application/html+xml" , stm); |
552 | } |
553 | fz_always(ctx) |
554 | { |
555 | fz_drop_stream(ctx, stm); |
556 | fz_drop_buffer(ctx, buf); |
557 | fz_drop_output(ctx, out); |
558 | fz_drop_stext_page(ctx, text); |
559 | } |
560 | fz_catch(ctx) |
561 | fz_rethrow(ctx); |
562 | |
563 | return new_doc; |
564 | } |
565 | |