1 | #include "fitz-imp.h" |
2 | |
3 | #define SUBSCRIPT_OFFSET 0.2f |
4 | #define SUPERSCRIPT_OFFSET -0.2f |
5 | |
6 | #include <ft2build.h> |
7 | #include FT_FREETYPE_H |
8 | |
9 | /* HTML output (visual formatting with preserved layout) */ |
10 | |
11 | static int |
12 | detect_super_script(fz_stext_line *line, fz_stext_char *ch) |
13 | { |
14 | if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0) |
15 | return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f; |
16 | return 0; |
17 | } |
18 | |
19 | static const char * |
20 | font_full_name(fz_context *ctx, fz_font *font) |
21 | { |
22 | const char *name = fz_font_name(ctx, font); |
23 | const char *s = strchr(name, '+'); |
24 | return s ? s + 1 : name; |
25 | } |
26 | |
27 | static void |
28 | font_family_name(fz_context *ctx, fz_font *font, char *buf, int size, int is_mono, int is_serif) |
29 | { |
30 | const char *name = font_full_name(ctx, font); |
31 | char *s; |
32 | fz_strlcpy(buf, name, size); |
33 | s = strrchr(buf, '-'); |
34 | if (s) |
35 | *s = 0; |
36 | if (is_mono) |
37 | fz_strlcat(buf, ",monospace" , size); |
38 | else |
39 | fz_strlcat(buf, is_serif ? ",serif" : ",sans-serif" , size); |
40 | } |
41 | |
42 | static void |
43 | fz_print_style_begin_html(fz_context *ctx, fz_output *out, fz_font *font, float size, int sup, int color) |
44 | { |
45 | char family[80]; |
46 | |
47 | int is_bold = fz_font_is_bold(ctx, font); |
48 | int is_italic = fz_font_is_italic(ctx, font); |
49 | int is_serif = fz_font_is_serif(ctx, font); |
50 | int is_mono = fz_font_is_monospaced(ctx, font); |
51 | |
52 | font_family_name(ctx, font, family, sizeof family, is_mono, is_serif); |
53 | |
54 | if (sup) fz_write_string(ctx, out, "<sup>" ); |
55 | if (is_mono) fz_write_string(ctx, out, "<tt>" ); |
56 | if (is_bold) fz_write_string(ctx, out, "<b>" ); |
57 | if (is_italic) fz_write_string(ctx, out, "<i>" ); |
58 | fz_write_printf(ctx, out, "<span style=\"font-family:%s;font-size:%gpt" , family, size); |
59 | if (color != 0) |
60 | fz_write_printf(ctx, out, ";color:#%06x" , color); |
61 | fz_write_printf(ctx, out, "\">" ); |
62 | } |
63 | |
64 | static void |
65 | fz_print_style_end_html(fz_context *ctx, fz_output *out, fz_font *font, float size, int sup) |
66 | { |
67 | int is_mono = fz_font_is_monospaced(ctx, font); |
68 | int is_bold = fz_font_is_bold(ctx,font); |
69 | int is_italic = fz_font_is_italic(ctx, font); |
70 | |
71 | fz_write_string(ctx, out, "</span>" ); |
72 | if (is_italic) fz_write_string(ctx, out, "</i>" ); |
73 | if (is_bold) fz_write_string(ctx, out, "</b>" ); |
74 | if (is_mono) fz_write_string(ctx, out, "</tt>" ); |
75 | if (sup) fz_write_string(ctx, out, "</sup>" ); |
76 | } |
77 | |
78 | static void |
79 | fz_print_stext_image_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block) |
80 | { |
81 | int x = block->bbox.x0; |
82 | int y = block->bbox.y0; |
83 | int w = block->bbox.x1 - block->bbox.x0; |
84 | int h = block->bbox.y1 - block->bbox.y0; |
85 | |
86 | fz_write_printf(ctx, out, "<img style=\"position:absolute;top:%dpt;left:%dpt;width:%dpt;height:%dpt\" src=\"" , y, x, w, h); |
87 | fz_write_image_as_data_uri(ctx, out, block->u.i.image); |
88 | fz_write_string(ctx, out, "\">\n" ); |
89 | } |
90 | |
91 | void |
92 | fz_print_stext_block_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block) |
93 | { |
94 | fz_stext_line *line; |
95 | fz_stext_char *ch; |
96 | int x, y; |
97 | |
98 | fz_font *font = NULL; |
99 | float size = 0; |
100 | int sup = 0; |
101 | int color = 0; |
102 | |
103 | for (line = block->u.t.first_line; line; line = line->next) |
104 | { |
105 | x = line->bbox.x0; |
106 | y = line->bbox.y0; |
107 | |
108 | fz_write_printf(ctx, out, "<p style=\"position:absolute;white-space:pre;margin:0;padding:0;top:%dpt;left:%dpt\">" , y, x); |
109 | font = NULL; |
110 | |
111 | for (ch = line->first_char; ch; ch = ch->next) |
112 | { |
113 | int ch_sup = detect_super_script(line, ch); |
114 | if (ch->font != font || ch->size != size || ch_sup != sup || ch->color != color) |
115 | { |
116 | if (font) |
117 | fz_print_style_end_html(ctx, out, font, size, sup); |
118 | font = ch->font; |
119 | size = ch->size; |
120 | color = ch->color; |
121 | sup = ch_sup; |
122 | fz_print_style_begin_html(ctx, out, font, size, sup, color); |
123 | } |
124 | |
125 | switch (ch->c) |
126 | { |
127 | default: |
128 | if (ch->c >= 32 && ch->c <= 127) |
129 | fz_write_byte(ctx, out, ch->c); |
130 | else |
131 | fz_write_printf(ctx, out, "&#x%x;" , ch->c); |
132 | break; |
133 | case '<': fz_write_string(ctx, out, "<" ); break; |
134 | case '>': fz_write_string(ctx, out, ">" ); break; |
135 | case '&': fz_write_string(ctx, out, "&" ); break; |
136 | case '"': fz_write_string(ctx, out, """ ); break; |
137 | case '\'': fz_write_string(ctx, out, "'" ); break; |
138 | } |
139 | } |
140 | |
141 | if (font) |
142 | fz_print_style_end_html(ctx, out, font, size, sup); |
143 | |
144 | fz_write_string(ctx, out, "</p>\n" ); |
145 | } |
146 | } |
147 | |
148 | /* |
149 | Output a page to a file in HTML (visual) format. |
150 | */ |
151 | void |
152 | fz_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page, int id) |
153 | { |
154 | fz_stext_block *block; |
155 | |
156 | int w = page->mediabox.x1 - page->mediabox.x0; |
157 | int h = page->mediabox.y1 - page->mediabox.y0; |
158 | |
159 | fz_write_printf(ctx, out, "<div id=\"page%d\" style=\"position:relative;width:%dpt;height:%dpt;background-color:white\">\n" , id, w, h); |
160 | |
161 | for (block = page->first_block; block; block = block->next) |
162 | { |
163 | if (block->type == FZ_STEXT_BLOCK_IMAGE) |
164 | fz_print_stext_image_as_html(ctx, out, block); |
165 | else if (block->type == FZ_STEXT_BLOCK_TEXT) |
166 | fz_print_stext_block_as_html(ctx, out, block); |
167 | } |
168 | |
169 | fz_write_string(ctx, out, "</div>\n" ); |
170 | } |
171 | |
172 | void |
173 | (fz_context *ctx, fz_output *out) |
174 | { |
175 | fz_write_string(ctx, out, "<!DOCTYPE html>\n" ); |
176 | fz_write_string(ctx, out, "<html>\n" ); |
177 | fz_write_string(ctx, out, "<head>\n" ); |
178 | fz_write_string(ctx, out, "<style>\n" ); |
179 | fz_write_string(ctx, out, "body{background-color:gray}\n" ); |
180 | fz_write_string(ctx, out, "div{margin:1em auto}\n" ); |
181 | fz_write_string(ctx, out, "</style>\n" ); |
182 | fz_write_string(ctx, out, "</head>\n" ); |
183 | fz_write_string(ctx, out, "<body>\n" ); |
184 | } |
185 | |
186 | void |
187 | fz_print_stext_trailer_as_html(fz_context *ctx, fz_output *out) |
188 | { |
189 | fz_write_string(ctx, out, "</body>\n" ); |
190 | fz_write_string(ctx, out, "</html>\n" ); |
191 | } |
192 | |
193 | /* XHTML output (semantic, little layout, suitable for reflow) */ |
194 | |
195 | static void |
196 | fz_print_stext_image_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block) |
197 | { |
198 | int w = block->bbox.x1 - block->bbox.x0; |
199 | int h = block->bbox.y1 - block->bbox.y0; |
200 | |
201 | fz_write_printf(ctx, out, "<p><img width=\"%d\" height=\"%d\" src=\"" , w, h); |
202 | fz_write_image_as_data_uri(ctx, out, block->u.i.image); |
203 | fz_write_string(ctx, out, "\"/></p>\n" ); |
204 | } |
205 | |
206 | static void |
207 | fz_print_style_begin_xhtml(fz_context *ctx, fz_output *out, fz_font *font, int sup) |
208 | { |
209 | int is_mono = fz_font_is_monospaced(ctx, font); |
210 | int is_bold = fz_font_is_bold(ctx, font); |
211 | int is_italic = fz_font_is_italic(ctx, font); |
212 | |
213 | if (sup) |
214 | fz_write_string(ctx, out, "<sup>" ); |
215 | if (is_mono) |
216 | fz_write_string(ctx, out, "<tt>" ); |
217 | if (is_bold) |
218 | fz_write_string(ctx, out, "<b>" ); |
219 | if (is_italic) |
220 | fz_write_string(ctx, out, "<i>" ); |
221 | } |
222 | |
223 | static void |
224 | fz_print_style_end_xhtml(fz_context *ctx, fz_output *out, fz_font *font, int sup) |
225 | { |
226 | int is_mono = fz_font_is_monospaced(ctx, font); |
227 | int is_bold = fz_font_is_bold(ctx, font); |
228 | int is_italic = fz_font_is_italic(ctx, font); |
229 | |
230 | if (is_italic) |
231 | fz_write_string(ctx, out, "</i>" ); |
232 | if (is_bold) |
233 | fz_write_string(ctx, out, "</b>" ); |
234 | if (is_mono) |
235 | fz_write_string(ctx, out, "</tt>" ); |
236 | if (sup) |
237 | fz_write_string(ctx, out, "</sup>" ); |
238 | } |
239 | |
240 | static float avg_font_size_of_line(fz_stext_char *ch) |
241 | { |
242 | float size = 0; |
243 | int n = 0; |
244 | if (!ch) |
245 | return 0; |
246 | while (ch) |
247 | { |
248 | size += ch->size; |
249 | ++n; |
250 | ch = ch->next; |
251 | } |
252 | return size / n; |
253 | } |
254 | |
255 | static const char *tag_from_font_size(float size) |
256 | { |
257 | if (size >= 20) return "h1" ; |
258 | if (size >= 15) return "h2" ; |
259 | if (size >= 12) return "h3" ; |
260 | return "p" ; |
261 | } |
262 | |
263 | static void fz_print_stext_block_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block) |
264 | { |
265 | fz_stext_line *line; |
266 | fz_stext_char *ch; |
267 | |
268 | fz_font *font = NULL; |
269 | int sup = 0; |
270 | int sp = 1; |
271 | const char *tag = NULL; |
272 | const char *new_tag; |
273 | |
274 | for (line = block->u.t.first_line; line; line = line->next) |
275 | { |
276 | new_tag = tag_from_font_size(avg_font_size_of_line(line->first_char)); |
277 | if (tag != new_tag) |
278 | { |
279 | if (tag) |
280 | { |
281 | if (font) |
282 | fz_print_style_end_xhtml(ctx, out, font, sup); |
283 | fz_write_printf(ctx, out, "</%s>" , tag); |
284 | } |
285 | tag = new_tag; |
286 | fz_write_printf(ctx, out, "<%s>" , tag); |
287 | if (font) |
288 | fz_print_style_begin_xhtml(ctx, out, font, sup); |
289 | } |
290 | |
291 | if (!sp) |
292 | fz_write_byte(ctx, out, ' '); |
293 | |
294 | for (ch = line->first_char; ch; ch = ch->next) |
295 | { |
296 | int ch_sup = detect_super_script(line, ch); |
297 | if (ch->font != font || ch_sup != sup) |
298 | { |
299 | if (font) |
300 | fz_print_style_end_xhtml(ctx, out, font, sup); |
301 | font = ch->font; |
302 | sup = ch_sup; |
303 | fz_print_style_begin_xhtml(ctx, out, font, sup); |
304 | } |
305 | |
306 | sp = (ch->c == ' '); |
307 | switch (ch->c) |
308 | { |
309 | default: |
310 | if (ch->c >= 32 && ch->c <= 127) |
311 | fz_write_byte(ctx, out, ch->c); |
312 | else |
313 | fz_write_printf(ctx, out, "&#x%x;" , ch->c); |
314 | break; |
315 | case '<': fz_write_string(ctx, out, "<" ); break; |
316 | case '>': fz_write_string(ctx, out, ">" ); break; |
317 | case '&': fz_write_string(ctx, out, "&" ); break; |
318 | case '"': fz_write_string(ctx, out, """ ); break; |
319 | case '\'': fz_write_string(ctx, out, "'" ); break; |
320 | } |
321 | } |
322 | } |
323 | |
324 | if (font) |
325 | fz_print_style_end_xhtml(ctx, out, font, sup); |
326 | fz_write_printf(ctx, out, "</%s>\n" , tag); |
327 | } |
328 | |
329 | /* |
330 | Output a page to a file in XHTML (semantic) format. |
331 | */ |
332 | void |
333 | fz_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id) |
334 | { |
335 | fz_stext_block *block; |
336 | |
337 | fz_write_printf(ctx, out, "<div id=\"page%d\">\n" , id); |
338 | |
339 | for (block = page->first_block; block; block = block->next) |
340 | { |
341 | if (block->type == FZ_STEXT_BLOCK_IMAGE) |
342 | fz_print_stext_image_as_xhtml(ctx, out, block); |
343 | else if (block->type == FZ_STEXT_BLOCK_TEXT) |
344 | fz_print_stext_block_as_xhtml(ctx, out, block); |
345 | } |
346 | |
347 | fz_write_string(ctx, out, "</div>\n" ); |
348 | } |
349 | |
350 | void |
351 | (fz_context *ctx, fz_output *out) |
352 | { |
353 | fz_write_string(ctx, out, "<?xml version=\"1.0\"?>\n" ); |
354 | fz_write_string(ctx, out, "<!DOCTYPE html" ); |
355 | fz_write_string(ctx, out, " PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"" ); |
356 | fz_write_string(ctx, out, " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ); |
357 | fz_write_string(ctx, out, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" ); |
358 | fz_write_string(ctx, out, "<head>\n" ); |
359 | fz_write_string(ctx, out, "<style>\n" ); |
360 | fz_write_string(ctx, out, "p{white-space:pre-wrap}\n" ); |
361 | fz_write_string(ctx, out, "</style>\n" ); |
362 | fz_write_string(ctx, out, "</head>\n" ); |
363 | fz_write_string(ctx, out, "<body>\n" ); |
364 | } |
365 | |
366 | void |
367 | fz_print_stext_trailer_as_xhtml(fz_context *ctx, fz_output *out) |
368 | { |
369 | fz_write_string(ctx, out, "</body>\n" ); |
370 | fz_write_string(ctx, out, "</html>\n" ); |
371 | } |
372 | |
373 | /* Detailed XML dump of the entire structured text data */ |
374 | |
375 | /* |
376 | Output a page to a file in XML format. |
377 | */ |
378 | void |
379 | fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id) |
380 | { |
381 | fz_stext_block *block; |
382 | fz_stext_line *line; |
383 | fz_stext_char *ch; |
384 | |
385 | fz_write_printf(ctx, out, "<page id=\"page%d\" width=\"%g\" height=\"%g\">\n" , id, |
386 | page->mediabox.x1 - page->mediabox.x0, |
387 | page->mediabox.y1 - page->mediabox.y0); |
388 | |
389 | for (block = page->first_block; block; block = block->next) |
390 | { |
391 | switch (block->type) |
392 | { |
393 | case FZ_STEXT_BLOCK_TEXT: |
394 | fz_write_printf(ctx, out, "<block bbox=\"%g %g %g %g\">\n" , |
395 | block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); |
396 | for (line = block->u.t.first_line; line; line = line->next) |
397 | { |
398 | fz_font *font = NULL; |
399 | float size = 0; |
400 | const char *name = NULL; |
401 | |
402 | fz_write_printf(ctx, out, "<line bbox=\"%g %g %g %g\" wmode=\"%d\" dir=\"%g %g\">\n" , |
403 | line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1, |
404 | line->wmode, |
405 | line->dir.x, line->dir.y); |
406 | |
407 | for (ch = line->first_char; ch; ch = ch->next) |
408 | { |
409 | if (ch->font != font || ch->size != size) |
410 | { |
411 | if (font) |
412 | fz_write_string(ctx, out, "</font>\n" ); |
413 | font = ch->font; |
414 | size = ch->size; |
415 | name = font_full_name(ctx, font); |
416 | fz_write_printf(ctx, out, "<font name=\"%s\" size=\"%g\">\n" , name, size); |
417 | } |
418 | fz_write_printf(ctx, out, "<char quad=\"%g %g %g %g %g %g %g %g\" x=\"%g\" y=\"%g\" color=\"#%06x\" c=\"" , |
419 | ch->quad.ul.x, ch->quad.ul.y, |
420 | ch->quad.ur.x, ch->quad.ur.y, |
421 | ch->quad.ll.x, ch->quad.ll.y, |
422 | ch->quad.lr.x, ch->quad.lr.y, |
423 | ch->origin.x, ch->origin.y, |
424 | ch->color); |
425 | switch (ch->c) |
426 | { |
427 | case '<': fz_write_string(ctx, out, "<" ); break; |
428 | case '>': fz_write_string(ctx, out, ">" ); break; |
429 | case '&': fz_write_string(ctx, out, "&" ); break; |
430 | case '"': fz_write_string(ctx, out, """ ); break; |
431 | case '\'': fz_write_string(ctx, out, "'" ); break; |
432 | default: |
433 | if (ch->c >= 32 && ch->c <= 127) |
434 | fz_write_printf(ctx, out, "%c" , ch->c); |
435 | else |
436 | fz_write_printf(ctx, out, "&#x%x;" , ch->c); |
437 | break; |
438 | } |
439 | fz_write_string(ctx, out, "\"/>\n" ); |
440 | } |
441 | |
442 | if (font) |
443 | fz_write_string(ctx, out, "</font>\n" ); |
444 | |
445 | fz_write_string(ctx, out, "</line>\n" ); |
446 | } |
447 | fz_write_string(ctx, out, "</block>\n" ); |
448 | break; |
449 | |
450 | case FZ_STEXT_BLOCK_IMAGE: |
451 | fz_write_printf(ctx, out, "<image bbox=\"%g %g %g %g\" />\n" , |
452 | block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); |
453 | break; |
454 | } |
455 | } |
456 | fz_write_string(ctx, out, "</page>\n" ); |
457 | } |
458 | |
459 | /* Plain text */ |
460 | |
461 | /* |
462 | Output a page to a file in UTF-8 format. |
463 | */ |
464 | void |
465 | fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page) |
466 | { |
467 | fz_stext_block *block; |
468 | fz_stext_line *line; |
469 | fz_stext_char *ch; |
470 | char utf[10]; |
471 | int i, n; |
472 | |
473 | for (block = page->first_block; block; block = block->next) |
474 | { |
475 | if (block->type == FZ_STEXT_BLOCK_TEXT) |
476 | { |
477 | for (line = block->u.t.first_line; line; line = line->next) |
478 | { |
479 | for (ch = line->first_char; ch; ch = ch->next) |
480 | { |
481 | n = fz_runetochar(utf, ch->c); |
482 | for (i = 0; i < n; i++) |
483 | fz_write_byte(ctx, out, utf[i]); |
484 | } |
485 | fz_write_string(ctx, out, "\n" ); |
486 | } |
487 | fz_write_string(ctx, out, "\n" ); |
488 | } |
489 | } |
490 | } |
491 | |
492 | /* Text output writer */ |
493 | |
494 | enum { |
495 | FZ_FORMAT_TEXT, |
496 | FZ_FORMAT_HTML, |
497 | FZ_FORMAT_XHTML, |
498 | FZ_FORMAT_STEXT, |
499 | }; |
500 | |
501 | typedef struct fz_text_writer_s fz_text_writer; |
502 | |
503 | struct fz_text_writer_s |
504 | { |
505 | fz_document_writer super; |
506 | int format; |
507 | int number; |
508 | fz_stext_options opts; |
509 | fz_stext_page *page; |
510 | fz_output *out; |
511 | }; |
512 | |
513 | static fz_device * |
514 | text_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox) |
515 | { |
516 | fz_text_writer *wri = (fz_text_writer*)wri_; |
517 | |
518 | if (wri->page) |
519 | { |
520 | fz_drop_stext_page(ctx, wri->page); |
521 | wri->page = NULL; |
522 | } |
523 | |
524 | wri->number++; |
525 | |
526 | wri->page = fz_new_stext_page(ctx, mediabox); |
527 | return fz_new_stext_device(ctx, wri->page, &wri->opts); |
528 | } |
529 | |
530 | static void |
531 | text_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev) |
532 | { |
533 | fz_text_writer *wri = (fz_text_writer*)wri_; |
534 | |
535 | fz_try(ctx) |
536 | { |
537 | fz_close_device(ctx, dev); |
538 | switch (wri->format) |
539 | { |
540 | default: |
541 | case FZ_FORMAT_TEXT: |
542 | fz_print_stext_page_as_text(ctx, wri->out, wri->page); |
543 | break; |
544 | case FZ_FORMAT_HTML: |
545 | fz_print_stext_page_as_html(ctx, wri->out, wri->page, wri->number); |
546 | break; |
547 | case FZ_FORMAT_XHTML: |
548 | fz_print_stext_page_as_xhtml(ctx, wri->out, wri->page, wri->number); |
549 | break; |
550 | case FZ_FORMAT_STEXT: |
551 | fz_print_stext_page_as_xml(ctx, wri->out, wri->page, wri->number); |
552 | break; |
553 | } |
554 | } |
555 | fz_always(ctx) |
556 | { |
557 | fz_drop_device(ctx, dev); |
558 | fz_drop_stext_page(ctx, wri->page); |
559 | wri->page = NULL; |
560 | } |
561 | fz_catch(ctx) |
562 | fz_rethrow(ctx); |
563 | } |
564 | |
565 | static void |
566 | text_close_writer(fz_context *ctx, fz_document_writer *wri_) |
567 | { |
568 | fz_text_writer *wri = (fz_text_writer*)wri_; |
569 | switch (wri->format) |
570 | { |
571 | case FZ_FORMAT_HTML: |
572 | fz_print_stext_trailer_as_html(ctx, wri->out); |
573 | break; |
574 | case FZ_FORMAT_XHTML: |
575 | fz_print_stext_trailer_as_xhtml(ctx, wri->out); |
576 | break; |
577 | case FZ_FORMAT_STEXT: |
578 | fz_write_string(ctx, wri->out, "</document>\n" ); |
579 | break; |
580 | } |
581 | fz_close_output(ctx, wri->out); |
582 | } |
583 | |
584 | static void |
585 | text_drop_writer(fz_context *ctx, fz_document_writer *wri_) |
586 | { |
587 | fz_text_writer *wri = (fz_text_writer*)wri_; |
588 | fz_drop_stext_page(ctx, wri->page); |
589 | fz_drop_output(ctx, wri->out); |
590 | } |
591 | |
592 | fz_document_writer * |
593 | fz_new_text_writer(fz_context *ctx, const char *format, const char *path, const char *args) |
594 | { |
595 | fz_text_writer *wri; |
596 | |
597 | wri = fz_new_derived_document_writer(ctx, fz_text_writer, text_begin_page, text_end_page, text_close_writer, text_drop_writer); |
598 | fz_try(ctx) |
599 | { |
600 | fz_parse_stext_options(ctx, &wri->opts, args); |
601 | |
602 | wri->format = FZ_FORMAT_TEXT; |
603 | if (!strcmp(format, "text" )) |
604 | wri->format = FZ_FORMAT_TEXT; |
605 | else if (!strcmp(format, "html" )) |
606 | wri->format = FZ_FORMAT_HTML; |
607 | else if (!strcmp(format, "xhtml" )) |
608 | wri->format = FZ_FORMAT_XHTML; |
609 | else if (!strcmp(format, "stext" )) |
610 | wri->format = FZ_FORMAT_STEXT; |
611 | |
612 | wri->out = fz_new_output_with_path(ctx, path ? path : "out.txt" , 0); |
613 | |
614 | switch (wri->format) |
615 | { |
616 | case FZ_FORMAT_HTML: |
617 | fz_print_stext_header_as_html(ctx, wri->out); |
618 | break; |
619 | case FZ_FORMAT_XHTML: |
620 | fz_print_stext_header_as_xhtml(ctx, wri->out); |
621 | break; |
622 | case FZ_FORMAT_STEXT: |
623 | fz_write_string(ctx, wri->out, "<?xml version=\"1.0\"?>\n" ); |
624 | fz_write_string(ctx, wri->out, "<document>\n" ); |
625 | break; |
626 | } |
627 | } |
628 | fz_catch(ctx) |
629 | { |
630 | fz_drop_output(ctx, wri->out); |
631 | fz_free(ctx, wri); |
632 | fz_rethrow(ctx); |
633 | } |
634 | |
635 | return (fz_document_writer*)wri; |
636 | } |
637 | |