1#include "mupdf/fitz.h"
2#include "mupdf/ucdn.h"
3
4#include <math.h>
5#include <float.h>
6#include <string.h>
7
8/* Simple layout structure */
9
10fz_layout_block *fz_new_layout(fz_context *ctx)
11{
12 fz_pool *pool = fz_new_pool(ctx);
13 fz_layout_block *block;
14 fz_try(ctx)
15 {
16 block = fz_pool_alloc(ctx, pool, sizeof (fz_layout_block));
17 block->pool = pool;
18 block->head = NULL;
19 block->tailp = &block->head;
20 }
21 fz_catch(ctx)
22 {
23 fz_drop_pool(ctx, pool);
24 fz_rethrow(ctx);
25 }
26 return block;
27}
28
29void fz_drop_layout(fz_context *ctx, fz_layout_block *block)
30{
31 if (block)
32 fz_drop_pool(ctx, block->pool);
33}
34
35void fz_add_layout_line(fz_context *ctx, fz_layout_block *block, float x, float y, float h, const char *p)
36{
37 fz_layout_line *line = fz_pool_alloc(ctx, block->pool, sizeof (fz_layout_line));
38 line->x = x;
39 line->y = y;
40 line->h = h;
41 line->p = p;
42 line->text = NULL;
43 line->next = NULL;
44 *block->tailp = line;
45 block->tailp = &line->next;
46 block->text_tailp = &line->text;
47}
48
49void fz_add_layout_char(fz_context *ctx, fz_layout_block *block, float x, float w, const char *p)
50{
51 fz_layout_char *ch = fz_pool_alloc(ctx, block->pool, sizeof (fz_layout_char));
52 ch->x = x;
53 ch->w = w;
54 ch->p = p;
55 ch->next = NULL;
56 *block->text_tailp = ch;
57 block->text_tailp = &ch->next;
58}
59
60/* Extract text into blocks and lines. */
61
62#define PARAGRAPH_DIST 1.5f
63#define SPACE_DIST 0.15f
64#define SPACE_MAX_DIST 0.8f
65
66typedef struct fz_stext_device_s fz_stext_device;
67
68struct fz_stext_device_s
69{
70 fz_device super;
71 fz_stext_page *page;
72 fz_point pen, start;
73 fz_matrix trm;
74 int new_obj;
75 int curdir;
76 int lastchar;
77 int flags;
78 int color;
79 const fz_text *lasttext;
80};
81
82const char *fz_stext_options_usage =
83 "Text output options:\n"
84 "\tinhibit-spaces: don't add spaces between gaps in the text\n"
85 "\tpreserve-images: keep images in output\n"
86 "\tpreserve-ligatures: do not expand ligatures into constituent characters\n"
87 "\tpreserve-whitespace: do not convert all whitespace into space characters\n"
88 "\n";
89
90/*
91 Create an empty text page.
92
93 The text page is filled out by the text device to contain the blocks
94 and lines of text on the page.
95
96 mediabox: optional mediabox information.
97*/
98fz_stext_page *
99fz_new_stext_page(fz_context *ctx, fz_rect mediabox)
100{
101 fz_pool *pool = fz_new_pool(ctx);
102 fz_stext_page *page = NULL;
103 fz_try(ctx)
104 {
105 page = fz_pool_alloc(ctx, pool, sizeof(*page));
106 page->pool = pool;
107 page->mediabox = mediabox;
108 page->first_block = NULL;
109 page->last_block = NULL;
110 }
111 fz_catch(ctx)
112 {
113 fz_drop_pool(ctx, pool);
114 fz_rethrow(ctx);
115 }
116 return page;
117}
118
119void
120fz_drop_stext_page(fz_context *ctx, fz_stext_page *page)
121{
122 if (page)
123 {
124 fz_stext_block *block;
125 for (block = page->first_block; block; block = block->next)
126 if (block->type == FZ_STEXT_BLOCK_IMAGE)
127 fz_drop_image(ctx, block->u.i.image);
128 fz_drop_pool(ctx, page->pool);
129 }
130}
131
132static fz_stext_block *
133add_block_to_page(fz_context *ctx, fz_stext_page *page)
134{
135 fz_stext_block *block = fz_pool_alloc(ctx, page->pool, sizeof *page->first_block);
136 block->prev = page->last_block;
137 if (!page->first_block)
138 page->first_block = page->last_block = block;
139 else
140 {
141 page->last_block->next = block;
142 page->last_block = block;
143 }
144 return block;
145}
146
147static fz_stext_block *
148add_text_block_to_page(fz_context *ctx, fz_stext_page *page)
149{
150 fz_stext_block *block = add_block_to_page(ctx, page);
151 block->type = FZ_STEXT_BLOCK_TEXT;
152 return block;
153}
154
155static fz_stext_block *
156add_image_block_to_page(fz_context *ctx, fz_stext_page *page, fz_matrix ctm, fz_image *image)
157{
158 fz_stext_block *block = add_block_to_page(ctx, page);
159 block->type = FZ_STEXT_BLOCK_IMAGE;
160 block->u.i.transform = ctm;
161 block->u.i.image = fz_keep_image(ctx, image);
162 block->bbox = fz_transform_rect(fz_unit_rect, ctm);
163 return block;
164}
165
166static fz_stext_line *
167add_line_to_block(fz_context *ctx, fz_stext_page *page, fz_stext_block *block, const fz_point *dir, int wmode)
168{
169 fz_stext_line *line = fz_pool_alloc(ctx, page->pool, sizeof *block->u.t.first_line);
170 line->prev = block->u.t.last_line;
171 if (!block->u.t.first_line)
172 block->u.t.first_line = block->u.t.last_line = line;
173 else
174 {
175 block->u.t.last_line->next = line;
176 block->u.t.last_line = line;
177 }
178
179 line->dir = *dir;
180 line->wmode = wmode;
181
182 return line;
183}
184
185static fz_stext_char *
186add_char_to_line(fz_context *ctx, fz_stext_page *page, fz_stext_line *line, fz_matrix trm, fz_font *font, float size, int c, fz_point *p, fz_point *q, int color)
187{
188 fz_stext_char *ch = fz_pool_alloc(ctx, page->pool, sizeof *line->first_char);
189 fz_point a, d;
190
191 if (!line->first_char)
192 line->first_char = line->last_char = ch;
193 else
194 {
195 line->last_char->next = ch;
196 line->last_char = ch;
197 }
198
199 ch->c = c;
200 ch->color = color;
201 ch->origin = *p;
202 ch->size = size;
203 ch->font = font; /* TODO: keep and drop */
204
205 if (line->wmode == 0)
206 {
207 a.x = 0;
208 d.x = 0;
209 a.y = fz_font_ascender(ctx, font);
210 d.y = fz_font_descender(ctx, font);
211 }
212 else
213 {
214 fz_rect bbox = fz_font_bbox(ctx, font);
215 a.x = bbox.x1;
216 d.x = bbox.x0;
217 a.y = 0;
218 d.y = 0;
219 }
220 a = fz_transform_vector(a, trm);
221 d = fz_transform_vector(d, trm);
222
223 ch->quad.ll = fz_make_point(p->x + d.x, p->y + d.y);
224 ch->quad.ul = fz_make_point(p->x + a.x, p->y + a.y);
225 ch->quad.lr = fz_make_point(q->x + d.x, q->y + d.y);
226 ch->quad.ur = fz_make_point(q->x + a.x, q->y + a.y);
227
228 return ch;
229}
230
231static int
232direction_from_bidi_class(int bidiclass, int curdir)
233{
234 switch (bidiclass)
235 {
236 /* strong */
237 case UCDN_BIDI_CLASS_L: return 1;
238 case UCDN_BIDI_CLASS_R: return -1;
239 case UCDN_BIDI_CLASS_AL: return -1;
240
241 /* weak */
242 case UCDN_BIDI_CLASS_EN:
243 case UCDN_BIDI_CLASS_ES:
244 case UCDN_BIDI_CLASS_ET:
245 case UCDN_BIDI_CLASS_AN:
246 case UCDN_BIDI_CLASS_CS:
247 case UCDN_BIDI_CLASS_NSM:
248 case UCDN_BIDI_CLASS_BN:
249 return curdir;
250
251 /* neutral */
252 case UCDN_BIDI_CLASS_B:
253 case UCDN_BIDI_CLASS_S:
254 case UCDN_BIDI_CLASS_WS:
255 case UCDN_BIDI_CLASS_ON:
256 return curdir;
257
258 /* embedding, override, pop ... we don't support them */
259 default:
260 return 0;
261 }
262}
263
264static float
265vec_dot(const fz_point *a, const fz_point *b)
266{
267 return a->x * b->x + a->y * b->y;
268}
269
270static void
271fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix trm, float adv, int wmode)
272{
273 fz_stext_page *page = dev->page;
274 fz_stext_block *cur_block;
275 fz_stext_line *cur_line;
276
277 int new_para = 0;
278 int new_line = 1;
279 int add_space = 0;
280 fz_point dir, ndir, p, q;
281 float size;
282 fz_point delta;
283 float spacing = 0;
284 float base_offset = 0;
285 int rtl = 0;
286
287 dev->curdir = direction_from_bidi_class(ucdn_get_bidi_class(c), dev->curdir);
288
289 /* dir = direction vector for motion. ndir = normalised(dir) */
290 if (wmode == 0)
291 {
292 dir.x = 1;
293 dir.y = 0;
294 }
295 else
296 {
297 dir.x = 0;
298 dir.y = -1;
299 }
300 dir = fz_transform_vector(dir, trm);
301 ndir = fz_normalize_vector(dir);
302
303 size = fz_matrix_expansion(trm);
304
305 /* We need to identify where glyphs 'start' (p) and 'stop' (q).
306 * Each glyph holds its 'start' position, and the next glyph in the
307 * span (or span->max if there is no next glyph) holds its 'end'
308 * position.
309 *
310 * For both horizontal and vertical motion, trm->{e,f} gives the
311 * origin (usually the bottom left) of the glyph.
312 *
313 * In horizontal mode:
314 * + p is bottom left.
315 * + q is the bottom right
316 * In vertical mode:
317 * + p is top left (where it advanced from)
318 * + q is bottom left
319 */
320 if (wmode == 0)
321 {
322 p.x = trm.e;
323 p.y = trm.f;
324 q.x = trm.e + adv * dir.x;
325 q.y = trm.f + adv * dir.y;
326 }
327 else
328 {
329 p.x = trm.e - adv * dir.x;
330 p.y = trm.f - adv * dir.y;
331 q.x = trm.e;
332 q.y = trm.f;
333 }
334
335 /* Find current position to enter new text. */
336 cur_block = page->last_block;
337 if (cur_block && cur_block->type != FZ_STEXT_BLOCK_TEXT)
338 cur_block = NULL;
339 cur_line = cur_block ? cur_block->u.t.last_line : NULL;
340
341 if (cur_line && glyph < 0)
342 {
343 /* Don't advance pen or break lines for no-glyph characters in a cluster */
344 add_char_to_line(ctx, page, cur_line, trm, font, size, c, &dev->pen, &dev->pen, dev->color);
345 dev->lastchar = c;
346 return;
347 }
348
349 if (cur_line == NULL || cur_line->wmode != wmode || vec_dot(&ndir, &cur_line->dir) < 0.999f)
350 {
351 /* If the matrix has changed rotation, or the wmode is different (or if we don't have a line at all),
352 * then we can't append to the current block/line. */
353 new_para = 1;
354 new_line = 1;
355 }
356 else
357 {
358 /* Detect fake bold where text is printed twice in the same place. */
359 delta.x = fabsf(q.x - dev->pen.x);
360 delta.y = fabsf(q.y - dev->pen.y);
361 if (delta.x < FLT_EPSILON && delta.y < FLT_EPSILON && c == dev->lastchar)
362 return;
363
364 /* Calculate how far we've moved since the last character. */
365 delta.x = p.x - dev->pen.x;
366 delta.y = p.y - dev->pen.y;
367
368 /* The transform has not changed, so we know we're in the same
369 * direction. Calculate 2 distances; how far off the previous
370 * baseline we are, together with how far along the baseline
371 * we are from the expected position. */
372 spacing = ndir.x * delta.x + ndir.y * delta.y;
373 base_offset = -ndir.y * delta.x + ndir.x * delta.y;
374
375 /* Only a small amount off the baseline - we'll take this */
376 if (fabsf(base_offset) < size * 0.8f)
377 {
378 /* LTR or neutral character */
379 if (dev->curdir >= 0)
380 {
381 if (fabsf(spacing) < size * SPACE_DIST)
382 {
383 /* Motion is in line and small enough to ignore. */
384 new_line = 0;
385 }
386 else if (fabsf(spacing) > size * SPACE_MAX_DIST)
387 {
388 /* Motion is in line and large enough to warrant splitting to a new line */
389 new_line = 1;
390 }
391 else if (spacing < 0)
392 {
393 /* Motion is backward in line! Ignore this odd spacing. */
394 new_line = 0;
395 }
396 else
397 {
398 /* Motion is forward in line and large enough to warrant us adding a space. */
399 if (dev->lastchar != ' ' && wmode == 0)
400 add_space = 1;
401 new_line = 0;
402 }
403 }
404
405 /* RTL character -- disable space character and column detection heuristics */
406 else
407 {
408 new_line = 0;
409 if (spacing > size * SPACE_DIST || spacing < 0)
410 rtl = 0; /* backward (or big jump to 'right' side) means logical order */
411 else
412 rtl = 1; /* visual order, we need to reverse in a post process pass */
413 }
414 }
415
416 /* Enough for a new line, but not enough for a new paragraph */
417 else if (fabsf(base_offset) <= size * PARAGRAPH_DIST)
418 {
419 /* Check indent to spot text-indent style paragraphs */
420 if (wmode == 0 && cur_line && dev->new_obj)
421 if (fabsf(p.x - dev->start.x) > size * 0.5f)
422 new_para = 1;
423 new_line = 1;
424 }
425
426 /* Way off the baseline - open a new paragraph */
427 else
428 {
429 new_para = 1;
430 new_line = 1;
431 }
432 }
433
434 /* Start a new block (but only at the beginning of a text object) */
435 if (new_para || !cur_block)
436 {
437 cur_block = add_text_block_to_page(ctx, page);
438 cur_line = cur_block->u.t.last_line;
439 }
440
441 /* Start a new line */
442 if (new_line || !cur_line)
443 {
444 cur_line = add_line_to_block(ctx, page, cur_block, &ndir, wmode);
445 dev->start = p;
446 }
447
448 /* Add synthetic space */
449 if (add_space && !(dev->flags & FZ_STEXT_INHIBIT_SPACES))
450 add_char_to_line(ctx, page, cur_line, trm, font, size, ' ', &dev->pen, &p, dev->color);
451
452 add_char_to_line(ctx, page, cur_line, trm, font, size, c, &p, &q, dev->color);
453 dev->lastchar = c;
454 dev->pen = q;
455
456 dev->new_obj = 0;
457 dev->trm = trm;
458}
459
460static void
461fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix trm, float adv, int wmode)
462{
463 /* ignore when one unicode character maps to multiple glyphs */
464 if (c == -1)
465 return;
466
467 if (!(dev->flags & FZ_STEXT_PRESERVE_LIGATURES))
468 {
469 switch (c)
470 {
471 case 0xFB00: /* ff */
472 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode);
473 fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode);
474 return;
475 case 0xFB01: /* fi */
476 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode);
477 fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode);
478 return;
479 case 0xFB02: /* fl */
480 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode);
481 fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode);
482 return;
483 case 0xFB03: /* ffi */
484 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode);
485 fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode);
486 fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode);
487 return;
488 case 0xFB04: /* ffl */
489 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode);
490 fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode);
491 fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode);
492 return;
493 case 0xFB05: /* long st */
494 case 0xFB06: /* st */
495 fz_add_stext_char_imp(ctx, dev, font, 's', glyph, trm, adv, wmode);
496 fz_add_stext_char_imp(ctx, dev, font, 't', -1, trm, 0, wmode);
497 return;
498 }
499 }
500
501 if (!(dev->flags & FZ_STEXT_PRESERVE_WHITESPACE))
502 {
503 switch (c)
504 {
505 case 0x0009: /* tab */
506 case 0x0020: /* space */
507 case 0x00A0: /* no-break space */
508 case 0x1680: /* ogham space mark */
509 case 0x180E: /* mongolian vowel separator */
510 case 0x2000: /* en quad */
511 case 0x2001: /* em quad */
512 case 0x2002: /* en space */
513 case 0x2003: /* em space */
514 case 0x2004: /* three-per-em space */
515 case 0x2005: /* four-per-em space */
516 case 0x2006: /* six-per-em space */
517 case 0x2007: /* figure space */
518 case 0x2008: /* punctuation space */
519 case 0x2009: /* thin space */
520 case 0x200A: /* hair space */
521 case 0x202F: /* narrow no-break space */
522 case 0x205F: /* medium mathematical space */
523 case 0x3000: /* ideographic space */
524 c = ' ';
525 }
526 }
527
528 fz_add_stext_char_imp(ctx, dev, font, c, glyph, trm, adv, wmode);
529}
530
531static void
532fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_matrix ctm)
533{
534 fz_font *font = span->font;
535 fz_matrix tm = span->trm;
536 fz_matrix trm;
537 float adv;
538 int i;
539
540 if (span->len == 0)
541 return;
542
543 tm.e = 0;
544 tm.f = 0;
545 trm = fz_concat(tm, ctm);
546
547 for (i = 0; i < span->len; i++)
548 {
549 /* Calculate new pen location and delta */
550 tm.e = span->items[i].x;
551 tm.f = span->items[i].y;
552 trm = fz_concat(tm, ctm);
553
554 /* Calculate bounding box and new pen position based on font metrics */
555 if (span->items[i].gid >= 0)
556 adv = fz_advance_glyph(ctx, font, span->items[i].gid, span->wmode);
557 else
558 adv = 0;
559
560 fz_add_stext_char(ctx, dev, font, span->items[i].ucs, span->items[i].gid, trm, adv, span->wmode);
561 }
562}
563
564static int hexrgb_from_color(fz_context *ctx, fz_colorspace *colorspace, const float *color)
565{
566 float rgb[3];
567 fz_convert_color(ctx, colorspace, color, fz_device_rgb(ctx), rgb, NULL, fz_default_color_params);
568 return
569 (fz_clampi(rgb[0] * 255, 0, 255) << 16) |
570 (fz_clampi(rgb[1] * 255, 0, 255) << 8) |
571 (fz_clampi(rgb[2] * 255, 0, 255));
572}
573
574static void
575fz_stext_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm,
576 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
577{
578 fz_stext_device *tdev = (fz_stext_device*)dev;
579 fz_text_span *span;
580 if (text == tdev->lasttext)
581 return;
582 tdev->color = hexrgb_from_color(ctx, colorspace, color);
583 tdev->new_obj = 1;
584 for (span = text->head; span; span = span->next)
585 fz_stext_extract(ctx, tdev, span, ctm);
586 fz_drop_text(ctx, tdev->lasttext);
587 tdev->lasttext = fz_keep_text(ctx, text);
588}
589
590static void
591fz_stext_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
592 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
593{
594 fz_stext_device *tdev = (fz_stext_device*)dev;
595 fz_text_span *span;
596 if (text == tdev->lasttext)
597 return;
598 tdev->color = hexrgb_from_color(ctx, colorspace, color);
599 tdev->new_obj = 1;
600 for (span = text->head; span; span = span->next)
601 fz_stext_extract(ctx, tdev, span, ctm);
602 fz_drop_text(ctx, tdev->lasttext);
603 tdev->lasttext = fz_keep_text(ctx, text);
604}
605
606static void
607fz_stext_clip_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, fz_rect scissor)
608{
609 fz_stext_device *tdev = (fz_stext_device*)dev;
610 fz_text_span *span;
611 if (text == tdev->lasttext)
612 return;
613 tdev->color = 0;
614 tdev->new_obj = 1;
615 for (span = text->head; span; span = span->next)
616 fz_stext_extract(ctx, tdev, span, ctm);
617 fz_drop_text(ctx, tdev->lasttext);
618 tdev->lasttext = fz_keep_text(ctx, text);
619}
620
621static void
622fz_stext_clip_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
623{
624 fz_stext_device *tdev = (fz_stext_device*)dev;
625 fz_text_span *span;
626 if (text == tdev->lasttext)
627 return;
628 tdev->color = 0;
629 tdev->new_obj = 1;
630 for (span = text->head; span; span = span->next)
631 fz_stext_extract(ctx, tdev, span, ctm);
632 fz_drop_text(ctx, tdev->lasttext);
633 tdev->lasttext = fz_keep_text(ctx, text);
634}
635
636static void
637fz_stext_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm)
638{
639 fz_stext_device *tdev = (fz_stext_device*)dev;
640 fz_text_span *span;
641 if (text == tdev->lasttext)
642 return;
643 tdev->color = 0;
644 tdev->new_obj = 1;
645 for (span = text->head; span; span = span->next)
646 fz_stext_extract(ctx, tdev, span, ctm);
647 fz_drop_text(ctx, tdev->lasttext);
648 tdev->lasttext = fz_keep_text(ctx, text);
649}
650
651/* Images and shadings */
652
653static void
654fz_stext_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
655{
656 fz_stext_device *tdev = (fz_stext_device*)dev;
657
658 /* If the alpha is less than 50% then it's probably a watermark or effect or something. Skip it. */
659 if (alpha < 0.5f)
660 return;
661
662 add_image_block_to_page(ctx, tdev->page, ctm, img);
663}
664
665static void
666fz_stext_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm,
667 fz_colorspace *cspace, const float *color, float alpha, fz_color_params color_params)
668{
669 fz_stext_fill_image(ctx, dev, img, ctm, alpha, color_params);
670}
671
672static fz_image *
673fz_new_image_from_shade(fz_context *ctx, fz_shade *shade, fz_matrix *in_out_ctm, fz_color_params color_params, fz_rect scissor)
674{
675 fz_matrix ctm = *in_out_ctm;
676 fz_pixmap *pix;
677 fz_image *img = NULL;
678 fz_rect bounds;
679 fz_irect bbox;
680
681 bounds = fz_bound_shade(ctx, shade, ctm);
682 bounds = fz_intersect_rect(bounds, scissor);
683 bbox = fz_irect_from_rect(bounds);
684
685 pix = fz_new_pixmap_with_bbox(ctx, fz_device_rgb(ctx), bbox, NULL, !shade->use_background);
686 fz_try(ctx)
687 {
688 if (shade->use_background)
689 fz_fill_pixmap_with_color(ctx, pix, shade->colorspace, shade->background, color_params);
690 else
691 fz_clear_pixmap(ctx, pix);
692 fz_paint_shade(ctx, shade, NULL, ctm, pix, color_params, bbox, NULL);
693 img = fz_new_image_from_pixmap(ctx, pix, NULL);
694 }
695 fz_always(ctx)
696 fz_drop_pixmap(ctx, pix);
697 fz_catch(ctx)
698 fz_rethrow(ctx);
699
700 in_out_ctm->a = pix->w;
701 in_out_ctm->b = 0;
702 in_out_ctm->c = 0;
703 in_out_ctm->d = pix->h;
704 in_out_ctm->e = pix->x;
705 in_out_ctm->f = pix->y;
706 return img;
707}
708
709static void
710fz_stext_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shade, fz_matrix ctm, float alpha, fz_color_params color_params)
711{
712 fz_matrix local_ctm = ctm;
713 fz_rect scissor = fz_device_current_scissor(ctx, dev);
714 fz_image *image = fz_new_image_from_shade(ctx, shade, &local_ctm, color_params, scissor);
715 fz_try(ctx)
716 fz_stext_fill_image(ctx, dev, image, local_ctm, alpha, color_params);
717 fz_always(ctx)
718 fz_drop_image(ctx, image);
719 fz_catch(ctx)
720 fz_rethrow(ctx);
721}
722
723static void
724fz_stext_close_device(fz_context *ctx, fz_device *dev)
725{
726 fz_stext_device *tdev = (fz_stext_device*)dev;
727 fz_stext_page *page = tdev->page;
728 fz_stext_block *block;
729 fz_stext_line *line;
730 fz_stext_char *ch;
731
732 for (block = page->first_block; block; block = block->next)
733 {
734 if (block->type != FZ_STEXT_BLOCK_TEXT)
735 continue;
736 for (line = block->u.t.first_line; line; line = line->next)
737 {
738 for (ch = line->first_char; ch; ch = ch->next)
739 {
740 fz_rect ch_box = fz_rect_from_quad(ch->quad);
741 if (ch == line->first_char)
742 line->bbox = ch_box;
743 else
744 line->bbox = fz_union_rect(line->bbox, ch_box);
745 }
746 block->bbox = fz_union_rect(block->bbox, line->bbox);
747 }
748 }
749
750 /* TODO: smart sorting of blocks and lines in reading order */
751 /* TODO: unicode NFC normalization */
752}
753
754static void
755fz_stext_drop_device(fz_context *ctx, fz_device *dev)
756{
757 fz_stext_device *tdev = (fz_stext_device*)dev;
758 fz_drop_text(ctx, tdev->lasttext);
759}
760
761/*
762 Parse stext device options from a comma separated key-value string.
763*/
764fz_stext_options *
765fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string)
766{
767 const char *val;
768
769 memset(opts, 0, sizeof *opts);
770
771 if (fz_has_option(ctx, string, "preserve-ligatures", &val) && fz_option_eq(val, "yes"))
772 opts->flags |= FZ_STEXT_PRESERVE_LIGATURES;
773 if (fz_has_option(ctx, string, "preserve-whitespace", &val) && fz_option_eq(val, "yes"))
774 opts->flags |= FZ_STEXT_PRESERVE_WHITESPACE;
775 if (fz_has_option(ctx, string, "preserve-images", &val) && fz_option_eq(val, "yes"))
776 opts->flags |= FZ_STEXT_PRESERVE_IMAGES;
777 if (fz_has_option(ctx, string, "inhibit-spaces", &val) && fz_option_eq(val, "yes"))
778 opts->flags |= FZ_STEXT_INHIBIT_SPACES;
779
780 return opts;
781}
782
783/*
784 Create a device to extract the text on a page.
785
786 Gather the text on a page into blocks and lines.
787
788 The reading order is taken from the order the text is drawn in the
789 source file, so may not be accurate.
790
791 page: The text page to which content should be added. This will
792 usually be a newly created (empty) text page, but it can be one
793 containing data already (for example when merging multiple pages,
794 or watermarking).
795
796 options: Options to configure the stext device.
797*/
798fz_device *
799fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options *opts)
800{
801 fz_stext_device *dev = fz_new_derived_device(ctx, fz_stext_device);
802
803 dev->super.close_device = fz_stext_close_device;
804 dev->super.drop_device = fz_stext_drop_device;
805
806 dev->super.fill_text = fz_stext_fill_text;
807 dev->super.stroke_text = fz_stext_stroke_text;
808 dev->super.clip_text = fz_stext_clip_text;
809 dev->super.clip_stroke_text = fz_stext_clip_stroke_text;
810 dev->super.ignore_text = fz_stext_ignore_text;
811
812 if (opts && (opts->flags & FZ_STEXT_PRESERVE_IMAGES))
813 {
814 dev->super.fill_shade = fz_stext_fill_shade;
815 dev->super.fill_image = fz_stext_fill_image;
816 dev->super.fill_image_mask = fz_stext_fill_image_mask;
817 }
818
819 if (opts)
820 dev->flags = opts->flags;
821 dev->page = page;
822 dev->pen.x = 0;
823 dev->pen.y = 0;
824 dev->trm = fz_identity;
825 dev->lastchar = ' ';
826 dev->curdir = 1;
827 dev->lasttext = NULL;
828
829 return (fz_device*)dev;
830}
831