1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4#include <string.h>
5
6typedef struct filter_gstate_s filter_gstate;
7
8typedef enum
9{
10 FLUSH_CTM = 1,
11 FLUSH_COLOR_F = 2,
12 FLUSH_COLOR_S = 4,
13 FLUSH_TEXT = 8,
14
15 FLUSH_ALL = 15,
16 FLUSH_STROKE = 1+4,
17 FLUSH_FILL = 1+2
18} gstate_flush_flags;
19
20typedef struct pdf_filter_gstate_s pdf_filter_gstate;
21
22struct pdf_filter_gstate_s
23{
24 fz_matrix ctm;
25 struct
26 {
27 char name[256];
28 fz_colorspace *cs;
29 } cs, CS;
30 struct
31 {
32 char name[256];
33 pdf_pattern *pat;
34 fz_shade *shd;
35 int n;
36 float c[FZ_MAX_COLORS];
37 } sc, SC;
38 struct
39 {
40 fz_linecap linecap;
41 fz_linejoin linejoin;
42 float linewidth;
43 float miterlimit;
44 } stroke;
45 pdf_text_state text;
46};
47
48struct filter_gstate_s
49{
50 filter_gstate *next;
51 int pushed;
52 pdf_filter_gstate pending;
53 pdf_filter_gstate sent;
54};
55
56typedef struct editable_str_s
57{
58 char *utf8;
59 int edited;
60 int pos;
61} editable_str;
62
63typedef struct tag_record_s
64{
65 int bdc;
66 char *tag;
67 pdf_obj *raw;
68 pdf_obj *cooked;
69
70 int mcid_num;
71 pdf_obj *mcid_obj;
72 editable_str alt;
73 editable_str actualtext;
74
75 struct tag_record_s *prev;
76} tag_record;
77
78typedef struct pdf_filter_processor_s
79{
80 pdf_processor super;
81 pdf_document *doc;
82 int structparents;
83 pdf_obj *structarray;
84 pdf_processor *chain;
85 filter_gstate *gstate;
86 pdf_text_object_state tos;
87 int Tm_pending;
88 int BT_pending;
89 float Tm_adjust;
90 void *font_name;
91 tag_record *current_tags;
92 tag_record *pending_tags;
93 pdf_text_filter_fn *text_filter;
94 pdf_after_text_object_fn *after_text;
95 void *opaque;
96 pdf_obj *old_rdb, *new_rdb;
97} pdf_filter_processor;
98
99static void
100copy_resource(fz_context *ctx, pdf_filter_processor *p, pdf_obj *key, const char *name)
101{
102 pdf_obj *res, *obj;
103
104 if (!name || name[0] == 0)
105 return;
106
107 res = pdf_dict_get(ctx, p->old_rdb, key);
108 obj = pdf_dict_gets(ctx, res, name);
109 if (obj)
110 {
111 res = pdf_dict_get(ctx, p->new_rdb, key);
112 if (!res)
113 {
114 res = pdf_new_dict(ctx, pdf_get_bound_document(ctx, p->new_rdb), 1);
115 pdf_dict_put_drop(ctx, p->new_rdb, key, res);
116 }
117 pdf_dict_putp(ctx, res, name, obj);
118 }
119}
120
121static void
122filter_push(fz_context *ctx, pdf_filter_processor *p)
123{
124 filter_gstate *gstate = p->gstate;
125 filter_gstate *new_gstate = fz_malloc_struct(ctx, filter_gstate);
126 *new_gstate = *gstate;
127 new_gstate->pushed = 0;
128 new_gstate->next = gstate;
129 p->gstate = new_gstate;
130
131 pdf_keep_font(ctx, new_gstate->pending.text.font);
132 pdf_keep_font(ctx, new_gstate->sent.text.font);
133}
134
135static int
136filter_pop(fz_context *ctx, pdf_filter_processor *p)
137{
138 filter_gstate *gstate = p->gstate;
139 filter_gstate *old = gstate->next;
140
141 /* We are at the top, so nothing to pop! */
142 if (old == NULL)
143 return 1;
144
145 if (gstate->pushed)
146 if (p->chain->op_Q)
147 p->chain->op_Q(ctx, p->chain);
148
149 pdf_drop_font(ctx, gstate->pending.text.font);
150 pdf_drop_font(ctx, gstate->sent.text.font);
151 fz_free(ctx, gstate);
152 p->gstate = old;
153 return 0;
154}
155
156/* We never allow the topmost gstate to be changed. This allows us
157 * to pop back to the zeroth level and be sure that our gstate is
158 * sane. This is important for being able to add new operators at
159 * the end of pages in a sane way. */
160static filter_gstate *
161gstate_to_update(fz_context *ctx, pdf_filter_processor *p)
162{
163 filter_gstate *gstate = p->gstate;
164
165 /* If we're not the top, that's fine */
166 if (gstate->next != NULL)
167 return gstate;
168
169 /* We are the top. Push a group, so we're not */
170 filter_push(ctx, p);
171 gstate = p->gstate;
172 gstate->pushed = 1;
173 if (p->chain->op_q)
174 p->chain->op_q(ctx, p->chain);
175
176 return p->gstate;
177}
178
179static void flush_tags(fz_context *ctx, pdf_filter_processor *p, tag_record **tags)
180{
181 tag_record *tag = *tags;
182
183 if (tag == NULL)
184 return;
185 if (tag->prev)
186 flush_tags(ctx, p, &tag->prev);
187 if (tag->bdc)
188 {
189 if (p->chain->op_BDC)
190 p->chain->op_BDC(ctx, p->chain, tag->tag, tag->raw, tag->cooked);
191 }
192 else if (p->chain->op_BMC)
193 p->chain->op_BMC(ctx, p->chain, tag->tag);
194 tag->prev = p->current_tags;
195 p->current_tags = tag;
196 *tags = NULL;
197}
198
199static void filter_flush(fz_context *ctx, pdf_filter_processor *p, int flush)
200{
201 filter_gstate *gstate = gstate_to_update(ctx, p);
202 int i;
203
204 if (gstate->pushed == 0)
205 {
206 gstate->pushed = 1;
207 if (p->chain->op_q)
208 p->chain->op_q(ctx, p->chain);
209 }
210
211 if (flush)
212 flush_tags(ctx, p, &p->pending_tags);
213
214 if (flush & FLUSH_CTM)
215 {
216 if (gstate->pending.ctm.a != 1 || gstate->pending.ctm.b != 0 ||
217 gstate->pending.ctm.c != 0 || gstate->pending.ctm.d != 1 ||
218 gstate->pending.ctm.e != 0 || gstate->pending.ctm.f != 0)
219 {
220 fz_matrix current = gstate->sent.ctm;
221
222 if (p->chain->op_cm)
223 p->chain->op_cm(ctx, p->chain,
224 gstate->pending.ctm.a,
225 gstate->pending.ctm.b,
226 gstate->pending.ctm.c,
227 gstate->pending.ctm.d,
228 gstate->pending.ctm.e,
229 gstate->pending.ctm.f);
230
231 gstate->sent.ctm = fz_concat(current, gstate->pending.ctm);
232 gstate->pending.ctm.a = 1;
233 gstate->pending.ctm.b = 0;
234 gstate->pending.ctm.c = 0;
235 gstate->pending.ctm.d = 1;
236 gstate->pending.ctm.e = 0;
237 gstate->pending.ctm.f = 0;
238 }
239 }
240
241 if (flush & FLUSH_COLOR_F)
242 {
243 if (gstate->pending.cs.cs == fz_device_gray(ctx) && !gstate->pending.sc.pat && !gstate->pending.sc.shd && gstate->pending.sc.n == 1 &&
244 (gstate->sent.cs.cs != fz_device_gray(ctx) || gstate->sent.sc.pat || gstate->sent.sc.shd || gstate->sent.sc.n != 1 || gstate->pending.sc.c[0] != gstate->sent.sc.c[0]))
245 {
246 if (p->chain->op_g)
247 p->chain->op_g(ctx, p->chain, gstate->pending.sc.c[0]);
248 goto done_sc;
249 }
250 if (gstate->pending.cs.cs == fz_device_rgb(ctx) && !gstate->pending.sc.pat && !gstate->pending.sc.shd && gstate->pending.sc.n == 3 &&
251 (gstate->sent.cs.cs != fz_device_rgb(ctx) || gstate->sent.sc.pat || gstate->sent.sc.shd || gstate->sent.sc.n != 3 || gstate->pending.sc.c[0] != gstate->sent.sc.c[0] ||
252 gstate->pending.sc.c[1] != gstate->sent.sc.c[1] || gstate->pending.sc.c[1] != gstate->sent.sc.c[1]))
253 {
254 if (p->chain->op_rg)
255 p->chain->op_rg(ctx, p->chain, gstate->pending.sc.c[0], gstate->pending.sc.c[1], gstate->pending.sc.c[2]);
256 goto done_sc;
257 }
258 if (gstate->pending.cs.cs == fz_device_cmyk(ctx) && !gstate->pending.sc.pat && !gstate->pending.sc.shd && gstate->pending.sc.n == 4 &&
259 (gstate->sent.cs.cs != fz_device_cmyk(ctx) || gstate->sent.sc.pat || gstate->sent.sc.shd || gstate->pending.sc.n != 4 || gstate->pending.sc.c[0] != gstate->sent.sc.c[0] ||
260 gstate->pending.sc.c[1] != gstate->sent.sc.c[1] || gstate->pending.sc.c[2] != gstate->sent.sc.c[2] || gstate->pending.sc.c[3] != gstate->sent.sc.c[3]))
261 {
262 if (p->chain->op_k)
263 p->chain->op_k(ctx, p->chain, gstate->pending.sc.c[0], gstate->pending.sc.c[1], gstate->pending.sc.c[2], gstate->pending.sc.c[3]);
264 goto done_sc;
265 }
266
267 if (strcmp(gstate->pending.cs.name, gstate->sent.cs.name))
268 {
269 if (p->chain->op_cs)
270 p->chain->op_cs(ctx, p->chain, gstate->pending.cs.name, gstate->pending.cs.cs);
271 }
272
273 /* pattern or shading */
274 if (gstate->pending.sc.name[0])
275 {
276 int emit = 0;
277 if (strcmp(gstate->pending.sc.name, gstate->sent.sc.name))
278 emit = 1;
279 if (gstate->pending.sc.n != gstate->sent.sc.n)
280 emit = 1;
281 else
282 for (i = 0; i < gstate->pending.sc.n; ++i)
283 if (gstate->pending.sc.c[i] != gstate->sent.sc.c[i])
284 emit = 1;
285 if (emit)
286 {
287 if (gstate->pending.sc.pat)
288 if (p->chain->op_sc_pattern)
289 p->chain->op_sc_pattern(ctx, p->chain, gstate->pending.sc.name, gstate->pending.sc.pat, gstate->pending.sc.n, gstate->pending.sc.c);
290 if (gstate->pending.sc.shd)
291 if (p->chain->op_sc_shade)
292 p->chain->op_sc_shade(ctx, p->chain, gstate->pending.sc.name, gstate->pending.sc.shd);
293 }
294 }
295
296 /* plain color */
297 else
298 {
299 int emit = 0;
300 if (gstate->pending.sc.n != gstate->sent.sc.n)
301 emit = 1;
302 else
303 for (i = 0; i < gstate->pending.sc.n; ++i)
304 if (gstate->pending.sc.c[i] != gstate->sent.sc.c[i])
305 emit = 1;
306 if (emit)
307 {
308 if (p->chain->op_sc_color)
309 p->chain->op_sc_color(ctx, p->chain, gstate->pending.sc.n, gstate->pending.sc.c);
310 }
311 }
312
313done_sc:
314 gstate->sent.cs = gstate->pending.cs;
315 gstate->sent.sc = gstate->pending.sc;
316 }
317
318 if (flush & FLUSH_COLOR_S)
319 {
320 if (gstate->pending.CS.cs == fz_device_gray(ctx) && !gstate->pending.SC.pat && !gstate->pending.SC.shd && gstate->pending.SC.n == 1 &&
321 (gstate->sent.CS.cs != fz_device_gray(ctx) || gstate->sent.SC.pat || gstate->sent.SC.shd || gstate->sent.SC.n != 0 || gstate->pending.SC.c[0] != gstate->sent.SC.c[0]))
322 {
323 if (p->chain->op_G)
324 p->chain->op_G(ctx, p->chain, gstate->pending.SC.c[0]);
325 goto done_SC;
326 }
327 if (gstate->pending.CS.cs == fz_device_rgb(ctx) && !gstate->pending.SC.pat && !gstate->pending.SC.shd && gstate->pending.SC.n == 3 &&
328 (gstate->sent.CS.cs != fz_device_rgb(ctx) || gstate->sent.SC.pat || gstate->sent.SC.shd || gstate->sent.SC.n != 3 || gstate->pending.SC.c[0] != gstate->sent.SC.c[0] ||
329 gstate->pending.SC.c[1] != gstate->sent.SC.c[1] || gstate->pending.SC.c[1] != gstate->sent.SC.c[1]))
330 {
331 if (p->chain->op_RG)
332 p->chain->op_RG(ctx, p->chain, gstate->pending.SC.c[0], gstate->pending.SC.c[1], gstate->pending.SC.c[2]);
333 goto done_SC;
334 }
335 if (gstate->pending.CS.cs == fz_device_cmyk(ctx) && !gstate->pending.SC.pat && !gstate->pending.SC.shd && gstate->pending.SC.n == 4 &&
336 (gstate->sent.CS.cs != fz_device_cmyk(ctx) || gstate->sent.SC.pat || gstate->sent.SC.shd || gstate->pending.SC.n != 4 || gstate->pending.SC.c[0] != gstate->sent.SC.c[0] ||
337 gstate->pending.SC.c[1] != gstate->sent.SC.c[1] || gstate->pending.SC.c[2] != gstate->sent.SC.c[2] || gstate->pending.SC.c[3] != gstate->sent.SC.c[3]))
338 {
339 if (p->chain->op_K)
340 p->chain->op_K(ctx, p->chain, gstate->pending.SC.c[0], gstate->pending.SC.c[1], gstate->pending.SC.c[2], gstate->pending.SC.c[3]);
341 goto done_SC;
342 }
343
344 if (strcmp(gstate->pending.CS.name, gstate->sent.CS.name))
345 {
346 if (p->chain->op_CS)
347 p->chain->op_CS(ctx, p->chain, gstate->pending.CS.name, gstate->pending.CS.cs);
348 }
349
350 /* pattern or shading */
351 if (gstate->pending.SC.name[0])
352 {
353 int emit = 0;
354 if (strcmp(gstate->pending.SC.name, gstate->sent.SC.name))
355 emit = 1;
356 if (gstate->pending.SC.n != gstate->sent.SC.n)
357 emit = 1;
358 else
359 for (i = 0; i < gstate->pending.SC.n; ++i)
360 if (gstate->pending.SC.c[i] != gstate->sent.SC.c[i])
361 emit = 1;
362 if (emit)
363 {
364 if (gstate->pending.SC.pat)
365 if (p->chain->op_SC_pattern)
366 p->chain->op_SC_pattern(ctx, p->chain, gstate->pending.SC.name, gstate->pending.SC.pat, gstate->pending.SC.n, gstate->pending.SC.c);
367 if (gstate->pending.SC.shd)
368 if (p->chain->op_SC_shade)
369 p->chain->op_SC_shade(ctx, p->chain, gstate->pending.SC.name, gstate->pending.SC.shd);
370 }
371 }
372
373 /* plain color */
374 else
375 {
376 int emit = 0;
377 if (gstate->pending.SC.n != gstate->sent.SC.n)
378 emit = 1;
379 else
380 for (i = 0; i < gstate->pending.SC.n; ++i)
381 if (gstate->pending.SC.c[i] != gstate->sent.SC.c[i])
382 emit = 1;
383 if (emit)
384 {
385 if (p->chain->op_SC_color)
386 p->chain->op_SC_color(ctx, p->chain, gstate->pending.SC.n, gstate->pending.SC.c);
387 }
388 }
389
390done_SC:
391 gstate->sent.CS = gstate->pending.CS;
392 gstate->sent.SC = gstate->pending.SC;
393 }
394
395 if (flush & FLUSH_STROKE)
396 {
397 if (gstate->pending.stroke.linecap != gstate->sent.stroke.linecap)
398 {
399 if (p->chain->op_J)
400 p->chain->op_J(ctx, p->chain, gstate->pending.stroke.linecap);
401 }
402 if (gstate->pending.stroke.linejoin != gstate->sent.stroke.linejoin)
403 {
404 if (p->chain->op_j)
405 p->chain->op_j(ctx, p->chain, gstate->pending.stroke.linejoin);
406 }
407 if (gstate->pending.stroke.linewidth != gstate->sent.stroke.linewidth)
408 {
409 if (p->chain->op_w)
410 p->chain->op_w(ctx, p->chain, gstate->pending.stroke.linewidth);
411 }
412 if (gstate->pending.stroke.miterlimit != gstate->sent.stroke.miterlimit)
413 {
414 if (p->chain->op_M)
415 p->chain->op_M(ctx, p->chain, gstate->pending.stroke.miterlimit);
416 }
417 gstate->sent.stroke = gstate->pending.stroke;
418 }
419
420 if (flush & FLUSH_TEXT)
421 {
422 if (p->BT_pending)
423 {
424 if (p->chain->op_BT)
425 p->chain->op_BT(ctx, p->chain);
426 p->BT_pending = 0;
427 }
428 if (gstate->pending.text.char_space != gstate->sent.text.char_space)
429 {
430 if (p->chain->op_Tc)
431 p->chain->op_Tc(ctx, p->chain, gstate->pending.text.char_space);
432 }
433 if (gstate->pending.text.word_space != gstate->sent.text.word_space)
434 {
435 if (p->chain->op_Tw)
436 p->chain->op_Tw(ctx, p->chain, gstate->pending.text.word_space);
437 }
438 if (gstate->pending.text.scale != gstate->sent.text.scale)
439 {
440 /* The value of scale in the gstate is divided by 100 from what is written in the file */
441 if (p->chain->op_Tz)
442 p->chain->op_Tz(ctx, p->chain, gstate->pending.text.scale*100);
443 }
444 if (gstate->pending.text.leading != gstate->sent.text.leading)
445 {
446 if (p->chain->op_TL)
447 p->chain->op_TL(ctx, p->chain, gstate->pending.text.leading);
448 }
449 if (gstate->pending.text.font != gstate->sent.text.font ||
450 gstate->pending.text.size != gstate->sent.text.size)
451 {
452 if (p->chain->op_Tf)
453 p->chain->op_Tf(ctx, p->chain, p->font_name, gstate->pending.text.font, gstate->pending.text.size);
454 }
455 if (gstate->pending.text.render != gstate->sent.text.render)
456 {
457 if (p->chain->op_Tr)
458 p->chain->op_Tr(ctx, p->chain, gstate->pending.text.render);
459 }
460 if (gstate->pending.text.rise != gstate->sent.text.rise)
461 {
462 if (p->chain->op_Ts)
463 p->chain->op_Ts(ctx, p->chain, gstate->pending.text.rise);
464 }
465 pdf_drop_font(ctx, gstate->sent.text.font);
466 gstate->sent.text = gstate->pending.text;
467 gstate->sent.text.font = pdf_keep_font(ctx, gstate->pending.text.font);
468 if (p->Tm_pending != 0)
469 {
470 if (p->chain->op_Tm)
471 p->chain->op_Tm(ctx, p->chain, p->tos.tlm.a, p->tos.tlm.b, p->tos.tlm.c, p->tos.tlm.d, p->tos.tlm.e, p->tos.tlm.f);
472 p->Tm_pending = 0;
473 }
474 }
475}
476
477static int
478filter_show_char(fz_context *ctx, pdf_filter_processor *p, int cid, int *unicode)
479{
480 filter_gstate *gstate = p->gstate;
481 pdf_font_desc *fontdesc = gstate->pending.text.font;
482 fz_matrix trm;
483 int ucsbuf[8];
484 int ucslen;
485 int remove = 0;
486
487 (void)pdf_tos_make_trm(ctx, &p->tos, &gstate->pending.text, fontdesc, cid, &trm);
488
489 ucslen = 0;
490 if (fontdesc->to_unicode)
491 ucslen = pdf_lookup_cmap_full(fontdesc->to_unicode, cid, ucsbuf);
492 if (ucslen == 0 && (size_t)cid < fontdesc->cid_to_ucs_len)
493 {
494 ucsbuf[0] = fontdesc->cid_to_ucs[cid];
495 ucslen = 1;
496 }
497 if (ucslen == 0 || (ucslen == 1 && ucsbuf[0] == 0))
498 {
499 ucsbuf[0] = FZ_REPLACEMENT_CHARACTER;
500 ucslen = 1;
501 }
502 *unicode = ucsbuf[0];
503
504 if (p->text_filter)
505 {
506 fz_matrix ctm = fz_concat(gstate->sent.ctm, gstate->pending.ctm);
507 fz_rect bbox;
508
509 if (fontdesc->wmode == 0)
510 {
511 bbox.x0 = 0;
512 bbox.y0 = fz_font_descender(ctx, fontdesc->font);
513 bbox.x1 = fz_advance_glyph(ctx, fontdesc->font, p->tos.gid, 0);
514 bbox.y1 = fz_font_ascender(ctx, fontdesc->font);
515 }
516 else
517 {
518 fz_rect font_bbox = fz_font_bbox(ctx, fontdesc->font);
519 bbox.x0 = font_bbox.x0;
520 bbox.x1 = font_bbox.x1;
521 bbox.y0 = 0;
522 bbox.y1 = fz_advance_glyph(ctx, fontdesc->font, p->tos.gid, 1);
523 }
524
525 remove = p->text_filter(ctx, p->opaque, ucsbuf, ucslen, trm, ctm, bbox);
526 }
527
528 pdf_tos_move_after_char(ctx, &p->tos);
529
530 return remove;
531}
532
533static void
534filter_show_space(fz_context *ctx, pdf_filter_processor *p, float tadj)
535{
536 filter_gstate *gstate = p->gstate;
537 pdf_font_desc *fontdesc = gstate->pending.text.font;
538
539 if (fontdesc->wmode == 0)
540 p->tos.tm = fz_pre_translate(p->tos.tm, tadj * gstate->pending.text.scale, 0);
541 else
542 p->tos.tm = fz_pre_translate(p->tos.tm, 0, tadj);
543}
544
545static void
546walk_string(fz_context *ctx, int uni, int remove, editable_str *str)
547{
548 int rune;
549
550 if (str->utf8 == NULL || str->pos == -1)
551 return;
552
553 do
554 {
555 char *s = &str->utf8[str->pos];
556 size_t len;
557 int n = fz_chartorune(&rune, s);
558 if (rune == uni)
559 {
560 /* Match. Skip over that one. */
561 str->pos += n;
562 }
563 else if (uni == 32) {
564 /* We don't care if we're given whitespace
565 * and it doesn't match the string. Don't
566 * skip forward. Nothing to remove. */
567 break;
568 }
569 else if (rune == 32) {
570 /* The string has a whitespace, and we
571 * don't match it; that's forgivable as
572 * PDF often misses out spaces. Remove this
573 * if we are removing stuff. */
574 }
575 else
576 {
577 /* Mismatch. No point in tracking through any more. */
578 str->pos = -1;
579 break;
580 }
581 if (remove)
582 {
583 len = strlen(s+n);
584 memmove(s, s+n, len+1);
585 str->edited = 1;
586 }
587 }
588 while (rune != uni);
589}
590
591/* For a given character we've processed (removed or not)
592 * consider it in the tag_record. Try and step over it in
593 * the Alt or ActualText strings, removing if possible.
594 * If we can't marry up the Alt/ActualText strings with
595 * what we're meeting, just take the easy route and delete
596 * the whole lot. */
597static void
598mcid_char_imp(fz_context *ctx, pdf_filter_processor *p, tag_record *tr, int uni, int remove)
599{
600 if (tr->mcid_obj == NULL)
601 /* No object, or already deleted */
602 return;
603
604 if (remove)
605 {
606 /* Remove the expanded abbreviation, if there is one. */
607 pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(E));
608 /* Remove the structure title, if there is one. */
609 pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(T));
610 }
611
612 /* Edit the Alt string */
613 walk_string(ctx, uni, remove, &tr->alt);
614 /* Edit the ActualText string */
615 walk_string(ctx, uni, remove, &tr->actualtext);
616
617 /* If we're removing a character, and either of the strings
618 * haven't matched up to what we were expecting, then just
619 * delete the whole string. */
620 if (remove)
621 remove = (tr->alt.pos == -1 || tr->actualtext.pos == -1);
622 else if (tr->alt.pos >= 0 || tr->actualtext.pos >= 0)
623 {
624 /* The strings are making sense so far */
625 remove = 0;
626 }
627
628 if (remove)
629 {
630 /* Anything else we have to err on the side of caution and
631 * delete everything that might leak info. */
632 if (tr->actualtext.pos == -1)
633 pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(ActualText));
634 if (tr->alt.pos == -1)
635 pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(Alt));
636 pdf_drop_obj(ctx, tr->mcid_obj);
637 tr->mcid_obj = NULL;
638 fz_free(ctx, tr->alt.utf8);
639 tr->alt.utf8 = NULL;
640 fz_free(ctx, tr->actualtext.utf8);
641 tr->actualtext.utf8 = NULL;
642 }
643}
644
645/* For every character that is processed, consider that character in
646 * every pending/current MCID. */
647static void
648mcid_char(fz_context *ctx, pdf_filter_processor *p, int uni, int remove)
649{
650 tag_record *tr = p->pending_tags;
651
652 for (tr = p->pending_tags; tr != NULL; tr = tr->prev)
653 mcid_char_imp(ctx, p, tr, uni, remove);
654 for (tr = p->current_tags; tr != NULL; tr = tr->prev)
655 mcid_char_imp(ctx, p, tr, uni, remove);
656}
657
658static void
659update_mcid(fz_context *ctx, pdf_filter_processor *p)
660{
661 tag_record *tag = p->current_tags;
662
663 if (tag == NULL)
664 return;
665 if (tag->alt.edited)
666 pdf_dict_put_text_string(ctx, tag->mcid_obj, PDF_NAME(Alt), tag->alt.utf8);
667 if (tag->actualtext.edited)
668 pdf_dict_put_text_string(ctx, tag->mcid_obj, PDF_NAME(Alt), tag->actualtext.utf8);
669}
670
671/* Process a string (from buf, of length len), from position *pos onwards.
672 * Stop when we hit the end, or when we find a character to remove. The
673 * caller will restart us again later. On exit, *pos = the point we got to,
674 * *inc = The number of bytes to skip to step over the next character (unless
675 * we hit the end).
676 */
677static void
678filter_string_to_segment(fz_context *ctx, pdf_filter_processor *p, unsigned char *buf, int len, int *pos, int *inc, int *removed_space)
679{
680 filter_gstate *gstate = p->gstate;
681 pdf_font_desc *fontdesc = gstate->pending.text.font;
682 unsigned char *end = buf + len;
683 unsigned int cpt;
684 int cid;
685 int remove;
686
687 buf += *pos;
688
689 *removed_space = 0;
690
691 while (buf < end)
692 {
693 int uni;
694 *inc = pdf_decode_cmap(fontdesc->encoding, buf, end, &cpt);
695 buf += *inc;
696
697 cid = pdf_lookup_cmap(fontdesc->encoding, cpt);
698 if (cid < 0)
699 {
700 uni = FZ_REPLACEMENT_CHARACTER;
701 fz_warn(ctx, "cannot encode character");
702 }
703 else
704 remove = filter_show_char(ctx, p, cid, &uni);
705 if (cpt == 32 && *inc == 1)
706 filter_show_space(ctx, p, gstate->pending.text.word_space);
707 /* For every character we process (whether we remove it
708 * or not), we consider any MCIDs that are in effect. */
709 mcid_char(ctx, p, uni, remove);
710 if (remove)
711 {
712 *removed_space = (cpt == 32 && *inc == 1);
713 return;
714 }
715 *pos += *inc;
716 }
717}
718
719static void
720adjust_text(fz_context *ctx, pdf_filter_processor *p, float x, float y)
721{
722 float skip_dist = p->tos.fontdesc->wmode == 1 ? -y : -x;
723 skip_dist = skip_dist / p->gstate->pending.text.size;
724 p->Tm_adjust += skip_dist;
725}
726
727static void
728adjust_for_removed_space(fz_context *ctx, pdf_filter_processor *p)
729{
730 filter_gstate *gstate = p->gstate;
731 float adj = gstate->pending.text.word_space;
732 adjust_text(ctx, p, adj * gstate->pending.text.scale, adj);
733}
734
735static void
736flush_adjustment(fz_context *ctx, pdf_filter_processor *p)
737{
738 pdf_obj *arr;
739
740 if (p->Tm_adjust == 0)
741 return;
742
743 filter_flush(ctx, p, FLUSH_ALL);
744 arr = pdf_new_array(ctx, p->doc, 1);
745 fz_try(ctx)
746 {
747 pdf_array_push_real(ctx, arr, p->Tm_adjust * 1000);
748 if (p->chain->op_TJ)
749 p->chain->op_TJ(ctx, p->chain, arr);
750 }
751 fz_always(ctx)
752 pdf_drop_obj(ctx, arr);
753 fz_catch(ctx)
754 fz_rethrow(ctx);
755
756 p->Tm_adjust = 0;
757}
758
759static void
760push_adjustment_to_array(fz_context *ctx, pdf_filter_processor *p, pdf_obj *arr)
761{
762 if (p->Tm_adjust == 0)
763 return;
764 pdf_array_push_real(ctx, arr, p->Tm_adjust * 1000);
765 p->Tm_adjust = 0;
766}
767
768static void
769filter_show_string(fz_context *ctx, pdf_filter_processor *p, unsigned char *buf, int len)
770{
771 filter_gstate *gstate = p->gstate;
772 pdf_font_desc *fontdesc = gstate->pending.text.font;
773 int i, inc, removed_space;
774
775 if (!fontdesc)
776 return;
777
778 i = 0;
779 while (i < len)
780 {
781 int start = i;
782 filter_string_to_segment(ctx, p, buf, len, &i, &inc, &removed_space);
783 if (start != i)
784 {
785 /* We have *some* chars to send at least */
786 filter_flush(ctx, p, FLUSH_ALL);
787 flush_adjustment(ctx, p);
788 if (p->chain->op_Tj)
789 p->chain->op_Tj(ctx, p->chain, (char *)buf+start, i-start);
790 }
791 if (i != len)
792 {
793 adjust_text(ctx, p, p->tos.char_tx, p->tos.char_ty);
794 i += inc;
795 }
796 if (removed_space)
797 adjust_for_removed_space(ctx, p);
798 }
799}
800
801static void
802filter_show_text(fz_context *ctx, pdf_filter_processor *p, pdf_obj *text)
803{
804 filter_gstate *gstate = p->gstate;
805 pdf_font_desc *fontdesc = gstate->pending.text.font;
806 int i, n;
807 pdf_obj *new_arr;
808 pdf_document *doc;
809
810 if (!fontdesc)
811 return;
812
813 if (pdf_is_string(ctx, text))
814 {
815 filter_show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, text), pdf_to_str_len(ctx, text));
816 return;
817 }
818 if (!pdf_is_array(ctx, text))
819 return;
820
821 p->tos.fontdesc = fontdesc;
822 n = pdf_array_len(ctx, text);
823 doc = pdf_get_bound_document(ctx, text);
824 new_arr = pdf_new_array(ctx, doc, 4);
825 fz_try(ctx)
826 {
827 for (i = 0; i < n; i++)
828 {
829 pdf_obj *item = pdf_array_get(ctx, text, i);
830 if (pdf_is_string(ctx, item))
831 {
832 unsigned char *buf = (unsigned char *)pdf_to_str_buf(ctx, item);
833 int len = pdf_to_str_len(ctx, item);
834 int j = 0;
835 int removed_space;
836 while (j < len)
837 {
838 int inc;
839 int start = j;
840 filter_string_to_segment(ctx, p, buf, len, &j, &inc, &removed_space);
841 if (start != j)
842 {
843 /* We have *some* chars to send at least */
844 filter_flush(ctx, p, FLUSH_ALL);
845 push_adjustment_to_array(ctx, p, new_arr);
846 pdf_array_push_string(ctx, new_arr, (char *)buf+start, j-start);
847 }
848 if (j != len)
849 {
850 adjust_text(ctx, p, p->tos.char_tx, p->tos.char_ty);
851 j += inc;
852 }
853 if (removed_space)
854 adjust_for_removed_space(ctx, p);
855 }
856 }
857 else
858 {
859 float tadj = - pdf_to_real(ctx, item) * gstate->pending.text.size * 0.001f;
860 if (fontdesc->wmode == 0)
861 {
862 adjust_text(ctx, p, tadj, 0);
863 p->tos.tm = fz_pre_translate(p->tos.tm, tadj * p->gstate->pending.text.scale, 0);
864 }
865 else
866 {
867 adjust_text(ctx, p, 0, tadj);
868 p->tos.tm = fz_pre_translate(p->tos.tm, 0, tadj);
869 }
870 }
871 }
872 if (p->chain->op_TJ && pdf_array_len(ctx, new_arr))
873 p->chain->op_TJ(ctx, p->chain, new_arr);
874 }
875 fz_always(ctx)
876 pdf_drop_obj(ctx, new_arr);
877 fz_catch(ctx)
878 fz_rethrow(ctx);
879}
880
881/* general graphics state */
882
883static void
884pdf_filter_w(fz_context *ctx, pdf_processor *proc, float linewidth)
885{
886 pdf_filter_processor *p = (pdf_filter_processor*)proc;
887 filter_gstate *gstate = gstate_to_update(ctx, p);
888 gstate->pending.stroke.linewidth = linewidth;
889}
890
891static void
892pdf_filter_j(fz_context *ctx, pdf_processor *proc, int linejoin)
893{
894 pdf_filter_processor *p = (pdf_filter_processor*)proc;
895 filter_gstate *gstate = gstate_to_update(ctx, p);
896 gstate->pending.stroke.linejoin = linejoin;
897}
898
899static void
900pdf_filter_J(fz_context *ctx, pdf_processor *proc, int linecap)
901{
902 pdf_filter_processor *p = (pdf_filter_processor*)proc;
903 filter_gstate *gstate = gstate_to_update(ctx, p);
904 gstate->pending.stroke.linecap = linecap;
905}
906
907static void
908pdf_filter_M(fz_context *ctx, pdf_processor *proc, float miterlimit)
909{
910 pdf_filter_processor *p = (pdf_filter_processor*)proc;
911 filter_gstate *gstate = gstate_to_update(ctx, p);
912 gstate->pending.stroke.miterlimit = miterlimit;
913}
914
915static void
916pdf_filter_d(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase)
917{
918 pdf_filter_processor *p = (pdf_filter_processor*)proc;
919 filter_flush(ctx, p, 0);
920 if (p->chain->op_d)
921 p->chain->op_d(ctx, p->chain, array, phase);
922}
923
924static void
925pdf_filter_ri(fz_context *ctx, pdf_processor *proc, const char *intent)
926{
927 pdf_filter_processor *p = (pdf_filter_processor*)proc;
928 filter_flush(ctx, p, 0);
929 if (p->chain->op_ri)
930 p->chain->op_ri(ctx, p->chain, intent);
931}
932
933static void
934pdf_filter_gs_OP(fz_context *ctx, pdf_processor *proc, int b)
935{
936 pdf_filter_processor *p = (pdf_filter_processor*)proc;
937 filter_flush(ctx, p, 0);
938 if (p->chain->op_gs_OP)
939 p->chain->op_gs_OP(ctx, p->chain, b);
940}
941
942static void
943pdf_filter_gs_op(fz_context *ctx, pdf_processor *proc, int b)
944{
945 pdf_filter_processor *p = (pdf_filter_processor*)proc;
946 filter_flush(ctx, p, 0);
947 if (p->chain->op_gs_op)
948 p->chain->op_gs_op(ctx, p->chain, b);
949}
950
951static void
952pdf_filter_gs_OPM(fz_context *ctx, pdf_processor *proc, int i)
953{
954 pdf_filter_processor *p = (pdf_filter_processor*)proc;
955 filter_flush(ctx, p, 0);
956 if (p->chain->op_gs_OPM)
957 p->chain->op_gs_OPM(ctx, p->chain, i);
958}
959
960static void
961pdf_filter_gs_UseBlackPtComp(fz_context *ctx, pdf_processor *proc, pdf_obj *name)
962{
963 pdf_filter_processor *p = (pdf_filter_processor*)proc;
964 filter_flush(ctx, p, 0);
965 if (p->chain->op_gs_UseBlackPtComp)
966 p->chain->op_gs_UseBlackPtComp(ctx, p->chain, name);
967}
968
969static void
970pdf_filter_i(fz_context *ctx, pdf_processor *proc, float flatness)
971{
972 pdf_filter_processor *p = (pdf_filter_processor*)proc;
973 filter_flush(ctx, p, 0);
974 if (p->chain->op_i)
975 p->chain->op_i(ctx, p->chain, flatness);
976}
977
978static void
979pdf_filter_gs_begin(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate)
980{
981 pdf_filter_processor *p = (pdf_filter_processor*)proc;
982 filter_flush(ctx, p, FLUSH_ALL);
983 if (p->chain->op_gs_begin)
984 p->chain->op_gs_begin(ctx, p->chain, name, extgstate);
985 copy_resource(ctx, p, PDF_NAME(ExtGState), name);
986}
987
988static void
989pdf_filter_gs_BM(fz_context *ctx, pdf_processor *proc, const char *blendmode)
990{
991 pdf_filter_processor *p = (pdf_filter_processor*)proc;
992 if (p->chain->op_gs_BM)
993 p->chain->op_gs_BM(ctx, p->chain, blendmode);
994}
995
996static void
997pdf_filter_gs_CA(fz_context *ctx, pdf_processor *proc, float alpha)
998{
999 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1000 if (p->chain->op_gs_CA)
1001 p->chain->op_gs_CA(ctx, p->chain, alpha);
1002}
1003
1004static void
1005pdf_filter_gs_ca(fz_context *ctx, pdf_processor *proc, float alpha)
1006{
1007 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1008 if (p->chain->op_gs_ca)
1009 p->chain->op_gs_ca(ctx, p->chain, alpha);
1010}
1011
1012static void
1013pdf_filter_gs_SMask(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, pdf_obj *page_resources, float *bc, int luminosity)
1014{
1015 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1016 if (p->chain->op_gs_SMask)
1017 p->chain->op_gs_SMask(ctx, p->chain, smask, page_resources, bc, luminosity);
1018}
1019
1020static void
1021pdf_filter_gs_end(fz_context *ctx, pdf_processor *proc)
1022{
1023 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1024 if (p->chain->op_gs_end)
1025 p->chain->op_gs_end(ctx, p->chain);
1026}
1027
1028/* special graphics state */
1029
1030static void
1031pdf_filter_q(fz_context *ctx, pdf_processor *proc)
1032{
1033 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1034 filter_push(ctx, p);
1035}
1036
1037static void
1038pdf_filter_Q(fz_context *ctx, pdf_processor *proc)
1039{
1040 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1041 filter_pop(ctx, p);
1042}
1043
1044static void
1045pdf_filter_cm(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f)
1046{
1047 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1048 filter_gstate *gstate = gstate_to_update(ctx, p);
1049 fz_matrix ctm;
1050
1051 /* If we're being given an identity matrix, don't bother sending it */
1052 if (a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0)
1053 return;
1054
1055 ctm.a = a;
1056 ctm.b = b;
1057 ctm.c = c;
1058 ctm.d = d;
1059 ctm.e = e;
1060 ctm.f = f;
1061
1062 gstate->pending.ctm = fz_concat(ctm, gstate->pending.ctm);
1063}
1064
1065/* path construction */
1066
1067static void
1068pdf_filter_m(fz_context *ctx, pdf_processor *proc, float x, float y)
1069{
1070 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1071 filter_flush(ctx, p, FLUSH_CTM);
1072 if (p->chain->op_m)
1073 p->chain->op_m(ctx, p->chain, x, y);
1074}
1075
1076static void
1077pdf_filter_l(fz_context *ctx, pdf_processor *proc, float x, float y)
1078{
1079 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1080 filter_flush(ctx, p, FLUSH_CTM);
1081 if (p->chain->op_l)
1082 p->chain->op_l(ctx, p->chain, x, y);
1083}
1084
1085static void
1086pdf_filter_c(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3)
1087{
1088 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1089 filter_flush(ctx, p, FLUSH_CTM);
1090 if (p->chain->op_c)
1091 p->chain->op_c(ctx, p->chain, x1, y1, x2, y2, x3, y3);
1092}
1093
1094static void
1095pdf_filter_v(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3)
1096{
1097 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1098 filter_flush(ctx, p, FLUSH_CTM);
1099 if (p->chain->op_v)
1100 p->chain->op_v(ctx, p->chain, x2, y2, x3, y3);
1101}
1102
1103static void
1104pdf_filter_y(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3)
1105{
1106 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1107 filter_flush(ctx, p, FLUSH_CTM);
1108 if (p->chain->op_y)
1109 p->chain->op_y(ctx, p->chain, x1, y1, x3, y3);
1110}
1111
1112static void
1113pdf_filter_h(fz_context *ctx, pdf_processor *proc)
1114{
1115 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1116 filter_flush(ctx, p, FLUSH_CTM);
1117 if (p->chain->op_h)
1118 p->chain->op_h(ctx, p->chain);
1119}
1120
1121static void
1122pdf_filter_re(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h)
1123{
1124 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1125 filter_flush(ctx, p, FLUSH_CTM);
1126 if (p->chain->op_re)
1127 p->chain->op_re(ctx, p->chain, x, y, w, h);
1128}
1129
1130/* path painting */
1131
1132static void
1133pdf_filter_S(fz_context *ctx, pdf_processor *proc)
1134{
1135 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1136 filter_flush(ctx, p, FLUSH_STROKE);
1137 if (p->chain->op_S)
1138 p->chain->op_S(ctx, p->chain);
1139}
1140
1141static void
1142pdf_filter_s(fz_context *ctx, pdf_processor *proc)
1143{
1144 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1145 filter_flush(ctx, p, FLUSH_STROKE);
1146 if (p->chain->op_s)
1147 p->chain->op_s(ctx, p->chain);
1148}
1149
1150static void
1151pdf_filter_F(fz_context *ctx, pdf_processor *proc)
1152{
1153 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1154 filter_flush(ctx, p, FLUSH_FILL);
1155 if (p->chain->op_F)
1156 p->chain->op_F(ctx, p->chain);
1157}
1158
1159static void
1160pdf_filter_f(fz_context *ctx, pdf_processor *proc)
1161{
1162 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1163 filter_flush(ctx, p, FLUSH_FILL);
1164 if (p->chain->op_f)
1165 p->chain->op_f(ctx, p->chain);
1166}
1167
1168static void
1169pdf_filter_fstar(fz_context *ctx, pdf_processor *proc)
1170{
1171 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1172 filter_flush(ctx, p, FLUSH_FILL);
1173 if (p->chain->op_fstar)
1174 p->chain->op_fstar(ctx, p->chain);
1175}
1176
1177static void
1178pdf_filter_B(fz_context *ctx, pdf_processor *proc)
1179{
1180 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1181 filter_flush(ctx, p, FLUSH_ALL);
1182 if (p->chain->op_B)
1183 p->chain->op_B(ctx, p->chain);
1184}
1185
1186static void
1187pdf_filter_Bstar(fz_context *ctx, pdf_processor *proc)
1188{
1189 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1190 filter_flush(ctx, p, FLUSH_ALL);
1191 if (p->chain->op_Bstar)
1192 p->chain->op_Bstar(ctx, p->chain);
1193}
1194
1195static void
1196pdf_filter_b(fz_context *ctx, pdf_processor *proc)
1197{
1198 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1199 filter_flush(ctx, p, FLUSH_ALL);
1200 if (p->chain->op_b)
1201 p->chain->op_b(ctx, p->chain);
1202}
1203
1204static void
1205pdf_filter_bstar(fz_context *ctx, pdf_processor *proc)
1206{
1207 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1208 filter_flush(ctx, p, FLUSH_ALL);
1209 if (p->chain->op_bstar)
1210 p->chain->op_bstar(ctx, p->chain);
1211}
1212
1213static void
1214pdf_filter_n(fz_context *ctx, pdf_processor *proc)
1215{
1216 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1217 filter_flush(ctx, p, FLUSH_CTM);
1218 if (p->chain->op_n)
1219 p->chain->op_n(ctx, p->chain);
1220}
1221
1222/* clipping paths */
1223
1224static void
1225pdf_filter_W(fz_context *ctx, pdf_processor *proc)
1226{
1227 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1228 filter_flush(ctx, p, FLUSH_CTM);
1229 if (p->chain->op_W)
1230 p->chain->op_W(ctx, p->chain);
1231}
1232
1233static void
1234pdf_filter_Wstar(fz_context *ctx, pdf_processor *proc)
1235{
1236 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1237 filter_flush(ctx, p, FLUSH_CTM);
1238 if (p->chain->op_Wstar)
1239 p->chain->op_Wstar(ctx, p->chain);
1240}
1241
1242/* text objects */
1243
1244static void
1245pdf_filter_BT(fz_context *ctx, pdf_processor *proc)
1246{
1247 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1248 filter_flush(ctx, p, 0);
1249 p->tos.tm = fz_identity;
1250 p->tos.tlm = fz_identity;
1251 p->BT_pending = 1;
1252}
1253
1254static void
1255pdf_filter_ET(fz_context *ctx, pdf_processor *proc)
1256{
1257 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1258
1259 if (!p->BT_pending)
1260 {
1261 filter_flush(ctx, p, 0);
1262 if (p->chain->op_ET)
1263 p->chain->op_ET(ctx, p->chain);
1264 }
1265 p->BT_pending = 0;
1266 if (p->after_text)
1267 {
1268 fz_matrix ctm = fz_concat(p->gstate->sent.ctm, p->gstate->pending.ctm);
1269 if (p->chain->op_q)
1270 p->chain->op_q(ctx, p->chain);
1271 p->after_text(ctx, p->opaque, p->doc, p->chain, ctm);
1272 if (p->chain->op_Q)
1273 p->chain->op_Q(ctx, p->chain);
1274 }
1275}
1276
1277/* text state */
1278
1279static void
1280pdf_filter_Tc(fz_context *ctx, pdf_processor *proc, float charspace)
1281{
1282 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1283 filter_flush(ctx, p, 0);
1284 p->gstate->pending.text.char_space = charspace;
1285}
1286
1287static void
1288pdf_filter_Tw(fz_context *ctx, pdf_processor *proc, float wordspace)
1289{
1290 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1291 filter_flush(ctx, p, 0);
1292 p->gstate->pending.text.word_space = wordspace;
1293}
1294
1295static void
1296pdf_filter_Tz(fz_context *ctx, pdf_processor *proc, float scale)
1297{
1298 /* scale is as written in the file. It is 100 times smaller
1299 * in the gstate. */
1300 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1301 filter_flush(ctx, p, 0);
1302 p->gstate->pending.text.scale = scale / 100;
1303}
1304
1305static void
1306pdf_filter_TL(fz_context *ctx, pdf_processor *proc, float leading)
1307{
1308 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1309 filter_flush(ctx, p, 0);
1310 p->gstate->pending.text.leading = leading;
1311}
1312
1313static void
1314pdf_filter_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size)
1315{
1316 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1317 filter_flush(ctx, p, 0);
1318 fz_free(ctx, p->font_name);
1319 p->font_name = NULL;
1320 p->font_name = name ? fz_strdup(ctx, name) : NULL;
1321 pdf_drop_font(ctx, p->gstate->pending.text.font);
1322 p->gstate->pending.text.font = pdf_keep_font(ctx, font);
1323 p->gstate->pending.text.size = size;
1324 copy_resource(ctx, p, PDF_NAME(Font), name);
1325}
1326
1327static void
1328pdf_filter_Tr(fz_context *ctx, pdf_processor *proc, int render)
1329{
1330 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1331 filter_flush(ctx, p, 0);
1332 p->gstate->pending.text.render = render;
1333}
1334
1335static void
1336pdf_filter_Ts(fz_context *ctx, pdf_processor *proc, float rise)
1337{
1338 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1339 filter_flush(ctx, p, 0);
1340 p->gstate->pending.text.rise = rise;
1341}
1342
1343/* text positioning */
1344
1345static void
1346pdf_filter_Td(fz_context *ctx, pdf_processor *proc, float tx, float ty)
1347{
1348 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1349 p->Tm_adjust = 0;
1350 pdf_tos_translate(&p->tos, tx, ty);
1351 p->Tm_pending = 1;
1352}
1353
1354static void
1355pdf_filter_TD(fz_context *ctx, pdf_processor *proc, float tx, float ty)
1356{
1357 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1358 p->Tm_adjust = 0;
1359 pdf_tos_translate(&p->tos, tx, ty);
1360 p->gstate->pending.text.leading = -ty;
1361 p->Tm_pending = 1;
1362}
1363
1364static void
1365pdf_filter_Tm(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f)
1366{
1367 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1368 pdf_tos_set_matrix(&p->tos, a, b, c, d, e, f);
1369 p->Tm_pending = 1;
1370 p->Tm_adjust = 0;
1371}
1372
1373static void
1374pdf_filter_Tstar(fz_context *ctx, pdf_processor *proc)
1375{
1376 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1377 pdf_tos_newline(&p->tos, p->gstate->pending.text.leading);
1378 /* If Tm_pending, then just adjusting the matrix (as
1379 * pdf_tos_newline has done) is enough. Otherwise we
1380 * need to actually call the operator. */
1381 if (!p->Tm_pending && p->chain->op_Tstar)
1382 p->chain->op_Tstar(ctx, p->chain);
1383}
1384
1385/* text showing */
1386
1387static void
1388pdf_filter_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array)
1389{
1390 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1391 filter_show_text(ctx, p, array);
1392}
1393
1394static void
1395pdf_filter_Tj(fz_context *ctx, pdf_processor *proc, char *str, int len)
1396{
1397 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1398 filter_show_string(ctx, p, (unsigned char *)str, len);
1399}
1400
1401static void
1402pdf_filter_squote(fz_context *ctx, pdf_processor *proc, char *str, int len)
1403{
1404 /* Note, we convert all T' operators to (maybe) a T* and a Tj */
1405 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1406 pdf_tos_newline(&p->tos, p->gstate->pending.text.leading);
1407 /* If Tm_pending, then just adjusting the matrix (as
1408 * pdf_tos_newline has done) is enough. Otherwise we
1409 * need to do it manually. */
1410 if (!p->Tm_pending && p->chain->op_Tstar)
1411 p->chain->op_Tstar(ctx, p->chain);
1412 filter_show_string(ctx, p, (unsigned char *)str, len);
1413}
1414
1415static void
1416pdf_filter_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, int len)
1417{
1418 /* Note, we convert all T" operators to (maybe) a T*,
1419 * (maybe) Tc, (maybe) Tw and a Tj. */
1420 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1421 p->gstate->pending.text.word_space = aw;
1422 p->gstate->pending.text.char_space = ac;
1423 pdf_tos_newline(&p->tos, p->gstate->pending.text.leading);
1424 /* If Tm_pending, then just adjusting the matrix (as
1425 * pdf_tos_newline has done) is enough. Otherwise we
1426 * need to do it manually. */
1427 if (!p->Tm_pending && p->chain->op_Tstar)
1428 p->chain->op_Tstar(ctx, p->chain);
1429 filter_show_string(ctx, p, (unsigned char*)str, len);
1430}
1431
1432/* type 3 fonts */
1433
1434static void
1435pdf_filter_d0(fz_context *ctx, pdf_processor *proc, float wx, float wy)
1436{
1437 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1438 filter_flush(ctx, p, 0);
1439 if (p->chain->op_d0)
1440 p->chain->op_d0(ctx, p->chain, wx, wy);
1441}
1442
1443static void
1444pdf_filter_d1(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury)
1445{
1446 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1447 filter_flush(ctx, p, 0);
1448 if (p->chain->op_d1)
1449 p->chain->op_d1(ctx, p->chain, wx, wy, llx, lly, urx, ury);
1450}
1451
1452/* color */
1453
1454static void
1455pdf_filter_CS(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs)
1456{
1457 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1458 filter_gstate *gstate = gstate_to_update(ctx, p);
1459 fz_strlcpy(gstate->pending.CS.name, name, sizeof gstate->pending.CS.name);
1460 gstate->pending.CS.cs = cs;
1461 copy_resource(ctx, p, PDF_NAME(ColorSpace), name);
1462}
1463
1464static void
1465pdf_filter_cs(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs)
1466{
1467 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1468 filter_gstate *gstate = gstate_to_update(ctx, p);
1469 fz_strlcpy(gstate->pending.cs.name, name, sizeof gstate->pending.cs.name);
1470 gstate->pending.cs.cs = cs;
1471 copy_resource(ctx, p, PDF_NAME(ColorSpace), name);
1472}
1473
1474static void
1475pdf_filter_SC_pattern(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color)
1476{
1477 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1478 filter_gstate *gstate = gstate_to_update(ctx, p);
1479 int i;
1480 fz_strlcpy(gstate->pending.SC.name, name, sizeof gstate->pending.SC.name);
1481 gstate->pending.SC.pat = pat;
1482 gstate->pending.SC.shd = NULL;
1483 gstate->pending.SC.n = n;
1484 for (i = 0; i < n; ++i)
1485 gstate->pending.SC.c[i] = color[i];
1486 copy_resource(ctx, p, PDF_NAME(Pattern), name);
1487}
1488
1489static void
1490pdf_filter_sc_pattern(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color)
1491{
1492 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1493 filter_gstate *gstate = gstate_to_update(ctx, p);
1494 int i;
1495 fz_strlcpy(gstate->pending.sc.name, name, sizeof gstate->pending.sc.name);
1496 gstate->pending.sc.pat = pat;
1497 gstate->pending.sc.shd = NULL;
1498 gstate->pending.sc.n = n;
1499 for (i = 0; i < n; ++i)
1500 gstate->pending.sc.c[i] = color[i];
1501 copy_resource(ctx, p, PDF_NAME(Pattern), name);
1502}
1503
1504static void
1505pdf_filter_SC_shade(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade)
1506{
1507 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1508 filter_gstate *gstate = gstate_to_update(ctx, p);
1509 fz_strlcpy(gstate->pending.SC.name, name, sizeof gstate->pending.SC.name);
1510 gstate->pending.SC.pat = NULL;
1511 gstate->pending.SC.shd = shade;
1512 gstate->pending.SC.n = 0;
1513 copy_resource(ctx, p, PDF_NAME(Pattern), name);
1514}
1515
1516static void
1517pdf_filter_sc_shade(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade)
1518{
1519 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1520 filter_gstate *gstate = gstate_to_update(ctx, p);
1521 fz_strlcpy(gstate->pending.sc.name, name, sizeof gstate->pending.sc.name);
1522 gstate->pending.sc.pat = NULL;
1523 gstate->pending.sc.shd = shade;
1524 gstate->pending.sc.n = 0;
1525 copy_resource(ctx, p, PDF_NAME(Pattern), name);
1526}
1527
1528static void
1529pdf_filter_SC_color(fz_context *ctx, pdf_processor *proc, int n, float *color)
1530{
1531 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1532 filter_gstate *gstate = gstate_to_update(ctx, p);
1533 int i;
1534 gstate->pending.SC.name[0] = 0;
1535 gstate->pending.SC.pat = NULL;
1536 gstate->pending.SC.shd = NULL;
1537 gstate->pending.SC.n = n;
1538 for (i = 0; i < n; ++i)
1539 gstate->pending.SC.c[i] = color[i];
1540}
1541
1542static void
1543pdf_filter_sc_color(fz_context *ctx, pdf_processor *proc, int n, float *color)
1544{
1545 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1546 filter_gstate *gstate = gstate_to_update(ctx, p);
1547 int i;
1548 gstate->pending.sc.name[0] = 0;
1549 gstate->pending.sc.pat = NULL;
1550 gstate->pending.sc.shd = NULL;
1551 gstate->pending.sc.n = n;
1552 for (i = 0; i < n; ++i)
1553 gstate->pending.sc.c[i] = color[i];
1554}
1555
1556static void
1557pdf_filter_G(fz_context *ctx, pdf_processor *proc, float g)
1558{
1559 float color[1] = { g };
1560 pdf_filter_CS(ctx, proc, "DeviceGray", fz_device_gray(ctx));
1561 pdf_filter_SC_color(ctx, proc, 1, color);
1562}
1563
1564static void
1565pdf_filter_g(fz_context *ctx, pdf_processor *proc, float g)
1566{
1567 float color[1] = { g };
1568 pdf_filter_cs(ctx, proc, "DeviceGray", fz_device_gray(ctx));
1569 pdf_filter_sc_color(ctx, proc, 1, color);
1570}
1571
1572static void
1573pdf_filter_RG(fz_context *ctx, pdf_processor *proc, float r, float g, float b)
1574{
1575 float color[3] = { r, g, b };
1576 pdf_filter_CS(ctx, proc, "DeviceRGB", fz_device_rgb(ctx));
1577 pdf_filter_SC_color(ctx, proc, 3, color);
1578}
1579
1580static void
1581pdf_filter_rg(fz_context *ctx, pdf_processor *proc, float r, float g, float b)
1582{
1583 float color[3] = { r, g, b };
1584 pdf_filter_cs(ctx, proc, "DeviceRGB", fz_device_rgb(ctx));
1585 pdf_filter_sc_color(ctx, proc, 3, color);
1586}
1587
1588static void
1589pdf_filter_K(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k)
1590{
1591 float color[4] = { c, m, y, k };
1592 pdf_filter_CS(ctx, proc, "DeviceCMYK", fz_device_cmyk(ctx));
1593 pdf_filter_SC_color(ctx, proc, 4, color);
1594}
1595
1596static void
1597pdf_filter_k(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k)
1598{
1599 float color[4] = { c, m, y, k };
1600 pdf_filter_cs(ctx, proc, "DeviceCMYK", fz_device_cmyk(ctx));
1601 pdf_filter_sc_color(ctx, proc, 4, color);
1602}
1603
1604/* shadings, images, xobjects */
1605
1606static void
1607pdf_filter_BI(fz_context *ctx, pdf_processor *proc, fz_image *img, const char *colorspace)
1608{
1609 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1610 filter_flush(ctx, p, FLUSH_ALL);
1611 if (p->chain->op_BI)
1612 p->chain->op_BI(ctx, p->chain, img, colorspace);
1613}
1614
1615static void
1616pdf_filter_sh(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade)
1617{
1618 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1619 filter_flush(ctx, p, FLUSH_ALL);
1620 if (p->chain->op_sh)
1621 p->chain->op_sh(ctx, p->chain, name, shade);
1622 copy_resource(ctx, p, PDF_NAME(Shading), name);
1623}
1624
1625static void
1626pdf_filter_Do_image(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image)
1627{
1628 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1629 filter_flush(ctx, p, FLUSH_ALL);
1630 if (p->chain->op_Do_image)
1631 p->chain->op_Do_image(ctx, p->chain, name, image);
1632 copy_resource(ctx, p, PDF_NAME(XObject), name);
1633}
1634
1635static void
1636pdf_filter_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj, pdf_obj *page_resources)
1637{
1638 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1639 filter_flush(ctx, p, FLUSH_ALL);
1640 if (p->chain->op_Do_form)
1641 p->chain->op_Do_form(ctx, p->chain, name, xobj, page_resources);
1642 copy_resource(ctx, p, PDF_NAME(XObject), name);
1643}
1644
1645/* marked content */
1646
1647static void
1648pdf_filter_MP(fz_context *ctx, pdf_processor *proc, const char *tag)
1649{
1650 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1651 filter_flush(ctx, p, 0);
1652 if (p->chain->op_MP)
1653 p->chain->op_MP(ctx, p->chain, tag);
1654}
1655
1656static void
1657pdf_filter_DP(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked)
1658{
1659 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1660 filter_flush(ctx, p, 0);
1661 if (p->chain->op_DP)
1662 p->chain->op_DP(ctx, p->chain, tag, raw, cooked);
1663}
1664
1665static void
1666pdf_filter_BMC(fz_context *ctx, pdf_processor *proc, const char *tag)
1667{
1668 /* Create a tag, and push it onto pending_tags. If it gets
1669 * flushed to the stream, it'll be moved from there onto
1670 * current_tags. */
1671 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1672 tag_record *bmc = fz_malloc_struct(ctx, tag_record);
1673
1674 fz_try(ctx)
1675 bmc->tag = fz_strdup(ctx, tag);
1676 fz_catch(ctx)
1677 {
1678 fz_free(ctx, bmc);
1679 fz_rethrow(ctx);
1680 }
1681 bmc->prev = p->pending_tags;
1682 p->pending_tags = bmc;
1683}
1684
1685static void
1686pdf_filter_BDC(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked)
1687{
1688 /* Create a tag, and push it onto pending_tags. If it gets
1689 * flushed to the stream, it'll be moved from there onto
1690 * current_tags. */
1691 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1692 tag_record *bdc = fz_malloc_struct(ctx, tag_record);
1693 pdf_obj *mcid;
1694 pdf_obj *str;
1695
1696 fz_try(ctx)
1697 {
1698 bdc->bdc = 1;
1699 bdc->tag = fz_strdup(ctx, tag);
1700 bdc->raw = pdf_keep_obj(ctx, raw);
1701 bdc->cooked = pdf_keep_obj(ctx, raw);
1702 }
1703 fz_catch(ctx)
1704 {
1705 fz_free(ctx, bdc->tag);
1706 pdf_drop_obj(ctx, bdc->raw);
1707 pdf_drop_obj(ctx, bdc->cooked);
1708 fz_free(ctx, bdc);
1709 fz_rethrow(ctx);
1710 }
1711 bdc->prev = p->pending_tags;
1712 p->pending_tags = bdc;
1713
1714 /* Look to see if this has an mcid object */
1715 mcid = pdf_dict_get(ctx, cooked, PDF_NAME(MCID));
1716 if (!pdf_is_number(ctx, mcid))
1717 return;
1718 bdc->mcid_num = pdf_to_int(ctx, mcid);
1719 bdc->mcid_obj = pdf_keep_obj(ctx, pdf_array_get(ctx, p->structarray, bdc->mcid_num));
1720 str = pdf_dict_get(ctx, bdc->mcid_obj, PDF_NAME(Alt));
1721 if (str)
1722 bdc->alt.utf8 = pdf_new_utf8_from_pdf_string_obj(ctx, str);
1723 str = pdf_dict_get(ctx, bdc->mcid_obj, PDF_NAME(ActualText));
1724 if (str)
1725 bdc->actualtext.utf8 = pdf_new_utf8_from_pdf_string_obj(ctx, str);
1726}
1727
1728/* Bin the topmost (most recent) tag from a tag list. */
1729static void
1730pop_tag(fz_context *ctx, pdf_filter_processor *p, tag_record **tags)
1731{
1732 tag_record *tag = *tags;
1733
1734 if (tag == NULL)
1735 return;
1736 *tags = tag->prev;
1737 fz_free(ctx, tag->tag);
1738 if (tag->bdc)
1739 {
1740 pdf_drop_obj(ctx, tag->raw);
1741 pdf_drop_obj(ctx, tag->cooked);
1742 }
1743 fz_free(ctx, tag->alt.utf8);
1744 fz_free(ctx, tag->actualtext.utf8);
1745 pdf_drop_obj(ctx, tag->mcid_obj);
1746 fz_free(ctx, tag);
1747}
1748
1749static void
1750pdf_filter_EMC(fz_context *ctx, pdf_processor *proc)
1751{
1752 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1753
1754 /* If we have any pending tags, pop one of those. If not,
1755 * pop one of the current ones, and pass the EMC on. */
1756 if (p->pending_tags != NULL)
1757 pop_tag(ctx, p, &p->pending_tags);
1758 else
1759 {
1760 update_mcid(ctx, p);
1761 pop_tag(ctx, p, &p->current_tags);
1762 if (p->chain->op_EMC)
1763 p->chain->op_EMC(ctx, p->chain);
1764 }
1765}
1766
1767/* compatibility */
1768
1769static void
1770pdf_filter_BX(fz_context *ctx, pdf_processor *proc)
1771{
1772 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1773 filter_flush(ctx, p, 0);
1774 if (p->chain->op_BX)
1775 p->chain->op_BX(ctx, p->chain);
1776}
1777
1778static void
1779pdf_filter_EX(fz_context *ctx, pdf_processor *proc)
1780{
1781 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1782 filter_flush(ctx, p, 0);
1783 if (p->chain->op_EX)
1784 p->chain->op_EX(ctx, p->chain);
1785}
1786
1787static void
1788pdf_filter_END(fz_context *ctx, pdf_processor *proc)
1789{
1790 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1791 while (!filter_pop(ctx, p))
1792 {
1793 /* Nothing to do in the loop, all work done above */
1794 }
1795}
1796
1797static void
1798pdf_drop_filter_processor(fz_context *ctx, pdf_processor *proc)
1799{
1800 pdf_filter_processor *p = (pdf_filter_processor*)proc;
1801 filter_gstate *gs = p->gstate;
1802 while (gs)
1803 {
1804 filter_gstate *next = gs->next;
1805 pdf_drop_font(ctx, gs->pending.text.font);
1806 pdf_drop_font(ctx, gs->sent.text.font);
1807 fz_free(ctx, gs);
1808 gs = next;
1809 }
1810 while (p->pending_tags)
1811 pop_tag(ctx, p, &p->pending_tags);
1812 while (p->current_tags)
1813 pop_tag(ctx, p, &p->current_tags);
1814 pdf_drop_obj(ctx, p->structarray);
1815 pdf_drop_document(ctx, p->doc);
1816 fz_free(ctx, p->font_name);
1817}
1818
1819/*
1820 Create a filter processor. This
1821 filters the PDF operators it is fed, and passes them down
1822 (with some changes) to the child filter.
1823
1824 The changes made by the filter are:
1825
1826 * No operations are allowed to change the top level gstate.
1827 Additional q/Q operators are inserted to prevent this.
1828
1829 * Repeated/unnecessary colour operators are removed (so,
1830 for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to
1831 "0.5 g")
1832
1833 The intention of these changes is to provide a simpler,
1834 but equivalent stream, repairing problems with mismatched
1835 operators, maintaining structure (such as BMC, EMC calls)
1836 and leaving the graphics state in an known (default) state
1837 so that subsequent operations (such as synthesising new
1838 operators to be appended to the stream) are easier.
1839
1840 The net graphical effect of the filtered operator stream
1841 should be identical to the incoming operator stream.
1842
1843 chain: The child processor to which the filtered operators
1844 will be fed.
1845
1846 old_res: The incoming resource dictionary.
1847
1848 new_res: An (initially empty) resource dictionary that will
1849 be populated by copying entries from the old dictionary to
1850 the new one as they are used. At the end therefore, this
1851 contains exactly those resource objects actually required.
1852
1853*/
1854pdf_processor *
1855pdf_new_filter_processor(fz_context *ctx, pdf_document *doc, pdf_processor *chain, pdf_obj *old_rdb, pdf_obj *new_rdb)
1856{
1857 return pdf_new_filter_processor_with_text_filter(ctx, doc, -1, chain, old_rdb, new_rdb, NULL, NULL, NULL);
1858}
1859
1860/*
1861 Create a filter
1862 processor with a filter function for text. This filters the
1863 PDF operators it is fed, and passes them down (with some
1864 changes) to the child filter.
1865
1866 See pdf_new_filter_processor for documentation.
1867
1868 text_filter: A function called to assess whether a given
1869 character should be removed or not.
1870
1871 after_text_object: A function to be called after each text object.
1872 This allows the caller to insert some extra content if
1873 required.
1874
1875 text_filter_opaque: Opaque value to be passed to the
1876 text_filter function.
1877*/
1878pdf_processor *
1879pdf_new_filter_processor_with_text_filter(fz_context *ctx, pdf_document *doc, int structparents, pdf_processor *chain, pdf_obj *old_rdb, pdf_obj *new_rdb, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after, void *text_filter_opaque)
1880{
1881 pdf_filter_processor *proc = pdf_new_processor(ctx, sizeof *proc);
1882 {
1883 proc->super.drop_processor = pdf_drop_filter_processor;
1884
1885 /* general graphics state */
1886 proc->super.op_w = pdf_filter_w;
1887 proc->super.op_j = pdf_filter_j;
1888 proc->super.op_J = pdf_filter_J;
1889 proc->super.op_M = pdf_filter_M;
1890 proc->super.op_d = pdf_filter_d;
1891 proc->super.op_ri = pdf_filter_ri;
1892 proc->super.op_i = pdf_filter_i;
1893 proc->super.op_gs_begin = pdf_filter_gs_begin;
1894 proc->super.op_gs_end = pdf_filter_gs_end;
1895
1896 /* transparency graphics state */
1897 proc->super.op_gs_BM = pdf_filter_gs_BM;
1898 proc->super.op_gs_CA = pdf_filter_gs_CA;
1899 proc->super.op_gs_ca = pdf_filter_gs_ca;
1900 proc->super.op_gs_SMask = pdf_filter_gs_SMask;
1901
1902 /* special graphics state */
1903 proc->super.op_q = pdf_filter_q;
1904 proc->super.op_Q = pdf_filter_Q;
1905 proc->super.op_cm = pdf_filter_cm;
1906
1907 /* path construction */
1908 proc->super.op_m = pdf_filter_m;
1909 proc->super.op_l = pdf_filter_l;
1910 proc->super.op_c = pdf_filter_c;
1911 proc->super.op_v = pdf_filter_v;
1912 proc->super.op_y = pdf_filter_y;
1913 proc->super.op_h = pdf_filter_h;
1914 proc->super.op_re = pdf_filter_re;
1915
1916 /* path painting */
1917 proc->super.op_S = pdf_filter_S;
1918 proc->super.op_s = pdf_filter_s;
1919 proc->super.op_F = pdf_filter_F;
1920 proc->super.op_f = pdf_filter_f;
1921 proc->super.op_fstar = pdf_filter_fstar;
1922 proc->super.op_B = pdf_filter_B;
1923 proc->super.op_Bstar = pdf_filter_Bstar;
1924 proc->super.op_b = pdf_filter_b;
1925 proc->super.op_bstar = pdf_filter_bstar;
1926 proc->super.op_n = pdf_filter_n;
1927
1928 /* clipping paths */
1929 proc->super.op_W = pdf_filter_W;
1930 proc->super.op_Wstar = pdf_filter_Wstar;
1931
1932 /* text objects */
1933 proc->super.op_BT = pdf_filter_BT;
1934 proc->super.op_ET = pdf_filter_ET;
1935
1936 /* text state */
1937 proc->super.op_Tc = pdf_filter_Tc;
1938 proc->super.op_Tw = pdf_filter_Tw;
1939 proc->super.op_Tz = pdf_filter_Tz;
1940 proc->super.op_TL = pdf_filter_TL;
1941 proc->super.op_Tf = pdf_filter_Tf;
1942 proc->super.op_Tr = pdf_filter_Tr;
1943 proc->super.op_Ts = pdf_filter_Ts;
1944
1945 /* text positioning */
1946 proc->super.op_Td = pdf_filter_Td;
1947 proc->super.op_TD = pdf_filter_TD;
1948 proc->super.op_Tm = pdf_filter_Tm;
1949 proc->super.op_Tstar = pdf_filter_Tstar;
1950
1951 /* text showing */
1952 proc->super.op_TJ = pdf_filter_TJ;
1953 proc->super.op_Tj = pdf_filter_Tj;
1954 proc->super.op_squote = pdf_filter_squote;
1955 proc->super.op_dquote = pdf_filter_dquote;
1956
1957 /* type 3 fonts */
1958 proc->super.op_d0 = pdf_filter_d0;
1959 proc->super.op_d1 = pdf_filter_d1;
1960
1961 /* color */
1962 proc->super.op_CS = pdf_filter_CS;
1963 proc->super.op_cs = pdf_filter_cs;
1964 proc->super.op_SC_color = pdf_filter_SC_color;
1965 proc->super.op_sc_color = pdf_filter_sc_color;
1966 proc->super.op_SC_pattern = pdf_filter_SC_pattern;
1967 proc->super.op_sc_pattern = pdf_filter_sc_pattern;
1968 proc->super.op_SC_shade = pdf_filter_SC_shade;
1969 proc->super.op_sc_shade = pdf_filter_sc_shade;
1970
1971 proc->super.op_G = pdf_filter_G;
1972 proc->super.op_g = pdf_filter_g;
1973 proc->super.op_RG = pdf_filter_RG;
1974 proc->super.op_rg = pdf_filter_rg;
1975 proc->super.op_K = pdf_filter_K;
1976 proc->super.op_k = pdf_filter_k;
1977
1978 /* shadings, images, xobjects */
1979 proc->super.op_BI = pdf_filter_BI;
1980 proc->super.op_sh = pdf_filter_sh;
1981 proc->super.op_Do_image = pdf_filter_Do_image;
1982 proc->super.op_Do_form = pdf_filter_Do_form;
1983
1984 /* marked content */
1985 proc->super.op_MP = pdf_filter_MP;
1986 proc->super.op_DP = pdf_filter_DP;
1987 proc->super.op_BMC = pdf_filter_BMC;
1988 proc->super.op_BDC = pdf_filter_BDC;
1989 proc->super.op_EMC = pdf_filter_EMC;
1990
1991 /* compatibility */
1992 proc->super.op_BX = pdf_filter_BX;
1993 proc->super.op_EX = pdf_filter_EX;
1994
1995 /* extgstate */
1996 proc->super.op_gs_OP = pdf_filter_gs_OP;
1997 proc->super.op_gs_op = pdf_filter_gs_op;
1998 proc->super.op_gs_OPM = pdf_filter_gs_OPM;
1999 proc->super.op_gs_UseBlackPtComp = pdf_filter_gs_UseBlackPtComp;
2000
2001 proc->super.op_END = pdf_filter_END;
2002 }
2003
2004 proc->doc = pdf_keep_document(ctx, doc);
2005 proc->structparents = structparents;
2006 if (structparents != -1)
2007 proc->structarray = pdf_keep_obj(ctx, pdf_lookup_number(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/StructTreeRoot/ParentTree"), structparents));
2008 proc->chain = chain;
2009 proc->old_rdb = old_rdb;
2010 proc->new_rdb = new_rdb;
2011
2012 proc->text_filter = text_filter;
2013 proc->after_text = after;
2014 proc->opaque = text_filter_opaque;
2015
2016 fz_try(ctx)
2017 {
2018 proc->gstate = fz_malloc_struct(ctx, filter_gstate);
2019 proc->gstate->pending.ctm = fz_identity;
2020 proc->gstate->sent.ctm = fz_identity;
2021
2022 proc->gstate->pending.stroke = proc->gstate->pending.stroke; /* ? */
2023 proc->gstate->sent.stroke = proc->gstate->pending.stroke;
2024 proc->gstate->pending.text.char_space = 0;
2025 proc->gstate->pending.text.word_space = 0;
2026 proc->gstate->pending.text.scale = 1;
2027 proc->gstate->pending.text.leading = 0;
2028 proc->gstate->pending.text.font = NULL;
2029 proc->gstate->pending.text.size = -1;
2030 proc->gstate->pending.text.render = 0;
2031 proc->gstate->pending.text.rise = 0;
2032 proc->gstate->sent.text.char_space = 0;
2033 proc->gstate->sent.text.word_space = 0;
2034 proc->gstate->sent.text.scale = 1;
2035 proc->gstate->sent.text.leading = 0;
2036 proc->gstate->sent.text.font = NULL;
2037 proc->gstate->sent.text.size = -1;
2038 proc->gstate->sent.text.render = 0;
2039 proc->gstate->sent.text.rise = 0;
2040 }
2041 fz_catch(ctx)
2042 {
2043 pdf_drop_processor(ctx, (pdf_processor *) proc);
2044 fz_rethrow(ctx);
2045 }
2046
2047 return (pdf_processor*)proc;
2048}
2049