1 | #include "mupdf/fitz.h" |
2 | #include "mupdf/pdf.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | typedef struct filter_gstate_s filter_gstate; |
7 | |
8 | typedef enum |
9 | { |
10 | FLUSH_CTM = 1, |
11 | FLUSH_COLOR_F = 2, |
12 | FLUSH_COLOR_S = 4, |
13 | FLUSH_TEXT = 8, |
14 | |
15 | FLUSH_ALL = 15, |
16 | FLUSH_STROKE = 1+4, |
17 | FLUSH_FILL = 1+2 |
18 | } gstate_flush_flags; |
19 | |
20 | typedef struct pdf_filter_gstate_s pdf_filter_gstate; |
21 | |
22 | struct pdf_filter_gstate_s |
23 | { |
24 | fz_matrix ctm; |
25 | struct |
26 | { |
27 | char name[256]; |
28 | fz_colorspace *cs; |
29 | } cs, CS; |
30 | struct |
31 | { |
32 | char name[256]; |
33 | pdf_pattern *pat; |
34 | fz_shade *shd; |
35 | int n; |
36 | float c[FZ_MAX_COLORS]; |
37 | } sc, SC; |
38 | struct |
39 | { |
40 | fz_linecap linecap; |
41 | fz_linejoin linejoin; |
42 | float linewidth; |
43 | float miterlimit; |
44 | } stroke; |
45 | pdf_text_state text; |
46 | }; |
47 | |
48 | struct filter_gstate_s |
49 | { |
50 | filter_gstate *next; |
51 | int pushed; |
52 | pdf_filter_gstate pending; |
53 | pdf_filter_gstate sent; |
54 | }; |
55 | |
56 | typedef struct editable_str_s |
57 | { |
58 | char *utf8; |
59 | int edited; |
60 | int pos; |
61 | } editable_str; |
62 | |
63 | typedef struct tag_record_s |
64 | { |
65 | int bdc; |
66 | char *tag; |
67 | pdf_obj *raw; |
68 | pdf_obj *cooked; |
69 | |
70 | int mcid_num; |
71 | pdf_obj *mcid_obj; |
72 | editable_str alt; |
73 | editable_str actualtext; |
74 | |
75 | struct tag_record_s *prev; |
76 | } tag_record; |
77 | |
78 | typedef struct pdf_filter_processor_s |
79 | { |
80 | pdf_processor super; |
81 | pdf_document *doc; |
82 | int structparents; |
83 | pdf_obj *structarray; |
84 | pdf_processor *chain; |
85 | filter_gstate *gstate; |
86 | pdf_text_object_state tos; |
87 | int Tm_pending; |
88 | int BT_pending; |
89 | float Tm_adjust; |
90 | void *font_name; |
91 | tag_record *current_tags; |
92 | tag_record *pending_tags; |
93 | pdf_text_filter_fn *text_filter; |
94 | pdf_after_text_object_fn *after_text; |
95 | void *opaque; |
96 | pdf_obj *old_rdb, *new_rdb; |
97 | } pdf_filter_processor; |
98 | |
99 | static void |
100 | copy_resource(fz_context *ctx, pdf_filter_processor *p, pdf_obj *key, const char *name) |
101 | { |
102 | pdf_obj *res, *obj; |
103 | |
104 | if (!name || name[0] == 0) |
105 | return; |
106 | |
107 | res = pdf_dict_get(ctx, p->old_rdb, key); |
108 | obj = pdf_dict_gets(ctx, res, name); |
109 | if (obj) |
110 | { |
111 | res = pdf_dict_get(ctx, p->new_rdb, key); |
112 | if (!res) |
113 | { |
114 | res = pdf_new_dict(ctx, pdf_get_bound_document(ctx, p->new_rdb), 1); |
115 | pdf_dict_put_drop(ctx, p->new_rdb, key, res); |
116 | } |
117 | pdf_dict_putp(ctx, res, name, obj); |
118 | } |
119 | } |
120 | |
121 | static void |
122 | filter_push(fz_context *ctx, pdf_filter_processor *p) |
123 | { |
124 | filter_gstate *gstate = p->gstate; |
125 | filter_gstate *new_gstate = fz_malloc_struct(ctx, filter_gstate); |
126 | *new_gstate = *gstate; |
127 | new_gstate->pushed = 0; |
128 | new_gstate->next = gstate; |
129 | p->gstate = new_gstate; |
130 | |
131 | pdf_keep_font(ctx, new_gstate->pending.text.font); |
132 | pdf_keep_font(ctx, new_gstate->sent.text.font); |
133 | } |
134 | |
135 | static int |
136 | filter_pop(fz_context *ctx, pdf_filter_processor *p) |
137 | { |
138 | filter_gstate *gstate = p->gstate; |
139 | filter_gstate *old = gstate->next; |
140 | |
141 | /* We are at the top, so nothing to pop! */ |
142 | if (old == NULL) |
143 | return 1; |
144 | |
145 | if (gstate->pushed) |
146 | if (p->chain->op_Q) |
147 | p->chain->op_Q(ctx, p->chain); |
148 | |
149 | pdf_drop_font(ctx, gstate->pending.text.font); |
150 | pdf_drop_font(ctx, gstate->sent.text.font); |
151 | fz_free(ctx, gstate); |
152 | p->gstate = old; |
153 | return 0; |
154 | } |
155 | |
156 | /* We never allow the topmost gstate to be changed. This allows us |
157 | * to pop back to the zeroth level and be sure that our gstate is |
158 | * sane. This is important for being able to add new operators at |
159 | * the end of pages in a sane way. */ |
160 | static filter_gstate * |
161 | gstate_to_update(fz_context *ctx, pdf_filter_processor *p) |
162 | { |
163 | filter_gstate *gstate = p->gstate; |
164 | |
165 | /* If we're not the top, that's fine */ |
166 | if (gstate->next != NULL) |
167 | return gstate; |
168 | |
169 | /* We are the top. Push a group, so we're not */ |
170 | filter_push(ctx, p); |
171 | gstate = p->gstate; |
172 | gstate->pushed = 1; |
173 | if (p->chain->op_q) |
174 | p->chain->op_q(ctx, p->chain); |
175 | |
176 | return p->gstate; |
177 | } |
178 | |
179 | static void flush_tags(fz_context *ctx, pdf_filter_processor *p, tag_record **tags) |
180 | { |
181 | tag_record *tag = *tags; |
182 | |
183 | if (tag == NULL) |
184 | return; |
185 | if (tag->prev) |
186 | flush_tags(ctx, p, &tag->prev); |
187 | if (tag->bdc) |
188 | { |
189 | if (p->chain->op_BDC) |
190 | p->chain->op_BDC(ctx, p->chain, tag->tag, tag->raw, tag->cooked); |
191 | } |
192 | else if (p->chain->op_BMC) |
193 | p->chain->op_BMC(ctx, p->chain, tag->tag); |
194 | tag->prev = p->current_tags; |
195 | p->current_tags = tag; |
196 | *tags = NULL; |
197 | } |
198 | |
199 | static void filter_flush(fz_context *ctx, pdf_filter_processor *p, int flush) |
200 | { |
201 | filter_gstate *gstate = gstate_to_update(ctx, p); |
202 | int i; |
203 | |
204 | if (gstate->pushed == 0) |
205 | { |
206 | gstate->pushed = 1; |
207 | if (p->chain->op_q) |
208 | p->chain->op_q(ctx, p->chain); |
209 | } |
210 | |
211 | if (flush) |
212 | flush_tags(ctx, p, &p->pending_tags); |
213 | |
214 | if (flush & FLUSH_CTM) |
215 | { |
216 | if (gstate->pending.ctm.a != 1 || gstate->pending.ctm.b != 0 || |
217 | gstate->pending.ctm.c != 0 || gstate->pending.ctm.d != 1 || |
218 | gstate->pending.ctm.e != 0 || gstate->pending.ctm.f != 0) |
219 | { |
220 | fz_matrix current = gstate->sent.ctm; |
221 | |
222 | if (p->chain->op_cm) |
223 | p->chain->op_cm(ctx, p->chain, |
224 | gstate->pending.ctm.a, |
225 | gstate->pending.ctm.b, |
226 | gstate->pending.ctm.c, |
227 | gstate->pending.ctm.d, |
228 | gstate->pending.ctm.e, |
229 | gstate->pending.ctm.f); |
230 | |
231 | gstate->sent.ctm = fz_concat(current, gstate->pending.ctm); |
232 | gstate->pending.ctm.a = 1; |
233 | gstate->pending.ctm.b = 0; |
234 | gstate->pending.ctm.c = 0; |
235 | gstate->pending.ctm.d = 1; |
236 | gstate->pending.ctm.e = 0; |
237 | gstate->pending.ctm.f = 0; |
238 | } |
239 | } |
240 | |
241 | if (flush & FLUSH_COLOR_F) |
242 | { |
243 | if (gstate->pending.cs.cs == fz_device_gray(ctx) && !gstate->pending.sc.pat && !gstate->pending.sc.shd && gstate->pending.sc.n == 1 && |
244 | (gstate->sent.cs.cs != fz_device_gray(ctx) || gstate->sent.sc.pat || gstate->sent.sc.shd || gstate->sent.sc.n != 1 || gstate->pending.sc.c[0] != gstate->sent.sc.c[0])) |
245 | { |
246 | if (p->chain->op_g) |
247 | p->chain->op_g(ctx, p->chain, gstate->pending.sc.c[0]); |
248 | goto done_sc; |
249 | } |
250 | if (gstate->pending.cs.cs == fz_device_rgb(ctx) && !gstate->pending.sc.pat && !gstate->pending.sc.shd && gstate->pending.sc.n == 3 && |
251 | (gstate->sent.cs.cs != fz_device_rgb(ctx) || gstate->sent.sc.pat || gstate->sent.sc.shd || gstate->sent.sc.n != 3 || gstate->pending.sc.c[0] != gstate->sent.sc.c[0] || |
252 | gstate->pending.sc.c[1] != gstate->sent.sc.c[1] || gstate->pending.sc.c[1] != gstate->sent.sc.c[1])) |
253 | { |
254 | if (p->chain->op_rg) |
255 | p->chain->op_rg(ctx, p->chain, gstate->pending.sc.c[0], gstate->pending.sc.c[1], gstate->pending.sc.c[2]); |
256 | goto done_sc; |
257 | } |
258 | if (gstate->pending.cs.cs == fz_device_cmyk(ctx) && !gstate->pending.sc.pat && !gstate->pending.sc.shd && gstate->pending.sc.n == 4 && |
259 | (gstate->sent.cs.cs != fz_device_cmyk(ctx) || gstate->sent.sc.pat || gstate->sent.sc.shd || gstate->pending.sc.n != 4 || gstate->pending.sc.c[0] != gstate->sent.sc.c[0] || |
260 | gstate->pending.sc.c[1] != gstate->sent.sc.c[1] || gstate->pending.sc.c[2] != gstate->sent.sc.c[2] || gstate->pending.sc.c[3] != gstate->sent.sc.c[3])) |
261 | { |
262 | if (p->chain->op_k) |
263 | p->chain->op_k(ctx, p->chain, gstate->pending.sc.c[0], gstate->pending.sc.c[1], gstate->pending.sc.c[2], gstate->pending.sc.c[3]); |
264 | goto done_sc; |
265 | } |
266 | |
267 | if (strcmp(gstate->pending.cs.name, gstate->sent.cs.name)) |
268 | { |
269 | if (p->chain->op_cs) |
270 | p->chain->op_cs(ctx, p->chain, gstate->pending.cs.name, gstate->pending.cs.cs); |
271 | } |
272 | |
273 | /* pattern or shading */ |
274 | if (gstate->pending.sc.name[0]) |
275 | { |
276 | int emit = 0; |
277 | if (strcmp(gstate->pending.sc.name, gstate->sent.sc.name)) |
278 | emit = 1; |
279 | if (gstate->pending.sc.n != gstate->sent.sc.n) |
280 | emit = 1; |
281 | else |
282 | for (i = 0; i < gstate->pending.sc.n; ++i) |
283 | if (gstate->pending.sc.c[i] != gstate->sent.sc.c[i]) |
284 | emit = 1; |
285 | if (emit) |
286 | { |
287 | if (gstate->pending.sc.pat) |
288 | if (p->chain->op_sc_pattern) |
289 | p->chain->op_sc_pattern(ctx, p->chain, gstate->pending.sc.name, gstate->pending.sc.pat, gstate->pending.sc.n, gstate->pending.sc.c); |
290 | if (gstate->pending.sc.shd) |
291 | if (p->chain->op_sc_shade) |
292 | p->chain->op_sc_shade(ctx, p->chain, gstate->pending.sc.name, gstate->pending.sc.shd); |
293 | } |
294 | } |
295 | |
296 | /* plain color */ |
297 | else |
298 | { |
299 | int emit = 0; |
300 | if (gstate->pending.sc.n != gstate->sent.sc.n) |
301 | emit = 1; |
302 | else |
303 | for (i = 0; i < gstate->pending.sc.n; ++i) |
304 | if (gstate->pending.sc.c[i] != gstate->sent.sc.c[i]) |
305 | emit = 1; |
306 | if (emit) |
307 | { |
308 | if (p->chain->op_sc_color) |
309 | p->chain->op_sc_color(ctx, p->chain, gstate->pending.sc.n, gstate->pending.sc.c); |
310 | } |
311 | } |
312 | |
313 | done_sc: |
314 | gstate->sent.cs = gstate->pending.cs; |
315 | gstate->sent.sc = gstate->pending.sc; |
316 | } |
317 | |
318 | if (flush & FLUSH_COLOR_S) |
319 | { |
320 | if (gstate->pending.CS.cs == fz_device_gray(ctx) && !gstate->pending.SC.pat && !gstate->pending.SC.shd && gstate->pending.SC.n == 1 && |
321 | (gstate->sent.CS.cs != fz_device_gray(ctx) || gstate->sent.SC.pat || gstate->sent.SC.shd || gstate->sent.SC.n != 0 || gstate->pending.SC.c[0] != gstate->sent.SC.c[0])) |
322 | { |
323 | if (p->chain->op_G) |
324 | p->chain->op_G(ctx, p->chain, gstate->pending.SC.c[0]); |
325 | goto done_SC; |
326 | } |
327 | if (gstate->pending.CS.cs == fz_device_rgb(ctx) && !gstate->pending.SC.pat && !gstate->pending.SC.shd && gstate->pending.SC.n == 3 && |
328 | (gstate->sent.CS.cs != fz_device_rgb(ctx) || gstate->sent.SC.pat || gstate->sent.SC.shd || gstate->sent.SC.n != 3 || gstate->pending.SC.c[0] != gstate->sent.SC.c[0] || |
329 | gstate->pending.SC.c[1] != gstate->sent.SC.c[1] || gstate->pending.SC.c[1] != gstate->sent.SC.c[1])) |
330 | { |
331 | if (p->chain->op_RG) |
332 | p->chain->op_RG(ctx, p->chain, gstate->pending.SC.c[0], gstate->pending.SC.c[1], gstate->pending.SC.c[2]); |
333 | goto done_SC; |
334 | } |
335 | if (gstate->pending.CS.cs == fz_device_cmyk(ctx) && !gstate->pending.SC.pat && !gstate->pending.SC.shd && gstate->pending.SC.n == 4 && |
336 | (gstate->sent.CS.cs != fz_device_cmyk(ctx) || gstate->sent.SC.pat || gstate->sent.SC.shd || gstate->pending.SC.n != 4 || gstate->pending.SC.c[0] != gstate->sent.SC.c[0] || |
337 | gstate->pending.SC.c[1] != gstate->sent.SC.c[1] || gstate->pending.SC.c[2] != gstate->sent.SC.c[2] || gstate->pending.SC.c[3] != gstate->sent.SC.c[3])) |
338 | { |
339 | if (p->chain->op_K) |
340 | p->chain->op_K(ctx, p->chain, gstate->pending.SC.c[0], gstate->pending.SC.c[1], gstate->pending.SC.c[2], gstate->pending.SC.c[3]); |
341 | goto done_SC; |
342 | } |
343 | |
344 | if (strcmp(gstate->pending.CS.name, gstate->sent.CS.name)) |
345 | { |
346 | if (p->chain->op_CS) |
347 | p->chain->op_CS(ctx, p->chain, gstate->pending.CS.name, gstate->pending.CS.cs); |
348 | } |
349 | |
350 | /* pattern or shading */ |
351 | if (gstate->pending.SC.name[0]) |
352 | { |
353 | int emit = 0; |
354 | if (strcmp(gstate->pending.SC.name, gstate->sent.SC.name)) |
355 | emit = 1; |
356 | if (gstate->pending.SC.n != gstate->sent.SC.n) |
357 | emit = 1; |
358 | else |
359 | for (i = 0; i < gstate->pending.SC.n; ++i) |
360 | if (gstate->pending.SC.c[i] != gstate->sent.SC.c[i]) |
361 | emit = 1; |
362 | if (emit) |
363 | { |
364 | if (gstate->pending.SC.pat) |
365 | if (p->chain->op_SC_pattern) |
366 | p->chain->op_SC_pattern(ctx, p->chain, gstate->pending.SC.name, gstate->pending.SC.pat, gstate->pending.SC.n, gstate->pending.SC.c); |
367 | if (gstate->pending.SC.shd) |
368 | if (p->chain->op_SC_shade) |
369 | p->chain->op_SC_shade(ctx, p->chain, gstate->pending.SC.name, gstate->pending.SC.shd); |
370 | } |
371 | } |
372 | |
373 | /* plain color */ |
374 | else |
375 | { |
376 | int emit = 0; |
377 | if (gstate->pending.SC.n != gstate->sent.SC.n) |
378 | emit = 1; |
379 | else |
380 | for (i = 0; i < gstate->pending.SC.n; ++i) |
381 | if (gstate->pending.SC.c[i] != gstate->sent.SC.c[i]) |
382 | emit = 1; |
383 | if (emit) |
384 | { |
385 | if (p->chain->op_SC_color) |
386 | p->chain->op_SC_color(ctx, p->chain, gstate->pending.SC.n, gstate->pending.SC.c); |
387 | } |
388 | } |
389 | |
390 | done_SC: |
391 | gstate->sent.CS = gstate->pending.CS; |
392 | gstate->sent.SC = gstate->pending.SC; |
393 | } |
394 | |
395 | if (flush & FLUSH_STROKE) |
396 | { |
397 | if (gstate->pending.stroke.linecap != gstate->sent.stroke.linecap) |
398 | { |
399 | if (p->chain->op_J) |
400 | p->chain->op_J(ctx, p->chain, gstate->pending.stroke.linecap); |
401 | } |
402 | if (gstate->pending.stroke.linejoin != gstate->sent.stroke.linejoin) |
403 | { |
404 | if (p->chain->op_j) |
405 | p->chain->op_j(ctx, p->chain, gstate->pending.stroke.linejoin); |
406 | } |
407 | if (gstate->pending.stroke.linewidth != gstate->sent.stroke.linewidth) |
408 | { |
409 | if (p->chain->op_w) |
410 | p->chain->op_w(ctx, p->chain, gstate->pending.stroke.linewidth); |
411 | } |
412 | if (gstate->pending.stroke.miterlimit != gstate->sent.stroke.miterlimit) |
413 | { |
414 | if (p->chain->op_M) |
415 | p->chain->op_M(ctx, p->chain, gstate->pending.stroke.miterlimit); |
416 | } |
417 | gstate->sent.stroke = gstate->pending.stroke; |
418 | } |
419 | |
420 | if (flush & FLUSH_TEXT) |
421 | { |
422 | if (p->BT_pending) |
423 | { |
424 | if (p->chain->op_BT) |
425 | p->chain->op_BT(ctx, p->chain); |
426 | p->BT_pending = 0; |
427 | } |
428 | if (gstate->pending.text.char_space != gstate->sent.text.char_space) |
429 | { |
430 | if (p->chain->op_Tc) |
431 | p->chain->op_Tc(ctx, p->chain, gstate->pending.text.char_space); |
432 | } |
433 | if (gstate->pending.text.word_space != gstate->sent.text.word_space) |
434 | { |
435 | if (p->chain->op_Tw) |
436 | p->chain->op_Tw(ctx, p->chain, gstate->pending.text.word_space); |
437 | } |
438 | if (gstate->pending.text.scale != gstate->sent.text.scale) |
439 | { |
440 | /* The value of scale in the gstate is divided by 100 from what is written in the file */ |
441 | if (p->chain->op_Tz) |
442 | p->chain->op_Tz(ctx, p->chain, gstate->pending.text.scale*100); |
443 | } |
444 | if (gstate->pending.text.leading != gstate->sent.text.leading) |
445 | { |
446 | if (p->chain->op_TL) |
447 | p->chain->op_TL(ctx, p->chain, gstate->pending.text.leading); |
448 | } |
449 | if (gstate->pending.text.font != gstate->sent.text.font || |
450 | gstate->pending.text.size != gstate->sent.text.size) |
451 | { |
452 | if (p->chain->op_Tf) |
453 | p->chain->op_Tf(ctx, p->chain, p->font_name, gstate->pending.text.font, gstate->pending.text.size); |
454 | } |
455 | if (gstate->pending.text.render != gstate->sent.text.render) |
456 | { |
457 | if (p->chain->op_Tr) |
458 | p->chain->op_Tr(ctx, p->chain, gstate->pending.text.render); |
459 | } |
460 | if (gstate->pending.text.rise != gstate->sent.text.rise) |
461 | { |
462 | if (p->chain->op_Ts) |
463 | p->chain->op_Ts(ctx, p->chain, gstate->pending.text.rise); |
464 | } |
465 | pdf_drop_font(ctx, gstate->sent.text.font); |
466 | gstate->sent.text = gstate->pending.text; |
467 | gstate->sent.text.font = pdf_keep_font(ctx, gstate->pending.text.font); |
468 | if (p->Tm_pending != 0) |
469 | { |
470 | if (p->chain->op_Tm) |
471 | p->chain->op_Tm(ctx, p->chain, p->tos.tlm.a, p->tos.tlm.b, p->tos.tlm.c, p->tos.tlm.d, p->tos.tlm.e, p->tos.tlm.f); |
472 | p->Tm_pending = 0; |
473 | } |
474 | } |
475 | } |
476 | |
477 | static int |
478 | filter_show_char(fz_context *ctx, pdf_filter_processor *p, int cid, int *unicode) |
479 | { |
480 | filter_gstate *gstate = p->gstate; |
481 | pdf_font_desc *fontdesc = gstate->pending.text.font; |
482 | fz_matrix trm; |
483 | int ucsbuf[8]; |
484 | int ucslen; |
485 | int remove = 0; |
486 | |
487 | (void)pdf_tos_make_trm(ctx, &p->tos, &gstate->pending.text, fontdesc, cid, &trm); |
488 | |
489 | ucslen = 0; |
490 | if (fontdesc->to_unicode) |
491 | ucslen = pdf_lookup_cmap_full(fontdesc->to_unicode, cid, ucsbuf); |
492 | if (ucslen == 0 && (size_t)cid < fontdesc->cid_to_ucs_len) |
493 | { |
494 | ucsbuf[0] = fontdesc->cid_to_ucs[cid]; |
495 | ucslen = 1; |
496 | } |
497 | if (ucslen == 0 || (ucslen == 1 && ucsbuf[0] == 0)) |
498 | { |
499 | ucsbuf[0] = FZ_REPLACEMENT_CHARACTER; |
500 | ucslen = 1; |
501 | } |
502 | *unicode = ucsbuf[0]; |
503 | |
504 | if (p->text_filter) |
505 | { |
506 | fz_matrix ctm = fz_concat(gstate->sent.ctm, gstate->pending.ctm); |
507 | fz_rect bbox; |
508 | |
509 | if (fontdesc->wmode == 0) |
510 | { |
511 | bbox.x0 = 0; |
512 | bbox.y0 = fz_font_descender(ctx, fontdesc->font); |
513 | bbox.x1 = fz_advance_glyph(ctx, fontdesc->font, p->tos.gid, 0); |
514 | bbox.y1 = fz_font_ascender(ctx, fontdesc->font); |
515 | } |
516 | else |
517 | { |
518 | fz_rect font_bbox = fz_font_bbox(ctx, fontdesc->font); |
519 | bbox.x0 = font_bbox.x0; |
520 | bbox.x1 = font_bbox.x1; |
521 | bbox.y0 = 0; |
522 | bbox.y1 = fz_advance_glyph(ctx, fontdesc->font, p->tos.gid, 1); |
523 | } |
524 | |
525 | remove = p->text_filter(ctx, p->opaque, ucsbuf, ucslen, trm, ctm, bbox); |
526 | } |
527 | |
528 | pdf_tos_move_after_char(ctx, &p->tos); |
529 | |
530 | return remove; |
531 | } |
532 | |
533 | static void |
534 | filter_show_space(fz_context *ctx, pdf_filter_processor *p, float tadj) |
535 | { |
536 | filter_gstate *gstate = p->gstate; |
537 | pdf_font_desc *fontdesc = gstate->pending.text.font; |
538 | |
539 | if (fontdesc->wmode == 0) |
540 | p->tos.tm = fz_pre_translate(p->tos.tm, tadj * gstate->pending.text.scale, 0); |
541 | else |
542 | p->tos.tm = fz_pre_translate(p->tos.tm, 0, tadj); |
543 | } |
544 | |
545 | static void |
546 | walk_string(fz_context *ctx, int uni, int remove, editable_str *str) |
547 | { |
548 | int rune; |
549 | |
550 | if (str->utf8 == NULL || str->pos == -1) |
551 | return; |
552 | |
553 | do |
554 | { |
555 | char *s = &str->utf8[str->pos]; |
556 | size_t len; |
557 | int n = fz_chartorune(&rune, s); |
558 | if (rune == uni) |
559 | { |
560 | /* Match. Skip over that one. */ |
561 | str->pos += n; |
562 | } |
563 | else if (uni == 32) { |
564 | /* We don't care if we're given whitespace |
565 | * and it doesn't match the string. Don't |
566 | * skip forward. Nothing to remove. */ |
567 | break; |
568 | } |
569 | else if (rune == 32) { |
570 | /* The string has a whitespace, and we |
571 | * don't match it; that's forgivable as |
572 | * PDF often misses out spaces. Remove this |
573 | * if we are removing stuff. */ |
574 | } |
575 | else |
576 | { |
577 | /* Mismatch. No point in tracking through any more. */ |
578 | str->pos = -1; |
579 | break; |
580 | } |
581 | if (remove) |
582 | { |
583 | len = strlen(s+n); |
584 | memmove(s, s+n, len+1); |
585 | str->edited = 1; |
586 | } |
587 | } |
588 | while (rune != uni); |
589 | } |
590 | |
591 | /* For a given character we've processed (removed or not) |
592 | * consider it in the tag_record. Try and step over it in |
593 | * the Alt or ActualText strings, removing if possible. |
594 | * If we can't marry up the Alt/ActualText strings with |
595 | * what we're meeting, just take the easy route and delete |
596 | * the whole lot. */ |
597 | static void |
598 | mcid_char_imp(fz_context *ctx, pdf_filter_processor *p, tag_record *tr, int uni, int remove) |
599 | { |
600 | if (tr->mcid_obj == NULL) |
601 | /* No object, or already deleted */ |
602 | return; |
603 | |
604 | if (remove) |
605 | { |
606 | /* Remove the expanded abbreviation, if there is one. */ |
607 | pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(E)); |
608 | /* Remove the structure title, if there is one. */ |
609 | pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(T)); |
610 | } |
611 | |
612 | /* Edit the Alt string */ |
613 | walk_string(ctx, uni, remove, &tr->alt); |
614 | /* Edit the ActualText string */ |
615 | walk_string(ctx, uni, remove, &tr->actualtext); |
616 | |
617 | /* If we're removing a character, and either of the strings |
618 | * haven't matched up to what we were expecting, then just |
619 | * delete the whole string. */ |
620 | if (remove) |
621 | remove = (tr->alt.pos == -1 || tr->actualtext.pos == -1); |
622 | else if (tr->alt.pos >= 0 || tr->actualtext.pos >= 0) |
623 | { |
624 | /* The strings are making sense so far */ |
625 | remove = 0; |
626 | } |
627 | |
628 | if (remove) |
629 | { |
630 | /* Anything else we have to err on the side of caution and |
631 | * delete everything that might leak info. */ |
632 | if (tr->actualtext.pos == -1) |
633 | pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(ActualText)); |
634 | if (tr->alt.pos == -1) |
635 | pdf_dict_del(ctx, tr->mcid_obj, PDF_NAME(Alt)); |
636 | pdf_drop_obj(ctx, tr->mcid_obj); |
637 | tr->mcid_obj = NULL; |
638 | fz_free(ctx, tr->alt.utf8); |
639 | tr->alt.utf8 = NULL; |
640 | fz_free(ctx, tr->actualtext.utf8); |
641 | tr->actualtext.utf8 = NULL; |
642 | } |
643 | } |
644 | |
645 | /* For every character that is processed, consider that character in |
646 | * every pending/current MCID. */ |
647 | static void |
648 | mcid_char(fz_context *ctx, pdf_filter_processor *p, int uni, int remove) |
649 | { |
650 | tag_record *tr = p->pending_tags; |
651 | |
652 | for (tr = p->pending_tags; tr != NULL; tr = tr->prev) |
653 | mcid_char_imp(ctx, p, tr, uni, remove); |
654 | for (tr = p->current_tags; tr != NULL; tr = tr->prev) |
655 | mcid_char_imp(ctx, p, tr, uni, remove); |
656 | } |
657 | |
658 | static void |
659 | update_mcid(fz_context *ctx, pdf_filter_processor *p) |
660 | { |
661 | tag_record *tag = p->current_tags; |
662 | |
663 | if (tag == NULL) |
664 | return; |
665 | if (tag->alt.edited) |
666 | pdf_dict_put_text_string(ctx, tag->mcid_obj, PDF_NAME(Alt), tag->alt.utf8); |
667 | if (tag->actualtext.edited) |
668 | pdf_dict_put_text_string(ctx, tag->mcid_obj, PDF_NAME(Alt), tag->actualtext.utf8); |
669 | } |
670 | |
671 | /* Process a string (from buf, of length len), from position *pos onwards. |
672 | * Stop when we hit the end, or when we find a character to remove. The |
673 | * caller will restart us again later. On exit, *pos = the point we got to, |
674 | * *inc = The number of bytes to skip to step over the next character (unless |
675 | * we hit the end). |
676 | */ |
677 | static void |
678 | filter_string_to_segment(fz_context *ctx, pdf_filter_processor *p, unsigned char *buf, int len, int *pos, int *inc, int *removed_space) |
679 | { |
680 | filter_gstate *gstate = p->gstate; |
681 | pdf_font_desc *fontdesc = gstate->pending.text.font; |
682 | unsigned char *end = buf + len; |
683 | unsigned int cpt; |
684 | int cid; |
685 | int remove; |
686 | |
687 | buf += *pos; |
688 | |
689 | *removed_space = 0; |
690 | |
691 | while (buf < end) |
692 | { |
693 | int uni; |
694 | *inc = pdf_decode_cmap(fontdesc->encoding, buf, end, &cpt); |
695 | buf += *inc; |
696 | |
697 | cid = pdf_lookup_cmap(fontdesc->encoding, cpt); |
698 | if (cid < 0) |
699 | { |
700 | uni = FZ_REPLACEMENT_CHARACTER; |
701 | fz_warn(ctx, "cannot encode character" ); |
702 | } |
703 | else |
704 | remove = filter_show_char(ctx, p, cid, &uni); |
705 | if (cpt == 32 && *inc == 1) |
706 | filter_show_space(ctx, p, gstate->pending.text.word_space); |
707 | /* For every character we process (whether we remove it |
708 | * or not), we consider any MCIDs that are in effect. */ |
709 | mcid_char(ctx, p, uni, remove); |
710 | if (remove) |
711 | { |
712 | *removed_space = (cpt == 32 && *inc == 1); |
713 | return; |
714 | } |
715 | *pos += *inc; |
716 | } |
717 | } |
718 | |
719 | static void |
720 | adjust_text(fz_context *ctx, pdf_filter_processor *p, float x, float y) |
721 | { |
722 | float skip_dist = p->tos.fontdesc->wmode == 1 ? -y : -x; |
723 | skip_dist = skip_dist / p->gstate->pending.text.size; |
724 | p->Tm_adjust += skip_dist; |
725 | } |
726 | |
727 | static void |
728 | adjust_for_removed_space(fz_context *ctx, pdf_filter_processor *p) |
729 | { |
730 | filter_gstate *gstate = p->gstate; |
731 | float adj = gstate->pending.text.word_space; |
732 | adjust_text(ctx, p, adj * gstate->pending.text.scale, adj); |
733 | } |
734 | |
735 | static void |
736 | flush_adjustment(fz_context *ctx, pdf_filter_processor *p) |
737 | { |
738 | pdf_obj *arr; |
739 | |
740 | if (p->Tm_adjust == 0) |
741 | return; |
742 | |
743 | filter_flush(ctx, p, FLUSH_ALL); |
744 | arr = pdf_new_array(ctx, p->doc, 1); |
745 | fz_try(ctx) |
746 | { |
747 | pdf_array_push_real(ctx, arr, p->Tm_adjust * 1000); |
748 | if (p->chain->op_TJ) |
749 | p->chain->op_TJ(ctx, p->chain, arr); |
750 | } |
751 | fz_always(ctx) |
752 | pdf_drop_obj(ctx, arr); |
753 | fz_catch(ctx) |
754 | fz_rethrow(ctx); |
755 | |
756 | p->Tm_adjust = 0; |
757 | } |
758 | |
759 | static void |
760 | push_adjustment_to_array(fz_context *ctx, pdf_filter_processor *p, pdf_obj *arr) |
761 | { |
762 | if (p->Tm_adjust == 0) |
763 | return; |
764 | pdf_array_push_real(ctx, arr, p->Tm_adjust * 1000); |
765 | p->Tm_adjust = 0; |
766 | } |
767 | |
768 | static void |
769 | filter_show_string(fz_context *ctx, pdf_filter_processor *p, unsigned char *buf, int len) |
770 | { |
771 | filter_gstate *gstate = p->gstate; |
772 | pdf_font_desc *fontdesc = gstate->pending.text.font; |
773 | int i, inc, removed_space; |
774 | |
775 | if (!fontdesc) |
776 | return; |
777 | |
778 | i = 0; |
779 | while (i < len) |
780 | { |
781 | int start = i; |
782 | filter_string_to_segment(ctx, p, buf, len, &i, &inc, &removed_space); |
783 | if (start != i) |
784 | { |
785 | /* We have *some* chars to send at least */ |
786 | filter_flush(ctx, p, FLUSH_ALL); |
787 | flush_adjustment(ctx, p); |
788 | if (p->chain->op_Tj) |
789 | p->chain->op_Tj(ctx, p->chain, (char *)buf+start, i-start); |
790 | } |
791 | if (i != len) |
792 | { |
793 | adjust_text(ctx, p, p->tos.char_tx, p->tos.char_ty); |
794 | i += inc; |
795 | } |
796 | if (removed_space) |
797 | adjust_for_removed_space(ctx, p); |
798 | } |
799 | } |
800 | |
801 | static void |
802 | filter_show_text(fz_context *ctx, pdf_filter_processor *p, pdf_obj *text) |
803 | { |
804 | filter_gstate *gstate = p->gstate; |
805 | pdf_font_desc *fontdesc = gstate->pending.text.font; |
806 | int i, n; |
807 | pdf_obj *new_arr; |
808 | pdf_document *doc; |
809 | |
810 | if (!fontdesc) |
811 | return; |
812 | |
813 | if (pdf_is_string(ctx, text)) |
814 | { |
815 | filter_show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, text), pdf_to_str_len(ctx, text)); |
816 | return; |
817 | } |
818 | if (!pdf_is_array(ctx, text)) |
819 | return; |
820 | |
821 | p->tos.fontdesc = fontdesc; |
822 | n = pdf_array_len(ctx, text); |
823 | doc = pdf_get_bound_document(ctx, text); |
824 | new_arr = pdf_new_array(ctx, doc, 4); |
825 | fz_try(ctx) |
826 | { |
827 | for (i = 0; i < n; i++) |
828 | { |
829 | pdf_obj *item = pdf_array_get(ctx, text, i); |
830 | if (pdf_is_string(ctx, item)) |
831 | { |
832 | unsigned char *buf = (unsigned char *)pdf_to_str_buf(ctx, item); |
833 | int len = pdf_to_str_len(ctx, item); |
834 | int j = 0; |
835 | int removed_space; |
836 | while (j < len) |
837 | { |
838 | int inc; |
839 | int start = j; |
840 | filter_string_to_segment(ctx, p, buf, len, &j, &inc, &removed_space); |
841 | if (start != j) |
842 | { |
843 | /* We have *some* chars to send at least */ |
844 | filter_flush(ctx, p, FLUSH_ALL); |
845 | push_adjustment_to_array(ctx, p, new_arr); |
846 | pdf_array_push_string(ctx, new_arr, (char *)buf+start, j-start); |
847 | } |
848 | if (j != len) |
849 | { |
850 | adjust_text(ctx, p, p->tos.char_tx, p->tos.char_ty); |
851 | j += inc; |
852 | } |
853 | if (removed_space) |
854 | adjust_for_removed_space(ctx, p); |
855 | } |
856 | } |
857 | else |
858 | { |
859 | float tadj = - pdf_to_real(ctx, item) * gstate->pending.text.size * 0.001f; |
860 | if (fontdesc->wmode == 0) |
861 | { |
862 | adjust_text(ctx, p, tadj, 0); |
863 | p->tos.tm = fz_pre_translate(p->tos.tm, tadj * p->gstate->pending.text.scale, 0); |
864 | } |
865 | else |
866 | { |
867 | adjust_text(ctx, p, 0, tadj); |
868 | p->tos.tm = fz_pre_translate(p->tos.tm, 0, tadj); |
869 | } |
870 | } |
871 | } |
872 | if (p->chain->op_TJ && pdf_array_len(ctx, new_arr)) |
873 | p->chain->op_TJ(ctx, p->chain, new_arr); |
874 | } |
875 | fz_always(ctx) |
876 | pdf_drop_obj(ctx, new_arr); |
877 | fz_catch(ctx) |
878 | fz_rethrow(ctx); |
879 | } |
880 | |
881 | /* general graphics state */ |
882 | |
883 | static void |
884 | pdf_filter_w(fz_context *ctx, pdf_processor *proc, float linewidth) |
885 | { |
886 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
887 | filter_gstate *gstate = gstate_to_update(ctx, p); |
888 | gstate->pending.stroke.linewidth = linewidth; |
889 | } |
890 | |
891 | static void |
892 | pdf_filter_j(fz_context *ctx, pdf_processor *proc, int linejoin) |
893 | { |
894 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
895 | filter_gstate *gstate = gstate_to_update(ctx, p); |
896 | gstate->pending.stroke.linejoin = linejoin; |
897 | } |
898 | |
899 | static void |
900 | pdf_filter_J(fz_context *ctx, pdf_processor *proc, int linecap) |
901 | { |
902 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
903 | filter_gstate *gstate = gstate_to_update(ctx, p); |
904 | gstate->pending.stroke.linecap = linecap; |
905 | } |
906 | |
907 | static void |
908 | pdf_filter_M(fz_context *ctx, pdf_processor *proc, float miterlimit) |
909 | { |
910 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
911 | filter_gstate *gstate = gstate_to_update(ctx, p); |
912 | gstate->pending.stroke.miterlimit = miterlimit; |
913 | } |
914 | |
915 | static void |
916 | pdf_filter_d(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase) |
917 | { |
918 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
919 | filter_flush(ctx, p, 0); |
920 | if (p->chain->op_d) |
921 | p->chain->op_d(ctx, p->chain, array, phase); |
922 | } |
923 | |
924 | static void |
925 | pdf_filter_ri(fz_context *ctx, pdf_processor *proc, const char *intent) |
926 | { |
927 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
928 | filter_flush(ctx, p, 0); |
929 | if (p->chain->op_ri) |
930 | p->chain->op_ri(ctx, p->chain, intent); |
931 | } |
932 | |
933 | static void |
934 | pdf_filter_gs_OP(fz_context *ctx, pdf_processor *proc, int b) |
935 | { |
936 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
937 | filter_flush(ctx, p, 0); |
938 | if (p->chain->op_gs_OP) |
939 | p->chain->op_gs_OP(ctx, p->chain, b); |
940 | } |
941 | |
942 | static void |
943 | pdf_filter_gs_op(fz_context *ctx, pdf_processor *proc, int b) |
944 | { |
945 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
946 | filter_flush(ctx, p, 0); |
947 | if (p->chain->op_gs_op) |
948 | p->chain->op_gs_op(ctx, p->chain, b); |
949 | } |
950 | |
951 | static void |
952 | pdf_filter_gs_OPM(fz_context *ctx, pdf_processor *proc, int i) |
953 | { |
954 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
955 | filter_flush(ctx, p, 0); |
956 | if (p->chain->op_gs_OPM) |
957 | p->chain->op_gs_OPM(ctx, p->chain, i); |
958 | } |
959 | |
960 | static void |
961 | pdf_filter_gs_UseBlackPtComp(fz_context *ctx, pdf_processor *proc, pdf_obj *name) |
962 | { |
963 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
964 | filter_flush(ctx, p, 0); |
965 | if (p->chain->op_gs_UseBlackPtComp) |
966 | p->chain->op_gs_UseBlackPtComp(ctx, p->chain, name); |
967 | } |
968 | |
969 | static void |
970 | pdf_filter_i(fz_context *ctx, pdf_processor *proc, float flatness) |
971 | { |
972 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
973 | filter_flush(ctx, p, 0); |
974 | if (p->chain->op_i) |
975 | p->chain->op_i(ctx, p->chain, flatness); |
976 | } |
977 | |
978 | static void |
979 | pdf_filter_gs_begin(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate) |
980 | { |
981 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
982 | filter_flush(ctx, p, FLUSH_ALL); |
983 | if (p->chain->op_gs_begin) |
984 | p->chain->op_gs_begin(ctx, p->chain, name, extgstate); |
985 | copy_resource(ctx, p, PDF_NAME(ExtGState), name); |
986 | } |
987 | |
988 | static void |
989 | pdf_filter_gs_BM(fz_context *ctx, pdf_processor *proc, const char *blendmode) |
990 | { |
991 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
992 | if (p->chain->op_gs_BM) |
993 | p->chain->op_gs_BM(ctx, p->chain, blendmode); |
994 | } |
995 | |
996 | static void |
997 | pdf_filter_gs_CA(fz_context *ctx, pdf_processor *proc, float alpha) |
998 | { |
999 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1000 | if (p->chain->op_gs_CA) |
1001 | p->chain->op_gs_CA(ctx, p->chain, alpha); |
1002 | } |
1003 | |
1004 | static void |
1005 | pdf_filter_gs_ca(fz_context *ctx, pdf_processor *proc, float alpha) |
1006 | { |
1007 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1008 | if (p->chain->op_gs_ca) |
1009 | p->chain->op_gs_ca(ctx, p->chain, alpha); |
1010 | } |
1011 | |
1012 | static void |
1013 | pdf_filter_gs_SMask(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, pdf_obj *page_resources, float *bc, int luminosity) |
1014 | { |
1015 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1016 | if (p->chain->op_gs_SMask) |
1017 | p->chain->op_gs_SMask(ctx, p->chain, smask, page_resources, bc, luminosity); |
1018 | } |
1019 | |
1020 | static void |
1021 | pdf_filter_gs_end(fz_context *ctx, pdf_processor *proc) |
1022 | { |
1023 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1024 | if (p->chain->op_gs_end) |
1025 | p->chain->op_gs_end(ctx, p->chain); |
1026 | } |
1027 | |
1028 | /* special graphics state */ |
1029 | |
1030 | static void |
1031 | pdf_filter_q(fz_context *ctx, pdf_processor *proc) |
1032 | { |
1033 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1034 | filter_push(ctx, p); |
1035 | } |
1036 | |
1037 | static void |
1038 | pdf_filter_Q(fz_context *ctx, pdf_processor *proc) |
1039 | { |
1040 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1041 | filter_pop(ctx, p); |
1042 | } |
1043 | |
1044 | static void |
1045 | pdf_filter_cm(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f) |
1046 | { |
1047 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1048 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1049 | fz_matrix ctm; |
1050 | |
1051 | /* If we're being given an identity matrix, don't bother sending it */ |
1052 | if (a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0) |
1053 | return; |
1054 | |
1055 | ctm.a = a; |
1056 | ctm.b = b; |
1057 | ctm.c = c; |
1058 | ctm.d = d; |
1059 | ctm.e = e; |
1060 | ctm.f = f; |
1061 | |
1062 | gstate->pending.ctm = fz_concat(ctm, gstate->pending.ctm); |
1063 | } |
1064 | |
1065 | /* path construction */ |
1066 | |
1067 | static void |
1068 | pdf_filter_m(fz_context *ctx, pdf_processor *proc, float x, float y) |
1069 | { |
1070 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1071 | filter_flush(ctx, p, FLUSH_CTM); |
1072 | if (p->chain->op_m) |
1073 | p->chain->op_m(ctx, p->chain, x, y); |
1074 | } |
1075 | |
1076 | static void |
1077 | pdf_filter_l(fz_context *ctx, pdf_processor *proc, float x, float y) |
1078 | { |
1079 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1080 | filter_flush(ctx, p, FLUSH_CTM); |
1081 | if (p->chain->op_l) |
1082 | p->chain->op_l(ctx, p->chain, x, y); |
1083 | } |
1084 | |
1085 | static void |
1086 | pdf_filter_c(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3) |
1087 | { |
1088 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1089 | filter_flush(ctx, p, FLUSH_CTM); |
1090 | if (p->chain->op_c) |
1091 | p->chain->op_c(ctx, p->chain, x1, y1, x2, y2, x3, y3); |
1092 | } |
1093 | |
1094 | static void |
1095 | pdf_filter_v(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3) |
1096 | { |
1097 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1098 | filter_flush(ctx, p, FLUSH_CTM); |
1099 | if (p->chain->op_v) |
1100 | p->chain->op_v(ctx, p->chain, x2, y2, x3, y3); |
1101 | } |
1102 | |
1103 | static void |
1104 | pdf_filter_y(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3) |
1105 | { |
1106 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1107 | filter_flush(ctx, p, FLUSH_CTM); |
1108 | if (p->chain->op_y) |
1109 | p->chain->op_y(ctx, p->chain, x1, y1, x3, y3); |
1110 | } |
1111 | |
1112 | static void |
1113 | pdf_filter_h(fz_context *ctx, pdf_processor *proc) |
1114 | { |
1115 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1116 | filter_flush(ctx, p, FLUSH_CTM); |
1117 | if (p->chain->op_h) |
1118 | p->chain->op_h(ctx, p->chain); |
1119 | } |
1120 | |
1121 | static void |
1122 | pdf_filter_re(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h) |
1123 | { |
1124 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1125 | filter_flush(ctx, p, FLUSH_CTM); |
1126 | if (p->chain->op_re) |
1127 | p->chain->op_re(ctx, p->chain, x, y, w, h); |
1128 | } |
1129 | |
1130 | /* path painting */ |
1131 | |
1132 | static void |
1133 | pdf_filter_S(fz_context *ctx, pdf_processor *proc) |
1134 | { |
1135 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1136 | filter_flush(ctx, p, FLUSH_STROKE); |
1137 | if (p->chain->op_S) |
1138 | p->chain->op_S(ctx, p->chain); |
1139 | } |
1140 | |
1141 | static void |
1142 | pdf_filter_s(fz_context *ctx, pdf_processor *proc) |
1143 | { |
1144 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1145 | filter_flush(ctx, p, FLUSH_STROKE); |
1146 | if (p->chain->op_s) |
1147 | p->chain->op_s(ctx, p->chain); |
1148 | } |
1149 | |
1150 | static void |
1151 | pdf_filter_F(fz_context *ctx, pdf_processor *proc) |
1152 | { |
1153 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1154 | filter_flush(ctx, p, FLUSH_FILL); |
1155 | if (p->chain->op_F) |
1156 | p->chain->op_F(ctx, p->chain); |
1157 | } |
1158 | |
1159 | static void |
1160 | pdf_filter_f(fz_context *ctx, pdf_processor *proc) |
1161 | { |
1162 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1163 | filter_flush(ctx, p, FLUSH_FILL); |
1164 | if (p->chain->op_f) |
1165 | p->chain->op_f(ctx, p->chain); |
1166 | } |
1167 | |
1168 | static void |
1169 | pdf_filter_fstar(fz_context *ctx, pdf_processor *proc) |
1170 | { |
1171 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1172 | filter_flush(ctx, p, FLUSH_FILL); |
1173 | if (p->chain->op_fstar) |
1174 | p->chain->op_fstar(ctx, p->chain); |
1175 | } |
1176 | |
1177 | static void |
1178 | pdf_filter_B(fz_context *ctx, pdf_processor *proc) |
1179 | { |
1180 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1181 | filter_flush(ctx, p, FLUSH_ALL); |
1182 | if (p->chain->op_B) |
1183 | p->chain->op_B(ctx, p->chain); |
1184 | } |
1185 | |
1186 | static void |
1187 | pdf_filter_Bstar(fz_context *ctx, pdf_processor *proc) |
1188 | { |
1189 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1190 | filter_flush(ctx, p, FLUSH_ALL); |
1191 | if (p->chain->op_Bstar) |
1192 | p->chain->op_Bstar(ctx, p->chain); |
1193 | } |
1194 | |
1195 | static void |
1196 | pdf_filter_b(fz_context *ctx, pdf_processor *proc) |
1197 | { |
1198 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1199 | filter_flush(ctx, p, FLUSH_ALL); |
1200 | if (p->chain->op_b) |
1201 | p->chain->op_b(ctx, p->chain); |
1202 | } |
1203 | |
1204 | static void |
1205 | pdf_filter_bstar(fz_context *ctx, pdf_processor *proc) |
1206 | { |
1207 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1208 | filter_flush(ctx, p, FLUSH_ALL); |
1209 | if (p->chain->op_bstar) |
1210 | p->chain->op_bstar(ctx, p->chain); |
1211 | } |
1212 | |
1213 | static void |
1214 | pdf_filter_n(fz_context *ctx, pdf_processor *proc) |
1215 | { |
1216 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1217 | filter_flush(ctx, p, FLUSH_CTM); |
1218 | if (p->chain->op_n) |
1219 | p->chain->op_n(ctx, p->chain); |
1220 | } |
1221 | |
1222 | /* clipping paths */ |
1223 | |
1224 | static void |
1225 | pdf_filter_W(fz_context *ctx, pdf_processor *proc) |
1226 | { |
1227 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1228 | filter_flush(ctx, p, FLUSH_CTM); |
1229 | if (p->chain->op_W) |
1230 | p->chain->op_W(ctx, p->chain); |
1231 | } |
1232 | |
1233 | static void |
1234 | pdf_filter_Wstar(fz_context *ctx, pdf_processor *proc) |
1235 | { |
1236 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1237 | filter_flush(ctx, p, FLUSH_CTM); |
1238 | if (p->chain->op_Wstar) |
1239 | p->chain->op_Wstar(ctx, p->chain); |
1240 | } |
1241 | |
1242 | /* text objects */ |
1243 | |
1244 | static void |
1245 | pdf_filter_BT(fz_context *ctx, pdf_processor *proc) |
1246 | { |
1247 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1248 | filter_flush(ctx, p, 0); |
1249 | p->tos.tm = fz_identity; |
1250 | p->tos.tlm = fz_identity; |
1251 | p->BT_pending = 1; |
1252 | } |
1253 | |
1254 | static void |
1255 | pdf_filter_ET(fz_context *ctx, pdf_processor *proc) |
1256 | { |
1257 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1258 | |
1259 | if (!p->BT_pending) |
1260 | { |
1261 | filter_flush(ctx, p, 0); |
1262 | if (p->chain->op_ET) |
1263 | p->chain->op_ET(ctx, p->chain); |
1264 | } |
1265 | p->BT_pending = 0; |
1266 | if (p->after_text) |
1267 | { |
1268 | fz_matrix ctm = fz_concat(p->gstate->sent.ctm, p->gstate->pending.ctm); |
1269 | if (p->chain->op_q) |
1270 | p->chain->op_q(ctx, p->chain); |
1271 | p->after_text(ctx, p->opaque, p->doc, p->chain, ctm); |
1272 | if (p->chain->op_Q) |
1273 | p->chain->op_Q(ctx, p->chain); |
1274 | } |
1275 | } |
1276 | |
1277 | /* text state */ |
1278 | |
1279 | static void |
1280 | pdf_filter_Tc(fz_context *ctx, pdf_processor *proc, float charspace) |
1281 | { |
1282 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1283 | filter_flush(ctx, p, 0); |
1284 | p->gstate->pending.text.char_space = charspace; |
1285 | } |
1286 | |
1287 | static void |
1288 | pdf_filter_Tw(fz_context *ctx, pdf_processor *proc, float wordspace) |
1289 | { |
1290 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1291 | filter_flush(ctx, p, 0); |
1292 | p->gstate->pending.text.word_space = wordspace; |
1293 | } |
1294 | |
1295 | static void |
1296 | pdf_filter_Tz(fz_context *ctx, pdf_processor *proc, float scale) |
1297 | { |
1298 | /* scale is as written in the file. It is 100 times smaller |
1299 | * in the gstate. */ |
1300 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1301 | filter_flush(ctx, p, 0); |
1302 | p->gstate->pending.text.scale = scale / 100; |
1303 | } |
1304 | |
1305 | static void |
1306 | pdf_filter_TL(fz_context *ctx, pdf_processor *proc, float leading) |
1307 | { |
1308 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1309 | filter_flush(ctx, p, 0); |
1310 | p->gstate->pending.text.leading = leading; |
1311 | } |
1312 | |
1313 | static void |
1314 | pdf_filter_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size) |
1315 | { |
1316 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1317 | filter_flush(ctx, p, 0); |
1318 | fz_free(ctx, p->font_name); |
1319 | p->font_name = NULL; |
1320 | p->font_name = name ? fz_strdup(ctx, name) : NULL; |
1321 | pdf_drop_font(ctx, p->gstate->pending.text.font); |
1322 | p->gstate->pending.text.font = pdf_keep_font(ctx, font); |
1323 | p->gstate->pending.text.size = size; |
1324 | copy_resource(ctx, p, PDF_NAME(Font), name); |
1325 | } |
1326 | |
1327 | static void |
1328 | pdf_filter_Tr(fz_context *ctx, pdf_processor *proc, int render) |
1329 | { |
1330 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1331 | filter_flush(ctx, p, 0); |
1332 | p->gstate->pending.text.render = render; |
1333 | } |
1334 | |
1335 | static void |
1336 | pdf_filter_Ts(fz_context *ctx, pdf_processor *proc, float rise) |
1337 | { |
1338 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1339 | filter_flush(ctx, p, 0); |
1340 | p->gstate->pending.text.rise = rise; |
1341 | } |
1342 | |
1343 | /* text positioning */ |
1344 | |
1345 | static void |
1346 | pdf_filter_Td(fz_context *ctx, pdf_processor *proc, float tx, float ty) |
1347 | { |
1348 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1349 | p->Tm_adjust = 0; |
1350 | pdf_tos_translate(&p->tos, tx, ty); |
1351 | p->Tm_pending = 1; |
1352 | } |
1353 | |
1354 | static void |
1355 | pdf_filter_TD(fz_context *ctx, pdf_processor *proc, float tx, float ty) |
1356 | { |
1357 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1358 | p->Tm_adjust = 0; |
1359 | pdf_tos_translate(&p->tos, tx, ty); |
1360 | p->gstate->pending.text.leading = -ty; |
1361 | p->Tm_pending = 1; |
1362 | } |
1363 | |
1364 | static void |
1365 | pdf_filter_Tm(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f) |
1366 | { |
1367 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1368 | pdf_tos_set_matrix(&p->tos, a, b, c, d, e, f); |
1369 | p->Tm_pending = 1; |
1370 | p->Tm_adjust = 0; |
1371 | } |
1372 | |
1373 | static void |
1374 | pdf_filter_Tstar(fz_context *ctx, pdf_processor *proc) |
1375 | { |
1376 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1377 | pdf_tos_newline(&p->tos, p->gstate->pending.text.leading); |
1378 | /* If Tm_pending, then just adjusting the matrix (as |
1379 | * pdf_tos_newline has done) is enough. Otherwise we |
1380 | * need to actually call the operator. */ |
1381 | if (!p->Tm_pending && p->chain->op_Tstar) |
1382 | p->chain->op_Tstar(ctx, p->chain); |
1383 | } |
1384 | |
1385 | /* text showing */ |
1386 | |
1387 | static void |
1388 | pdf_filter_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array) |
1389 | { |
1390 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1391 | filter_show_text(ctx, p, array); |
1392 | } |
1393 | |
1394 | static void |
1395 | pdf_filter_Tj(fz_context *ctx, pdf_processor *proc, char *str, int len) |
1396 | { |
1397 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1398 | filter_show_string(ctx, p, (unsigned char *)str, len); |
1399 | } |
1400 | |
1401 | static void |
1402 | pdf_filter_squote(fz_context *ctx, pdf_processor *proc, char *str, int len) |
1403 | { |
1404 | /* Note, we convert all T' operators to (maybe) a T* and a Tj */ |
1405 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1406 | pdf_tos_newline(&p->tos, p->gstate->pending.text.leading); |
1407 | /* If Tm_pending, then just adjusting the matrix (as |
1408 | * pdf_tos_newline has done) is enough. Otherwise we |
1409 | * need to do it manually. */ |
1410 | if (!p->Tm_pending && p->chain->op_Tstar) |
1411 | p->chain->op_Tstar(ctx, p->chain); |
1412 | filter_show_string(ctx, p, (unsigned char *)str, len); |
1413 | } |
1414 | |
1415 | static void |
1416 | pdf_filter_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, int len) |
1417 | { |
1418 | /* Note, we convert all T" operators to (maybe) a T*, |
1419 | * (maybe) Tc, (maybe) Tw and a Tj. */ |
1420 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1421 | p->gstate->pending.text.word_space = aw; |
1422 | p->gstate->pending.text.char_space = ac; |
1423 | pdf_tos_newline(&p->tos, p->gstate->pending.text.leading); |
1424 | /* If Tm_pending, then just adjusting the matrix (as |
1425 | * pdf_tos_newline has done) is enough. Otherwise we |
1426 | * need to do it manually. */ |
1427 | if (!p->Tm_pending && p->chain->op_Tstar) |
1428 | p->chain->op_Tstar(ctx, p->chain); |
1429 | filter_show_string(ctx, p, (unsigned char*)str, len); |
1430 | } |
1431 | |
1432 | /* type 3 fonts */ |
1433 | |
1434 | static void |
1435 | pdf_filter_d0(fz_context *ctx, pdf_processor *proc, float wx, float wy) |
1436 | { |
1437 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1438 | filter_flush(ctx, p, 0); |
1439 | if (p->chain->op_d0) |
1440 | p->chain->op_d0(ctx, p->chain, wx, wy); |
1441 | } |
1442 | |
1443 | static void |
1444 | pdf_filter_d1(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury) |
1445 | { |
1446 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1447 | filter_flush(ctx, p, 0); |
1448 | if (p->chain->op_d1) |
1449 | p->chain->op_d1(ctx, p->chain, wx, wy, llx, lly, urx, ury); |
1450 | } |
1451 | |
1452 | /* color */ |
1453 | |
1454 | static void |
1455 | pdf_filter_CS(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs) |
1456 | { |
1457 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1458 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1459 | fz_strlcpy(gstate->pending.CS.name, name, sizeof gstate->pending.CS.name); |
1460 | gstate->pending.CS.cs = cs; |
1461 | copy_resource(ctx, p, PDF_NAME(ColorSpace), name); |
1462 | } |
1463 | |
1464 | static void |
1465 | pdf_filter_cs(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs) |
1466 | { |
1467 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1468 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1469 | fz_strlcpy(gstate->pending.cs.name, name, sizeof gstate->pending.cs.name); |
1470 | gstate->pending.cs.cs = cs; |
1471 | copy_resource(ctx, p, PDF_NAME(ColorSpace), name); |
1472 | } |
1473 | |
1474 | static void |
1475 | pdf_filter_SC_pattern(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color) |
1476 | { |
1477 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1478 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1479 | int i; |
1480 | fz_strlcpy(gstate->pending.SC.name, name, sizeof gstate->pending.SC.name); |
1481 | gstate->pending.SC.pat = pat; |
1482 | gstate->pending.SC.shd = NULL; |
1483 | gstate->pending.SC.n = n; |
1484 | for (i = 0; i < n; ++i) |
1485 | gstate->pending.SC.c[i] = color[i]; |
1486 | copy_resource(ctx, p, PDF_NAME(Pattern), name); |
1487 | } |
1488 | |
1489 | static void |
1490 | pdf_filter_sc_pattern(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color) |
1491 | { |
1492 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1493 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1494 | int i; |
1495 | fz_strlcpy(gstate->pending.sc.name, name, sizeof gstate->pending.sc.name); |
1496 | gstate->pending.sc.pat = pat; |
1497 | gstate->pending.sc.shd = NULL; |
1498 | gstate->pending.sc.n = n; |
1499 | for (i = 0; i < n; ++i) |
1500 | gstate->pending.sc.c[i] = color[i]; |
1501 | copy_resource(ctx, p, PDF_NAME(Pattern), name); |
1502 | } |
1503 | |
1504 | static void |
1505 | pdf_filter_SC_shade(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade) |
1506 | { |
1507 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1508 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1509 | fz_strlcpy(gstate->pending.SC.name, name, sizeof gstate->pending.SC.name); |
1510 | gstate->pending.SC.pat = NULL; |
1511 | gstate->pending.SC.shd = shade; |
1512 | gstate->pending.SC.n = 0; |
1513 | copy_resource(ctx, p, PDF_NAME(Pattern), name); |
1514 | } |
1515 | |
1516 | static void |
1517 | pdf_filter_sc_shade(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade) |
1518 | { |
1519 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1520 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1521 | fz_strlcpy(gstate->pending.sc.name, name, sizeof gstate->pending.sc.name); |
1522 | gstate->pending.sc.pat = NULL; |
1523 | gstate->pending.sc.shd = shade; |
1524 | gstate->pending.sc.n = 0; |
1525 | copy_resource(ctx, p, PDF_NAME(Pattern), name); |
1526 | } |
1527 | |
1528 | static void |
1529 | pdf_filter_SC_color(fz_context *ctx, pdf_processor *proc, int n, float *color) |
1530 | { |
1531 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1532 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1533 | int i; |
1534 | gstate->pending.SC.name[0] = 0; |
1535 | gstate->pending.SC.pat = NULL; |
1536 | gstate->pending.SC.shd = NULL; |
1537 | gstate->pending.SC.n = n; |
1538 | for (i = 0; i < n; ++i) |
1539 | gstate->pending.SC.c[i] = color[i]; |
1540 | } |
1541 | |
1542 | static void |
1543 | pdf_filter_sc_color(fz_context *ctx, pdf_processor *proc, int n, float *color) |
1544 | { |
1545 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1546 | filter_gstate *gstate = gstate_to_update(ctx, p); |
1547 | int i; |
1548 | gstate->pending.sc.name[0] = 0; |
1549 | gstate->pending.sc.pat = NULL; |
1550 | gstate->pending.sc.shd = NULL; |
1551 | gstate->pending.sc.n = n; |
1552 | for (i = 0; i < n; ++i) |
1553 | gstate->pending.sc.c[i] = color[i]; |
1554 | } |
1555 | |
1556 | static void |
1557 | pdf_filter_G(fz_context *ctx, pdf_processor *proc, float g) |
1558 | { |
1559 | float color[1] = { g }; |
1560 | pdf_filter_CS(ctx, proc, "DeviceGray" , fz_device_gray(ctx)); |
1561 | pdf_filter_SC_color(ctx, proc, 1, color); |
1562 | } |
1563 | |
1564 | static void |
1565 | pdf_filter_g(fz_context *ctx, pdf_processor *proc, float g) |
1566 | { |
1567 | float color[1] = { g }; |
1568 | pdf_filter_cs(ctx, proc, "DeviceGray" , fz_device_gray(ctx)); |
1569 | pdf_filter_sc_color(ctx, proc, 1, color); |
1570 | } |
1571 | |
1572 | static void |
1573 | pdf_filter_RG(fz_context *ctx, pdf_processor *proc, float r, float g, float b) |
1574 | { |
1575 | float color[3] = { r, g, b }; |
1576 | pdf_filter_CS(ctx, proc, "DeviceRGB" , fz_device_rgb(ctx)); |
1577 | pdf_filter_SC_color(ctx, proc, 3, color); |
1578 | } |
1579 | |
1580 | static void |
1581 | pdf_filter_rg(fz_context *ctx, pdf_processor *proc, float r, float g, float b) |
1582 | { |
1583 | float color[3] = { r, g, b }; |
1584 | pdf_filter_cs(ctx, proc, "DeviceRGB" , fz_device_rgb(ctx)); |
1585 | pdf_filter_sc_color(ctx, proc, 3, color); |
1586 | } |
1587 | |
1588 | static void |
1589 | pdf_filter_K(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k) |
1590 | { |
1591 | float color[4] = { c, m, y, k }; |
1592 | pdf_filter_CS(ctx, proc, "DeviceCMYK" , fz_device_cmyk(ctx)); |
1593 | pdf_filter_SC_color(ctx, proc, 4, color); |
1594 | } |
1595 | |
1596 | static void |
1597 | pdf_filter_k(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k) |
1598 | { |
1599 | float color[4] = { c, m, y, k }; |
1600 | pdf_filter_cs(ctx, proc, "DeviceCMYK" , fz_device_cmyk(ctx)); |
1601 | pdf_filter_sc_color(ctx, proc, 4, color); |
1602 | } |
1603 | |
1604 | /* shadings, images, xobjects */ |
1605 | |
1606 | static void |
1607 | pdf_filter_BI(fz_context *ctx, pdf_processor *proc, fz_image *img, const char *colorspace) |
1608 | { |
1609 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1610 | filter_flush(ctx, p, FLUSH_ALL); |
1611 | if (p->chain->op_BI) |
1612 | p->chain->op_BI(ctx, p->chain, img, colorspace); |
1613 | } |
1614 | |
1615 | static void |
1616 | pdf_filter_sh(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade) |
1617 | { |
1618 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1619 | filter_flush(ctx, p, FLUSH_ALL); |
1620 | if (p->chain->op_sh) |
1621 | p->chain->op_sh(ctx, p->chain, name, shade); |
1622 | copy_resource(ctx, p, PDF_NAME(Shading), name); |
1623 | } |
1624 | |
1625 | static void |
1626 | pdf_filter_Do_image(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image) |
1627 | { |
1628 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1629 | filter_flush(ctx, p, FLUSH_ALL); |
1630 | if (p->chain->op_Do_image) |
1631 | p->chain->op_Do_image(ctx, p->chain, name, image); |
1632 | copy_resource(ctx, p, PDF_NAME(XObject), name); |
1633 | } |
1634 | |
1635 | static void |
1636 | pdf_filter_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj, pdf_obj *page_resources) |
1637 | { |
1638 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1639 | filter_flush(ctx, p, FLUSH_ALL); |
1640 | if (p->chain->op_Do_form) |
1641 | p->chain->op_Do_form(ctx, p->chain, name, xobj, page_resources); |
1642 | copy_resource(ctx, p, PDF_NAME(XObject), name); |
1643 | } |
1644 | |
1645 | /* marked content */ |
1646 | |
1647 | static void |
1648 | pdf_filter_MP(fz_context *ctx, pdf_processor *proc, const char *tag) |
1649 | { |
1650 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1651 | filter_flush(ctx, p, 0); |
1652 | if (p->chain->op_MP) |
1653 | p->chain->op_MP(ctx, p->chain, tag); |
1654 | } |
1655 | |
1656 | static void |
1657 | pdf_filter_DP(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked) |
1658 | { |
1659 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1660 | filter_flush(ctx, p, 0); |
1661 | if (p->chain->op_DP) |
1662 | p->chain->op_DP(ctx, p->chain, tag, raw, cooked); |
1663 | } |
1664 | |
1665 | static void |
1666 | pdf_filter_BMC(fz_context *ctx, pdf_processor *proc, const char *tag) |
1667 | { |
1668 | /* Create a tag, and push it onto pending_tags. If it gets |
1669 | * flushed to the stream, it'll be moved from there onto |
1670 | * current_tags. */ |
1671 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1672 | tag_record *bmc = fz_malloc_struct(ctx, tag_record); |
1673 | |
1674 | fz_try(ctx) |
1675 | bmc->tag = fz_strdup(ctx, tag); |
1676 | fz_catch(ctx) |
1677 | { |
1678 | fz_free(ctx, bmc); |
1679 | fz_rethrow(ctx); |
1680 | } |
1681 | bmc->prev = p->pending_tags; |
1682 | p->pending_tags = bmc; |
1683 | } |
1684 | |
1685 | static void |
1686 | pdf_filter_BDC(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked) |
1687 | { |
1688 | /* Create a tag, and push it onto pending_tags. If it gets |
1689 | * flushed to the stream, it'll be moved from there onto |
1690 | * current_tags. */ |
1691 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1692 | tag_record *bdc = fz_malloc_struct(ctx, tag_record); |
1693 | pdf_obj *mcid; |
1694 | pdf_obj *str; |
1695 | |
1696 | fz_try(ctx) |
1697 | { |
1698 | bdc->bdc = 1; |
1699 | bdc->tag = fz_strdup(ctx, tag); |
1700 | bdc->raw = pdf_keep_obj(ctx, raw); |
1701 | bdc->cooked = pdf_keep_obj(ctx, raw); |
1702 | } |
1703 | fz_catch(ctx) |
1704 | { |
1705 | fz_free(ctx, bdc->tag); |
1706 | pdf_drop_obj(ctx, bdc->raw); |
1707 | pdf_drop_obj(ctx, bdc->cooked); |
1708 | fz_free(ctx, bdc); |
1709 | fz_rethrow(ctx); |
1710 | } |
1711 | bdc->prev = p->pending_tags; |
1712 | p->pending_tags = bdc; |
1713 | |
1714 | /* Look to see if this has an mcid object */ |
1715 | mcid = pdf_dict_get(ctx, cooked, PDF_NAME(MCID)); |
1716 | if (!pdf_is_number(ctx, mcid)) |
1717 | return; |
1718 | bdc->mcid_num = pdf_to_int(ctx, mcid); |
1719 | bdc->mcid_obj = pdf_keep_obj(ctx, pdf_array_get(ctx, p->structarray, bdc->mcid_num)); |
1720 | str = pdf_dict_get(ctx, bdc->mcid_obj, PDF_NAME(Alt)); |
1721 | if (str) |
1722 | bdc->alt.utf8 = pdf_new_utf8_from_pdf_string_obj(ctx, str); |
1723 | str = pdf_dict_get(ctx, bdc->mcid_obj, PDF_NAME(ActualText)); |
1724 | if (str) |
1725 | bdc->actualtext.utf8 = pdf_new_utf8_from_pdf_string_obj(ctx, str); |
1726 | } |
1727 | |
1728 | /* Bin the topmost (most recent) tag from a tag list. */ |
1729 | static void |
1730 | pop_tag(fz_context *ctx, pdf_filter_processor *p, tag_record **tags) |
1731 | { |
1732 | tag_record *tag = *tags; |
1733 | |
1734 | if (tag == NULL) |
1735 | return; |
1736 | *tags = tag->prev; |
1737 | fz_free(ctx, tag->tag); |
1738 | if (tag->bdc) |
1739 | { |
1740 | pdf_drop_obj(ctx, tag->raw); |
1741 | pdf_drop_obj(ctx, tag->cooked); |
1742 | } |
1743 | fz_free(ctx, tag->alt.utf8); |
1744 | fz_free(ctx, tag->actualtext.utf8); |
1745 | pdf_drop_obj(ctx, tag->mcid_obj); |
1746 | fz_free(ctx, tag); |
1747 | } |
1748 | |
1749 | static void |
1750 | pdf_filter_EMC(fz_context *ctx, pdf_processor *proc) |
1751 | { |
1752 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1753 | |
1754 | /* If we have any pending tags, pop one of those. If not, |
1755 | * pop one of the current ones, and pass the EMC on. */ |
1756 | if (p->pending_tags != NULL) |
1757 | pop_tag(ctx, p, &p->pending_tags); |
1758 | else |
1759 | { |
1760 | update_mcid(ctx, p); |
1761 | pop_tag(ctx, p, &p->current_tags); |
1762 | if (p->chain->op_EMC) |
1763 | p->chain->op_EMC(ctx, p->chain); |
1764 | } |
1765 | } |
1766 | |
1767 | /* compatibility */ |
1768 | |
1769 | static void |
1770 | pdf_filter_BX(fz_context *ctx, pdf_processor *proc) |
1771 | { |
1772 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1773 | filter_flush(ctx, p, 0); |
1774 | if (p->chain->op_BX) |
1775 | p->chain->op_BX(ctx, p->chain); |
1776 | } |
1777 | |
1778 | static void |
1779 | pdf_filter_EX(fz_context *ctx, pdf_processor *proc) |
1780 | { |
1781 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1782 | filter_flush(ctx, p, 0); |
1783 | if (p->chain->op_EX) |
1784 | p->chain->op_EX(ctx, p->chain); |
1785 | } |
1786 | |
1787 | static void |
1788 | pdf_filter_END(fz_context *ctx, pdf_processor *proc) |
1789 | { |
1790 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1791 | while (!filter_pop(ctx, p)) |
1792 | { |
1793 | /* Nothing to do in the loop, all work done above */ |
1794 | } |
1795 | } |
1796 | |
1797 | static void |
1798 | pdf_drop_filter_processor(fz_context *ctx, pdf_processor *proc) |
1799 | { |
1800 | pdf_filter_processor *p = (pdf_filter_processor*)proc; |
1801 | filter_gstate *gs = p->gstate; |
1802 | while (gs) |
1803 | { |
1804 | filter_gstate *next = gs->next; |
1805 | pdf_drop_font(ctx, gs->pending.text.font); |
1806 | pdf_drop_font(ctx, gs->sent.text.font); |
1807 | fz_free(ctx, gs); |
1808 | gs = next; |
1809 | } |
1810 | while (p->pending_tags) |
1811 | pop_tag(ctx, p, &p->pending_tags); |
1812 | while (p->current_tags) |
1813 | pop_tag(ctx, p, &p->current_tags); |
1814 | pdf_drop_obj(ctx, p->structarray); |
1815 | pdf_drop_document(ctx, p->doc); |
1816 | fz_free(ctx, p->font_name); |
1817 | } |
1818 | |
1819 | /* |
1820 | Create a filter processor. This |
1821 | filters the PDF operators it is fed, and passes them down |
1822 | (with some changes) to the child filter. |
1823 | |
1824 | The changes made by the filter are: |
1825 | |
1826 | * No operations are allowed to change the top level gstate. |
1827 | Additional q/Q operators are inserted to prevent this. |
1828 | |
1829 | * Repeated/unnecessary colour operators are removed (so, |
1830 | for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to |
1831 | "0.5 g") |
1832 | |
1833 | The intention of these changes is to provide a simpler, |
1834 | but equivalent stream, repairing problems with mismatched |
1835 | operators, maintaining structure (such as BMC, EMC calls) |
1836 | and leaving the graphics state in an known (default) state |
1837 | so that subsequent operations (such as synthesising new |
1838 | operators to be appended to the stream) are easier. |
1839 | |
1840 | The net graphical effect of the filtered operator stream |
1841 | should be identical to the incoming operator stream. |
1842 | |
1843 | chain: The child processor to which the filtered operators |
1844 | will be fed. |
1845 | |
1846 | old_res: The incoming resource dictionary. |
1847 | |
1848 | new_res: An (initially empty) resource dictionary that will |
1849 | be populated by copying entries from the old dictionary to |
1850 | the new one as they are used. At the end therefore, this |
1851 | contains exactly those resource objects actually required. |
1852 | |
1853 | */ |
1854 | pdf_processor * |
1855 | pdf_new_filter_processor(fz_context *ctx, pdf_document *doc, pdf_processor *chain, pdf_obj *old_rdb, pdf_obj *new_rdb) |
1856 | { |
1857 | return pdf_new_filter_processor_with_text_filter(ctx, doc, -1, chain, old_rdb, new_rdb, NULL, NULL, NULL); |
1858 | } |
1859 | |
1860 | /* |
1861 | Create a filter |
1862 | processor with a filter function for text. This filters the |
1863 | PDF operators it is fed, and passes them down (with some |
1864 | changes) to the child filter. |
1865 | |
1866 | See pdf_new_filter_processor for documentation. |
1867 | |
1868 | text_filter: A function called to assess whether a given |
1869 | character should be removed or not. |
1870 | |
1871 | after_text_object: A function to be called after each text object. |
1872 | This allows the caller to insert some extra content if |
1873 | required. |
1874 | |
1875 | text_filter_opaque: Opaque value to be passed to the |
1876 | text_filter function. |
1877 | */ |
1878 | pdf_processor * |
1879 | pdf_new_filter_processor_with_text_filter(fz_context *ctx, pdf_document *doc, int structparents, pdf_processor *chain, pdf_obj *old_rdb, pdf_obj *new_rdb, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after, void *text_filter_opaque) |
1880 | { |
1881 | pdf_filter_processor *proc = pdf_new_processor(ctx, sizeof *proc); |
1882 | { |
1883 | proc->super.drop_processor = pdf_drop_filter_processor; |
1884 | |
1885 | /* general graphics state */ |
1886 | proc->super.op_w = pdf_filter_w; |
1887 | proc->super.op_j = pdf_filter_j; |
1888 | proc->super.op_J = pdf_filter_J; |
1889 | proc->super.op_M = pdf_filter_M; |
1890 | proc->super.op_d = pdf_filter_d; |
1891 | proc->super.op_ri = pdf_filter_ri; |
1892 | proc->super.op_i = pdf_filter_i; |
1893 | proc->super.op_gs_begin = pdf_filter_gs_begin; |
1894 | proc->super.op_gs_end = pdf_filter_gs_end; |
1895 | |
1896 | /* transparency graphics state */ |
1897 | proc->super.op_gs_BM = pdf_filter_gs_BM; |
1898 | proc->super.op_gs_CA = pdf_filter_gs_CA; |
1899 | proc->super.op_gs_ca = pdf_filter_gs_ca; |
1900 | proc->super.op_gs_SMask = pdf_filter_gs_SMask; |
1901 | |
1902 | /* special graphics state */ |
1903 | proc->super.op_q = pdf_filter_q; |
1904 | proc->super.op_Q = pdf_filter_Q; |
1905 | proc->super.op_cm = pdf_filter_cm; |
1906 | |
1907 | /* path construction */ |
1908 | proc->super.op_m = pdf_filter_m; |
1909 | proc->super.op_l = pdf_filter_l; |
1910 | proc->super.op_c = pdf_filter_c; |
1911 | proc->super.op_v = pdf_filter_v; |
1912 | proc->super.op_y = pdf_filter_y; |
1913 | proc->super.op_h = pdf_filter_h; |
1914 | proc->super.op_re = pdf_filter_re; |
1915 | |
1916 | /* path painting */ |
1917 | proc->super.op_S = pdf_filter_S; |
1918 | proc->super.op_s = pdf_filter_s; |
1919 | proc->super.op_F = pdf_filter_F; |
1920 | proc->super.op_f = pdf_filter_f; |
1921 | proc->super.op_fstar = pdf_filter_fstar; |
1922 | proc->super.op_B = pdf_filter_B; |
1923 | proc->super.op_Bstar = pdf_filter_Bstar; |
1924 | proc->super.op_b = pdf_filter_b; |
1925 | proc->super.op_bstar = pdf_filter_bstar; |
1926 | proc->super.op_n = pdf_filter_n; |
1927 | |
1928 | /* clipping paths */ |
1929 | proc->super.op_W = pdf_filter_W; |
1930 | proc->super.op_Wstar = pdf_filter_Wstar; |
1931 | |
1932 | /* text objects */ |
1933 | proc->super.op_BT = pdf_filter_BT; |
1934 | proc->super.op_ET = pdf_filter_ET; |
1935 | |
1936 | /* text state */ |
1937 | proc->super.op_Tc = pdf_filter_Tc; |
1938 | proc->super.op_Tw = pdf_filter_Tw; |
1939 | proc->super.op_Tz = pdf_filter_Tz; |
1940 | proc->super.op_TL = pdf_filter_TL; |
1941 | proc->super.op_Tf = pdf_filter_Tf; |
1942 | proc->super.op_Tr = pdf_filter_Tr; |
1943 | proc->super.op_Ts = pdf_filter_Ts; |
1944 | |
1945 | /* text positioning */ |
1946 | proc->super.op_Td = pdf_filter_Td; |
1947 | proc->super.op_TD = pdf_filter_TD; |
1948 | proc->super.op_Tm = pdf_filter_Tm; |
1949 | proc->super.op_Tstar = pdf_filter_Tstar; |
1950 | |
1951 | /* text showing */ |
1952 | proc->super.op_TJ = pdf_filter_TJ; |
1953 | proc->super.op_Tj = pdf_filter_Tj; |
1954 | proc->super.op_squote = pdf_filter_squote; |
1955 | proc->super.op_dquote = pdf_filter_dquote; |
1956 | |
1957 | /* type 3 fonts */ |
1958 | proc->super.op_d0 = pdf_filter_d0; |
1959 | proc->super.op_d1 = pdf_filter_d1; |
1960 | |
1961 | /* color */ |
1962 | proc->super.op_CS = pdf_filter_CS; |
1963 | proc->super.op_cs = pdf_filter_cs; |
1964 | proc->super.op_SC_color = pdf_filter_SC_color; |
1965 | proc->super.op_sc_color = pdf_filter_sc_color; |
1966 | proc->super.op_SC_pattern = pdf_filter_SC_pattern; |
1967 | proc->super.op_sc_pattern = pdf_filter_sc_pattern; |
1968 | proc->super.op_SC_shade = pdf_filter_SC_shade; |
1969 | proc->super.op_sc_shade = pdf_filter_sc_shade; |
1970 | |
1971 | proc->super.op_G = pdf_filter_G; |
1972 | proc->super.op_g = pdf_filter_g; |
1973 | proc->super.op_RG = pdf_filter_RG; |
1974 | proc->super.op_rg = pdf_filter_rg; |
1975 | proc->super.op_K = pdf_filter_K; |
1976 | proc->super.op_k = pdf_filter_k; |
1977 | |
1978 | /* shadings, images, xobjects */ |
1979 | proc->super.op_BI = pdf_filter_BI; |
1980 | proc->super.op_sh = pdf_filter_sh; |
1981 | proc->super.op_Do_image = pdf_filter_Do_image; |
1982 | proc->super.op_Do_form = pdf_filter_Do_form; |
1983 | |
1984 | /* marked content */ |
1985 | proc->super.op_MP = pdf_filter_MP; |
1986 | proc->super.op_DP = pdf_filter_DP; |
1987 | proc->super.op_BMC = pdf_filter_BMC; |
1988 | proc->super.op_BDC = pdf_filter_BDC; |
1989 | proc->super.op_EMC = pdf_filter_EMC; |
1990 | |
1991 | /* compatibility */ |
1992 | proc->super.op_BX = pdf_filter_BX; |
1993 | proc->super.op_EX = pdf_filter_EX; |
1994 | |
1995 | /* extgstate */ |
1996 | proc->super.op_gs_OP = pdf_filter_gs_OP; |
1997 | proc->super.op_gs_op = pdf_filter_gs_op; |
1998 | proc->super.op_gs_OPM = pdf_filter_gs_OPM; |
1999 | proc->super.op_gs_UseBlackPtComp = pdf_filter_gs_UseBlackPtComp; |
2000 | |
2001 | proc->super.op_END = pdf_filter_END; |
2002 | } |
2003 | |
2004 | proc->doc = pdf_keep_document(ctx, doc); |
2005 | proc->structparents = structparents; |
2006 | if (structparents != -1) |
2007 | proc->structarray = pdf_keep_obj(ctx, pdf_lookup_number(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/StructTreeRoot/ParentTree" ), structparents)); |
2008 | proc->chain = chain; |
2009 | proc->old_rdb = old_rdb; |
2010 | proc->new_rdb = new_rdb; |
2011 | |
2012 | proc->text_filter = text_filter; |
2013 | proc->after_text = after; |
2014 | proc->opaque = text_filter_opaque; |
2015 | |
2016 | fz_try(ctx) |
2017 | { |
2018 | proc->gstate = fz_malloc_struct(ctx, filter_gstate); |
2019 | proc->gstate->pending.ctm = fz_identity; |
2020 | proc->gstate->sent.ctm = fz_identity; |
2021 | |
2022 | proc->gstate->pending.stroke = proc->gstate->pending.stroke; /* ? */ |
2023 | proc->gstate->sent.stroke = proc->gstate->pending.stroke; |
2024 | proc->gstate->pending.text.char_space = 0; |
2025 | proc->gstate->pending.text.word_space = 0; |
2026 | proc->gstate->pending.text.scale = 1; |
2027 | proc->gstate->pending.text.leading = 0; |
2028 | proc->gstate->pending.text.font = NULL; |
2029 | proc->gstate->pending.text.size = -1; |
2030 | proc->gstate->pending.text.render = 0; |
2031 | proc->gstate->pending.text.rise = 0; |
2032 | proc->gstate->sent.text.char_space = 0; |
2033 | proc->gstate->sent.text.word_space = 0; |
2034 | proc->gstate->sent.text.scale = 1; |
2035 | proc->gstate->sent.text.leading = 0; |
2036 | proc->gstate->sent.text.font = NULL; |
2037 | proc->gstate->sent.text.size = -1; |
2038 | proc->gstate->sent.text.render = 0; |
2039 | proc->gstate->sent.text.rise = 0; |
2040 | } |
2041 | fz_catch(ctx) |
2042 | { |
2043 | pdf_drop_processor(ctx, (pdf_processor *) proc); |
2044 | fz_rethrow(ctx); |
2045 | } |
2046 | |
2047 | return (pdf_processor*)proc; |
2048 | } |
2049 | |