1 | #include "mupdf/fitz.h" |
2 | #include "mupdf/pdf.h" |
3 | |
4 | #include <assert.h> |
5 | #include <string.h> |
6 | |
7 | #undef CHECK_SPLAY |
8 | #undef DUMP_SPLAY |
9 | |
10 | /* |
11 | * Allocate, destroy and simple parameters. |
12 | */ |
13 | |
14 | void |
15 | pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_) |
16 | { |
17 | pdf_cmap *cmap = (pdf_cmap *)cmap_; |
18 | pdf_drop_cmap(ctx, cmap->usecmap); |
19 | fz_free(ctx, cmap->ranges); |
20 | fz_free(ctx, cmap->xranges); |
21 | fz_free(ctx, cmap->mranges); |
22 | fz_free(ctx, cmap->dict); |
23 | fz_free(ctx, cmap->tree); |
24 | fz_free(ctx, cmap); |
25 | } |
26 | |
27 | pdf_cmap * |
28 | pdf_new_cmap(fz_context *ctx) |
29 | { |
30 | pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap); |
31 | FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp); |
32 | return cmap; |
33 | } |
34 | |
35 | /* Could be a macro for speed */ |
36 | pdf_cmap * |
37 | pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap) |
38 | { |
39 | return fz_keep_storable(ctx, &cmap->storable); |
40 | } |
41 | |
42 | /* Could be a macro for speed */ |
43 | void |
44 | pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap) |
45 | { |
46 | fz_drop_storable(ctx, &cmap->storable); |
47 | } |
48 | |
49 | void |
50 | pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap) |
51 | { |
52 | int i; |
53 | |
54 | pdf_drop_cmap(ctx, cmap->usecmap); |
55 | cmap->usecmap = pdf_keep_cmap(ctx, usecmap); |
56 | |
57 | if (cmap->codespace_len == 0) |
58 | { |
59 | cmap->codespace_len = usecmap->codespace_len; |
60 | for (i = 0; i < usecmap->codespace_len; i++) |
61 | cmap->codespace[i] = usecmap->codespace[i]; |
62 | } |
63 | } |
64 | |
65 | int |
66 | pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap) |
67 | { |
68 | return cmap->wmode; |
69 | } |
70 | |
71 | void |
72 | pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode) |
73 | { |
74 | cmap->wmode = wmode; |
75 | } |
76 | |
77 | /* |
78 | * Add a codespacerange section. |
79 | * These ranges are used by pdf_decode_cmap to decode |
80 | * multi-byte encoded strings. |
81 | */ |
82 | void |
83 | pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int n) |
84 | { |
85 | if (cmap->codespace_len + 1 == nelem(cmap->codespace)) |
86 | { |
87 | fz_warn(ctx, "assert: too many code space ranges" ); |
88 | return; |
89 | } |
90 | |
91 | cmap->codespace[cmap->codespace_len].n = n; |
92 | cmap->codespace[cmap->codespace_len].low = low; |
93 | cmap->codespace[cmap->codespace_len].high = high; |
94 | cmap->codespace_len ++; |
95 | } |
96 | |
97 | struct cmap_splay_s { |
98 | unsigned int low; |
99 | unsigned int high; |
100 | unsigned int out; |
101 | unsigned int left; |
102 | unsigned int right; |
103 | unsigned int parent : 31; |
104 | unsigned int many : 1; |
105 | }; |
106 | |
107 | #define EMPTY ((unsigned int)0x40000000) |
108 | |
109 | /* |
110 | The splaying steps used: |
111 | |
112 | Case 1: | z x |
113 | | y D => A y |
114 | | x C B z |
115 | | A B C D |
116 | |
117 | Case 2: | z x |
118 | | y D => y z |
119 | | A x A B C D |
120 | | B C |
121 | |
122 | Case 3: | y x |
123 | | x C => A y |
124 | | A B B C |
125 | */ |
126 | |
127 | static void |
128 | move_to_root(cmap_splay *tree, unsigned int x) |
129 | { |
130 | if (x == EMPTY) |
131 | return; |
132 | do |
133 | { |
134 | unsigned int z, zp; |
135 | unsigned int y = tree[x].parent; |
136 | if (y == EMPTY) |
137 | break; |
138 | z = tree[y].parent; |
139 | if (z == EMPTY) |
140 | { |
141 | /* Case 3 */ |
142 | tree[x].parent = EMPTY; |
143 | tree[y].parent = x; |
144 | if (tree[y].left == x) |
145 | { |
146 | /* Case 3 */ |
147 | tree[y].left = tree[x].right; |
148 | if (tree[y].left != EMPTY) |
149 | tree[tree[y].left].parent = y; |
150 | tree[x].right = y; |
151 | } |
152 | else |
153 | { |
154 | /* Case 3 - reflected */ |
155 | assert(tree[y].right == x); |
156 | tree[y].right = tree[x].left; |
157 | if (tree[y].right != EMPTY) |
158 | tree[tree[y].right].parent = y; |
159 | tree[x].left = y; |
160 | } |
161 | break; |
162 | } |
163 | |
164 | zp = tree[z].parent; |
165 | tree[x].parent = zp; |
166 | if (zp != EMPTY) { |
167 | if (tree[zp].left == z) |
168 | tree[zp].left = x; |
169 | else |
170 | { |
171 | assert(tree[zp].right == z); |
172 | tree[zp].right = x; |
173 | } |
174 | } |
175 | tree[y].parent = x; |
176 | if (tree[y].left == x) |
177 | { |
178 | tree[y].left = tree[x].right; |
179 | if (tree[y].left != EMPTY) |
180 | tree[tree[y].left].parent = y; |
181 | tree[x].right = y; |
182 | if (tree[z].left == y) |
183 | { |
184 | /* Case 1 */ |
185 | tree[z].parent = y; |
186 | tree[z].left = tree[y].right; |
187 | if (tree[z].left != EMPTY) |
188 | tree[tree[z].left].parent = z; |
189 | tree[y].right = z; |
190 | } |
191 | else |
192 | { |
193 | /* Case 2 - reflected */ |
194 | assert(tree[z].right == y); |
195 | tree[z].parent = x; |
196 | tree[z].right = tree[x].left; |
197 | if (tree[z].right != EMPTY) |
198 | tree[tree[z].right].parent = z; |
199 | tree[x].left = z; |
200 | } |
201 | } |
202 | else |
203 | { |
204 | assert(tree[y].right == x); |
205 | tree[y].right = tree[x].left; |
206 | if (tree[y].right != EMPTY) |
207 | tree[tree[y].right].parent = y; |
208 | tree[x].left = y; |
209 | if (tree[z].left == y) |
210 | { |
211 | /* Case 2 */ |
212 | tree[z].parent = x; |
213 | tree[z].left = tree[x].right; |
214 | if (tree[z].left != EMPTY) |
215 | tree[tree[z].left].parent = z; |
216 | tree[x].right = z; |
217 | } |
218 | else |
219 | { |
220 | /* Case 1 - reflected */ |
221 | assert(tree[z].right == y); |
222 | tree[z].parent = y; |
223 | tree[z].right = tree[y].left; |
224 | if (tree[z].right != EMPTY) |
225 | tree[tree[z].right].parent = z; |
226 | tree[y].left = z; |
227 | } |
228 | } |
229 | } while (1); |
230 | } |
231 | |
232 | static unsigned int delete_node(pdf_cmap *cmap, unsigned int current) |
233 | { |
234 | cmap_splay *tree = cmap->tree; |
235 | unsigned int parent; |
236 | unsigned int replacement; |
237 | |
238 | assert(current != EMPTY); |
239 | |
240 | parent = tree[current].parent; |
241 | if (tree[current].right == EMPTY) |
242 | { |
243 | if (parent == EMPTY) |
244 | { |
245 | replacement = cmap->ttop = tree[current].left; |
246 | } |
247 | else if (tree[parent].left == current) |
248 | { |
249 | replacement = tree[parent].left = tree[current].left; |
250 | } |
251 | else |
252 | { |
253 | assert(tree[parent].right == current); |
254 | replacement = tree[parent].right = tree[current].left; |
255 | } |
256 | if (replacement != EMPTY) |
257 | tree[replacement].parent = parent; |
258 | else |
259 | replacement = parent; |
260 | } |
261 | else if (tree[current].left == EMPTY) |
262 | { |
263 | if (parent == EMPTY) |
264 | { |
265 | replacement = cmap->ttop = tree[current].right; |
266 | } |
267 | else if (tree[parent].left == current) |
268 | { |
269 | replacement = tree[parent].left = tree[current].right; |
270 | } |
271 | else |
272 | { |
273 | assert(tree[parent].right == current); |
274 | replacement = tree[parent].right = tree[current].right; |
275 | } |
276 | if (replacement != EMPTY) |
277 | tree[replacement].parent = parent; |
278 | else |
279 | replacement = parent; |
280 | } |
281 | else |
282 | { |
283 | /* Hard case, find the in-order predecessor of current */ |
284 | int amputee = current; |
285 | replacement = tree[current].left; |
286 | while (tree[replacement].right != EMPTY) { |
287 | amputee = replacement; |
288 | replacement = tree[replacement].right; |
289 | } |
290 | /* Remove replacement from the tree */ |
291 | if (amputee == current) |
292 | { |
293 | tree[amputee].left = tree[replacement].left; |
294 | if (tree[amputee].left != EMPTY) |
295 | tree[tree[amputee].left].parent = amputee; |
296 | } |
297 | else |
298 | { |
299 | tree[amputee].right = tree[replacement].left; |
300 | if (tree[amputee].right != EMPTY) |
301 | tree[tree[amputee].right].parent = amputee; |
302 | } |
303 | /* Insert replacement in place of current */ |
304 | tree[replacement].parent = parent; |
305 | if (parent == EMPTY) |
306 | { |
307 | tree[replacement].parent = EMPTY; |
308 | cmap->ttop = replacement; |
309 | } |
310 | else if (tree[parent].left == current) |
311 | tree[parent].left = replacement; |
312 | else |
313 | { |
314 | assert(tree[parent].right == current); |
315 | tree[parent].right = replacement; |
316 | } |
317 | tree[replacement].left = tree[current].left; |
318 | if (tree[replacement].left != EMPTY) |
319 | tree[tree[replacement].left].parent = replacement; |
320 | tree[replacement].right = tree[current].right; |
321 | if (tree[replacement].right != EMPTY) |
322 | tree[tree[replacement].right].parent = replacement; |
323 | } |
324 | |
325 | /* current is now unlinked. We need to remove it from our array. */ |
326 | cmap->tlen--; |
327 | if (current != cmap->tlen) |
328 | { |
329 | if (replacement == cmap->tlen) |
330 | replacement = current; |
331 | tree[current] = tree[cmap->tlen]; |
332 | parent = tree[current].parent; |
333 | if (parent == EMPTY) |
334 | cmap->ttop = current; |
335 | else if (tree[parent].left == cmap->tlen) |
336 | tree[parent].left = current; |
337 | else |
338 | { |
339 | assert(tree[parent].right == cmap->tlen); |
340 | tree[parent].right = current; |
341 | } |
342 | if (tree[current].left != EMPTY) |
343 | { |
344 | assert(tree[tree[current].left].parent == cmap->tlen); |
345 | tree[tree[current].left].parent = current; |
346 | } |
347 | if (tree[current].right != EMPTY) |
348 | { |
349 | assert(tree[tree[current].right].parent == cmap->tlen); |
350 | tree[tree[current].right].parent = current; |
351 | } |
352 | } |
353 | |
354 | /* Return the node that we should continue searching from */ |
355 | return replacement; |
356 | } |
357 | |
358 | #ifdef DUMP_SPLAY |
359 | static void |
360 | dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre) |
361 | { |
362 | int i; |
363 | |
364 | if (tree == NULL || node == EMPTY) |
365 | return; |
366 | |
367 | for (i = 0; i < depth; i++) |
368 | fprintf(stderr, " " ); |
369 | fprintf(stderr, "%s%d:" , pre, node); |
370 | if (tree[node].parent == EMPTY) |
371 | fprintf(stderr, "^EMPTY" ); |
372 | else |
373 | fprintf(stderr, "^%d" , tree[node].parent); |
374 | if (tree[node].left == EMPTY) |
375 | fprintf(stderr, "<EMPTY" ); |
376 | else |
377 | fprintf(stderr, "<%d" , tree[node].left); |
378 | if (tree[node].right == EMPTY) |
379 | fprintf(stderr, ">EMPTY" ); |
380 | else |
381 | fprintf(stderr, ">%d" , tree[node].right); |
382 | fprintf(stderr, "(%x,%x,%x,%d)\n" , tree[node].low, tree[node].high, tree[node].out, tree[node].many); |
383 | assert(tree[node].parent == EMPTY || depth); |
384 | assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node); |
385 | assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node); |
386 | dump_splay(tree, tree[node].left, depth+1, "L" ); |
387 | dump_splay(tree, tree[node].right, depth+1, "R" ); |
388 | } |
389 | #endif |
390 | |
391 | enum |
392 | { |
393 | TOP = 0, |
394 | LEFT = 1, |
395 | RIGHT = 2 |
396 | }; |
397 | |
398 | static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg) |
399 | { |
400 | int from = TOP; |
401 | |
402 | while (node != EMPTY) |
403 | { |
404 | switch (from) |
405 | { |
406 | case TOP: |
407 | if (tree[node].left != EMPTY) |
408 | { |
409 | node = tree[node].left; |
410 | from = TOP; |
411 | break; |
412 | } |
413 | /* fallthrough */ |
414 | case LEFT: |
415 | fn(&tree[node], arg); |
416 | if (tree[node].right != EMPTY) |
417 | { |
418 | node = tree[node].right; |
419 | from = TOP; |
420 | break; |
421 | } |
422 | /* fallthrough */ |
423 | case RIGHT: |
424 | { |
425 | unsigned int parent = tree[node].parent; |
426 | if (parent == EMPTY) |
427 | return; |
428 | if (tree[parent].left == node) |
429 | from = LEFT; |
430 | else |
431 | { |
432 | assert(tree[parent].right == node); |
433 | from = RIGHT; |
434 | } |
435 | node = parent; |
436 | } |
437 | } |
438 | } |
439 | } |
440 | |
441 | #ifdef CHECK_SPLAY |
442 | |
443 | static int |
444 | tree_has_overlap(cmap_splay *tree, int node, int low, int high) |
445 | { |
446 | if (tree[node].left != EMPTY) |
447 | if (tree_has_overlap(tree, tree[node].left, low, high)) |
448 | return 1; |
449 | if (tree[node].right != EMPTY) |
450 | if (tree_has_overlap(tree, tree[node].right, low, high)) |
451 | return 1; |
452 | return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high); |
453 | } |
454 | |
455 | static void |
456 | do_check(cmap_splay *node, void *arg) |
457 | { |
458 | cmap_splay *tree = arg; |
459 | unsigned int num = node - tree; |
460 | assert(!node->many || node->low == node->high); |
461 | assert(node->low <= node->high); |
462 | assert((node->left == EMPTY) || (tree[node->left].parent == num && |
463 | tree[node->left].high < node->low)); |
464 | assert(node->right == EMPTY || (tree[node->right].parent == num && |
465 | node->high < tree[node->right].low)); |
466 | assert(!tree_has_overlap(tree, num, node->low, node->high)); |
467 | } |
468 | |
469 | static void |
470 | check_splay(cmap_splay *tree, unsigned int node, int depth) |
471 | { |
472 | if (node == EMPTY) |
473 | return; |
474 | assert(tree[node].parent == EMPTY); |
475 | walk_splay(tree, node, do_check, tree); |
476 | } |
477 | #endif |
478 | |
479 | /* |
480 | * Add a range. |
481 | */ |
482 | static void |
483 | add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many) |
484 | { |
485 | int current; |
486 | cmap_splay *tree; |
487 | int i; |
488 | int inrange = 0; |
489 | unsigned int k, count; |
490 | |
491 | if (low > high) |
492 | { |
493 | fz_warn(ctx, "range limits out of range in cmap %s" , cmap->cmap_name); |
494 | return; |
495 | } |
496 | |
497 | count = high - low + 1; |
498 | for (k = 0; k < count; k++) { |
499 | unsigned int c = low + k; |
500 | |
501 | inrange = 0; |
502 | for (i = 0; i < cmap->codespace_len; i++) { |
503 | if (cmap->codespace[i].low <= c && c <= cmap->codespace[i].high) |
504 | inrange = 1; |
505 | } |
506 | if (!inrange) |
507 | { |
508 | fz_warn(ctx, "ignoring CMap range (%u-%u) that is outside of the codespace" , low, high); |
509 | return; |
510 | } |
511 | } |
512 | |
513 | tree = cmap->tree; |
514 | |
515 | if (cmap->tlen) |
516 | { |
517 | unsigned int move = cmap->ttop; |
518 | unsigned int gt = EMPTY; |
519 | unsigned int lt = EMPTY; |
520 | if (check_for_overlap) |
521 | { |
522 | /* Check for collision with the current node */ |
523 | do |
524 | { |
525 | current = move; |
526 | /* Cases we might meet: |
527 | * tree[i]: <-----> |
528 | * case 0: <-> |
529 | * case 1: <-------> |
530 | * case 2: <-------------> |
531 | * case 3: <-> |
532 | * case 4: <-------> |
533 | * case 5: <-> |
534 | */ |
535 | if (low <= tree[current].low && tree[current].low <= high) |
536 | { |
537 | /* case 1, reduces to case 0 */ |
538 | /* or case 2, deleting the node */ |
539 | tree[current].out += high + 1 - tree[current].low; |
540 | tree[current].low = high + 1; |
541 | if (tree[current].low > tree[current].high) |
542 | { |
543 | /* update lt/gt references that will be moved/stale after deleting current */ |
544 | if (gt == cmap->tlen - 1) |
545 | gt = current; |
546 | if (lt == cmap->tlen - 1) |
547 | lt = current; |
548 | /* delete_node() moves the element at cmap->tlen-1 into current */ |
549 | move = delete_node(cmap, current); |
550 | current = EMPTY; |
551 | continue; |
552 | } |
553 | } |
554 | else if (low <= tree[current].high && tree[current].high <= high) |
555 | { |
556 | /* case 4, reduces to case 5 */ |
557 | tree[current].high = low - 1; |
558 | assert(tree[current].low <= tree[current].high); |
559 | } |
560 | else if (tree[current].low < low && high < tree[current].high) |
561 | { |
562 | /* case 3, reduces to case 5 */ |
563 | int new_high = tree[current].high; |
564 | tree[current].high = low-1; |
565 | add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, tree[current].many); |
566 | tree = cmap->tree; |
567 | } |
568 | /* Now look for where to move to next (left for case 0, right for case 5) */ |
569 | if (tree[current].low > high) { |
570 | move = tree[current].left; |
571 | gt = current; |
572 | } |
573 | else |
574 | { |
575 | move = tree[current].right; |
576 | lt = current; |
577 | } |
578 | } |
579 | while (move != EMPTY); |
580 | } |
581 | else |
582 | { |
583 | do |
584 | { |
585 | current = move; |
586 | if (tree[current].low > high) |
587 | { |
588 | move = tree[current].left; |
589 | gt = current; |
590 | } |
591 | else |
592 | { |
593 | move = tree[current].right; |
594 | lt = current; |
595 | } |
596 | } while (move != EMPTY); |
597 | } |
598 | /* current is now the node to which we would be adding the new node */ |
599 | /* lt is the last node we traversed which is lt the new node. */ |
600 | /* gt is the last node we traversed which is gt the new node. */ |
601 | |
602 | if (!many) |
603 | { |
604 | /* Check for the 'merge' cases. */ |
605 | if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low) |
606 | { |
607 | tree[lt].high = high; |
608 | if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) |
609 | { |
610 | tree[lt].high = tree[gt].high; |
611 | delete_node(cmap, gt); |
612 | } |
613 | goto exit; |
614 | } |
615 | if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) |
616 | { |
617 | tree[gt].low = low; |
618 | tree[gt].out = out; |
619 | goto exit; |
620 | } |
621 | } |
622 | } |
623 | else |
624 | current = EMPTY; |
625 | |
626 | if (cmap->tlen == cmap->tcap) |
627 | { |
628 | int new_cap = cmap->tcap ? cmap->tcap * 2 : 256; |
629 | tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay); |
630 | cmap->tcap = new_cap; |
631 | } |
632 | tree[cmap->tlen].low = low; |
633 | tree[cmap->tlen].high = high; |
634 | tree[cmap->tlen].out = out; |
635 | tree[cmap->tlen].parent = current; |
636 | tree[cmap->tlen].left = EMPTY; |
637 | tree[cmap->tlen].right = EMPTY; |
638 | tree[cmap->tlen].many = many; |
639 | cmap->tlen++; |
640 | if (current == EMPTY) |
641 | cmap->ttop = 0; |
642 | else if (tree[current].low > high) |
643 | tree[current].left = cmap->tlen-1; |
644 | else |
645 | { |
646 | assert(tree[current].high < low); |
647 | tree[current].right = cmap->tlen-1; |
648 | } |
649 | move_to_root(tree, cmap->tlen-1); |
650 | cmap->ttop = cmap->tlen-1; |
651 | exit: |
652 | {} |
653 | #ifdef CHECK_SPLAY |
654 | check_splay(cmap->tree, cmap->ttop, 0); |
655 | #endif |
656 | #ifdef DUMP_SPLAY |
657 | dump_splay(cmap->tree, cmap->ttop, 0, "" ); |
658 | #endif |
659 | } |
660 | |
661 | /* |
662 | * Add a one-to-many mapping. |
663 | */ |
664 | static void |
665 | add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len) |
666 | { |
667 | int out_pos; |
668 | |
669 | if (cmap->dlen + len + 1 > cmap->dcap) |
670 | { |
671 | int new_cap = cmap->dcap ? cmap->dcap * 2 : 256; |
672 | cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int); |
673 | cmap->dcap = new_cap; |
674 | } |
675 | out_pos = cmap->dlen; |
676 | cmap->dict[out_pos] = len; |
677 | memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len); |
678 | cmap->dlen += len + 1; |
679 | |
680 | add_range(ctx, cmap, low, low, out_pos, 1, 1); |
681 | } |
682 | |
683 | /* |
684 | * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25) |
685 | */ |
686 | void |
687 | pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out) |
688 | { |
689 | add_range(ctx, cmap, low, high, out, 1, 0); |
690 | } |
691 | |
692 | /* |
693 | * Add a single one-to-many mapping. |
694 | */ |
695 | void |
696 | pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, int len) |
697 | { |
698 | if (len == 1) |
699 | { |
700 | add_range(ctx, cmap, low, low, values[0], 1, 0); |
701 | return; |
702 | } |
703 | |
704 | /* Decode unicode surrogate pairs. */ |
705 | /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */ |
706 | if (len == 2 && |
707 | values[0] >= 0xD800 && values[0] <= 0xDBFF && |
708 | values[1] >= 0xDC00 && values[1] <= 0xDFFF) |
709 | { |
710 | int rune = ((values[0] - 0xD800) << 10) + (values[1] - 0xDC00) + 0x10000; |
711 | add_range(ctx, cmap, low, low, rune, 1, 0); |
712 | return; |
713 | } |
714 | |
715 | if (len > PDF_MRANGE_CAP) |
716 | { |
717 | fz_warn(ctx, "ignoring one to many mapping in cmap %s" , cmap->cmap_name); |
718 | return; |
719 | } |
720 | |
721 | add_mrange(ctx, cmap, low, values, len); |
722 | } |
723 | |
724 | static void |
725 | count_node_types(cmap_splay *node, void *arg) |
726 | { |
727 | int *counts = (int *)arg; |
728 | |
729 | if (node->many) |
730 | counts[2]++; |
731 | else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) |
732 | counts[0]++; |
733 | else |
734 | counts[1]++; |
735 | } |
736 | |
737 | static void |
738 | copy_node_types(cmap_splay *node, void *arg) |
739 | { |
740 | pdf_cmap *cmap = (pdf_cmap *)arg; |
741 | |
742 | if (node->many) |
743 | { |
744 | assert(node->low == node->high); |
745 | cmap->mranges[cmap->mlen].low = node->low; |
746 | cmap->mranges[cmap->mlen].out = node->out; |
747 | cmap->mlen++; |
748 | } |
749 | else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) |
750 | { |
751 | cmap->ranges[cmap->rlen].low = node->low; |
752 | cmap->ranges[cmap->rlen].high = node->high; |
753 | cmap->ranges[cmap->rlen].out = node->out; |
754 | cmap->rlen++; |
755 | } |
756 | else |
757 | { |
758 | cmap->xranges[cmap->xlen].low = node->low; |
759 | cmap->xranges[cmap->xlen].high = node->high; |
760 | cmap->xranges[cmap->xlen].out = node->out; |
761 | cmap->xlen++; |
762 | } |
763 | } |
764 | |
765 | void |
766 | pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap) |
767 | { |
768 | int counts[3]; |
769 | |
770 | if (cmap->tree == NULL) |
771 | return; |
772 | |
773 | counts[0] = 0; |
774 | counts[1] = 0; |
775 | counts[2] = 0; |
776 | walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts); |
777 | |
778 | cmap->ranges = fz_malloc_array(ctx, counts[0], pdf_range); |
779 | cmap->rcap = counts[0]; |
780 | cmap->xranges = fz_malloc_array(ctx, counts[1], pdf_xrange); |
781 | cmap->xcap = counts[1]; |
782 | cmap->mranges = fz_malloc_array(ctx, counts[2], pdf_mrange); |
783 | cmap->mcap = counts[2]; |
784 | |
785 | walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap); |
786 | |
787 | fz_free(ctx, cmap->tree); |
788 | cmap->tree = NULL; |
789 | } |
790 | |
791 | /* |
792 | * Lookup the mapping of a codepoint. |
793 | */ |
794 | int |
795 | pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt) |
796 | { |
797 | pdf_range *ranges = cmap->ranges; |
798 | pdf_xrange *xranges = cmap->xranges; |
799 | int l, r, m; |
800 | |
801 | l = 0; |
802 | r = cmap->rlen - 1; |
803 | while (l <= r) |
804 | { |
805 | m = (l + r) >> 1; |
806 | if (cpt < ranges[m].low) |
807 | r = m - 1; |
808 | else if (cpt > ranges[m].high) |
809 | l = m + 1; |
810 | else |
811 | return cpt - ranges[m].low + ranges[m].out; |
812 | } |
813 | |
814 | l = 0; |
815 | r = cmap->xlen - 1; |
816 | while (l <= r) |
817 | { |
818 | m = (l + r) >> 1; |
819 | if (cpt < xranges[m].low) |
820 | r = m - 1; |
821 | else if (cpt > xranges[m].high) |
822 | l = m + 1; |
823 | else |
824 | return cpt - xranges[m].low + xranges[m].out; |
825 | } |
826 | |
827 | if (cmap->usecmap) |
828 | return pdf_lookup_cmap(cmap->usecmap, cpt); |
829 | |
830 | return -1; |
831 | } |
832 | |
833 | int |
834 | pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out) |
835 | { |
836 | pdf_range *ranges = cmap->ranges; |
837 | pdf_xrange *xranges = cmap->xranges; |
838 | pdf_mrange *mranges = cmap->mranges; |
839 | unsigned int i; |
840 | int l, r, m; |
841 | |
842 | l = 0; |
843 | r = cmap->rlen - 1; |
844 | while (l <= r) |
845 | { |
846 | m = (l + r) >> 1; |
847 | if (cpt < ranges[m].low) |
848 | r = m - 1; |
849 | else if (cpt > ranges[m].high) |
850 | l = m + 1; |
851 | else |
852 | { |
853 | out[0] = cpt - ranges[m].low + ranges[m].out; |
854 | return 1; |
855 | } |
856 | } |
857 | |
858 | l = 0; |
859 | r = cmap->xlen - 1; |
860 | while (l <= r) |
861 | { |
862 | m = (l + r) >> 1; |
863 | if (cpt < xranges[m].low) |
864 | r = m - 1; |
865 | else if (cpt > xranges[m].high) |
866 | l = m + 1; |
867 | else |
868 | { |
869 | out[0] = cpt - xranges[m].low + xranges[m].out; |
870 | return 1; |
871 | } |
872 | } |
873 | |
874 | l = 0; |
875 | r = cmap->mlen - 1; |
876 | while (l <= r) |
877 | { |
878 | m = (l + r) >> 1; |
879 | if (cpt < mranges[m].low) |
880 | r = m - 1; |
881 | else if (cpt > mranges[m].low) |
882 | l = m + 1; |
883 | else |
884 | { |
885 | int *ptr = &cmap->dict[cmap->mranges[m].out]; |
886 | unsigned int len = (unsigned int)*ptr++; |
887 | for (i = 0; i < len; ++i) |
888 | out[i] = *ptr++; |
889 | return len; |
890 | } |
891 | } |
892 | |
893 | if (cmap->usecmap) |
894 | return pdf_lookup_cmap_full(cmap->usecmap, cpt, out); |
895 | |
896 | return 0; |
897 | } |
898 | |
899 | /* |
900 | * Use the codespace ranges to extract a codepoint from a |
901 | * multi-byte encoded string. |
902 | */ |
903 | int |
904 | pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt) |
905 | { |
906 | unsigned int c; |
907 | int k, n; |
908 | int len = end - buf; |
909 | |
910 | if (len > 4) |
911 | len = 4; |
912 | |
913 | c = 0; |
914 | for (n = 0; n < len; n++) |
915 | { |
916 | c = (c << 8) | buf[n]; |
917 | for (k = 0; k < cmap->codespace_len; k++) |
918 | { |
919 | if (cmap->codespace[k].n == n + 1) |
920 | { |
921 | if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high) |
922 | { |
923 | *cpt = c; |
924 | return n + 1; |
925 | } |
926 | } |
927 | } |
928 | } |
929 | |
930 | *cpt = 0; |
931 | return 1; |
932 | } |
933 | |