1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4static pdf_obj *
5pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, pdf_obj *needle)
6{
7 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
8 pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
9
10 if (pdf_is_array(ctx, kids))
11 {
12 int l = 0;
13 int r = pdf_array_len(ctx, kids) - 1;
14
15 while (l <= r)
16 {
17 int m = (l + r) >> 1;
18 pdf_obj *kid = pdf_array_get(ctx, kids, m);
19 pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
20 pdf_obj *first = pdf_array_get(ctx, limits, 0);
21 pdf_obj *last = pdf_array_get(ctx, limits, 1);
22
23 if (pdf_objcmp(ctx, needle, first) < 0)
24 r = m - 1;
25 else if (pdf_objcmp(ctx, needle, last) > 0)
26 l = m + 1;
27 else
28 {
29 pdf_obj *obj;
30
31 if (pdf_mark_obj(ctx, node))
32 break;
33 fz_try(ctx)
34 obj = pdf_lookup_name_imp(ctx, kid, needle);
35 fz_always(ctx)
36 pdf_unmark_obj(ctx, node);
37 fz_catch(ctx)
38 fz_rethrow(ctx);
39 return obj;
40 }
41 }
42 }
43
44 if (pdf_is_array(ctx, names))
45 {
46 int l = 0;
47 int r = (pdf_array_len(ctx, names) / 2) - 1;
48
49 while (l <= r)
50 {
51 int m = (l + r) >> 1;
52 int c;
53 pdf_obj *key = pdf_array_get(ctx, names, m * 2);
54 pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
55
56 c = pdf_objcmp(ctx, needle, key);
57 if (c < 0)
58 r = m - 1;
59 else if (c > 0)
60 l = m + 1;
61 else
62 return val;
63 }
64
65 /* Spec says names should be sorted (hence the binary search,
66 * above), but Acrobat copes with non-sorted. Drop back to a
67 * simple search if the binary search fails. */
68 r = pdf_array_len(ctx, names)/2;
69 for (l = 0; l < r; l++)
70 if (!pdf_objcmp(ctx, needle, pdf_array_get(ctx, names, l * 2)))
71 return pdf_array_get(ctx, names, l * 2 + 1);
72 }
73
74 return NULL;
75}
76
77pdf_obj *
78pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
79{
80 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
81 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
82 pdf_obj *tree = pdf_dict_get(ctx, names, which);
83 return pdf_lookup_name_imp(ctx, tree, needle);
84}
85
86pdf_obj *
87pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
88{
89 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
90 pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
91 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
92 pdf_obj *dest = NULL;
93
94 /* PDF 1.1 has destinations in a dictionary */
95 if (dests)
96 {
97 if (pdf_is_name(ctx, needle))
98 return pdf_dict_get(ctx, dests, needle);
99 else
100 return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
101 }
102
103 /* PDF 1.2 has destinations in a name tree */
104 if (names && !dest)
105 {
106 pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
107 return pdf_lookup_name_imp(ctx, tree, needle);
108 }
109
110 return NULL;
111}
112
113static void
114pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node)
115{
116 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
117 pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
118 int i;
119
120 if (kids && !pdf_mark_obj(ctx, node))
121 {
122 fz_try(ctx)
123 {
124 int len = pdf_array_len(ctx, kids);
125 for (i = 0; i < len; i++)
126 pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i));
127 }
128 fz_always(ctx)
129 pdf_unmark_obj(ctx, node);
130 fz_catch(ctx)
131 fz_rethrow(ctx);
132 }
133
134 if (names)
135 {
136 int len = pdf_array_len(ctx, names);
137 for (i = 0; i + 1 < len; i += 2)
138 {
139 pdf_obj *key = pdf_array_get(ctx, names, i);
140 pdf_obj *val = pdf_array_get(ctx, names, i + 1);
141 if (pdf_is_string(ctx, key))
142 {
143 key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
144 fz_try(ctx)
145 pdf_dict_put(ctx, dict, key, val);
146 fz_always(ctx)
147 pdf_drop_obj(ctx, key);
148 fz_catch(ctx)
149 fz_rethrow(ctx);
150 }
151 else if (pdf_is_name(ctx, key))
152 {
153 pdf_dict_put(ctx, dict, key, val);
154 }
155 }
156 }
157}
158
159pdf_obj *
160pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
161{
162 pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
163 pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
164 pdf_obj *tree = pdf_dict_get(ctx, names, which);
165 if (pdf_is_dict(ctx, tree))
166 {
167 pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
168 pdf_load_name_tree_imp(ctx, dict, doc, tree);
169 return dict;
170 }
171 return NULL;
172}
173
174pdf_obj *
175pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
176{
177 pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
178 pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
179
180 if (pdf_is_array(ctx, kids))
181 {
182 int l = 0;
183 int r = pdf_array_len(ctx, kids) - 1;
184
185 while (l <= r)
186 {
187 int m = (l + r) >> 1;
188 pdf_obj *kid = pdf_array_get(ctx, kids, m);
189 pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
190 int first = pdf_to_int(ctx, pdf_array_get(ctx, limits, 0));
191 int last = pdf_to_int(ctx, pdf_array_get(ctx, limits, 1));
192
193 if (needle < first)
194 r = m - 1;
195 else if (needle > last)
196 l = m + 1;
197 else
198 {
199 pdf_obj *obj;
200
201 if (pdf_mark_obj(ctx, node))
202 break;
203 fz_try(ctx)
204 obj = pdf_lookup_number(ctx, kid, needle);
205 fz_always(ctx)
206 pdf_unmark_obj(ctx, node);
207 fz_catch(ctx)
208 fz_rethrow(ctx);
209 return obj;
210 }
211 }
212 }
213
214 if (pdf_is_array(ctx, nums))
215 {
216 pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
217 int l = 0;
218 int r = (pdf_array_len(ctx, nums) / 2) - 1;
219
220 while (l <= r)
221 {
222 int m = (l + r) >> 1;
223 int key = pdf_to_int(ctx, pdf_array_get(ctx, nums, m * 2));
224 pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
225
226 if (needle < key)
227 r = m - 1;
228 else if (needle > key)
229 l = m + 1;
230 else
231 return val;
232 }
233
234 /* Parallel the nametree lookup above by allowing for non-sorted lists. */
235 r = pdf_array_len(ctx, nums)/2;
236 for (l = 0; l < r; l++)
237 if (needle == pdf_to_int(ctx, pdf_array_get(ctx, nums, l * 2)))
238 return pdf_array_get(ctx, nums, l * 2 + 1);
239 }
240
241 return NULL;
242}
243