pdf-link.c source code [MuPDF/source/pdf/pdf-link.c]

1	#include "mupdf/fitz.h"
2	#include "mupdf/pdf.h"
3
4	#include <string.h>
5
6	static pdf_obj *
7	resolve_dest_rec(fz_context ctx, pdf_document doc, pdf_obj dest, int* depth)
8	{
9	if (depth > `10`) / Arbitrary to avoid infinite recursion /
10	return NULL;
11
12	if (pdf_is_name(ctx, dest) \|\| pdf_is_string(ctx, dest))
13	{
14	dest = pdf_lookup_dest(ctx, doc, dest);
15	dest = resolve_dest_rec(ctx, doc, dest, depth+`1`);
16	return dest;
17	}
18
19	else if (pdf_is_array(ctx, dest))
20	{
21	return dest;
22	}
23
24	else if (pdf_is_dict(ctx, dest))
25	{
26	dest = pdf_dict_get(ctx, dest, PDF_NAME(D));
27	return resolve_dest_rec(ctx, doc, dest, depth+`1`);
28	}
29
30	else if (pdf_is_indirect(ctx, dest))
31	return dest;
32
33	return NULL;
34	}
35
36	static pdf_obj *
37	resolve_dest(fz_context ctx, pdf_document doc, pdf_obj *dest)
38	{
39	return resolve_dest_rec(ctx, doc, dest, `0`);
40	}
41
42	char *
43	pdf_parse_link_dest(fz_context ctx, pdf_document doc, pdf_obj *dest)
44	{
45	pdf_obj obj, pageobj;
46	fz_rect mediabox;
47	fz_matrix pagectm;
48	const char *ld;
49	int page, x, y, h;
50
51	dest = resolve_dest(ctx, doc, dest);
52	if (dest == NULL)
53	{
54	fz_warn(ctx, "undefined link destination");
55	return NULL;
56	}
57
58	if (pdf_is_name(ctx, dest))
59	{
60	ld = pdf_to_name(ctx, dest);
61	return fz_strdup(ctx, ld);
62	}
63	else if (pdf_is_string(ctx, dest))
64	{
65	ld = pdf_to_str_buf(ctx, dest);
66	return fz_strdup(ctx, ld);
67	}
68
69	pageobj = pdf_array_get(ctx, dest, `0`);
70	if (pdf_is_int(ctx, pageobj))
71	{
72	page = pdf_to_int(ctx, pageobj);
73	pageobj = pdf_lookup_page_obj(ctx, doc, page);
74	}
75	else
76	{
77	fz_try(ctx)
78	page = pdf_lookup_page_number(ctx, doc, pageobj);
79	fz_catch(ctx)
80	page = -`1`;
81	}
82
83	if (page < `0`)
84	return NULL;
85
86	obj = pdf_array_get(ctx, dest, `1`);
87	if (obj)
88	{
89	/ Link coords use a coordinate space that does not seem to respect Rotate or UserUnit. /
90	/ All we need to do is figure out the page height to flip the coordinate space. /
91	pdf_page_obj_transform(ctx, pageobj, &mediabox, &pagectm);
92	mediabox = fz_transform_rect(mediabox, pagectm);
93	h = mediabox.y1 - mediabox.y0;
94
95	if (pdf_name_eq(ctx, obj, PDF_NAME(XYZ)))
96	{
97	x = pdf_array_get_int(ctx, dest, `2`);
98	y = h - pdf_array_get_int(ctx, dest, `3`);
99	}
100	else if (pdf_name_eq(ctx, obj, PDF_NAME(FitR)))
101	{
102	x = pdf_array_get_int(ctx, dest, `2`);
103	y = h - pdf_array_get_int(ctx, dest, `5`);
104	}
105	else if (pdf_name_eq(ctx, obj, PDF_NAME(FitH)) \|\| pdf_name_eq(ctx, obj, PDF_NAME(FitBH)))
106	{
107	x = `0`;
108	y = h - pdf_array_get_int(ctx, dest, `2`);
109	}
110	else if (pdf_name_eq(ctx, obj, PDF_NAME(FitV)) \|\| pdf_name_eq(ctx, obj, PDF_NAME(FitBV)))
111	{
112	x = pdf_array_get_int(ctx, dest, `2`);
113	y = `0`;
114	}
115	else
116	{
117	x = `0`;
118	y = `0`;
119	}
120	return fz_asprintf(ctx, "#%d,%d,%d", page + `1`, x, y);
121	}
122
123	return fz_asprintf(ctx, "#%d", page + `1`);
124	}
125
126	char *
127	pdf_parse_file_spec(fz_context ctx, pdf_document doc, pdf_obj file_spec, pdf_obj dest)
128	{
129	pdf_obj *filename = NULL;
130	const char *path;
131	char *uri;
132	char frag[`256`];
133
134	if (pdf_is_string(ctx, file_spec))
135	filename = file_spec;
136
137	if (pdf_is_dict(ctx, file_spec)) {
138	#ifdef _WIN32
139	filename = pdf_dict_get(ctx, file_spec, PDF_NAME(DOS));
140	#else
141	filename = pdf_dict_get(ctx, file_spec, PDF_NAME(Unix));
142	#endif
143	if (!filename)
144	filename = pdf_dict_geta(ctx, file_spec, PDF_NAME(UF), PDF_NAME(F));
145	}
146
147	if (!pdf_is_string(ctx, filename))
148	{
149	fz_warn(ctx, "cannot parse file specification");
150	return NULL;
151	}
152
153	if (pdf_is_array(ctx, dest))
154	fz_snprintf(frag, sizeof frag, "#page=%d", pdf_array_get_int(ctx, dest, `0`) + `1`);
155	else if (pdf_is_name(ctx, dest))
156	fz_snprintf(frag, sizeof frag, "#%s", pdf_to_name(ctx, dest));
157	else if (pdf_is_string(ctx, dest))
158	fz_snprintf(frag, sizeof frag, "#%s", pdf_to_str_buf(ctx, dest));
159	else
160	frag[`0`] = `0`;
161
162	path = pdf_to_text_string(ctx, filename);
163	uri = NULL;
164	#ifdef _WIN32
165	if (!pdf_name_eq(ctx, pdf_dict_get(ctx, file_spec, PDF_NAME(FS)), PDF_NAME(URL)))
166	{
167	/ Fix up the drive letter (change "/C/Documents/Foo" to "C:/Documents/Foo") /
168	if (path[`0`] == `'/'` && ((`'A'` <= path[`1`] && path[`1`] <= `'Z'`) \|\| (`'a'` <= path[`1`] && path[`1`] <= `'z'`)) && path[`2`] == `'/'`)
169	uri = fz_asprintf(ctx, "file://%c:%s%s", path[`1`], path+`2`, frag);
170	}
171	#endif
172	if (!uri)
173	uri = fz_asprintf(ctx, "file://%s%s", path, frag);
174
175	return uri;
176	}
177
178	char *
179	pdf_parse_link_action(fz_context ctx, pdf_document doc, pdf_obj action, int* pagenum)
180	{
181	pdf_obj obj, dest, *file_spec;
182
183	if (!action)
184	return NULL;
185
186	obj = pdf_dict_get(ctx, action, PDF_NAME(S));
187	if (pdf_name_eq(ctx, PDF_NAME(GoTo), obj))
188	{
189	dest = pdf_dict_get(ctx, action, PDF_NAME(D));
190	return pdf_parse_link_dest(ctx, doc, dest);
191	}
192	else if (pdf_name_eq(ctx, PDF_NAME(URI), obj))
193	{
194	/ URI entries are ASCII strings /
195	const char *uri = pdf_dict_get_text_string(ctx, action, PDF_NAME(URI));
196	if (!fz_is_external_link(ctx, uri))
197	{
198	pdf_obj *uri_base_obj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/URI/Base");
199	const char *uri_base = uri_base_obj ? pdf_to_text_string(ctx, uri_base_obj) : "file://";
200	char *new_uri = fz_malloc(ctx, strlen(uri_base) + strlen(uri) + `1`);
201	strcpy(new_uri, uri_base);
202	strcat(new_uri, uri);
203	return new_uri;
204	}
205	return fz_strdup(ctx, uri);
206	}
207	else if (pdf_name_eq(ctx, PDF_NAME(Launch), obj))
208	{
209	file_spec = pdf_dict_get(ctx, action, PDF_NAME(F));
210	return pdf_parse_file_spec(ctx, doc, file_spec, NULL);
211	}
212	else if (pdf_name_eq(ctx, PDF_NAME(GoToR), obj))
213	{
214	dest = pdf_dict_get(ctx, action, PDF_NAME(D));
215	file_spec = pdf_dict_get(ctx, action, PDF_NAME(F));
216	return pdf_parse_file_spec(ctx, doc, file_spec, dest);
217	}
218	else if (pdf_name_eq(ctx, PDF_NAME(Named), obj))
219	{
220	dest = pdf_dict_get(ctx, action, PDF_NAME(N));
221
222	if (pdf_name_eq(ctx, PDF_NAME(FirstPage), dest))
223	pagenum = `0`;
224	else if (pdf_name_eq(ctx, PDF_NAME(LastPage), dest))
225	pagenum = pdf_count_pages(ctx, doc) - `1`;
226	else if (pdf_name_eq(ctx, PDF_NAME(PrevPage), dest) && pagenum >= `0`)
227	{
228	if (pagenum > `0`)
229	pagenum--;
230	}
231	else if (pdf_name_eq(ctx, PDF_NAME(NextPage), dest) && pagenum >= `0`)
232	{
233	if (pagenum < pdf_count_pages(ctx, doc) - `1`)
234	pagenum++;
235	}
236	else
237	return NULL;
238
239	return fz_asprintf(ctx, "#%d", pagenum + `1`);
240	}
241
242	return NULL;
243	}
244
245	static fz_link *
246	pdf_load_link(fz_context ctx, pdf_document doc, pdf_obj dict, int* pagenum, fz_matrix page_ctm)
247	{
248	pdf_obj *action;
249	pdf_obj *obj;
250	fz_rect bbox;
251	char *uri;
252	fz_link *link = NULL;
253
254	obj = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
255	if (!pdf_name_eq(ctx, obj, PDF_NAME(Link)))
256	return NULL;
257
258	obj = pdf_dict_get(ctx, dict, PDF_NAME(Rect));
259	if (!obj)
260	return NULL;
261
262	bbox = pdf_to_rect(ctx, obj);
263	bbox = fz_transform_rect(bbox, page_ctm);
264
265	obj = pdf_dict_get(ctx, dict, PDF_NAME(Dest));
266	if (obj)
267	uri = pdf_parse_link_dest(ctx, doc, obj);
268	else
269	{
270	action = pdf_dict_get(ctx, dict, PDF_NAME(A));
271	/ fall back to additional action button's down/up action /
272	if (!action)
273	action = pdf_dict_geta(ctx, pdf_dict_get(ctx, dict, PDF_NAME(AA)), PDF_NAME(U), PDF_NAME(D));
274	uri = pdf_parse_link_action(ctx, doc, action, pagenum);
275	}
276
277	if (!uri)
278	return NULL;
279
280	fz_try(ctx)
281	link = fz_new_link(ctx, bbox, doc, uri);
282	fz_always(ctx)
283	fz_free(ctx, uri);
284	fz_catch(ctx)
285	fz_rethrow(ctx);
286
287	return link;
288	}
289
290	fz_link *
291	pdf_load_link_annots(fz_context ctx, pdf_document doc, pdf_obj annots, int* pagenum, fz_matrix page_ctm)
292	{
293	fz_link link, head, *tail;
294	pdf_obj *obj;
295	int i, n;
296
297	head = tail = NULL;
298	link = NULL;
299
300	n = pdf_array_len(ctx, annots);
301	for (i = `0`; i < n; i++)
302	{
303	/ FIXME: Move the try/catch out of the loop for performance? /
304	fz_try(ctx)
305	{
306	obj = pdf_array_get(ctx, annots, i);
307	link = pdf_load_link(ctx, doc, obj, pagenum, page_ctm);
308	}
309	fz_catch(ctx)
310	{
311	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
312	link = NULL;
313	}
314
315	if (link)
316	{
317	if (!head)
318	head = tail = link;
319	else
320	{
321	tail->next = link;
322	tail = link;
323	}
324	}
325	}
326
327	return head;
328	}
329
330	int
331	pdf_resolve_link(fz_context ctx, pdf_document doc, const char uri, float* xp, float* *yp)
332	{
333	if (uri && uri[`0`] == `'#'`)
334	{
335	int page = fz_atoi(uri + `1`) - `1`;
336	if (xp \|\| yp)
337	{
338	const char *x = strchr(uri, `','`);
339	const char *y = strrchr(uri, `','`);
340	if (x && y)
341	{
342	if (xp) *xp = x ? fz_atoi(x + `1`) : `0`;
343	if (yp) *yp = y ? fz_atoi(y + `1`) : `0`;
344	}
345	}
346	return page;
347	}
348	fz_warn(ctx, "unknown link uri '%s'", uri);
349	return -`1`;
350	}
351

Browse the source code of MuPDF/source/pdf/pdf-link.c