1#include "mupdf/fitz.h"
2#include "html-imp.h"
3
4#include <string.h>
5#include <math.h>
6
7enum { T, R, B, L };
8
9typedef struct epub_document_s epub_document;
10typedef struct epub_chapter_s epub_chapter;
11typedef struct epub_page_s epub_page;
12
13struct epub_document_s
14{
15 fz_document super;
16 fz_archive *zip;
17 fz_html_font_set *set;
18 int count;
19 epub_chapter *spine;
20 fz_outline *outline;
21 char *dc_title, *dc_creator;
22};
23
24struct epub_chapter_s
25{
26 char *path;
27 int start;
28 fz_html *html;
29 epub_chapter *next;
30};
31
32struct epub_page_s
33{
34 fz_page super;
35 epub_document *doc;
36 int number;
37};
38
39static int count_chapter_pages(epub_chapter *ch)
40{
41 if (ch->html->root->b > 0)
42 return ceilf(ch->html->root->b / ch->html->page_h);
43 return 1;
44}
45
46static int
47epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest, float *xp, float *yp)
48{
49 epub_document *doc = (epub_document*)doc_;
50 epub_chapter *ch;
51
52 const char *s = strchr(dest, '#');
53 size_t n = s ? s - dest : strlen(dest);
54 if (s && s[1] == 0)
55 s = NULL;
56
57 for (ch = doc->spine; ch; ch = ch->next)
58 {
59 if (!strncmp(ch->path, dest, n) && ch->path[n] == 0)
60 {
61 if (s)
62 {
63 /* Search for a matching fragment */
64 float y = fz_find_html_target(ctx, ch->html, s+1);
65 if (y >= 0)
66 {
67 int page = y / ch->html->page_h;
68 if (yp) *yp = y - page * ch->html->page_h;
69 return ch->start + page;
70 }
71 return -1;
72 }
73 return ch->start;
74 }
75 }
76
77 return -1;
78}
79
80static void
81epub_update_outline(fz_context *ctx, fz_document *doc, fz_outline *node)
82{
83 while (node)
84 {
85 node->page = epub_resolve_link(ctx, doc, node->uri, &node->x, &node->y);
86 epub_update_outline(ctx, doc, node->down);
87 node = node->next;
88 }
89}
90
91static void
92epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
93{
94 epub_document *doc = (epub_document*)doc_;
95 epub_chapter *ch;
96 int count = 0;
97
98 for (ch = doc->spine; ch; ch = ch->next)
99 {
100 ch->start = count;
101 fz_layout_html(ctx, ch->html, w, h, em);
102 count += count_chapter_pages(ch);
103 }
104
105 epub_update_outline(ctx, doc_, doc->outline);
106}
107
108static int
109epub_count_pages(fz_context *ctx, fz_document *doc_)
110{
111 epub_document *doc = (epub_document*)doc_;
112 epub_chapter *ch;
113 int count = 0;
114 for (ch = doc->spine; ch; ch = ch->next)
115 count += count_chapter_pages(ch);
116 return count;
117}
118
119static void
120epub_drop_page(fz_context *ctx, fz_page *page_)
121{
122}
123
124static fz_rect
125epub_bound_page(fz_context *ctx, fz_page *page_)
126{
127 epub_page *page = (epub_page*)page_;
128 epub_document *doc = page->doc;
129 epub_chapter *ch;
130 int n = page->number;
131 int count = 0;
132 fz_rect bbox;
133
134 for (ch = doc->spine; ch; ch = ch->next)
135 {
136 int cn = count_chapter_pages(ch);
137 if (n < count + cn)
138 {
139 bbox.x0 = 0;
140 bbox.y0 = 0;
141 bbox.x1 = ch->html->page_w + ch->html->page_margin[L] + ch->html->page_margin[R];
142 bbox.y1 = ch->html->page_h + ch->html->page_margin[T] + ch->html->page_margin[B];
143 return bbox;
144 }
145 count += cn;
146 }
147
148 return fz_unit_rect;
149}
150
151static void
152epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
153{
154 epub_page *page = (epub_page*)page_;
155 epub_document *doc = page->doc;
156 epub_chapter *ch;
157 int n = page->number;
158 int count = 0;
159
160 for (ch = doc->spine; ch; ch = ch->next)
161 {
162 int cn = count_chapter_pages(ch);
163 if (n < count + cn)
164 {
165 fz_draw_html(ctx, dev, ctm, ch->html, n-count);
166 break;
167 }
168 count += cn;
169 }
170}
171
172static fz_link *
173epub_load_links(fz_context *ctx, fz_page *page_)
174{
175 epub_page *page = (epub_page*)page_;
176 epub_document *doc = page->doc;
177 epub_chapter *ch;
178 int n = page->number;
179 int count = 0;
180
181 for (ch = doc->spine; ch; ch = ch->next)
182 {
183 int cn = count_chapter_pages(ch);
184 if (n < count + cn)
185 return fz_load_html_links(ctx, ch->html, n - count, ch->path, doc);
186 count += cn;
187 }
188
189 return NULL;
190}
191
192static fz_bookmark
193epub_make_bookmark(fz_context *ctx, fz_document *doc_, int n)
194{
195 epub_document *doc = (epub_document*)doc_;
196 epub_chapter *ch;
197 int count = 0;
198
199 for (ch = doc->spine; ch; ch = ch->next)
200 {
201 int cn = count_chapter_pages(ch);
202 if (n < count + cn)
203 return fz_make_html_bookmark(ctx, ch->html, n - count);
204 count += cn;
205 }
206
207 return 0;
208}
209
210static int
211epub_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark)
212{
213 epub_document *doc = (epub_document*)doc_;
214 epub_chapter *ch;
215
216 for (ch = doc->spine; ch; ch = ch->next)
217 {
218 int p = fz_lookup_html_bookmark(ctx, ch->html, mark);
219 if (p != -1)
220 return ch->start + p;
221 }
222 return -1;
223}
224
225static fz_page *
226epub_load_page(fz_context *ctx, fz_document *doc_, int number)
227{
228 epub_document *doc = (epub_document*)doc_;
229 epub_page *page = fz_new_derived_page(ctx, epub_page);
230 page->super.bound_page = epub_bound_page;
231 page->super.run_page_contents = epub_run_page;
232 page->super.load_links = epub_load_links;
233 page->super.drop_page = epub_drop_page;
234 page->doc = doc;
235 page->number = number;
236 return (fz_page*)page;
237}
238
239static void
240epub_drop_document(fz_context *ctx, fz_document *doc_)
241{
242 epub_document *doc = (epub_document*)doc_;
243 epub_chapter *ch, *next;
244 ch = doc->spine;
245 while (ch)
246 {
247 next = ch->next;
248 fz_drop_html(ctx, ch->html);
249 fz_free(ctx, ch->path);
250 fz_free(ctx, ch);
251 ch = next;
252 }
253 fz_drop_archive(ctx, doc->zip);
254 fz_drop_html_font_set(ctx, doc->set);
255 fz_drop_outline(ctx, doc->outline);
256 fz_free(ctx, doc->dc_title);
257 fz_free(ctx, doc->dc_creator);
258}
259
260static const char *
261rel_path_from_idref(fz_xml *manifest, const char *idref)
262{
263 fz_xml *item;
264 if (!idref)
265 return NULL;
266 item = fz_xml_find_down(manifest, "item");
267 while (item)
268 {
269 const char *id = fz_xml_att(item, "id");
270 if (id && !strcmp(id, idref))
271 return fz_xml_att(item, "href");
272 item = fz_xml_find_next(item, "item");
273 }
274 return NULL;
275}
276
277static const char *
278path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n)
279{
280 const char *rel_path = rel_path_from_idref(manifest, idref);
281 if (!rel_path)
282 {
283 path[0] = 0;
284 return NULL;
285 }
286 fz_strlcpy(path, base_uri, n);
287 fz_strlcat(path, "/", n);
288 fz_strlcat(path, rel_path, n);
289 return fz_cleanname(fz_urldecode(path));
290}
291
292static epub_chapter *
293epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path)
294{
295 fz_archive *zip = doc->zip;
296 fz_buffer *buf = NULL;
297 epub_chapter *ch;
298 char base_uri[2048];
299
300 fz_dirname(base_uri, path, sizeof base_uri);
301
302 ch = fz_malloc_struct(ctx, epub_chapter);
303 ch->path = NULL;
304 ch->html = NULL;
305 ch->next = NULL;
306
307 fz_var(buf);
308
309 fz_try(ctx)
310 {
311 buf = fz_read_archive_entry(ctx, zip, path);
312 ch->path = fz_strdup(ctx, path);
313 ch->html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx));
314 }
315 fz_always(ctx)
316 fz_drop_buffer(ctx, buf);
317 fz_catch(ctx)
318 {
319 fz_drop_html(ctx, ch->html);
320 fz_free(ctx, ch->path);
321 fz_free(ctx, ch);
322 fz_rethrow(ctx);
323 }
324
325 return ch;
326}
327
328static fz_outline *
329epub_parse_ncx_imp(fz_context *ctx, epub_document *doc, fz_xml *node, char *base_uri)
330{
331 char path[2048];
332 fz_outline *outline, *head, **tailp;
333
334 head = NULL;
335 tailp = &head;
336
337 node = fz_xml_find_down(node, "navPoint");
338 while (node)
339 {
340 char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(fz_xml_find_down(node, "navLabel"), "text")));
341 char *content = fz_xml_att(fz_xml_find_down(node, "content"), "src");
342 if (text && content)
343 {
344 fz_strlcpy(path, base_uri, sizeof path);
345 fz_strlcat(path, "/", sizeof path);
346 fz_strlcat(path, content, sizeof path);
347 fz_urldecode(path);
348 fz_cleanname(path);
349
350 fz_try(ctx)
351 {
352 *tailp = outline = fz_new_outline(ctx);
353 tailp = &(*tailp)->next;
354 outline->title = fz_strdup(ctx, text);
355 outline->uri = fz_strdup(ctx, path);
356 outline->page = -1;
357 outline->down = epub_parse_ncx_imp(ctx, doc, node, base_uri);
358 outline->is_open = 1;
359 }
360 fz_catch(ctx)
361 {
362 fz_drop_outline(ctx, head);
363 fz_rethrow(ctx);
364 }
365 }
366 node = fz_xml_find_next(node, "navPoint");
367 }
368
369 return head;
370}
371
372static void
373epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path)
374{
375 fz_archive *zip = doc->zip;
376 fz_buffer *buf = NULL;
377 fz_xml_doc *ncx = NULL;
378 char base_uri[2048];
379
380 fz_var(buf);
381 fz_var(ncx);
382
383 fz_try(ctx)
384 {
385 fz_dirname(base_uri, path, sizeof base_uri);
386 buf = fz_read_archive_entry(ctx, zip, path);
387 ncx = fz_parse_xml(ctx, buf, 0);
388 doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(fz_xml_root(ncx), "navMap"), base_uri);
389 }
390 fz_always(ctx)
391 {
392 fz_drop_buffer(ctx, buf);
393 fz_drop_xml(ctx, ncx);
394 }
395 fz_catch(ctx)
396 fz_rethrow(ctx);
397}
398
399static char *
400find_metadata(fz_context *ctx, fz_xml *metadata, char *key)
401{
402 char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(metadata, key)));
403 if (text)
404 return fz_strdup(ctx, text);
405 return NULL;
406}
407
408static void
409epub_parse_header(fz_context *ctx, epub_document *doc)
410{
411 fz_archive *zip = doc->zip;
412 fz_buffer *buf = NULL;
413 fz_xml_doc *container_xml = NULL;
414 fz_xml_doc *content_opf = NULL;
415 fz_xml *container, *rootfiles, *rootfile;
416 fz_xml *package, *manifest, *spine, *itemref, *metadata;
417 char base_uri[2048];
418 const char *full_path;
419 const char *version;
420 char ncx[2048], s[2048];
421 epub_chapter **tailp;
422
423 if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml"))
424 fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM");
425 if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml"))
426 fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM");
427
428 fz_var(buf);
429 fz_var(container_xml);
430 fz_var(content_opf);
431
432 fz_try(ctx)
433 {
434 /* parse META-INF/container.xml to find OPF */
435
436 buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml");
437 container_xml = fz_parse_xml(ctx, buf, 0);
438 fz_drop_buffer(ctx, buf);
439 buf = NULL;
440
441 container = fz_xml_find(fz_xml_root(container_xml), "container");
442 rootfiles = fz_xml_find_down(container, "rootfiles");
443 rootfile = fz_xml_find_down(rootfiles, "rootfile");
444 full_path = fz_xml_att(rootfile, "full-path");
445 if (!full_path)
446 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB");
447
448 fz_dirname(base_uri, full_path, sizeof base_uri);
449
450 /* parse OPF to find NCX and spine */
451
452 buf = fz_read_archive_entry(ctx, zip, full_path);
453 content_opf = fz_parse_xml(ctx, buf, 0);
454 fz_drop_buffer(ctx, buf);
455 buf = NULL;
456
457 package = fz_xml_find(fz_xml_root(content_opf), "package");
458 version = fz_xml_att(package, "version");
459 if (!version || strcmp(version, "2.0"))
460 fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>");
461
462 metadata = fz_xml_find_down(package, "metadata");
463 if (metadata)
464 {
465 doc->dc_title = find_metadata(ctx, metadata, "title");
466 doc->dc_creator = find_metadata(ctx, metadata, "creator");
467 }
468
469 manifest = fz_xml_find_down(package, "manifest");
470 spine = fz_xml_find_down(package, "spine");
471
472 if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx))
473 {
474 epub_parse_ncx(ctx, doc, ncx);
475 }
476
477 doc->spine = NULL;
478 tailp = &doc->spine;
479 itemref = fz_xml_find_down(spine, "itemref");
480 while (itemref)
481 {
482 if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s))
483 {
484 fz_try(ctx)
485 {
486 *tailp = epub_parse_chapter(ctx, doc, s);
487 tailp = &(*tailp)->next;
488 }
489 fz_catch(ctx)
490 {
491 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
492 fz_warn(ctx, "ignoring chapter %s", s);
493 }
494 }
495 itemref = fz_xml_find_next(itemref, "itemref");
496 }
497 }
498 fz_always(ctx)
499 {
500 fz_drop_xml(ctx, content_opf);
501 fz_drop_xml(ctx, container_xml);
502 fz_drop_buffer(ctx, buf);
503 }
504 fz_catch(ctx)
505 fz_rethrow(ctx);
506}
507
508static fz_outline *
509epub_load_outline(fz_context *ctx, fz_document *doc_)
510{
511 epub_document *doc = (epub_document*)doc_;
512 return fz_keep_outline(ctx, doc->outline);
513}
514
515static int
516epub_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, int size)
517{
518 epub_document *doc = (epub_document*)doc_;
519 if (!strcmp(key, FZ_META_FORMAT))
520 return (int)fz_strlcpy(buf, "EPUB", size);
521 if (!strcmp(key, FZ_META_INFO_TITLE) && doc->dc_title)
522 return (int)fz_strlcpy(buf, doc->dc_title, size);
523 if (!strcmp(key, FZ_META_INFO_AUTHOR) && doc->dc_creator)
524 return (int)fz_strlcpy(buf, doc->dc_creator, size);
525 return -1;
526}
527
528static fz_document *
529epub_init(fz_context *ctx, fz_archive *zip)
530{
531 epub_document *doc;
532
533 doc = fz_new_derived_document(ctx, epub_document);
534 doc->zip = zip;
535 doc->set = fz_new_html_font_set(ctx);
536
537 doc->super.drop_document = epub_drop_document;
538 doc->super.layout = epub_layout;
539 doc->super.load_outline = epub_load_outline;
540 doc->super.resolve_link = epub_resolve_link;
541 doc->super.make_bookmark = epub_make_bookmark;
542 doc->super.lookup_bookmark = epub_lookup_bookmark;
543 doc->super.count_pages = epub_count_pages;
544 doc->super.load_page = epub_load_page;
545 doc->super.lookup_metadata = epub_lookup_metadata;
546 doc->super.is_reflowable = 1;
547
548 fz_try(ctx)
549 {
550 epub_parse_header(ctx, doc);
551 }
552 fz_catch(ctx)
553 {
554 fz_drop_document(ctx, &doc->super);
555 fz_rethrow(ctx);
556 }
557
558 return (fz_document*)doc;
559}
560
561static fz_document *
562epub_open_document_with_stream(fz_context *ctx, fz_stream *file)
563{
564 return epub_init(ctx, fz_open_zip_archive_with_stream(ctx, file));
565}
566
567static fz_document *
568epub_open_document(fz_context *ctx, const char *filename)
569{
570 if (strstr(filename, "META-INF/container.xml") || strstr(filename, "META-INF\\container.xml"))
571 {
572 char dirname[2048], *p;
573 fz_strlcpy(dirname, filename, sizeof dirname);
574 p = strstr(dirname, "META-INF");
575 *p = 0;
576 if (!dirname[0])
577 fz_strlcpy(dirname, ".", sizeof dirname);
578 return epub_init(ctx, fz_open_directory(ctx, dirname));
579 }
580
581 return epub_init(ctx, fz_open_zip_archive(ctx, filename));
582}
583
584static int
585epub_recognize(fz_context *doc, const char *magic)
586{
587 if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml"))
588 return 200;
589 return 0;
590}
591
592static const char *epub_extensions[] =
593{
594 "epub",
595 NULL
596};
597
598static const char *epub_mimetypes[] =
599{
600 "application/epub+zip",
601 NULL
602};
603
604fz_document_handler epub_document_handler =
605{
606 epub_recognize,
607 epub_open_document,
608 epub_open_document_with_stream,
609 epub_extensions,
610 epub_mimetypes
611};
612