1 | #include "mupdf/fitz.h" |
2 | #include "html-imp.h" |
3 | |
4 | #include <string.h> |
5 | #include <math.h> |
6 | |
7 | enum { T, R, B, L }; |
8 | |
9 | typedef struct epub_document_s epub_document; |
10 | typedef struct epub_chapter_s epub_chapter; |
11 | typedef struct epub_page_s epub_page; |
12 | |
13 | struct epub_document_s |
14 | { |
15 | fz_document super; |
16 | fz_archive *zip; |
17 | fz_html_font_set *set; |
18 | int count; |
19 | epub_chapter *spine; |
20 | fz_outline *outline; |
21 | char *dc_title, *dc_creator; |
22 | }; |
23 | |
24 | struct epub_chapter_s |
25 | { |
26 | char *path; |
27 | int start; |
28 | fz_html *html; |
29 | epub_chapter *next; |
30 | }; |
31 | |
32 | struct epub_page_s |
33 | { |
34 | fz_page super; |
35 | epub_document *doc; |
36 | int number; |
37 | }; |
38 | |
39 | static int count_chapter_pages(epub_chapter *ch) |
40 | { |
41 | if (ch->html->root->b > 0) |
42 | return ceilf(ch->html->root->b / ch->html->page_h); |
43 | return 1; |
44 | } |
45 | |
46 | static int |
47 | epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest, float *xp, float *yp) |
48 | { |
49 | epub_document *doc = (epub_document*)doc_; |
50 | epub_chapter *ch; |
51 | |
52 | const char *s = strchr(dest, '#'); |
53 | size_t n = s ? s - dest : strlen(dest); |
54 | if (s && s[1] == 0) |
55 | s = NULL; |
56 | |
57 | for (ch = doc->spine; ch; ch = ch->next) |
58 | { |
59 | if (!strncmp(ch->path, dest, n) && ch->path[n] == 0) |
60 | { |
61 | if (s) |
62 | { |
63 | /* Search for a matching fragment */ |
64 | float y = fz_find_html_target(ctx, ch->html, s+1); |
65 | if (y >= 0) |
66 | { |
67 | int page = y / ch->html->page_h; |
68 | if (yp) *yp = y - page * ch->html->page_h; |
69 | return ch->start + page; |
70 | } |
71 | return -1; |
72 | } |
73 | return ch->start; |
74 | } |
75 | } |
76 | |
77 | return -1; |
78 | } |
79 | |
80 | static void |
81 | epub_update_outline(fz_context *ctx, fz_document *doc, fz_outline *node) |
82 | { |
83 | while (node) |
84 | { |
85 | node->page = epub_resolve_link(ctx, doc, node->uri, &node->x, &node->y); |
86 | epub_update_outline(ctx, doc, node->down); |
87 | node = node->next; |
88 | } |
89 | } |
90 | |
91 | static void |
92 | epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em) |
93 | { |
94 | epub_document *doc = (epub_document*)doc_; |
95 | epub_chapter *ch; |
96 | int count = 0; |
97 | |
98 | for (ch = doc->spine; ch; ch = ch->next) |
99 | { |
100 | ch->start = count; |
101 | fz_layout_html(ctx, ch->html, w, h, em); |
102 | count += count_chapter_pages(ch); |
103 | } |
104 | |
105 | epub_update_outline(ctx, doc_, doc->outline); |
106 | } |
107 | |
108 | static int |
109 | epub_count_pages(fz_context *ctx, fz_document *doc_) |
110 | { |
111 | epub_document *doc = (epub_document*)doc_; |
112 | epub_chapter *ch; |
113 | int count = 0; |
114 | for (ch = doc->spine; ch; ch = ch->next) |
115 | count += count_chapter_pages(ch); |
116 | return count; |
117 | } |
118 | |
119 | static void |
120 | epub_drop_page(fz_context *ctx, fz_page *page_) |
121 | { |
122 | } |
123 | |
124 | static fz_rect |
125 | epub_bound_page(fz_context *ctx, fz_page *page_) |
126 | { |
127 | epub_page *page = (epub_page*)page_; |
128 | epub_document *doc = page->doc; |
129 | epub_chapter *ch; |
130 | int n = page->number; |
131 | int count = 0; |
132 | fz_rect bbox; |
133 | |
134 | for (ch = doc->spine; ch; ch = ch->next) |
135 | { |
136 | int cn = count_chapter_pages(ch); |
137 | if (n < count + cn) |
138 | { |
139 | bbox.x0 = 0; |
140 | bbox.y0 = 0; |
141 | bbox.x1 = ch->html->page_w + ch->html->page_margin[L] + ch->html->page_margin[R]; |
142 | bbox.y1 = ch->html->page_h + ch->html->page_margin[T] + ch->html->page_margin[B]; |
143 | return bbox; |
144 | } |
145 | count += cn; |
146 | } |
147 | |
148 | return fz_unit_rect; |
149 | } |
150 | |
151 | static void |
152 | epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie) |
153 | { |
154 | epub_page *page = (epub_page*)page_; |
155 | epub_document *doc = page->doc; |
156 | epub_chapter *ch; |
157 | int n = page->number; |
158 | int count = 0; |
159 | |
160 | for (ch = doc->spine; ch; ch = ch->next) |
161 | { |
162 | int cn = count_chapter_pages(ch); |
163 | if (n < count + cn) |
164 | { |
165 | fz_draw_html(ctx, dev, ctm, ch->html, n-count); |
166 | break; |
167 | } |
168 | count += cn; |
169 | } |
170 | } |
171 | |
172 | static fz_link * |
173 | epub_load_links(fz_context *ctx, fz_page *page_) |
174 | { |
175 | epub_page *page = (epub_page*)page_; |
176 | epub_document *doc = page->doc; |
177 | epub_chapter *ch; |
178 | int n = page->number; |
179 | int count = 0; |
180 | |
181 | for (ch = doc->spine; ch; ch = ch->next) |
182 | { |
183 | int cn = count_chapter_pages(ch); |
184 | if (n < count + cn) |
185 | return fz_load_html_links(ctx, ch->html, n - count, ch->path, doc); |
186 | count += cn; |
187 | } |
188 | |
189 | return NULL; |
190 | } |
191 | |
192 | static fz_bookmark |
193 | epub_make_bookmark(fz_context *ctx, fz_document *doc_, int n) |
194 | { |
195 | epub_document *doc = (epub_document*)doc_; |
196 | epub_chapter *ch; |
197 | int count = 0; |
198 | |
199 | for (ch = doc->spine; ch; ch = ch->next) |
200 | { |
201 | int cn = count_chapter_pages(ch); |
202 | if (n < count + cn) |
203 | return fz_make_html_bookmark(ctx, ch->html, n - count); |
204 | count += cn; |
205 | } |
206 | |
207 | return 0; |
208 | } |
209 | |
210 | static int |
211 | epub_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark) |
212 | { |
213 | epub_document *doc = (epub_document*)doc_; |
214 | epub_chapter *ch; |
215 | |
216 | for (ch = doc->spine; ch; ch = ch->next) |
217 | { |
218 | int p = fz_lookup_html_bookmark(ctx, ch->html, mark); |
219 | if (p != -1) |
220 | return ch->start + p; |
221 | } |
222 | return -1; |
223 | } |
224 | |
225 | static fz_page * |
226 | epub_load_page(fz_context *ctx, fz_document *doc_, int number) |
227 | { |
228 | epub_document *doc = (epub_document*)doc_; |
229 | epub_page *page = fz_new_derived_page(ctx, epub_page); |
230 | page->super.bound_page = epub_bound_page; |
231 | page->super.run_page_contents = epub_run_page; |
232 | page->super.load_links = epub_load_links; |
233 | page->super.drop_page = epub_drop_page; |
234 | page->doc = doc; |
235 | page->number = number; |
236 | return (fz_page*)page; |
237 | } |
238 | |
239 | static void |
240 | epub_drop_document(fz_context *ctx, fz_document *doc_) |
241 | { |
242 | epub_document *doc = (epub_document*)doc_; |
243 | epub_chapter *ch, *next; |
244 | ch = doc->spine; |
245 | while (ch) |
246 | { |
247 | next = ch->next; |
248 | fz_drop_html(ctx, ch->html); |
249 | fz_free(ctx, ch->path); |
250 | fz_free(ctx, ch); |
251 | ch = next; |
252 | } |
253 | fz_drop_archive(ctx, doc->zip); |
254 | fz_drop_html_font_set(ctx, doc->set); |
255 | fz_drop_outline(ctx, doc->outline); |
256 | fz_free(ctx, doc->dc_title); |
257 | fz_free(ctx, doc->dc_creator); |
258 | } |
259 | |
260 | static const char * |
261 | rel_path_from_idref(fz_xml *manifest, const char *idref) |
262 | { |
263 | fz_xml *item; |
264 | if (!idref) |
265 | return NULL; |
266 | item = fz_xml_find_down(manifest, "item" ); |
267 | while (item) |
268 | { |
269 | const char *id = fz_xml_att(item, "id" ); |
270 | if (id && !strcmp(id, idref)) |
271 | return fz_xml_att(item, "href" ); |
272 | item = fz_xml_find_next(item, "item" ); |
273 | } |
274 | return NULL; |
275 | } |
276 | |
277 | static const char * |
278 | path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n) |
279 | { |
280 | const char *rel_path = rel_path_from_idref(manifest, idref); |
281 | if (!rel_path) |
282 | { |
283 | path[0] = 0; |
284 | return NULL; |
285 | } |
286 | fz_strlcpy(path, base_uri, n); |
287 | fz_strlcat(path, "/" , n); |
288 | fz_strlcat(path, rel_path, n); |
289 | return fz_cleanname(fz_urldecode(path)); |
290 | } |
291 | |
292 | static epub_chapter * |
293 | epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path) |
294 | { |
295 | fz_archive *zip = doc->zip; |
296 | fz_buffer *buf = NULL; |
297 | epub_chapter *ch; |
298 | char base_uri[2048]; |
299 | |
300 | fz_dirname(base_uri, path, sizeof base_uri); |
301 | |
302 | ch = fz_malloc_struct(ctx, epub_chapter); |
303 | ch->path = NULL; |
304 | ch->html = NULL; |
305 | ch->next = NULL; |
306 | |
307 | fz_var(buf); |
308 | |
309 | fz_try(ctx) |
310 | { |
311 | buf = fz_read_archive_entry(ctx, zip, path); |
312 | ch->path = fz_strdup(ctx, path); |
313 | ch->html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx)); |
314 | } |
315 | fz_always(ctx) |
316 | fz_drop_buffer(ctx, buf); |
317 | fz_catch(ctx) |
318 | { |
319 | fz_drop_html(ctx, ch->html); |
320 | fz_free(ctx, ch->path); |
321 | fz_free(ctx, ch); |
322 | fz_rethrow(ctx); |
323 | } |
324 | |
325 | return ch; |
326 | } |
327 | |
328 | static fz_outline * |
329 | epub_parse_ncx_imp(fz_context *ctx, epub_document *doc, fz_xml *node, char *base_uri) |
330 | { |
331 | char path[2048]; |
332 | fz_outline *outline, *head, **tailp; |
333 | |
334 | head = NULL; |
335 | tailp = &head; |
336 | |
337 | node = fz_xml_find_down(node, "navPoint" ); |
338 | while (node) |
339 | { |
340 | char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(fz_xml_find_down(node, "navLabel" ), "text" ))); |
341 | char *content = fz_xml_att(fz_xml_find_down(node, "content" ), "src" ); |
342 | if (text && content) |
343 | { |
344 | fz_strlcpy(path, base_uri, sizeof path); |
345 | fz_strlcat(path, "/" , sizeof path); |
346 | fz_strlcat(path, content, sizeof path); |
347 | fz_urldecode(path); |
348 | fz_cleanname(path); |
349 | |
350 | fz_try(ctx) |
351 | { |
352 | *tailp = outline = fz_new_outline(ctx); |
353 | tailp = &(*tailp)->next; |
354 | outline->title = fz_strdup(ctx, text); |
355 | outline->uri = fz_strdup(ctx, path); |
356 | outline->page = -1; |
357 | outline->down = epub_parse_ncx_imp(ctx, doc, node, base_uri); |
358 | outline->is_open = 1; |
359 | } |
360 | fz_catch(ctx) |
361 | { |
362 | fz_drop_outline(ctx, head); |
363 | fz_rethrow(ctx); |
364 | } |
365 | } |
366 | node = fz_xml_find_next(node, "navPoint" ); |
367 | } |
368 | |
369 | return head; |
370 | } |
371 | |
372 | static void |
373 | epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path) |
374 | { |
375 | fz_archive *zip = doc->zip; |
376 | fz_buffer *buf = NULL; |
377 | fz_xml_doc *ncx = NULL; |
378 | char base_uri[2048]; |
379 | |
380 | fz_var(buf); |
381 | fz_var(ncx); |
382 | |
383 | fz_try(ctx) |
384 | { |
385 | fz_dirname(base_uri, path, sizeof base_uri); |
386 | buf = fz_read_archive_entry(ctx, zip, path); |
387 | ncx = fz_parse_xml(ctx, buf, 0); |
388 | doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(fz_xml_root(ncx), "navMap" ), base_uri); |
389 | } |
390 | fz_always(ctx) |
391 | { |
392 | fz_drop_buffer(ctx, buf); |
393 | fz_drop_xml(ctx, ncx); |
394 | } |
395 | fz_catch(ctx) |
396 | fz_rethrow(ctx); |
397 | } |
398 | |
399 | static char * |
400 | find_metadata(fz_context *ctx, fz_xml *metadata, char *key) |
401 | { |
402 | char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(metadata, key))); |
403 | if (text) |
404 | return fz_strdup(ctx, text); |
405 | return NULL; |
406 | } |
407 | |
408 | static void |
409 | (fz_context *ctx, epub_document *doc) |
410 | { |
411 | fz_archive *zip = doc->zip; |
412 | fz_buffer *buf = NULL; |
413 | fz_xml_doc *container_xml = NULL; |
414 | fz_xml_doc *content_opf = NULL; |
415 | fz_xml *container, *rootfiles, *rootfile; |
416 | fz_xml *package, *manifest, *spine, *itemref, *metadata; |
417 | char base_uri[2048]; |
418 | const char *full_path; |
419 | const char *version; |
420 | char ncx[2048], s[2048]; |
421 | epub_chapter **tailp; |
422 | |
423 | if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml" )) |
424 | fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM" ); |
425 | if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml" )) |
426 | fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM" ); |
427 | |
428 | fz_var(buf); |
429 | fz_var(container_xml); |
430 | fz_var(content_opf); |
431 | |
432 | fz_try(ctx) |
433 | { |
434 | /* parse META-INF/container.xml to find OPF */ |
435 | |
436 | buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml" ); |
437 | container_xml = fz_parse_xml(ctx, buf, 0); |
438 | fz_drop_buffer(ctx, buf); |
439 | buf = NULL; |
440 | |
441 | container = fz_xml_find(fz_xml_root(container_xml), "container" ); |
442 | rootfiles = fz_xml_find_down(container, "rootfiles" ); |
443 | rootfile = fz_xml_find_down(rootfiles, "rootfile" ); |
444 | full_path = fz_xml_att(rootfile, "full-path" ); |
445 | if (!full_path) |
446 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB" ); |
447 | |
448 | fz_dirname(base_uri, full_path, sizeof base_uri); |
449 | |
450 | /* parse OPF to find NCX and spine */ |
451 | |
452 | buf = fz_read_archive_entry(ctx, zip, full_path); |
453 | content_opf = fz_parse_xml(ctx, buf, 0); |
454 | fz_drop_buffer(ctx, buf); |
455 | buf = NULL; |
456 | |
457 | package = fz_xml_find(fz_xml_root(content_opf), "package" ); |
458 | version = fz_xml_att(package, "version" ); |
459 | if (!version || strcmp(version, "2.0" )) |
460 | fz_warn(ctx, "unknown epub version: %s" , version ? version : "<none>" ); |
461 | |
462 | metadata = fz_xml_find_down(package, "metadata" ); |
463 | if (metadata) |
464 | { |
465 | doc->dc_title = find_metadata(ctx, metadata, "title" ); |
466 | doc->dc_creator = find_metadata(ctx, metadata, "creator" ); |
467 | } |
468 | |
469 | manifest = fz_xml_find_down(package, "manifest" ); |
470 | spine = fz_xml_find_down(package, "spine" ); |
471 | |
472 | if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc" ), sizeof ncx)) |
473 | { |
474 | epub_parse_ncx(ctx, doc, ncx); |
475 | } |
476 | |
477 | doc->spine = NULL; |
478 | tailp = &doc->spine; |
479 | itemref = fz_xml_find_down(spine, "itemref" ); |
480 | while (itemref) |
481 | { |
482 | if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref" ), sizeof s)) |
483 | { |
484 | fz_try(ctx) |
485 | { |
486 | *tailp = epub_parse_chapter(ctx, doc, s); |
487 | tailp = &(*tailp)->next; |
488 | } |
489 | fz_catch(ctx) |
490 | { |
491 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
492 | fz_warn(ctx, "ignoring chapter %s" , s); |
493 | } |
494 | } |
495 | itemref = fz_xml_find_next(itemref, "itemref" ); |
496 | } |
497 | } |
498 | fz_always(ctx) |
499 | { |
500 | fz_drop_xml(ctx, content_opf); |
501 | fz_drop_xml(ctx, container_xml); |
502 | fz_drop_buffer(ctx, buf); |
503 | } |
504 | fz_catch(ctx) |
505 | fz_rethrow(ctx); |
506 | } |
507 | |
508 | static fz_outline * |
509 | epub_load_outline(fz_context *ctx, fz_document *doc_) |
510 | { |
511 | epub_document *doc = (epub_document*)doc_; |
512 | return fz_keep_outline(ctx, doc->outline); |
513 | } |
514 | |
515 | static int |
516 | epub_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, int size) |
517 | { |
518 | epub_document *doc = (epub_document*)doc_; |
519 | if (!strcmp(key, FZ_META_FORMAT)) |
520 | return (int)fz_strlcpy(buf, "EPUB" , size); |
521 | if (!strcmp(key, FZ_META_INFO_TITLE) && doc->dc_title) |
522 | return (int)fz_strlcpy(buf, doc->dc_title, size); |
523 | if (!strcmp(key, FZ_META_INFO_AUTHOR) && doc->dc_creator) |
524 | return (int)fz_strlcpy(buf, doc->dc_creator, size); |
525 | return -1; |
526 | } |
527 | |
528 | static fz_document * |
529 | epub_init(fz_context *ctx, fz_archive *zip) |
530 | { |
531 | epub_document *doc; |
532 | |
533 | doc = fz_new_derived_document(ctx, epub_document); |
534 | doc->zip = zip; |
535 | doc->set = fz_new_html_font_set(ctx); |
536 | |
537 | doc->super.drop_document = epub_drop_document; |
538 | doc->super.layout = epub_layout; |
539 | doc->super.load_outline = epub_load_outline; |
540 | doc->super.resolve_link = epub_resolve_link; |
541 | doc->super.make_bookmark = epub_make_bookmark; |
542 | doc->super.lookup_bookmark = epub_lookup_bookmark; |
543 | doc->super.count_pages = epub_count_pages; |
544 | doc->super.load_page = epub_load_page; |
545 | doc->super.lookup_metadata = epub_lookup_metadata; |
546 | doc->super.is_reflowable = 1; |
547 | |
548 | fz_try(ctx) |
549 | { |
550 | epub_parse_header(ctx, doc); |
551 | } |
552 | fz_catch(ctx) |
553 | { |
554 | fz_drop_document(ctx, &doc->super); |
555 | fz_rethrow(ctx); |
556 | } |
557 | |
558 | return (fz_document*)doc; |
559 | } |
560 | |
561 | static fz_document * |
562 | epub_open_document_with_stream(fz_context *ctx, fz_stream *file) |
563 | { |
564 | return epub_init(ctx, fz_open_zip_archive_with_stream(ctx, file)); |
565 | } |
566 | |
567 | static fz_document * |
568 | epub_open_document(fz_context *ctx, const char *filename) |
569 | { |
570 | if (strstr(filename, "META-INF/container.xml" ) || strstr(filename, "META-INF\\container.xml" )) |
571 | { |
572 | char dirname[2048], *p; |
573 | fz_strlcpy(dirname, filename, sizeof dirname); |
574 | p = strstr(dirname, "META-INF" ); |
575 | *p = 0; |
576 | if (!dirname[0]) |
577 | fz_strlcpy(dirname, "." , sizeof dirname); |
578 | return epub_init(ctx, fz_open_directory(ctx, dirname)); |
579 | } |
580 | |
581 | return epub_init(ctx, fz_open_zip_archive(ctx, filename)); |
582 | } |
583 | |
584 | static int |
585 | epub_recognize(fz_context *doc, const char *magic) |
586 | { |
587 | if (strstr(magic, "META-INF/container.xml" ) || strstr(magic, "META-INF\\container.xml" )) |
588 | return 200; |
589 | return 0; |
590 | } |
591 | |
592 | static const char *epub_extensions[] = |
593 | { |
594 | "epub" , |
595 | NULL |
596 | }; |
597 | |
598 | static const char *epub_mimetypes[] = |
599 | { |
600 | "application/epub+zip" , |
601 | NULL |
602 | }; |
603 | |
604 | fz_document_handler epub_document_handler = |
605 | { |
606 | epub_recognize, |
607 | epub_open_document, |
608 | epub_open_document_with_stream, |
609 | epub_extensions, |
610 | epub_mimetypes |
611 | }; |
612 | |