1#include "mupdf/fitz.h"
2#include "mupdf/ucdn.h"
3#include "html-imp.h"
4
5#include <string.h>
6#include <stdio.h>
7
8enum { T, R, B, L };
9
10#define DEFAULT_DIR FZ_BIDI_LTR
11
12static const char *html_default_css =
13"@page{margin:3em 2em}"
14"a{color:#06C;text-decoration:underline}"
15"address{display:block;font-style:italic}"
16"b{font-weight:bold}"
17"bdo{direction:rtl;unicode-bidi:bidi-override}"
18"blockquote{display:block;margin:1em 40px}"
19"body{display:block;margin:1em}"
20"cite{font-style:italic}"
21"code{font-family:monospace}"
22"dd{display:block;margin:0 0 0 40px}"
23"del{text-decoration:line-through}"
24"div{display:block}"
25"dl{display:block;margin:1em 0}"
26"dt{display:block}"
27"em{font-style:italic}"
28"h1{display:block;font-size:2em;font-weight:bold;margin:0.67em 0;page-break-after:avoid}"
29"h2{display:block;font-size:1.5em;font-weight:bold;margin:0.83em 0;page-break-after:avoid}"
30"h3{display:block;font-size:1.17em;font-weight:bold;margin:1em 0;page-break-after:avoid}"
31"h4{display:block;font-size:1em;font-weight:bold;margin:1.33em 0;page-break-after:avoid}"
32"h5{display:block;font-size:0.83em;font-weight:bold;margin:1.67em 0;page-break-after:avoid}"
33"h6{display:block;font-size:0.67em;font-weight:bold;margin:2.33em 0;page-break-after:avoid}"
34"head{display:none}"
35"hr{border-style:solid;border-width:1px;display:block;margin-bottom:0.5em;margin-top:0.5em;text-align:center}"
36"html{display:block}"
37"i{font-style:italic}"
38"ins{text-decoration:underline}"
39"kbd{font-family:monospace}"
40"li{display:list-item}"
41"menu{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
42"ol{display:block;list-style-type:decimal;margin:1em 0;padding:0 0 0 30pt}"
43"p{display:block;margin:1em 0}"
44"pre{display:block;font-family:monospace;margin:1em 0;white-space:pre}"
45"samp{font-family:monospace}"
46"script{display:none}"
47"small{font-size:0.83em}"
48"strong{font-weight:bold}"
49"style{display:none}"
50"sub{font-size:0.83em;vertical-align:sub}"
51"sup{font-size:0.83em;vertical-align:super}"
52"table{display:table}"
53"tbody{display:table-row-group}"
54"td{display:table-cell;padding:1px}"
55"tfoot{display:table-footer-group}"
56"th{display:table-cell;font-weight:bold;padding:1px;text-align:center}"
57"thead{display:table-header-group}"
58"tr{display:table-row}"
59"ul{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
60"ul ul{list-style-type:circle}"
61"ul ul ul{list-style-type:square}"
62"var{font-style:italic}"
63"svg{display:none}"
64;
65
66static const char *fb2_default_css =
67"@page{margin:3em 2em}"
68"FictionBook{display:block;margin:1em}"
69"stylesheet,binary{display:none}"
70"description>*{display:none}"
71"description>title-info{display:block}"
72"description>title-info>*{display:none}"
73"description>title-info>coverpage{display:block;page-break-before:always;page-break-after:always}"
74"body,section,title,subtitle,p,cite,epigraph,text-author,date,poem,stanza,v,empty-line{display:block}"
75"image{display:block}"
76"p>image{display:inline}"
77"table{display:table}"
78"tr{display:table-row}"
79"th,td{display:table-cell}"
80"a{color:#06C;text-decoration:underline}"
81"a[type=note]{font-size:small;vertical-align:super}"
82"code{white-space:pre;font-family:monospace}"
83"emphasis{font-style:italic}"
84"strikethrough{text-decoration:line-through}"
85"strong{font-weight:bold}"
86"sub{font-size:small;vertical-align:sub}"
87"sup{font-size:small;vertical-align:super}"
88"image{margin:1em 0;text-align:center}"
89"cite,poem{margin:1em 2em}"
90"subtitle,epigraph,stanza{margin:1em 0}"
91"title>p{text-align:center;font-size:x-large}"
92"subtitle{text-align:center;font-size:large}"
93"p{margin-top:1em;text-align:justify}"
94"empty-line{padding-top:1em}"
95"p+p{margin-top:0;text-indent:1.5em}"
96"empty-line+p{margin-top:0}"
97"section>title{page-break-before:always}"
98;
99
100struct genstate
101{
102 fz_pool *pool;
103 fz_html_font_set *set;
104 fz_archive *zip;
105 fz_tree *images;
106 int is_fb2;
107 const char *base_uri;
108 fz_css *css;
109 int at_bol;
110 int emit_white;
111 int last_brk_cls;
112};
113
114static int iswhite(int c)
115{
116 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
117}
118
119static int is_all_white(const char *s)
120{
121 while (*s)
122 {
123 if (!iswhite(*s))
124 return 0;
125 ++s;
126 }
127 return 1;
128}
129
130/* TODO: pool allocator for flow nodes */
131/* TODO: store text by pointing to a giant buffer */
132
133static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow)
134{
135 while (flow)
136 {
137 fz_html_flow *next = flow->next;
138 if (flow->type == FLOW_IMAGE)
139 fz_drop_image(ctx, flow->content.image);
140 flow = next;
141 }
142}
143
144static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, int type)
145{
146 fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow);
147 flow->type = type;
148 flow->expand = 0;
149 flow->bidi_level = 0;
150 flow->markup_lang = 0;
151 flow->breaks_line = 0;
152 flow->box = inline_box;
153 *top->flow_tail = flow;
154 top->flow_tail = &flow->next;
155 return flow;
156}
157
158static void add_flow_space(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
159{
160 fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_SPACE);
161 flow->expand = 1;
162}
163
164static void add_flow_break(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
165{
166 (void)add_flow(ctx, pool, top, inline_box, FLOW_BREAK);
167}
168
169static void add_flow_sbreak(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
170{
171 (void)add_flow(ctx, pool, top, inline_box, FLOW_SBREAK);
172}
173
174static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
175{
176 (void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN);
177}
178
179static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, const char *a, const char *b, int lang)
180{
181 fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD);
182 flow->content.text = fz_pool_alloc(ctx, pool, b - a + 1);
183 memcpy(flow->content.text, a, b - a);
184 flow->content.text[b - a] = 0;
185 flow->markup_lang = lang;
186}
187
188static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, fz_image *img)
189{
190 fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_IMAGE);
191 flow->content.image = fz_keep_image(ctx, img);
192}
193
194static void add_flow_anchor(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
195{
196 (void)add_flow(ctx, pool, top, inline_box, FLOW_ANCHOR);
197}
198
199static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset)
200{
201 fz_html_flow *new_flow;
202 char *text;
203 size_t len;
204
205 if (offset == 0)
206 return flow;
207 new_flow = fz_pool_alloc(ctx, pool, sizeof *flow);
208 *new_flow = *flow;
209 new_flow->next = flow->next;
210 flow->next = new_flow;
211
212 text = flow->content.text;
213 while (*text && offset)
214 {
215 int rune;
216 text += fz_chartorune(&rune, text);
217 offset--;
218 }
219 len = strlen(text);
220 new_flow->content.text = fz_pool_alloc(ctx, pool, len+1);
221 strcpy(new_flow->content.text, text);
222 *text = 0;
223 return new_flow;
224}
225
226static void flush_space(fz_context *ctx, fz_html_box *flow, fz_html_box *inline_box, int lang, struct genstate *g)
227{
228 static const char *space = " ";
229 int bsp = inline_box->style.white_space & WS_ALLOW_BREAK_SPACE;
230 fz_pool *pool = g->pool;
231 if (g->emit_white)
232 {
233 if (!g->at_bol)
234 {
235 if (bsp)
236 add_flow_space(ctx, pool, flow, inline_box);
237 else
238 add_flow_word(ctx, pool, flow, inline_box, space, space+1, lang);
239 }
240 g->emit_white = 0;
241 }
242}
243
244/* pair-wise lookup table for UAX#14 linebreaks */
245static const char *pairbrk[29] =
246{
247/* -OCCQGNESIPPNAHIIHBBBZCWHHJJJR- */
248/* -PLPULSXYSROULLDNYAB2WMJ23LVTI- */
249 "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^", /* OP open punctuation */
250 "_^^%%^^^^%%_____%%__^^^______", /* CL close punctuation */
251 "_^^%%^^^^%%%%%__%%__^^^______", /* CP close parenthesis */
252 "^^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* QU quotation */
253 "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* GL non-breaking glue */
254 "_^^%%%^^^_______%%__^^^______", /* NS nonstarters */
255 "_^^%%%^^^______%%%__^^^______", /* EX exclamation/interrogation */
256 "_^^%%%^^^__%_%__%%__^^^______", /* SY symbols allowing break after */
257 "_^^%%%^^^__%%%__%%__^^^______", /* IS infix numeric separator */
258 "%^^%%%^^^__%%%%_%%__^^^%%%%%_", /* PR prefix numeric */
259 "%^^%%%^^^__%%%__%%__^^^______", /* PO postfix numeric */
260 "%^^%%%^^^%%%%%_%%%__^^^______", /* NU numeric */
261 "%^^%%%^^^__%%%_%%%__^^^______", /* AL ordinary alphabetic and symbol characters */
262 "%^^%%%^^^__%%%_%%%__^^^______", /* HL hebrew letter */
263 "_^^%%%^^^_%____%%%__^^^______", /* ID ideographic */
264 "_^^%%%^^^______%%%__^^^______", /* IN inseparable characters */
265 "_^^%_%^^^__%____%%__^^^______", /* HY hyphens */
266 "_^^%_%^^^_______%%__^^^______", /* BA break after */
267 "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* BB break before */
268 "_^^%%%^^^_______%%_^^^^______", /* B2 break opportunity before and after */
269 "____________________^________", /* ZW zero width space */
270 "%^^%%%^^^__%%%_%%%__^^^______", /* CM combining mark */
271 "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* WJ word joiner */
272 "_^^%%%^^^_%____%%%__^^^___%%_", /* H2 hangul leading/vowel syllable */
273 "_^^%%%^^^_%____%%%__^^^____%_", /* H3 hangul leading/vowel/trailing syllable */
274 "_^^%%%^^^_%____%%%__^^^%%%%__", /* JL hangul leading jamo */
275 "_^^%%%^^^_%____%%%__^^^___%%_", /* JV hangul vowel jamo */
276 "_^^%%%^^^_%____%%%__^^^____%_", /* JT hangul trailing jamo */
277 "_^^%%%^^^_______%%__^^^_____%", /* RI regional indicator */
278};
279
280static void generate_text(fz_context *ctx, fz_html_box *box, const char *text, int lang, struct genstate *g)
281{
282 fz_html_box *flow;
283 fz_pool *pool = g->pool;
284 int collapse = box->style.white_space & WS_COLLAPSE;
285 int bsp = box->style.white_space & WS_ALLOW_BREAK_SPACE;
286 int bnl = box->style.white_space & WS_FORCE_BREAK_NEWLINE;
287
288 static const char *space = " ";
289
290 flow = box;
291 while (flow->type != BOX_FLOW)
292 flow = flow->up;
293
294 while (*text)
295 {
296 if (bnl && (*text == '\n' || *text == '\r'))
297 {
298 if (text[0] == '\r' && text[1] == '\n')
299 text += 2;
300 else
301 text += 1;
302 add_flow_break(ctx, pool, flow, box);
303 g->at_bol = 1;
304 }
305 else if (iswhite(*text))
306 {
307 if (collapse)
308 {
309 if (bnl)
310 while (*text == ' ' || *text == '\t')
311 ++text;
312 else
313 while (iswhite(*text))
314 ++text;
315 g->emit_white = 1;
316 }
317 else
318 {
319 // TODO: tabs
320 if (bsp)
321 add_flow_space(ctx, pool, flow, box);
322 else
323 add_flow_word(ctx, pool, flow, box, space, space+1, lang);
324 ++text;
325 }
326 g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */
327 }
328 else
329 {
330 const char *prev, *mark = text;
331 int c;
332
333 flush_space(ctx, flow, box, lang, g);
334
335 if (g->at_bol)
336 g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ;
337
338 while (*text && !iswhite(*text))
339 {
340 prev = text;
341 text += fz_chartorune(&c, text);
342 if (c == 0xAD) /* soft hyphen */
343 {
344 if (mark != prev)
345 add_flow_word(ctx, pool, flow, box, mark, prev, lang);
346 add_flow_shyphen(ctx, pool, flow, box);
347 mark = text;
348 g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */
349 }
350 else if (bsp) /* allow soft breaks */
351 {
352 int this_brk_cls = ucdn_get_resolved_linebreak_class(c);
353 if (this_brk_cls < UCDN_LINEBREAK_CLASS_RI)
354 {
355 int brk = pairbrk[g->last_brk_cls][this_brk_cls];
356
357 /* we handle spaces elsewhere, so ignore these classes */
358 if (brk == '@') brk = '^';
359 if (brk == '#') brk = '^';
360 if (brk == '%') brk = '^';
361
362 if (brk == '_')
363 {
364 if (mark != prev)
365 add_flow_word(ctx, pool, flow, box, mark, prev, lang);
366 add_flow_sbreak(ctx, pool, flow, box);
367 mark = prev;
368 }
369
370 g->last_brk_cls = this_brk_cls;
371 }
372 }
373 }
374 if (mark != text)
375 add_flow_word(ctx, pool, flow, box, mark, text, lang);
376
377 g->at_bol = 0;
378 }
379 }
380}
381
382static fz_image *load_html_image(fz_context *ctx, fz_archive *zip, const char *base_uri, const char *src)
383{
384 char path[2048];
385 fz_image *img = NULL;
386 fz_buffer *buf = NULL;
387
388 fz_var(img);
389 fz_var(buf);
390
391 fz_try(ctx)
392 {
393 if (!strncmp(src, "data:image/jpeg;base64,", 23))
394 buf = fz_new_buffer_from_base64(ctx, src+23, 0);
395 else if (!strncmp(src, "data:image/png;base64,", 22))
396 buf = fz_new_buffer_from_base64(ctx, src+22, 0);
397 else
398 {
399 fz_strlcpy(path, base_uri, sizeof path);
400 fz_strlcat(path, "/", sizeof path);
401 fz_strlcat(path, src, sizeof path);
402 fz_urldecode(path);
403 fz_cleanname(path);
404 buf = fz_read_archive_entry(ctx, zip, path);
405 }
406#if FZ_ENABLE_SVG
407 if (strstr(src, ".svg"))
408 img = fz_new_image_from_svg(ctx, buf, base_uri, zip);
409 else
410#endif
411 img = fz_new_image_from_buffer(ctx, buf);
412 }
413 fz_always(ctx)
414 fz_drop_buffer(ctx, buf);
415 fz_catch(ctx)
416 fz_warn(ctx, "html: cannot load image src='%s'", src);
417
418 return img;
419}
420
421static fz_image *load_svg_image(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_xml *xml)
422{
423 fz_image *img = NULL;
424 fz_try(ctx)
425 img = fz_new_image_from_svg_xml(ctx, xml, base_uri, zip);
426 fz_catch(ctx)
427 fz_warn(ctx, "html: cannot load embedded svg document");
428 return img;
429}
430
431static void generate_anchor(fz_context *ctx, fz_html_box *box, struct genstate *g)
432{
433 fz_pool *pool = g->pool;
434 fz_html_box *flow = box;
435 while (flow->type != BOX_FLOW)
436 flow = flow->up;
437 add_flow_anchor(ctx, pool, flow, box);
438}
439
440static void generate_image(fz_context *ctx, fz_html_box *box, fz_image *img, struct genstate *g)
441{
442 fz_html_box *flow = box;
443 fz_pool *pool = g->pool;
444 while (flow->type != BOX_FLOW)
445 flow = flow->up;
446
447 flush_space(ctx, flow, box, 0, g);
448
449 if (!img)
450 {
451 const char *alt = "[image]";
452 add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0);
453 }
454 else
455 {
456 fz_try(ctx)
457 {
458 add_flow_sbreak(ctx, pool, flow, box);
459 add_flow_image(ctx, pool, flow, box, img);
460 add_flow_sbreak(ctx, pool, flow, box);
461 }
462 fz_always(ctx)
463 {
464 fz_drop_image(ctx, img);
465 }
466 fz_catch(ctx)
467 fz_rethrow(ctx);
468 }
469
470 g->at_bol = 0;
471}
472
473static void init_box(fz_context *ctx, fz_html_box *box, fz_bidi_direction markup_dir)
474{
475 box->type = BOX_BLOCK;
476 box->x = box->y = 0;
477 box->w = box->b = 0;
478
479 box->up = NULL;
480 box->last = NULL;
481 box->down = NULL;
482 box->next = NULL;
483
484 box->flow_head = NULL;
485 box->flow_tail = &box->flow_head;
486 box->markup_dir = markup_dir;
487
488 fz_default_css_style(ctx, &box->style);
489}
490
491static void fz_drop_html_box(fz_context *ctx, fz_html_box *box)
492{
493 while (box)
494 {
495 fz_html_box *next = box->next;
496 fz_drop_html_flow(ctx, box->flow_head);
497 fz_drop_html_box(ctx, box->down);
498 box = next;
499 }
500}
501
502void fz_drop_html(fz_context *ctx, fz_html *html)
503{
504 if (html)
505 {
506 fz_drop_html_box(ctx, html->root);
507 fz_drop_pool(ctx, html->pool);
508 }
509}
510
511static fz_html_box *new_box(fz_context *ctx, fz_pool *pool, fz_bidi_direction markup_dir)
512{
513 fz_html_box *box = fz_pool_alloc(ctx, pool, sizeof *box);
514 init_box(ctx, box, markup_dir);
515 return box;
516}
517
518static void insert_box(fz_context *ctx, fz_html_box *box, int type, fz_html_box *top)
519{
520 box->type = type;
521
522 box->up = top;
523
524 if (top)
525 {
526 if (!top->last)
527 {
528 top->down = top->last = box;
529 }
530 else
531 {
532 top->last->next = box;
533 top->last = box;
534 }
535 }
536}
537
538static fz_html_box *insert_block_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
539{
540 if (top->type == BOX_BLOCK)
541 {
542 insert_box(ctx, box, BOX_BLOCK, top);
543 }
544 else if (top->type == BOX_FLOW)
545 {
546 while (top->type != BOX_BLOCK)
547 top = top->up;
548 insert_box(ctx, box, BOX_BLOCK, top);
549 }
550 else if (top->type == BOX_INLINE)
551 {
552 while (top->type != BOX_BLOCK)
553 top = top->up;
554 insert_box(ctx, box, BOX_BLOCK, top);
555 }
556 return top;
557}
558
559static fz_html_box *insert_table_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
560{
561 top = insert_block_box(ctx, box, top);
562 box->type = BOX_TABLE;
563 return top;
564}
565
566static fz_html_box *insert_table_row_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
567{
568 fz_html_box *table = top;
569 while (table && table->type != BOX_TABLE)
570 table = table->up;
571 if (table)
572 {
573 insert_box(ctx, box, BOX_TABLE_ROW, table);
574 return table;
575 }
576 fz_warn(ctx, "table-row not inside table element");
577 insert_block_box(ctx, box, top);
578 return top;
579}
580
581static fz_html_box *insert_table_cell_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
582{
583 fz_html_box *tr = top;
584 while (tr && tr->type != BOX_TABLE_ROW)
585 tr = tr->up;
586 if (tr)
587 {
588 insert_box(ctx, box, BOX_TABLE_CELL, tr);
589 return tr;
590 }
591 fz_warn(ctx, "table-cell not inside table-row element");
592 insert_block_box(ctx, box, top);
593 return top;
594}
595
596static fz_html_box *insert_break_box(fz_context *ctx, fz_html_box *box, fz_html_box *top)
597{
598 if (top->type == BOX_BLOCK)
599 {
600 insert_box(ctx, box, BOX_BREAK, top);
601 }
602 else if (top->type == BOX_FLOW)
603 {
604 while (top->type != BOX_BLOCK)
605 top = top->up;
606 insert_box(ctx, box, BOX_BREAK, top);
607 }
608 else if (top->type == BOX_INLINE)
609 {
610 while (top->type != BOX_BLOCK)
611 top = top->up;
612 insert_box(ctx, box, BOX_BREAK, top);
613 }
614 return top;
615}
616
617static void insert_inline_box(fz_context *ctx, fz_html_box *box, fz_html_box *top, int markup_dir, struct genstate *g)
618{
619 if (top->type == BOX_FLOW || top->type == BOX_INLINE)
620 {
621 insert_box(ctx, box, BOX_INLINE, top);
622 }
623 else
624 {
625 while (top->type != BOX_BLOCK && top->type != BOX_TABLE_CELL)
626 top = top->up;
627
628 if (top->last && top->last->type == BOX_FLOW)
629 {
630 insert_box(ctx, box, BOX_INLINE, top->last);
631 }
632 else
633 {
634 fz_html_box *flow = new_box(ctx, g->pool, markup_dir);
635 flow->is_first_flow = !top->last;
636 insert_box(ctx, flow, BOX_FLOW, top);
637 insert_box(ctx, box, BOX_INLINE, flow);
638 g->at_bol = 1;
639 }
640 }
641}
642
643static fz_html_box *
644generate_boxes(fz_context *ctx,
645 fz_xml *node,
646 fz_html_box *top,
647 fz_css_match *up_match,
648 int list_counter,
649 int section_depth,
650 int markup_dir,
651 int markup_lang,
652 struct genstate *g)
653{
654 fz_css_match match;
655 fz_html_box *box, *last_top;
656 const char *tag;
657 int display;
658
659 while (node)
660 {
661 match.up = up_match;
662 match.count = 0;
663
664 tag = fz_xml_tag(node);
665 if (tag)
666 {
667 fz_match_css(ctx, &match, g->css, node);
668
669 display = fz_get_css_match_display(&match);
670
671 if (tag[0]=='b' && tag[1]=='r' && tag[2]==0)
672 {
673 if (top->type == BOX_INLINE)
674 {
675 fz_html_box *flow = top;
676 while (flow->type != BOX_FLOW)
677 flow = flow->up;
678 add_flow_break(ctx, g->pool, flow, top);
679 }
680 else
681 {
682 box = new_box(ctx, g->pool, markup_dir);
683 fz_apply_css_style(ctx, g->set, &box->style, &match);
684 top = insert_break_box(ctx, box, top);
685 }
686 g->at_bol = 1;
687 }
688
689 else if (tag[0]=='i' && tag[1]=='m' && tag[2]=='g' && tag[3]==0)
690 {
691 const char *src = fz_xml_att(node, "src");
692 if (src)
693 {
694 int w, h;
695 const char *w_att = fz_xml_att(node, "width");
696 const char *h_att = fz_xml_att(node, "height");
697 box = new_box(ctx, g->pool, markup_dir);
698 fz_apply_css_style(ctx, g->set, &box->style, &match);
699 if (w_att && (w = fz_atoi(w_att)) > 0)
700 {
701 box->style.width.value = w;
702 box->style.width.unit = strchr(w_att, '%') ? N_PERCENT : N_LENGTH;
703 }
704 if (h_att && (h = fz_atoi(h_att)) > 0)
705 {
706 box->style.height.value = h;
707 box->style.height.unit = strchr(h_att, '%') ? N_PERCENT : N_LENGTH;
708 }
709 insert_inline_box(ctx, box, top, markup_dir, g);
710 generate_image(ctx, box, load_html_image(ctx, g->zip, g->base_uri, src), g);
711 }
712 }
713
714 else if (tag[0]=='s' && tag[1]=='v' && tag[2]=='g' && tag[3]==0)
715 {
716 box = new_box(ctx, g->pool, markup_dir);
717 fz_apply_css_style(ctx, g->set, &box->style, &match);
718 insert_inline_box(ctx, box, top, markup_dir, g);
719 generate_image(ctx, box, load_svg_image(ctx, g->zip, g->base_uri, node), g);
720 }
721
722 else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0)
723 {
724 const char *src = fz_xml_att(node, "l:href");
725 if (!src)
726 src = fz_xml_att(node, "xlink:href");
727 if (src && src[0] == '#')
728 {
729 fz_image *img = fz_tree_lookup(ctx, g->images, src+1);
730 if (display == DIS_BLOCK)
731 {
732 fz_html_box *imgbox;
733 box = new_box(ctx, g->pool, markup_dir);
734 fz_apply_css_style(ctx, g->set, &box->style, &match);
735 top = insert_block_box(ctx, box, top);
736 imgbox = new_box(ctx, g->pool, markup_dir);
737 fz_apply_css_style(ctx, g->set, &imgbox->style, &match);
738 insert_inline_box(ctx, imgbox, box, markup_dir, g);
739 generate_image(ctx, imgbox, fz_keep_image(ctx, img), g);
740 }
741 else if (display == DIS_INLINE)
742 {
743 box = new_box(ctx, g->pool, markup_dir);
744 fz_apply_css_style(ctx, g->set, &box->style, &match);
745 insert_inline_box(ctx, box, top, markup_dir, g);
746 generate_image(ctx, box, fz_keep_image(ctx, img), g);
747 }
748 }
749 }
750
751 else if (display != DIS_NONE)
752 {
753 const char *dir, *lang, *id, *href;
754 int child_dir = markup_dir;
755 int child_lang = markup_lang;
756
757 dir = fz_xml_att(node, "dir");
758 if (dir)
759 {
760 if (!strcmp(dir, "auto"))
761 child_dir = FZ_BIDI_NEUTRAL;
762 else if (!strcmp(dir, "rtl"))
763 child_dir = FZ_BIDI_RTL;
764 else if (!strcmp(dir, "ltr"))
765 child_dir = FZ_BIDI_LTR;
766 else
767 child_dir = DEFAULT_DIR;
768 }
769
770 lang = fz_xml_att(node, "lang");
771 if (lang)
772 child_lang = fz_text_language_from_string(lang);
773
774 box = new_box(ctx, g->pool, child_dir);
775 fz_apply_css_style(ctx, g->set, &box->style, &match);
776
777 id = fz_xml_att(node, "id");
778 if (id)
779 box->id = fz_pool_strdup(ctx, g->pool, id);
780
781 if (display == DIS_BLOCK || display == DIS_INLINE_BLOCK)
782 {
783 top = insert_block_box(ctx, box, top);
784 if (g->is_fb2)
785 {
786 if (!strcmp(tag, "title") || !strcmp(tag, "subtitle"))
787 box->heading = fz_mini(section_depth, 6);
788 }
789 else
790 {
791 if (tag[0]=='h' && tag[1]>='1' && tag[1]<='6' && tag[2]==0)
792 box->heading = tag[1] - '0';
793 }
794 }
795 else if (display == DIS_LIST_ITEM)
796 {
797 top = insert_block_box(ctx, box, top);
798 box->list_item = ++list_counter;
799 }
800 else if (display == DIS_INLINE)
801 {
802 insert_inline_box(ctx, box, top, child_dir, g);
803 if (id)
804 generate_anchor(ctx, box, g);
805 if (tag[0]=='a' && tag[1]==0)
806 {
807 if (g->is_fb2)
808 {
809 href = fz_xml_att(node, "l:href");
810 if (!href)
811 href = fz_xml_att(node, "xlink:href");
812 }
813 else
814 href = fz_xml_att(node, g->is_fb2 ? "l:href" : "href");
815 if (href)
816 box->href = fz_pool_strdup(ctx, g->pool, href);
817 }
818 }
819 else if (display == DIS_TABLE)
820 {
821 top = insert_table_box(ctx, box, top);
822 }
823 else if (display == DIS_TABLE_ROW)
824 {
825 top = insert_table_row_box(ctx, box, top);
826 }
827 else if (display == DIS_TABLE_CELL)
828 {
829 top = insert_table_cell_box(ctx, box, top);
830 }
831 else
832 {
833 fz_warn(ctx, "unknown box display type");
834 insert_box(ctx, box, BOX_BLOCK, top);
835 }
836
837 if (fz_xml_down(node))
838 {
839 int child_counter = list_counter;
840 int child_section = section_depth;
841 if (!strcmp(tag, "ul") || !strcmp(tag, "ol"))
842 child_counter = 0;
843 if (!strcmp(tag, "section"))
844 ++child_section;
845 last_top = generate_boxes(ctx,
846 fz_xml_down(node),
847 box,
848 &match,
849 child_counter,
850 child_section,
851 child_dir,
852 child_lang,
853 g);
854 if (last_top != box)
855 top = last_top;
856 }
857 }
858 }
859 else
860 {
861 const char *text = fz_xml_text(node);
862 int collapse = top->style.white_space & WS_COLLAPSE;
863 if (collapse && is_all_white(text))
864 {
865 g->emit_white = 1;
866 }
867 else
868 {
869 if (top->type != BOX_INLINE)
870 {
871 /* Create anonymous inline box, with the same style as the top block box. */
872 box = new_box(ctx, g->pool, markup_dir);
873 insert_inline_box(ctx, box, top, markup_dir, g);
874 box->style = top->style;
875 /* Make sure not to recursively multiply font sizes. */
876 box->style.font_size.value = 1;
877 box->style.font_size.unit = N_SCALE;
878 generate_text(ctx, box, text, markup_lang, g);
879 }
880 else
881 {
882 generate_text(ctx, top, text, markup_lang, g);
883 }
884 }
885 }
886
887 node = fz_xml_next(node);
888 }
889
890 return top;
891}
892
893static char *concat_text(fz_context *ctx, fz_xml *root)
894{
895 fz_xml *node;
896 size_t i = 0, n = 1;
897 char *s;
898 for (node = fz_xml_down(root); node; node = fz_xml_next(node))
899 {
900 const char *text = fz_xml_text(node);
901 n += text ? strlen(text) : 0;
902 }
903 s = fz_malloc(ctx, n);
904 for (node = fz_xml_down(root); node; node = fz_xml_next(node))
905 {
906 const char *text = fz_xml_text(node);
907 if (text)
908 {
909 n = strlen(text);
910 memcpy(s+i, text, n);
911 i += n;
912 }
913 }
914 s[i] = 0;
915 return s;
916}
917
918static void
919html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
920{
921 fz_xml *html, *head, *node;
922 fz_buffer *buf;
923 char path[2048];
924
925 fz_var(buf);
926
927 html = fz_xml_find(root, "html");
928 head = fz_xml_find_down(html, "head");
929 for (node = fz_xml_down(head); node; node = fz_xml_next(node))
930 {
931 if (fz_xml_is_tag(node, "link"))
932 {
933 char *rel = fz_xml_att(node, "rel");
934 if (rel && !fz_strcasecmp(rel, "stylesheet"))
935 {
936 char *type = fz_xml_att(node, "type");
937 if ((type && !strcmp(type, "text/css")) || !type)
938 {
939 char *href = fz_xml_att(node, "href");
940 if (href)
941 {
942 fz_strlcpy(path, base_uri, sizeof path);
943 fz_strlcat(path, "/", sizeof path);
944 fz_strlcat(path, href, sizeof path);
945 fz_urldecode(path);
946 fz_cleanname(path);
947
948 buf = NULL;
949 fz_try(ctx)
950 {
951 buf = fz_read_archive_entry(ctx, zip, path);
952 fz_parse_css(ctx, css, fz_string_from_buffer(ctx, buf), path);
953 }
954 fz_always(ctx)
955 fz_drop_buffer(ctx, buf);
956 fz_catch(ctx)
957 fz_warn(ctx, "ignoring stylesheet %s", path);
958 }
959 }
960 }
961 }
962 else if (fz_xml_is_tag(node, "style"))
963 {
964 char *s = concat_text(ctx, node);
965 fz_try(ctx)
966 fz_parse_css(ctx, css, s, "<style>");
967 fz_catch(ctx)
968 fz_warn(ctx, "ignoring inline stylesheet");
969 fz_free(ctx, s);
970 }
971 }
972}
973
974static void
975fb2_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
976{
977 fz_xml *fictionbook, *stylesheet;
978
979 fictionbook = fz_xml_find(root, "FictionBook");
980 stylesheet = fz_xml_find_down(fictionbook, "stylesheet");
981 if (stylesheet)
982 {
983 char *s = concat_text(ctx, stylesheet);
984 fz_try(ctx)
985 fz_parse_css(ctx, css, s, "<stylesheet>");
986 fz_catch(ctx)
987 fz_warn(ctx, "ignoring inline stylesheet");
988 fz_free(ctx, s);
989 }
990}
991
992static fz_tree *
993load_fb2_images(fz_context *ctx, fz_xml *root)
994{
995 fz_xml *fictionbook, *binary;
996 fz_tree *images = NULL;
997
998 fictionbook = fz_xml_find(root, "FictionBook");
999 for (binary = fz_xml_find_down(fictionbook, "binary"); binary; binary = fz_xml_find_next(binary, "binary"))
1000 {
1001 const char *id = fz_xml_att(binary, "id");
1002 char *b64 = NULL;
1003 fz_buffer *buf = NULL;
1004 fz_image *img = NULL;
1005
1006 fz_var(b64);
1007 fz_var(buf);
1008
1009 fz_try(ctx)
1010 {
1011 b64 = concat_text(ctx, binary);
1012 buf = fz_new_buffer_from_base64(ctx, b64, strlen(b64));
1013 img = fz_new_image_from_buffer(ctx, buf);
1014 }
1015 fz_always(ctx)
1016 {
1017 fz_drop_buffer(ctx, buf);
1018 fz_free(ctx, b64);
1019 }
1020 fz_catch(ctx)
1021 fz_rethrow(ctx);
1022
1023 images = fz_tree_insert(ctx, images, id, img);
1024 }
1025
1026 return images;
1027}
1028
1029typedef struct
1030{
1031 uint32_t *data;
1032 size_t cap;
1033 size_t len;
1034} uni_buf;
1035
1036typedef struct
1037{
1038 fz_context *ctx;
1039 fz_pool *pool;
1040 fz_html_flow *flow;
1041 uni_buf *buffer;
1042} bidi_data;
1043
1044static void fragment_cb(const uint32_t *fragment,
1045 size_t fragment_len,
1046 int bidi_level,
1047 int script,
1048 void *arg)
1049{
1050 bidi_data *data = (bidi_data *)arg;
1051 size_t fragment_offset = fragment - data->buffer->data;
1052
1053 /* We are guaranteed that fragmentOffset will be at the beginning
1054 * of flow. */
1055 while (fragment_len > 0)
1056 {
1057 size_t len;
1058
1059 if (data->flow->type == FLOW_SPACE)
1060 {
1061 len = 1;
1062 }
1063 else if (data->flow->type == FLOW_BREAK || data->flow->type == FLOW_SBREAK ||
1064 data->flow->type == FLOW_SHYPHEN || data->flow->type == FLOW_ANCHOR)
1065 {
1066 len = 0;
1067 }
1068 else
1069 {
1070 /* Must be text */
1071 len = fz_utflen(data->flow->content.text);
1072 if (len > fragment_len)
1073 {
1074 /* We need to split this flow box */
1075 (void)split_flow(data->ctx, data->pool, data->flow, fragment_len);
1076 len = fz_utflen(data->flow->content.text);
1077 }
1078 }
1079
1080 /* This flow box is entirely contained within this fragment. */
1081 data->flow->bidi_level = bidi_level;
1082 data->flow->script = script;
1083 data->flow = data->flow->next;
1084 fragment_offset += len;
1085 fragment_len -= len;
1086 }
1087}
1088
1089static fz_bidi_direction
1090detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction bidi_dir, fz_html_flow *flow)
1091{
1092 fz_html_flow *end = flow;
1093 bidi_data data;
1094
1095 while (end)
1096 {
1097 int level = end->bidi_level;
1098
1099 /* Gather the text from the flow up into a single buffer (at
1100 * least, as much of it as has the same direction markup). */
1101 buffer->len = 0;
1102 while (end && (level & 1) == (end->bidi_level & 1))
1103 {
1104 size_t len = 0;
1105 const char *text = "";
1106 int broken = 0;
1107
1108 switch (end->type)
1109 {
1110 case FLOW_WORD:
1111 len = fz_utflen(end->content.text);
1112 text = end->content.text;
1113 break;
1114 case FLOW_SPACE:
1115 len = 1;
1116 text = " ";
1117 break;
1118 case FLOW_SHYPHEN:
1119 case FLOW_SBREAK:
1120 break;
1121 case FLOW_BREAK:
1122 case FLOW_IMAGE:
1123 broken = 1;
1124 break;
1125 }
1126
1127 end = end->next;
1128
1129 if (broken)
1130 break;
1131
1132 /* Make sure the buffer is large enough */
1133 if (buffer->len + len > buffer->cap)
1134 {
1135 size_t newcap = buffer->cap;
1136 if (newcap < 128)
1137 newcap = 128; /* Sensible small default */
1138
1139 while (newcap < buffer->len + len)
1140 newcap = (newcap * 3) / 2;
1141
1142 buffer->data = fz_realloc_array(ctx, buffer->data, newcap, uint32_t);
1143 buffer->cap = newcap;
1144 }
1145
1146 /* Expand the utf8 text into Unicode and store it in the buffer */
1147 while (*text)
1148 {
1149 int rune;
1150 text += fz_chartorune(&rune, text);
1151 buffer->data[buffer->len++] = rune;
1152 }
1153 }
1154
1155 /* Detect directionality for the buffer */
1156 data.ctx = ctx;
1157 data.pool = pool;
1158 data.flow = flow;
1159 data.buffer = buffer;
1160 fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, fragment_cb, &data, 0 /* Flags */);
1161 }
1162 return bidi_dir;
1163}
1164
1165static void
1166detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_html_box *box)
1167{
1168 while (box)
1169 {
1170 if (box->flow_head)
1171 box->markup_dir = detect_flow_directionality(ctx, pool, buffer, box->markup_dir, box->flow_head);
1172 detect_box_directionality(ctx, pool, buffer, box->down);
1173 box = box->next;
1174 }
1175}
1176
1177static void
1178detect_directionality(fz_context *ctx, fz_pool *pool, fz_html_box *box)
1179{
1180 uni_buf buffer = { NULL };
1181
1182 fz_try(ctx)
1183 detect_box_directionality(ctx, pool, &buffer, box);
1184 fz_always(ctx)
1185 fz_free(ctx, buffer.data);
1186 fz_catch(ctx)
1187 fz_rethrow(ctx);
1188}
1189
1190fz_html *
1191fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css)
1192{
1193 fz_xml_doc *xml;
1194 fz_xml *root, *node;
1195 fz_html *html = NULL;
1196 char *title;
1197
1198 fz_css_match match;
1199 struct genstate g;
1200
1201 g.pool = NULL;
1202 g.set = set;
1203 g.zip = zip;
1204 g.images = NULL;
1205 g.base_uri = base_uri;
1206 g.css = NULL;
1207 g.at_bol = 0;
1208 g.emit_white = 0;
1209 g.last_brk_cls = UCDN_LINEBREAK_CLASS_OP;
1210
1211 xml = fz_parse_xml(ctx, buf, 1);
1212 root = fz_xml_root(xml);
1213
1214 fz_try(ctx)
1215 g.css = fz_new_css(ctx);
1216 fz_catch(ctx)
1217 {
1218 fz_drop_xml(ctx, xml);
1219 fz_rethrow(ctx);
1220 }
1221
1222#ifndef NDEBUG
1223 if (fz_atoi(getenv("FZ_DEBUG_XML")))
1224 fz_debug_xml(root, 0);
1225#endif
1226
1227 fz_try(ctx)
1228 {
1229 if (fz_xml_find(root, "FictionBook"))
1230 {
1231 g.is_fb2 = 1;
1232 fz_parse_css(ctx, g.css, fb2_default_css, "<default:fb2>");
1233 if (fz_use_document_css(ctx))
1234 fb2_load_css(ctx, g.zip, g.base_uri, g.css, root);
1235 g.images = load_fb2_images(ctx, root);
1236 }
1237 else
1238 {
1239 g.is_fb2 = 0;
1240 fz_parse_css(ctx, g.css, html_default_css, "<default:html>");
1241 if (fz_use_document_css(ctx))
1242 html_load_css(ctx, g.zip, g.base_uri, g.css, root);
1243 g.images = NULL;
1244 }
1245
1246 if (user_css)
1247 fz_parse_css(ctx, g.css, user_css, "<user>");
1248
1249 fz_add_css_font_faces(ctx, g.set, g.zip, g.base_uri, g.css); /* load @font-face fonts into font set */
1250 }
1251 fz_catch(ctx)
1252 {
1253 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1254 fz_warn(ctx, "ignoring styles due to errors: %s", fz_caught_message(ctx));
1255 }
1256
1257#ifndef NDEBUG
1258 if (fz_atoi(getenv("FZ_DEBUG_CSS")))
1259 fz_debug_css(ctx, g.css);
1260#endif
1261
1262 fz_try(ctx)
1263 {
1264 g.pool = fz_new_pool(ctx);
1265 html = fz_pool_alloc(ctx, g.pool, sizeof *html);
1266 html->pool = g.pool;
1267 html->root = new_box(ctx, g.pool, DEFAULT_DIR);
1268
1269 match.up = NULL;
1270 match.count = 0;
1271 fz_match_css_at_page(ctx, &match, g.css);
1272 fz_apply_css_style(ctx, g.set, &html->root->style, &match);
1273 // TODO: transfer page margins out of this hacky box
1274
1275 generate_boxes(ctx, root, html->root, &match, 0, 0, DEFAULT_DIR, FZ_LANG_UNSET, &g);
1276
1277 detect_directionality(ctx, g.pool, html->root);
1278
1279 if (g.is_fb2)
1280 {
1281 node = fz_xml_find(root, "FictionBook");
1282 node = fz_xml_find_down(node, "description");
1283 node = fz_xml_find_down(node, "title-info");
1284 node = fz_xml_find_down(node, "book-title");
1285 title = fz_xml_text(fz_xml_down(node));
1286 if (title)
1287 html->title = fz_pool_strdup(ctx, g.pool, title);
1288 }
1289 else
1290 {
1291 node = fz_xml_find(root, "html");
1292 node = fz_xml_find_down(node, "head");
1293 node = fz_xml_find_down(node, "title");
1294 title = fz_xml_text(fz_xml_down(node));
1295 if (title)
1296 html->title = fz_pool_strdup(ctx, g.pool, title);
1297 }
1298 }
1299 fz_always(ctx)
1300 {
1301 fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image);
1302 fz_drop_css(ctx, g.css);
1303 fz_drop_xml(ctx, xml);
1304 }
1305 fz_catch(ctx)
1306 {
1307 fz_drop_html(ctx, html);
1308 fz_rethrow(ctx);
1309 }
1310
1311 return html;
1312}
1313
1314static void indent(int level)
1315{
1316 while (level-- > 0)
1317 putchar('\t');
1318}
1319
1320static void
1321fz_debug_html_flow(fz_context *ctx, fz_html_flow *flow, int level)
1322{
1323 fz_html_box *sbox = NULL;
1324 while (flow)
1325 {
1326 if (flow->box != sbox) {
1327 if (sbox) {
1328 indent(level);
1329 printf("}\n");
1330 }
1331 sbox = flow->box;
1332 indent(level);
1333 printf("span em=%g font='%s'", sbox->em, fz_font_name(ctx, sbox->style.font));
1334 if (fz_font_is_serif(ctx, sbox->style.font))
1335 printf(" serif");
1336 else
1337 printf(" sans");
1338 if (fz_font_is_monospaced(ctx, sbox->style.font))
1339 printf(" monospaced");
1340 if (fz_font_is_bold(ctx, sbox->style.font))
1341 printf(" bold");
1342 if (fz_font_is_italic(ctx, sbox->style.font))
1343 printf(" italic");
1344 if (sbox->style.small_caps)
1345 printf(" small-caps");
1346 printf("\n");
1347 indent(level);
1348 printf("{\n");
1349 }
1350
1351 indent(level+1);
1352 switch (flow->type) {
1353 case FLOW_WORD: printf("word "); break;
1354 case FLOW_SPACE: printf("space"); break;
1355 case FLOW_SBREAK: printf("sbrk "); break;
1356 case FLOW_SHYPHEN: printf("shy "); break;
1357 case FLOW_BREAK: printf("break"); break;
1358 case FLOW_IMAGE: printf("image"); break;
1359 case FLOW_ANCHOR: printf("anchor"); break;
1360 }
1361 printf(" y=%g x=%g w=%g", flow->y, flow->x, flow->w);
1362 if (flow->type == FLOW_IMAGE)
1363 printf(" h=%g", flow->h);
1364 if (flow->type == FLOW_WORD)
1365 printf(" text='%s'", flow->content.text);
1366 printf("\n");
1367 if (flow->breaks_line) {
1368 indent(level+1);
1369 printf("*\n");
1370 }
1371
1372 flow = flow->next;
1373 }
1374 indent(level);
1375 printf("}\n");
1376}
1377
1378static void
1379fz_debug_html_box(fz_context *ctx, fz_html_box *box, int level)
1380{
1381 while (box)
1382 {
1383 indent(level);
1384 switch (box->type) {
1385 case BOX_BLOCK: printf("block"); break;
1386 case BOX_BREAK: printf("break"); break;
1387 case BOX_FLOW: printf("flow"); break;
1388 case BOX_INLINE: printf("inline"); break;
1389 case BOX_TABLE: printf("table"); break;
1390 case BOX_TABLE_ROW: printf("table-row"); break;
1391 case BOX_TABLE_CELL: printf("table-cell"); break;
1392 }
1393
1394 printf(" em=%g x=%g y=%g w=%g b=%g\n", box->em, box->x, box->y, box->w, box->b);
1395
1396 indent(level);
1397 printf("{\n");
1398 if (box->type == BOX_BLOCK) {
1399 indent(level+1);
1400 printf("margin=%g %g %g %g\n", box->margin[0], box->margin[1], box->margin[2], box->margin[3]);
1401 }
1402 if (box->is_first_flow) {
1403 indent(level+1);
1404 printf("is-first-flow\n");
1405 }
1406 if (box->list_item) {
1407 indent(level+1);
1408 printf("list=%d\n", box->list_item);
1409 }
1410 if (box->id) {
1411 indent(level+1);
1412 printf("id=%s\n", box->id);
1413 }
1414 if (box->href) {
1415 indent(level+1);
1416 printf("href=%s\n", box->href);
1417 }
1418
1419 if (box->down)
1420 fz_debug_html_box(ctx, box->down, level + 1);
1421 if (box->flow_head)
1422 fz_debug_html_flow(ctx, box->flow_head, level + 1);
1423
1424 indent(level);
1425 printf("}\n");
1426
1427 box = box->next;
1428 }
1429}
1430
1431void
1432fz_debug_html(fz_context *ctx, fz_html_box *box)
1433{
1434 fz_debug_html_box(ctx, box, 0);
1435}
1436