| 1 | #include <stdlib.h> |
| 2 | #include <stdio.h> |
| 3 | #include <string.h> |
| 4 | #include <assert.h> |
| 5 | #include "cmark_ctype.h" |
| 6 | #include "config.h" |
| 7 | #include "cmark.h" |
| 8 | #include "node.h" |
| 9 | #include "buffer.h" |
| 10 | #include "houdini.h" |
| 11 | #include "scanners.h" |
| 12 | |
| 13 | #define BUFFER_SIZE 100 |
| 14 | |
| 15 | // Functions to convert cmark_nodes to HTML strings. |
| 16 | |
| 17 | static void escape_html(cmark_strbuf *dest, const unsigned char *source, |
| 18 | bufsize_t length) { |
| 19 | houdini_escape_html0(dest, source, length, 0); |
| 20 | } |
| 21 | |
| 22 | static CMARK_INLINE void cr(cmark_strbuf *html) { |
| 23 | if (html->size && html->ptr[html->size - 1] != '\n') |
| 24 | cmark_strbuf_putc(html, '\n'); |
| 25 | } |
| 26 | |
| 27 | struct render_state { |
| 28 | cmark_strbuf *html; |
| 29 | cmark_node *plain; |
| 30 | }; |
| 31 | |
| 32 | static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, |
| 33 | int options) { |
| 34 | char buffer[BUFFER_SIZE]; |
| 35 | if (CMARK_OPT_SOURCEPOS & options) { |
| 36 | snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"" , |
| 37 | cmark_node_get_start_line(node), cmark_node_get_start_column(node), |
| 38 | cmark_node_get_end_line(node), cmark_node_get_end_column(node)); |
| 39 | cmark_strbuf_puts(html, buffer); |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | static int S_render_node(cmark_node *node, cmark_event_type ev_type, |
| 44 | struct render_state *state, int options) { |
| 45 | cmark_node *parent; |
| 46 | cmark_node *grandparent; |
| 47 | cmark_strbuf *html = state->html; |
| 48 | char start_heading[] = "<h0" ; |
| 49 | char end_heading[] = "</h0" ; |
| 50 | bool tight; |
| 51 | char buffer[BUFFER_SIZE]; |
| 52 | |
| 53 | bool entering = (ev_type == CMARK_EVENT_ENTER); |
| 54 | |
| 55 | if (state->plain == node) { // back at original node |
| 56 | state->plain = NULL; |
| 57 | } |
| 58 | |
| 59 | if (state->plain != NULL) { |
| 60 | switch (node->type) { |
| 61 | case CMARK_NODE_TEXT: |
| 62 | case CMARK_NODE_CODE: |
| 63 | case CMARK_NODE_HTML_INLINE: |
| 64 | escape_html(html, node->data, node->len); |
| 65 | break; |
| 66 | |
| 67 | case CMARK_NODE_LINEBREAK: |
| 68 | case CMARK_NODE_SOFTBREAK: |
| 69 | cmark_strbuf_putc(html, ' '); |
| 70 | break; |
| 71 | |
| 72 | default: |
| 73 | break; |
| 74 | } |
| 75 | return 1; |
| 76 | } |
| 77 | |
| 78 | switch (node->type) { |
| 79 | case CMARK_NODE_DOCUMENT: |
| 80 | break; |
| 81 | |
| 82 | case CMARK_NODE_BLOCK_QUOTE: |
| 83 | if (entering) { |
| 84 | cr(html); |
| 85 | cmark_strbuf_puts(html, "<blockquote" ); |
| 86 | S_render_sourcepos(node, html, options); |
| 87 | cmark_strbuf_puts(html, ">\n" ); |
| 88 | } else { |
| 89 | cr(html); |
| 90 | cmark_strbuf_puts(html, "</blockquote>\n" ); |
| 91 | } |
| 92 | break; |
| 93 | |
| 94 | case CMARK_NODE_LIST: { |
| 95 | cmark_list_type list_type = (cmark_list_type)node->as.list.list_type; |
| 96 | int start = node->as.list.start; |
| 97 | |
| 98 | if (entering) { |
| 99 | cr(html); |
| 100 | if (list_type == CMARK_BULLET_LIST) { |
| 101 | cmark_strbuf_puts(html, "<ul" ); |
| 102 | S_render_sourcepos(node, html, options); |
| 103 | cmark_strbuf_puts(html, ">\n" ); |
| 104 | } else if (start == 1) { |
| 105 | cmark_strbuf_puts(html, "<ol" ); |
| 106 | S_render_sourcepos(node, html, options); |
| 107 | cmark_strbuf_puts(html, ">\n" ); |
| 108 | } else { |
| 109 | snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"" , start); |
| 110 | cmark_strbuf_puts(html, buffer); |
| 111 | S_render_sourcepos(node, html, options); |
| 112 | cmark_strbuf_puts(html, ">\n" ); |
| 113 | } |
| 114 | } else { |
| 115 | cmark_strbuf_puts(html, |
| 116 | list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n" ); |
| 117 | } |
| 118 | break; |
| 119 | } |
| 120 | |
| 121 | case CMARK_NODE_ITEM: |
| 122 | if (entering) { |
| 123 | cr(html); |
| 124 | cmark_strbuf_puts(html, "<li" ); |
| 125 | S_render_sourcepos(node, html, options); |
| 126 | cmark_strbuf_putc(html, '>'); |
| 127 | } else { |
| 128 | cmark_strbuf_puts(html, "</li>\n" ); |
| 129 | } |
| 130 | break; |
| 131 | |
| 132 | case CMARK_NODE_HEADING: |
| 133 | if (entering) { |
| 134 | cr(html); |
| 135 | start_heading[2] = (char)('0' + node->as.heading.level); |
| 136 | cmark_strbuf_puts(html, start_heading); |
| 137 | S_render_sourcepos(node, html, options); |
| 138 | cmark_strbuf_putc(html, '>'); |
| 139 | } else { |
| 140 | end_heading[3] = (char)('0' + node->as.heading.level); |
| 141 | cmark_strbuf_puts(html, end_heading); |
| 142 | cmark_strbuf_puts(html, ">\n" ); |
| 143 | } |
| 144 | break; |
| 145 | |
| 146 | case CMARK_NODE_CODE_BLOCK: |
| 147 | cr(html); |
| 148 | |
| 149 | if (node->as.code.info == NULL || node->as.code.info[0] == 0) { |
| 150 | cmark_strbuf_puts(html, "<pre" ); |
| 151 | S_render_sourcepos(node, html, options); |
| 152 | cmark_strbuf_puts(html, "><code>" ); |
| 153 | } else { |
| 154 | bufsize_t first_tag = 0; |
| 155 | while (node->as.code.info[first_tag] && |
| 156 | !cmark_isspace(node->as.code.info[first_tag])) { |
| 157 | first_tag += 1; |
| 158 | } |
| 159 | |
| 160 | cmark_strbuf_puts(html, "<pre" ); |
| 161 | S_render_sourcepos(node, html, options); |
| 162 | cmark_strbuf_puts(html, "><code class=\"language-" ); |
| 163 | escape_html(html, node->as.code.info, first_tag); |
| 164 | cmark_strbuf_puts(html, "\">" ); |
| 165 | } |
| 166 | |
| 167 | escape_html(html, node->data, node->len); |
| 168 | cmark_strbuf_puts(html, "</code></pre>\n" ); |
| 169 | break; |
| 170 | |
| 171 | case CMARK_NODE_HTML_BLOCK: |
| 172 | cr(html); |
| 173 | if (!(options & CMARK_OPT_UNSAFE)) { |
| 174 | cmark_strbuf_puts(html, "<!-- raw HTML omitted -->" ); |
| 175 | } else { |
| 176 | cmark_strbuf_put(html, node->data, node->len); |
| 177 | } |
| 178 | cr(html); |
| 179 | break; |
| 180 | |
| 181 | case CMARK_NODE_CUSTOM_BLOCK: { |
| 182 | unsigned char *block = entering ? node->as.custom.on_enter : |
| 183 | node->as.custom.on_exit; |
| 184 | cr(html); |
| 185 | if (block) { |
| 186 | cmark_strbuf_puts(html, (char *)block); |
| 187 | } |
| 188 | cr(html); |
| 189 | break; |
| 190 | } |
| 191 | |
| 192 | case CMARK_NODE_THEMATIC_BREAK: |
| 193 | cr(html); |
| 194 | cmark_strbuf_puts(html, "<hr" ); |
| 195 | S_render_sourcepos(node, html, options); |
| 196 | cmark_strbuf_puts(html, " />\n" ); |
| 197 | break; |
| 198 | |
| 199 | case CMARK_NODE_PARAGRAPH: |
| 200 | parent = cmark_node_parent(node); |
| 201 | grandparent = cmark_node_parent(parent); |
| 202 | if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) { |
| 203 | tight = grandparent->as.list.tight; |
| 204 | } else { |
| 205 | tight = false; |
| 206 | } |
| 207 | if (!tight) { |
| 208 | if (entering) { |
| 209 | cr(html); |
| 210 | cmark_strbuf_puts(html, "<p" ); |
| 211 | S_render_sourcepos(node, html, options); |
| 212 | cmark_strbuf_putc(html, '>'); |
| 213 | } else { |
| 214 | cmark_strbuf_puts(html, "</p>\n" ); |
| 215 | } |
| 216 | } |
| 217 | break; |
| 218 | |
| 219 | case CMARK_NODE_TEXT: |
| 220 | escape_html(html, node->data, node->len); |
| 221 | break; |
| 222 | |
| 223 | case CMARK_NODE_LINEBREAK: |
| 224 | cmark_strbuf_puts(html, "<br />\n" ); |
| 225 | break; |
| 226 | |
| 227 | case CMARK_NODE_SOFTBREAK: |
| 228 | if (options & CMARK_OPT_HARDBREAKS) { |
| 229 | cmark_strbuf_puts(html, "<br />\n" ); |
| 230 | } else if (options & CMARK_OPT_NOBREAKS) { |
| 231 | cmark_strbuf_putc(html, ' '); |
| 232 | } else { |
| 233 | cmark_strbuf_putc(html, '\n'); |
| 234 | } |
| 235 | break; |
| 236 | |
| 237 | case CMARK_NODE_CODE: |
| 238 | cmark_strbuf_puts(html, "<code>" ); |
| 239 | escape_html(html, node->data, node->len); |
| 240 | cmark_strbuf_puts(html, "</code>" ); |
| 241 | break; |
| 242 | |
| 243 | case CMARK_NODE_HTML_INLINE: |
| 244 | if (!(options & CMARK_OPT_UNSAFE)) { |
| 245 | cmark_strbuf_puts(html, "<!-- raw HTML omitted -->" ); |
| 246 | } else { |
| 247 | cmark_strbuf_put(html, node->data, node->len); |
| 248 | } |
| 249 | break; |
| 250 | |
| 251 | case CMARK_NODE_CUSTOM_INLINE: { |
| 252 | unsigned char *block = entering ? node->as.custom.on_enter : |
| 253 | node->as.custom.on_exit; |
| 254 | if (block) { |
| 255 | cmark_strbuf_puts(html, (char *)block); |
| 256 | } |
| 257 | break; |
| 258 | } |
| 259 | |
| 260 | case CMARK_NODE_STRONG: |
| 261 | if (entering) { |
| 262 | cmark_strbuf_puts(html, "<strong>" ); |
| 263 | } else { |
| 264 | cmark_strbuf_puts(html, "</strong>" ); |
| 265 | } |
| 266 | break; |
| 267 | |
| 268 | case CMARK_NODE_EMPH: |
| 269 | if (entering) { |
| 270 | cmark_strbuf_puts(html, "<em>" ); |
| 271 | } else { |
| 272 | cmark_strbuf_puts(html, "</em>" ); |
| 273 | } |
| 274 | break; |
| 275 | |
| 276 | case CMARK_NODE_LINK: |
| 277 | if (entering) { |
| 278 | cmark_strbuf_puts(html, "<a href=\"" ); |
| 279 | if (node->as.link.url && ((options & CMARK_OPT_UNSAFE) || |
| 280 | !(_scan_dangerous_url(node->as.link.url)))) { |
| 281 | houdini_escape_href(html, node->as.link.url, |
| 282 | strlen((char *)node->as.link.url)); |
| 283 | } |
| 284 | if (node->as.link.title) { |
| 285 | cmark_strbuf_puts(html, "\" title=\"" ); |
| 286 | escape_html(html, node->as.link.title, |
| 287 | strlen((char *)node->as.link.title)); |
| 288 | } |
| 289 | cmark_strbuf_puts(html, "\">" ); |
| 290 | } else { |
| 291 | cmark_strbuf_puts(html, "</a>" ); |
| 292 | } |
| 293 | break; |
| 294 | |
| 295 | case CMARK_NODE_IMAGE: |
| 296 | if (entering) { |
| 297 | cmark_strbuf_puts(html, "<img src=\"" ); |
| 298 | if (node->as.link.url && ((options & CMARK_OPT_UNSAFE) || |
| 299 | !(_scan_dangerous_url(node->as.link.url)))) { |
| 300 | houdini_escape_href(html, node->as.link.url, |
| 301 | strlen((char *)node->as.link.url)); |
| 302 | } |
| 303 | cmark_strbuf_puts(html, "\" alt=\"" ); |
| 304 | state->plain = node; |
| 305 | } else { |
| 306 | if (node->as.link.title) { |
| 307 | cmark_strbuf_puts(html, "\" title=\"" ); |
| 308 | escape_html(html, node->as.link.title, |
| 309 | strlen((char *)node->as.link.title)); |
| 310 | } |
| 311 | |
| 312 | cmark_strbuf_puts(html, "\" />" ); |
| 313 | } |
| 314 | break; |
| 315 | |
| 316 | default: |
| 317 | assert(false); |
| 318 | break; |
| 319 | } |
| 320 | |
| 321 | // cmark_strbuf_putc(html, 'x'); |
| 322 | return 1; |
| 323 | } |
| 324 | |
| 325 | char *cmark_render_html(cmark_node *root, int options) { |
| 326 | char *result; |
| 327 | cmark_strbuf html = CMARK_BUF_INIT(root->mem); |
| 328 | cmark_event_type ev_type; |
| 329 | cmark_node *cur; |
| 330 | struct render_state state = {&html, NULL}; |
| 331 | cmark_iter *iter = cmark_iter_new(root); |
| 332 | |
| 333 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { |
| 334 | cur = cmark_iter_get_node(iter); |
| 335 | S_render_node(cur, ev_type, &state, options); |
| 336 | } |
| 337 | result = (char *)cmark_strbuf_detach(&html); |
| 338 | |
| 339 | cmark_iter_free(iter); |
| 340 | return result; |
| 341 | } |
| 342 | |