| 1 | #include <stdlib.h> |
| 2 | #include <stdio.h> |
| 3 | #include <string.h> |
| 4 | #include <stdint.h> |
| 5 | #include <assert.h> |
| 6 | |
| 7 | #include "config.h" |
| 8 | #include "cmark.h" |
| 9 | #include "node.h" |
| 10 | #include "buffer.h" |
| 11 | #include "utf8.h" |
| 12 | #include "scanners.h" |
| 13 | #include "render.h" |
| 14 | |
| 15 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) |
| 16 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) |
| 17 | #define CR() renderer->cr(renderer) |
| 18 | #define BLANKLINE() renderer->blankline(renderer) |
| 19 | #define ENCODED_SIZE 20 |
| 20 | #define LISTMARKER_SIZE 20 |
| 21 | |
| 22 | // Functions to convert cmark_nodes to commonmark strings. |
| 23 | |
| 24 | static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, |
| 25 | int32_t c, unsigned char nextc) { |
| 26 | bool needs_escaping = false; |
| 27 | bool follows_digit = |
| 28 | renderer->buffer->size > 0 && |
| 29 | cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); |
| 30 | char encoded[ENCODED_SIZE]; |
| 31 | int options = renderer->options; |
| 32 | |
| 33 | needs_escaping = |
| 34 | c < 0x80 && escape != LITERAL && |
| 35 | ((escape == NORMAL && |
| 36 | (c < 0x20 || |
| 37 | c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || |
| 38 | c == '>' || c == '\\' || c == '`' || c == '!' || |
| 39 | (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || |
| 40 | ((CMARK_OPT_SMART & options) && |
| 41 | ((c == '-' && nextc == '-') || |
| 42 | (c == '.' && nextc == '.') || |
| 43 | c == '"' || c == '\'')) || |
| 44 | (renderer->begin_content && (c == '-' || c == '+' || c == '=') && |
| 45 | // begin_content doesn't get set to false til we've passed digits |
| 46 | // at the beginning of line, so... |
| 47 | !follows_digit) || |
| 48 | (renderer->begin_content && (c == '.' || c == ')') && follows_digit && |
| 49 | (nextc == 0 || cmark_isspace(nextc))))) || |
| 50 | (escape == URL && |
| 51 | (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || |
| 52 | c == ')' || c == '(')) || |
| 53 | (escape == TITLE && |
| 54 | (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); |
| 55 | |
| 56 | if (needs_escaping) { |
| 57 | if (escape == URL && cmark_isspace(c)) { |
| 58 | // use percent encoding for spaces |
| 59 | snprintf(encoded, ENCODED_SIZE, "%%%2X" , c); |
| 60 | cmark_strbuf_puts(renderer->buffer, encoded); |
| 61 | renderer->column += 3; |
| 62 | } else if (cmark_ispunct(c)) { |
| 63 | cmark_render_ascii(renderer, "\\" ); |
| 64 | cmark_render_code_point(renderer, c); |
| 65 | } else { // render as entity |
| 66 | snprintf(encoded, ENCODED_SIZE, "&#%d;" , c); |
| 67 | cmark_strbuf_puts(renderer->buffer, encoded); |
| 68 | renderer->column += strlen(encoded); |
| 69 | } |
| 70 | } else { |
| 71 | cmark_render_code_point(renderer, c); |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | static int longest_backtick_sequence(const char *code) { |
| 76 | int longest = 0; |
| 77 | int current = 0; |
| 78 | size_t i = 0; |
| 79 | size_t code_len = strlen(code); |
| 80 | while (i <= code_len) { |
| 81 | if (code[i] == '`') { |
| 82 | current++; |
| 83 | } else { |
| 84 | if (current > longest) { |
| 85 | longest = current; |
| 86 | } |
| 87 | current = 0; |
| 88 | } |
| 89 | i++; |
| 90 | } |
| 91 | return longest; |
| 92 | } |
| 93 | |
| 94 | static int shortest_unused_backtick_sequence(const char *code) { |
| 95 | // note: if the shortest sequence is >= 32, this returns 32 |
| 96 | // so as not to overflow the bit array. |
| 97 | uint32_t used = 1; |
| 98 | int current = 0; |
| 99 | size_t i = 0; |
| 100 | size_t code_len = strlen(code); |
| 101 | while (i <= code_len) { |
| 102 | if (code[i] == '`') { |
| 103 | current++; |
| 104 | } else { |
| 105 | if (current > 0 && current < 32) { |
| 106 | used |= (1U << current); |
| 107 | } |
| 108 | current = 0; |
| 109 | } |
| 110 | i++; |
| 111 | } |
| 112 | // return number of first bit that is 0: |
| 113 | i = 0; |
| 114 | while (i < 32 && used & 1) { |
| 115 | used = used >> 1; |
| 116 | i++; |
| 117 | } |
| 118 | return (int)i; |
| 119 | } |
| 120 | |
| 121 | static bool is_autolink(cmark_node *node) { |
| 122 | const unsigned char *title; |
| 123 | const unsigned char *url; |
| 124 | cmark_node *link_text; |
| 125 | |
| 126 | if (node->type != CMARK_NODE_LINK) { |
| 127 | return false; |
| 128 | } |
| 129 | |
| 130 | url = node->as.link.url; |
| 131 | if (url == NULL || _scan_scheme(url) == 0) { |
| 132 | return false; |
| 133 | } |
| 134 | |
| 135 | title = node->as.link.title; |
| 136 | // if it has a title, we can't treat it as an autolink: |
| 137 | if (title && title[0]) { |
| 138 | return false; |
| 139 | } |
| 140 | |
| 141 | link_text = node->first_child; |
| 142 | if (link_text == NULL) { |
| 143 | return false; |
| 144 | } |
| 145 | cmark_consolidate_text_nodes(link_text); |
| 146 | if (strncmp((const char *)url, "mailto:" , 7) == 0) { |
| 147 | url += 7; |
| 148 | } |
| 149 | return link_text->data != NULL && |
| 150 | strcmp((const char *)url, (char *)link_text->data) == 0; |
| 151 | } |
| 152 | |
| 153 | // if node is a block node, returns node. |
| 154 | // otherwise returns first block-level node that is an ancestor of node. |
| 155 | // if there is no block-level ancestor, returns NULL. |
| 156 | static cmark_node *get_containing_block(cmark_node *node) { |
| 157 | while (node) { |
| 158 | if (node->type >= CMARK_NODE_FIRST_BLOCK && |
| 159 | node->type <= CMARK_NODE_LAST_BLOCK) { |
| 160 | return node; |
| 161 | } else { |
| 162 | node = node->parent; |
| 163 | } |
| 164 | } |
| 165 | return NULL; |
| 166 | } |
| 167 | |
| 168 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, |
| 169 | cmark_event_type ev_type, int options) { |
| 170 | cmark_node *tmp; |
| 171 | int list_number; |
| 172 | cmark_delim_type list_delim; |
| 173 | size_t numticks; |
| 174 | bool ; |
| 175 | size_t i; |
| 176 | bool entering = (ev_type == CMARK_EVENT_ENTER); |
| 177 | const char *info, *code, *title; |
| 178 | char fencechar[2] = {'\0', '\0'}; |
| 179 | size_t code_len; |
| 180 | char listmarker[LISTMARKER_SIZE]; |
| 181 | const char *emph_delim; |
| 182 | bool first_in_list_item; |
| 183 | bufsize_t marker_width; |
| 184 | bool has_nonspace; |
| 185 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && |
| 186 | !(CMARK_OPT_HARDBREAKS & options); |
| 187 | |
| 188 | // Don't adjust tight list status til we've started the list. |
| 189 | // Otherwise we loose the blank line between a paragraph and |
| 190 | // a following list. |
| 191 | if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { |
| 192 | tmp = get_containing_block(node); |
| 193 | renderer->in_tight_list_item = |
| 194 | tmp && // tmp might be NULL if there is no containing block |
| 195 | ((tmp->type == CMARK_NODE_ITEM && |
| 196 | cmark_node_get_list_tight(tmp->parent)) || |
| 197 | (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && |
| 198 | cmark_node_get_list_tight(tmp->parent->parent))); |
| 199 | } |
| 200 | |
| 201 | switch (node->type) { |
| 202 | case CMARK_NODE_DOCUMENT: |
| 203 | break; |
| 204 | |
| 205 | case CMARK_NODE_BLOCK_QUOTE: |
| 206 | if (entering) { |
| 207 | LIT("> " ); |
| 208 | renderer->begin_content = true; |
| 209 | cmark_strbuf_puts(renderer->prefix, "> " ); |
| 210 | } else { |
| 211 | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); |
| 212 | BLANKLINE(); |
| 213 | } |
| 214 | break; |
| 215 | |
| 216 | case CMARK_NODE_LIST: |
| 217 | if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { |
| 218 | // this ensures that a following indented code block or list will be |
| 219 | // inteprereted correctly. |
| 220 | CR(); |
| 221 | LIT("<!-- end list -->" ); |
| 222 | BLANKLINE(); |
| 223 | } |
| 224 | break; |
| 225 | |
| 226 | case CMARK_NODE_ITEM: |
| 227 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
| 228 | marker_width = 4; |
| 229 | } else { |
| 230 | list_number = cmark_node_get_list_start(node->parent); |
| 231 | list_delim = cmark_node_get_list_delim(node->parent); |
| 232 | tmp = node; |
| 233 | while (tmp->prev) { |
| 234 | tmp = tmp->prev; |
| 235 | list_number += 1; |
| 236 | } |
| 237 | // we ensure a width of at least 4 so |
| 238 | // we get nice transition from single digits |
| 239 | // to double |
| 240 | snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s" , list_number, |
| 241 | list_delim == CMARK_PAREN_DELIM ? ")" : "." , |
| 242 | list_number < 10 ? " " : " " ); |
| 243 | marker_width = strlen(listmarker); |
| 244 | } |
| 245 | if (entering) { |
| 246 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
| 247 | LIT(" - " ); |
| 248 | renderer->begin_content = true; |
| 249 | } else { |
| 250 | LIT(listmarker); |
| 251 | renderer->begin_content = true; |
| 252 | } |
| 253 | for (i = marker_width; i--;) { |
| 254 | cmark_strbuf_putc(renderer->prefix, ' '); |
| 255 | } |
| 256 | } else { |
| 257 | cmark_strbuf_truncate(renderer->prefix, |
| 258 | renderer->prefix->size - marker_width); |
| 259 | CR(); |
| 260 | } |
| 261 | break; |
| 262 | |
| 263 | case CMARK_NODE_HEADING: |
| 264 | if (entering) { |
| 265 | for (i = cmark_node_get_heading_level(node); i > 0; i--) { |
| 266 | LIT("#" ); |
| 267 | } |
| 268 | LIT(" " ); |
| 269 | renderer->begin_content = true; |
| 270 | renderer->no_linebreaks = true; |
| 271 | } else { |
| 272 | renderer->no_linebreaks = false; |
| 273 | BLANKLINE(); |
| 274 | } |
| 275 | break; |
| 276 | |
| 277 | case CMARK_NODE_CODE_BLOCK: |
| 278 | |
| 279 | first_in_list_item = node->prev == NULL && node->parent && |
| 280 | node->parent->type == CMARK_NODE_ITEM; |
| 281 | |
| 282 | if (!first_in_list_item) { |
| 283 | BLANKLINE(); |
| 284 | } |
| 285 | info = cmark_node_get_fence_info(node); |
| 286 | fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; |
| 287 | code = cmark_node_get_literal(node); |
| 288 | |
| 289 | numticks = longest_backtick_sequence(code) + 1; |
| 290 | if (numticks < 3) { |
| 291 | numticks = 3; |
| 292 | } |
| 293 | for (i = 0; i < numticks; i++) { |
| 294 | LIT(fencechar); |
| 295 | } |
| 296 | LIT(" " ); |
| 297 | OUT(info, false, LITERAL); |
| 298 | CR(); |
| 299 | OUT(cmark_node_get_literal(node), false, LITERAL); |
| 300 | CR(); |
| 301 | for (i = 0; i < numticks; i++) { |
| 302 | LIT(fencechar); |
| 303 | } |
| 304 | |
| 305 | BLANKLINE(); |
| 306 | break; |
| 307 | |
| 308 | case CMARK_NODE_HTML_BLOCK: |
| 309 | BLANKLINE(); |
| 310 | OUT(cmark_node_get_literal(node), false, LITERAL); |
| 311 | BLANKLINE(); |
| 312 | break; |
| 313 | |
| 314 | case CMARK_NODE_CUSTOM_BLOCK: |
| 315 | BLANKLINE(); |
| 316 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
| 317 | false, LITERAL); |
| 318 | BLANKLINE(); |
| 319 | break; |
| 320 | |
| 321 | case CMARK_NODE_THEMATIC_BREAK: |
| 322 | BLANKLINE(); |
| 323 | LIT("-----" ); |
| 324 | BLANKLINE(); |
| 325 | break; |
| 326 | |
| 327 | case CMARK_NODE_PARAGRAPH: |
| 328 | if (!entering) { |
| 329 | BLANKLINE(); |
| 330 | } |
| 331 | break; |
| 332 | |
| 333 | case CMARK_NODE_TEXT: |
| 334 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); |
| 335 | break; |
| 336 | |
| 337 | case CMARK_NODE_LINEBREAK: |
| 338 | if (!(CMARK_OPT_HARDBREAKS & options)) { |
| 339 | LIT(" " ); |
| 340 | } |
| 341 | CR(); |
| 342 | break; |
| 343 | |
| 344 | case CMARK_NODE_SOFTBREAK: |
| 345 | if (CMARK_OPT_HARDBREAKS & options) { |
| 346 | LIT(" " ); |
| 347 | CR(); |
| 348 | } else if (!renderer->no_linebreaks && renderer->width == 0 && |
| 349 | !(CMARK_OPT_HARDBREAKS & options) && |
| 350 | !(CMARK_OPT_NOBREAKS & options)) { |
| 351 | CR(); |
| 352 | } else { |
| 353 | OUT(" " , allow_wrap, LITERAL); |
| 354 | } |
| 355 | break; |
| 356 | |
| 357 | case CMARK_NODE_CODE: |
| 358 | code = cmark_node_get_literal(node); |
| 359 | code_len = strlen(code); |
| 360 | numticks = shortest_unused_backtick_sequence(code); |
| 361 | has_nonspace = false; |
| 362 | for (i=0; i < code_len; i++) { |
| 363 | if (code[i] != ' ') { |
| 364 | has_nonspace = true; |
| 365 | break; |
| 366 | } |
| 367 | } |
| 368 | extra_spaces = code_len == 0 || |
| 369 | code[0] == '`' || code[code_len - 1] == '`' || |
| 370 | (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); |
| 371 | for (i = 0; i < numticks; i++) { |
| 372 | LIT("`" ); |
| 373 | } |
| 374 | if (extra_spaces) { |
| 375 | LIT(" " ); |
| 376 | } |
| 377 | OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); |
| 378 | if (extra_spaces) { |
| 379 | LIT(" " ); |
| 380 | } |
| 381 | for (i = 0; i < numticks; i++) { |
| 382 | LIT("`" ); |
| 383 | } |
| 384 | break; |
| 385 | |
| 386 | case CMARK_NODE_HTML_INLINE: |
| 387 | OUT(cmark_node_get_literal(node), false, LITERAL); |
| 388 | break; |
| 389 | |
| 390 | case CMARK_NODE_CUSTOM_INLINE: |
| 391 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
| 392 | false, LITERAL); |
| 393 | break; |
| 394 | |
| 395 | case CMARK_NODE_STRONG: |
| 396 | if (entering) { |
| 397 | LIT("**" ); |
| 398 | } else { |
| 399 | LIT("**" ); |
| 400 | } |
| 401 | break; |
| 402 | |
| 403 | case CMARK_NODE_EMPH: |
| 404 | // If we have EMPH(EMPH(x)), we need to use *_x_* |
| 405 | // because **x** is STRONG(x): |
| 406 | if (node->parent && node->parent->type == CMARK_NODE_EMPH && |
| 407 | node->next == NULL && node->prev == NULL) { |
| 408 | emph_delim = "_" ; |
| 409 | } else { |
| 410 | emph_delim = "*" ; |
| 411 | } |
| 412 | if (entering) { |
| 413 | LIT(emph_delim); |
| 414 | } else { |
| 415 | LIT(emph_delim); |
| 416 | } |
| 417 | break; |
| 418 | |
| 419 | case CMARK_NODE_LINK: |
| 420 | if (is_autolink(node)) { |
| 421 | if (entering) { |
| 422 | LIT("<" ); |
| 423 | if (strncmp(cmark_node_get_url(node), "mailto:" , 7) == 0) { |
| 424 | LIT((const char *)cmark_node_get_url(node) + 7); |
| 425 | } else { |
| 426 | LIT((const char *)cmark_node_get_url(node)); |
| 427 | } |
| 428 | LIT(">" ); |
| 429 | // return signal to skip contents of node... |
| 430 | return 0; |
| 431 | } |
| 432 | } else { |
| 433 | if (entering) { |
| 434 | LIT("[" ); |
| 435 | } else { |
| 436 | LIT("](" ); |
| 437 | OUT(cmark_node_get_url(node), false, URL); |
| 438 | title = cmark_node_get_title(node); |
| 439 | if (strlen(title) > 0) { |
| 440 | LIT(" \"" ); |
| 441 | OUT(title, false, TITLE); |
| 442 | LIT("\"" ); |
| 443 | } |
| 444 | LIT(")" ); |
| 445 | } |
| 446 | } |
| 447 | break; |
| 448 | |
| 449 | case CMARK_NODE_IMAGE: |
| 450 | if (entering) { |
| 451 | LIT("; |
| 454 | OUT(cmark_node_get_url(node), false, URL); |
| 455 | title = cmark_node_get_title(node); |
| 456 | if (strlen(title) > 0) { |
| 457 | OUT(" \"" , allow_wrap, LITERAL); |
| 458 | OUT(title, false, TITLE); |
| 459 | LIT("\"" ); |
| 460 | } |
| 461 | LIT(")" ); |
| 462 | } |
| 463 | break; |
| 464 | |
| 465 | default: |
| 466 | assert(false); |
| 467 | break; |
| 468 | } |
| 469 | |
| 470 | return 1; |
| 471 | } |
| 472 | |
| 473 | char *cmark_render_commonmark(cmark_node *root, int options, int width) { |
| 474 | if (options & CMARK_OPT_HARDBREAKS) { |
| 475 | // disable breaking on width, since it has |
| 476 | // a different meaning with OPT_HARDBREAKS |
| 477 | width = 0; |
| 478 | } |
| 479 | return cmark_render(root, options, width, outc, S_render_node); |
| 480 | } |
| 481 | |