| 1 | #include <stdlib.h> |
| 2 | #include <stdio.h> |
| 3 | #include <string.h> |
| 4 | #include <assert.h> |
| 5 | |
| 6 | #include "config.h" |
| 7 | #include "cmark.h" |
| 8 | #include "node.h" |
| 9 | #include "buffer.h" |
| 10 | #include "utf8.h" |
| 11 | #include "scanners.h" |
| 12 | #include "render.h" |
| 13 | |
| 14 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) |
| 15 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) |
| 16 | #define CR() renderer->cr(renderer) |
| 17 | #define BLANKLINE() renderer->blankline(renderer) |
| 18 | #define LIST_NUMBER_STRING_SIZE 20 |
| 19 | |
| 20 | static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, |
| 21 | int32_t c, unsigned char nextc) { |
| 22 | if (escape == LITERAL) { |
| 23 | cmark_render_code_point(renderer, c); |
| 24 | return; |
| 25 | } |
| 26 | |
| 27 | switch (c) { |
| 28 | case 123: // '{' |
| 29 | case 125: // '}' |
| 30 | case 35: // '#' |
| 31 | case 37: // '%' |
| 32 | case 38: // '&' |
| 33 | cmark_render_ascii(renderer, "\\" ); |
| 34 | cmark_render_code_point(renderer, c); |
| 35 | break; |
| 36 | case 36: // '$' |
| 37 | case 95: // '_' |
| 38 | if (escape == NORMAL) { |
| 39 | cmark_render_ascii(renderer, "\\" ); |
| 40 | } |
| 41 | cmark_render_code_point(renderer, c); |
| 42 | break; |
| 43 | case 45: // '-' |
| 44 | if (nextc == 45) { // prevent ligature |
| 45 | cmark_render_ascii(renderer, "-{}" ); |
| 46 | } else { |
| 47 | cmark_render_ascii(renderer, "-" ); |
| 48 | } |
| 49 | break; |
| 50 | case 126: // '~' |
| 51 | if (escape == NORMAL) { |
| 52 | cmark_render_ascii(renderer, "\\textasciitilde{}" ); |
| 53 | } else { |
| 54 | cmark_render_code_point(renderer, c); |
| 55 | } |
| 56 | break; |
| 57 | case 94: // '^' |
| 58 | cmark_render_ascii(renderer, "\\^{}" ); |
| 59 | break; |
| 60 | case 92: // '\\' |
| 61 | if (escape == URL) { |
| 62 | // / acts as path sep even on windows: |
| 63 | cmark_render_ascii(renderer, "/" ); |
| 64 | } else { |
| 65 | cmark_render_ascii(renderer, "\\textbackslash{}" ); |
| 66 | } |
| 67 | break; |
| 68 | case 124: // '|' |
| 69 | cmark_render_ascii(renderer, "\\textbar{}" ); |
| 70 | break; |
| 71 | case 60: // '<' |
| 72 | cmark_render_ascii(renderer, "\\textless{}" ); |
| 73 | break; |
| 74 | case 62: // '>' |
| 75 | cmark_render_ascii(renderer, "\\textgreater{}" ); |
| 76 | break; |
| 77 | case 91: // '[' |
| 78 | case 93: // ']' |
| 79 | cmark_render_ascii(renderer, "{" ); |
| 80 | cmark_render_code_point(renderer, c); |
| 81 | cmark_render_ascii(renderer, "}" ); |
| 82 | break; |
| 83 | case 34: // '"' |
| 84 | cmark_render_ascii(renderer, "\\textquotedbl{}" ); |
| 85 | // requires \usepackage[T1]{fontenc} |
| 86 | break; |
| 87 | case 39: // '\'' |
| 88 | cmark_render_ascii(renderer, "\\textquotesingle{}" ); |
| 89 | // requires \usepackage{textcomp} |
| 90 | break; |
| 91 | case 160: // nbsp |
| 92 | cmark_render_ascii(renderer, "~" ); |
| 93 | break; |
| 94 | case 8230: // hellip |
| 95 | cmark_render_ascii(renderer, "\\ldots{}" ); |
| 96 | break; |
| 97 | case 8216: // lsquo |
| 98 | if (escape == NORMAL) { |
| 99 | cmark_render_ascii(renderer, "`" ); |
| 100 | } else { |
| 101 | cmark_render_code_point(renderer, c); |
| 102 | } |
| 103 | break; |
| 104 | case 8217: // rsquo |
| 105 | if (escape == NORMAL) { |
| 106 | cmark_render_ascii(renderer, "\'" ); |
| 107 | } else { |
| 108 | cmark_render_code_point(renderer, c); |
| 109 | } |
| 110 | break; |
| 111 | case 8220: // ldquo |
| 112 | if (escape == NORMAL) { |
| 113 | cmark_render_ascii(renderer, "``" ); |
| 114 | } else { |
| 115 | cmark_render_code_point(renderer, c); |
| 116 | } |
| 117 | break; |
| 118 | case 8221: // rdquo |
| 119 | if (escape == NORMAL) { |
| 120 | cmark_render_ascii(renderer, "''" ); |
| 121 | } else { |
| 122 | cmark_render_code_point(renderer, c); |
| 123 | } |
| 124 | break; |
| 125 | case 8212: // emdash |
| 126 | if (escape == NORMAL) { |
| 127 | cmark_render_ascii(renderer, "---" ); |
| 128 | } else { |
| 129 | cmark_render_code_point(renderer, c); |
| 130 | } |
| 131 | break; |
| 132 | case 8211: // endash |
| 133 | if (escape == NORMAL) { |
| 134 | cmark_render_ascii(renderer, "--" ); |
| 135 | } else { |
| 136 | cmark_render_code_point(renderer, c); |
| 137 | } |
| 138 | break; |
| 139 | default: |
| 140 | cmark_render_code_point(renderer, c); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | typedef enum { |
| 145 | NO_LINK, |
| 146 | URL_AUTOLINK, |
| 147 | EMAIL_AUTOLINK, |
| 148 | NORMAL_LINK, |
| 149 | INTERNAL_LINK |
| 150 | } link_type; |
| 151 | |
| 152 | static link_type get_link_type(cmark_node *node) { |
| 153 | size_t title_len, url_len; |
| 154 | cmark_node *link_text; |
| 155 | char *realurl; |
| 156 | int realurllen; |
| 157 | bool isemail = false; |
| 158 | |
| 159 | if (node->type != CMARK_NODE_LINK) { |
| 160 | return NO_LINK; |
| 161 | } |
| 162 | |
| 163 | const char *url = cmark_node_get_url(node); |
| 164 | cmark_chunk url_chunk = cmark_chunk_literal(url); |
| 165 | |
| 166 | if (url && *url == '#') { |
| 167 | return INTERNAL_LINK; |
| 168 | } |
| 169 | |
| 170 | url_len = strlen(url); |
| 171 | if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { |
| 172 | return NO_LINK; |
| 173 | } |
| 174 | |
| 175 | const char *title = cmark_node_get_title(node); |
| 176 | title_len = strlen(title); |
| 177 | // if it has a title, we can't treat it as an autolink: |
| 178 | if (title_len == 0) { |
| 179 | |
| 180 | link_text = node->first_child; |
| 181 | cmark_consolidate_text_nodes(link_text); |
| 182 | |
| 183 | if (!link_text) |
| 184 | return NO_LINK; |
| 185 | |
| 186 | realurl = (char *)url; |
| 187 | realurllen = (int)url_len; |
| 188 | if (strncmp(realurl, "mailto:" , 7) == 0) { |
| 189 | realurl += 7; |
| 190 | realurllen -= 7; |
| 191 | isemail = true; |
| 192 | } |
| 193 | if (realurllen == link_text->len && |
| 194 | strncmp(realurl, (char *)link_text->data, |
| 195 | link_text->len) == 0) { |
| 196 | if (isemail) { |
| 197 | return EMAIL_AUTOLINK; |
| 198 | } else { |
| 199 | return URL_AUTOLINK; |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | return NORMAL_LINK; |
| 205 | } |
| 206 | |
| 207 | static int S_get_enumlevel(cmark_node *node) { |
| 208 | int enumlevel = 0; |
| 209 | cmark_node *tmp = node; |
| 210 | while (tmp) { |
| 211 | if (tmp->type == CMARK_NODE_LIST && |
| 212 | cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) { |
| 213 | enumlevel++; |
| 214 | } |
| 215 | tmp = tmp->parent; |
| 216 | } |
| 217 | return enumlevel; |
| 218 | } |
| 219 | |
| 220 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, |
| 221 | cmark_event_type ev_type, int options) { |
| 222 | int list_number; |
| 223 | int enumlevel; |
| 224 | char list_number_string[LIST_NUMBER_STRING_SIZE]; |
| 225 | bool entering = (ev_type == CMARK_EVENT_ENTER); |
| 226 | cmark_list_type list_type; |
| 227 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); |
| 228 | |
| 229 | // avoid warning about unused parameter: |
| 230 | (void)(options); |
| 231 | |
| 232 | switch (node->type) { |
| 233 | case CMARK_NODE_DOCUMENT: |
| 234 | break; |
| 235 | |
| 236 | case CMARK_NODE_BLOCK_QUOTE: |
| 237 | if (entering) { |
| 238 | LIT("\\begin{quote}" ); |
| 239 | CR(); |
| 240 | } else { |
| 241 | LIT("\\end{quote}" ); |
| 242 | BLANKLINE(); |
| 243 | } |
| 244 | break; |
| 245 | |
| 246 | case CMARK_NODE_LIST: |
| 247 | list_type = cmark_node_get_list_type(node); |
| 248 | if (entering) { |
| 249 | LIT("\\begin{" ); |
| 250 | LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize" ); |
| 251 | LIT("}" ); |
| 252 | CR(); |
| 253 | list_number = cmark_node_get_list_start(node); |
| 254 | if (list_number > 1) { |
| 255 | enumlevel = S_get_enumlevel(node); |
| 256 | // latex normally supports only five levels |
| 257 | if (enumlevel >= 1 && enumlevel <= 5) { |
| 258 | snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d" , |
| 259 | list_number); |
| 260 | LIT("\\setcounter{enum" ); |
| 261 | switch (enumlevel) { |
| 262 | case 1: LIT("i" ); break; |
| 263 | case 2: LIT("ii" ); break; |
| 264 | case 3: LIT("iii" ); break; |
| 265 | case 4: LIT("iv" ); break; |
| 266 | case 5: LIT("v" ); break; |
| 267 | default: LIT("i" ); break; |
| 268 | } |
| 269 | LIT("}{" ); |
| 270 | OUT(list_number_string, false, NORMAL); |
| 271 | LIT("}" ); |
| 272 | } |
| 273 | CR(); |
| 274 | } |
| 275 | } else { |
| 276 | LIT("\\end{" ); |
| 277 | LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize" ); |
| 278 | LIT("}" ); |
| 279 | BLANKLINE(); |
| 280 | } |
| 281 | break; |
| 282 | |
| 283 | case CMARK_NODE_ITEM: |
| 284 | if (entering) { |
| 285 | LIT("\\item " ); |
| 286 | } else { |
| 287 | CR(); |
| 288 | } |
| 289 | break; |
| 290 | |
| 291 | case CMARK_NODE_HEADING: |
| 292 | if (entering) { |
| 293 | switch (cmark_node_get_heading_level(node)) { |
| 294 | case 1: |
| 295 | LIT("\\section" ); |
| 296 | break; |
| 297 | case 2: |
| 298 | LIT("\\subsection" ); |
| 299 | break; |
| 300 | case 3: |
| 301 | LIT("\\subsubsection" ); |
| 302 | break; |
| 303 | case 4: |
| 304 | LIT("\\paragraph" ); |
| 305 | break; |
| 306 | case 5: |
| 307 | LIT("\\subparagraph" ); |
| 308 | break; |
| 309 | } |
| 310 | LIT("{" ); |
| 311 | } else { |
| 312 | LIT("}" ); |
| 313 | BLANKLINE(); |
| 314 | } |
| 315 | break; |
| 316 | |
| 317 | case CMARK_NODE_CODE_BLOCK: |
| 318 | CR(); |
| 319 | LIT("\\begin{verbatim}" ); |
| 320 | CR(); |
| 321 | OUT(cmark_node_get_literal(node), false, LITERAL); |
| 322 | CR(); |
| 323 | LIT("\\end{verbatim}" ); |
| 324 | BLANKLINE(); |
| 325 | break; |
| 326 | |
| 327 | case CMARK_NODE_HTML_BLOCK: |
| 328 | break; |
| 329 | |
| 330 | case CMARK_NODE_CUSTOM_BLOCK: |
| 331 | CR(); |
| 332 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
| 333 | false, LITERAL); |
| 334 | CR(); |
| 335 | break; |
| 336 | |
| 337 | case CMARK_NODE_THEMATIC_BREAK: |
| 338 | BLANKLINE(); |
| 339 | LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" ); |
| 340 | BLANKLINE(); |
| 341 | break; |
| 342 | |
| 343 | case CMARK_NODE_PARAGRAPH: |
| 344 | if (!entering) { |
| 345 | BLANKLINE(); |
| 346 | } |
| 347 | break; |
| 348 | |
| 349 | case CMARK_NODE_TEXT: |
| 350 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); |
| 351 | break; |
| 352 | |
| 353 | case CMARK_NODE_LINEBREAK: |
| 354 | LIT("\\\\" ); |
| 355 | CR(); |
| 356 | break; |
| 357 | |
| 358 | case CMARK_NODE_SOFTBREAK: |
| 359 | if (options & CMARK_OPT_HARDBREAKS) { |
| 360 | LIT("\\\\" ); |
| 361 | CR(); |
| 362 | } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { |
| 363 | CR(); |
| 364 | } else { |
| 365 | OUT(" " , allow_wrap, NORMAL); |
| 366 | } |
| 367 | break; |
| 368 | |
| 369 | case CMARK_NODE_CODE: |
| 370 | LIT("\\texttt{" ); |
| 371 | OUT(cmark_node_get_literal(node), false, NORMAL); |
| 372 | LIT("}" ); |
| 373 | break; |
| 374 | |
| 375 | case CMARK_NODE_HTML_INLINE: |
| 376 | break; |
| 377 | |
| 378 | case CMARK_NODE_CUSTOM_INLINE: |
| 379 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
| 380 | false, LITERAL); |
| 381 | break; |
| 382 | |
| 383 | case CMARK_NODE_STRONG: |
| 384 | if (entering) { |
| 385 | LIT("\\textbf{" ); |
| 386 | } else { |
| 387 | LIT("}" ); |
| 388 | } |
| 389 | break; |
| 390 | |
| 391 | case CMARK_NODE_EMPH: |
| 392 | if (entering) { |
| 393 | LIT("\\emph{" ); |
| 394 | } else { |
| 395 | LIT("}" ); |
| 396 | } |
| 397 | break; |
| 398 | |
| 399 | case CMARK_NODE_LINK: |
| 400 | if (entering) { |
| 401 | const char *url = cmark_node_get_url(node); |
| 402 | // requires \usepackage{hyperref} |
| 403 | switch (get_link_type(node)) { |
| 404 | case URL_AUTOLINK: |
| 405 | LIT("\\url{" ); |
| 406 | OUT(url, false, URL); |
| 407 | LIT("}" ); |
| 408 | return 0; // Don't process further nodes to avoid double-rendering artefacts |
| 409 | case EMAIL_AUTOLINK: |
| 410 | LIT("\\href{" ); |
| 411 | OUT(url, false, URL); |
| 412 | LIT("}\\nolinkurl{" ); |
| 413 | break; |
| 414 | case NORMAL_LINK: |
| 415 | LIT("\\href{" ); |
| 416 | OUT(url, false, URL); |
| 417 | LIT("}{" ); |
| 418 | break; |
| 419 | case INTERNAL_LINK: |
| 420 | LIT("\\protect\\hyperlink{" ); |
| 421 | OUT(url + 1, false, URL); |
| 422 | LIT("}{" ); |
| 423 | break; |
| 424 | case NO_LINK: |
| 425 | LIT("{" ); // error? |
| 426 | } |
| 427 | } else { |
| 428 | LIT("}" ); |
| 429 | } |
| 430 | |
| 431 | break; |
| 432 | |
| 433 | case CMARK_NODE_IMAGE: |
| 434 | if (entering) { |
| 435 | LIT("\\protect\\includegraphics{" ); |
| 436 | // requires \include{graphicx} |
| 437 | OUT(cmark_node_get_url(node), false, URL); |
| 438 | LIT("}" ); |
| 439 | return 0; |
| 440 | } |
| 441 | break; |
| 442 | |
| 443 | default: |
| 444 | assert(false); |
| 445 | break; |
| 446 | } |
| 447 | |
| 448 | return 1; |
| 449 | } |
| 450 | |
| 451 | char *cmark_render_latex(cmark_node *root, int options, int width) { |
| 452 | return cmark_render(root, options, width, outc, S_render_node); |
| 453 | } |
| 454 | |