1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4#include <assert.h>
5
6#include "config.h"
7#include "cmark.h"
8#include "node.h"
9#include "buffer.h"
10#include "utf8.h"
11#include "scanners.h"
12#include "render.h"
13
14#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
15#define LIT(s) renderer->out(renderer, s, false, LITERAL)
16#define CR() renderer->cr(renderer)
17#define BLANKLINE() renderer->blankline(renderer)
18#define LIST_NUMBER_STRING_SIZE 20
19
20static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
21 int32_t c, unsigned char nextc) {
22 if (escape == LITERAL) {
23 cmark_render_code_point(renderer, c);
24 return;
25 }
26
27 switch (c) {
28 case 123: // '{'
29 case 125: // '}'
30 case 35: // '#'
31 case 37: // '%'
32 case 38: // '&'
33 cmark_render_ascii(renderer, "\\");
34 cmark_render_code_point(renderer, c);
35 break;
36 case 36: // '$'
37 case 95: // '_'
38 if (escape == NORMAL) {
39 cmark_render_ascii(renderer, "\\");
40 }
41 cmark_render_code_point(renderer, c);
42 break;
43 case 45: // '-'
44 if (nextc == 45) { // prevent ligature
45 cmark_render_ascii(renderer, "-{}");
46 } else {
47 cmark_render_ascii(renderer, "-");
48 }
49 break;
50 case 126: // '~'
51 if (escape == NORMAL) {
52 cmark_render_ascii(renderer, "\\textasciitilde{}");
53 } else {
54 cmark_render_code_point(renderer, c);
55 }
56 break;
57 case 94: // '^'
58 cmark_render_ascii(renderer, "\\^{}");
59 break;
60 case 92: // '\\'
61 if (escape == URL) {
62 // / acts as path sep even on windows:
63 cmark_render_ascii(renderer, "/");
64 } else {
65 cmark_render_ascii(renderer, "\\textbackslash{}");
66 }
67 break;
68 case 124: // '|'
69 cmark_render_ascii(renderer, "\\textbar{}");
70 break;
71 case 60: // '<'
72 cmark_render_ascii(renderer, "\\textless{}");
73 break;
74 case 62: // '>'
75 cmark_render_ascii(renderer, "\\textgreater{}");
76 break;
77 case 91: // '['
78 case 93: // ']'
79 cmark_render_ascii(renderer, "{");
80 cmark_render_code_point(renderer, c);
81 cmark_render_ascii(renderer, "}");
82 break;
83 case 34: // '"'
84 cmark_render_ascii(renderer, "\\textquotedbl{}");
85 // requires \usepackage[T1]{fontenc}
86 break;
87 case 39: // '\''
88 cmark_render_ascii(renderer, "\\textquotesingle{}");
89 // requires \usepackage{textcomp}
90 break;
91 case 160: // nbsp
92 cmark_render_ascii(renderer, "~");
93 break;
94 case 8230: // hellip
95 cmark_render_ascii(renderer, "\\ldots{}");
96 break;
97 case 8216: // lsquo
98 if (escape == NORMAL) {
99 cmark_render_ascii(renderer, "`");
100 } else {
101 cmark_render_code_point(renderer, c);
102 }
103 break;
104 case 8217: // rsquo
105 if (escape == NORMAL) {
106 cmark_render_ascii(renderer, "\'");
107 } else {
108 cmark_render_code_point(renderer, c);
109 }
110 break;
111 case 8220: // ldquo
112 if (escape == NORMAL) {
113 cmark_render_ascii(renderer, "``");
114 } else {
115 cmark_render_code_point(renderer, c);
116 }
117 break;
118 case 8221: // rdquo
119 if (escape == NORMAL) {
120 cmark_render_ascii(renderer, "''");
121 } else {
122 cmark_render_code_point(renderer, c);
123 }
124 break;
125 case 8212: // emdash
126 if (escape == NORMAL) {
127 cmark_render_ascii(renderer, "---");
128 } else {
129 cmark_render_code_point(renderer, c);
130 }
131 break;
132 case 8211: // endash
133 if (escape == NORMAL) {
134 cmark_render_ascii(renderer, "--");
135 } else {
136 cmark_render_code_point(renderer, c);
137 }
138 break;
139 default:
140 cmark_render_code_point(renderer, c);
141 }
142}
143
144typedef enum {
145 NO_LINK,
146 URL_AUTOLINK,
147 EMAIL_AUTOLINK,
148 NORMAL_LINK,
149 INTERNAL_LINK
150} link_type;
151
152static link_type get_link_type(cmark_node *node) {
153 size_t title_len, url_len;
154 cmark_node *link_text;
155 char *realurl;
156 int realurllen;
157 bool isemail = false;
158
159 if (node->type != CMARK_NODE_LINK) {
160 return NO_LINK;
161 }
162
163 const char *url = cmark_node_get_url(node);
164 cmark_chunk url_chunk = cmark_chunk_literal(url);
165
166 if (url && *url == '#') {
167 return INTERNAL_LINK;
168 }
169
170 url_len = strlen(url);
171 if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
172 return NO_LINK;
173 }
174
175 const char *title = cmark_node_get_title(node);
176 title_len = strlen(title);
177 // if it has a title, we can't treat it as an autolink:
178 if (title_len == 0) {
179
180 link_text = node->first_child;
181 cmark_consolidate_text_nodes(link_text);
182
183 if (!link_text)
184 return NO_LINK;
185
186 realurl = (char *)url;
187 realurllen = (int)url_len;
188 if (strncmp(realurl, "mailto:", 7) == 0) {
189 realurl += 7;
190 realurllen -= 7;
191 isemail = true;
192 }
193 if (realurllen == link_text->len &&
194 strncmp(realurl, (char *)link_text->data,
195 link_text->len) == 0) {
196 if (isemail) {
197 return EMAIL_AUTOLINK;
198 } else {
199 return URL_AUTOLINK;
200 }
201 }
202 }
203
204 return NORMAL_LINK;
205}
206
207static int S_get_enumlevel(cmark_node *node) {
208 int enumlevel = 0;
209 cmark_node *tmp = node;
210 while (tmp) {
211 if (tmp->type == CMARK_NODE_LIST &&
212 cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
213 enumlevel++;
214 }
215 tmp = tmp->parent;
216 }
217 return enumlevel;
218}
219
220static int S_render_node(cmark_renderer *renderer, cmark_node *node,
221 cmark_event_type ev_type, int options) {
222 int list_number;
223 int enumlevel;
224 char list_number_string[LIST_NUMBER_STRING_SIZE];
225 bool entering = (ev_type == CMARK_EVENT_ENTER);
226 cmark_list_type list_type;
227 bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
228
229 // avoid warning about unused parameter:
230 (void)(options);
231
232 switch (node->type) {
233 case CMARK_NODE_DOCUMENT:
234 break;
235
236 case CMARK_NODE_BLOCK_QUOTE:
237 if (entering) {
238 LIT("\\begin{quote}");
239 CR();
240 } else {
241 LIT("\\end{quote}");
242 BLANKLINE();
243 }
244 break;
245
246 case CMARK_NODE_LIST:
247 list_type = cmark_node_get_list_type(node);
248 if (entering) {
249 LIT("\\begin{");
250 LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
251 LIT("}");
252 CR();
253 list_number = cmark_node_get_list_start(node);
254 if (list_number > 1) {
255 enumlevel = S_get_enumlevel(node);
256 // latex normally supports only five levels
257 if (enumlevel >= 1 && enumlevel <= 5) {
258 snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
259 list_number);
260 LIT("\\setcounter{enum");
261 switch (enumlevel) {
262 case 1: LIT("i"); break;
263 case 2: LIT("ii"); break;
264 case 3: LIT("iii"); break;
265 case 4: LIT("iv"); break;
266 case 5: LIT("v"); break;
267 default: LIT("i"); break;
268 }
269 LIT("}{");
270 OUT(list_number_string, false, NORMAL);
271 LIT("}");
272 }
273 CR();
274 }
275 } else {
276 LIT("\\end{");
277 LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
278 LIT("}");
279 BLANKLINE();
280 }
281 break;
282
283 case CMARK_NODE_ITEM:
284 if (entering) {
285 LIT("\\item ");
286 } else {
287 CR();
288 }
289 break;
290
291 case CMARK_NODE_HEADING:
292 if (entering) {
293 switch (cmark_node_get_heading_level(node)) {
294 case 1:
295 LIT("\\section");
296 break;
297 case 2:
298 LIT("\\subsection");
299 break;
300 case 3:
301 LIT("\\subsubsection");
302 break;
303 case 4:
304 LIT("\\paragraph");
305 break;
306 case 5:
307 LIT("\\subparagraph");
308 break;
309 }
310 LIT("{");
311 } else {
312 LIT("}");
313 BLANKLINE();
314 }
315 break;
316
317 case CMARK_NODE_CODE_BLOCK:
318 CR();
319 LIT("\\begin{verbatim}");
320 CR();
321 OUT(cmark_node_get_literal(node), false, LITERAL);
322 CR();
323 LIT("\\end{verbatim}");
324 BLANKLINE();
325 break;
326
327 case CMARK_NODE_HTML_BLOCK:
328 break;
329
330 case CMARK_NODE_CUSTOM_BLOCK:
331 CR();
332 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
333 false, LITERAL);
334 CR();
335 break;
336
337 case CMARK_NODE_THEMATIC_BREAK:
338 BLANKLINE();
339 LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
340 BLANKLINE();
341 break;
342
343 case CMARK_NODE_PARAGRAPH:
344 if (!entering) {
345 BLANKLINE();
346 }
347 break;
348
349 case CMARK_NODE_TEXT:
350 OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
351 break;
352
353 case CMARK_NODE_LINEBREAK:
354 LIT("\\\\");
355 CR();
356 break;
357
358 case CMARK_NODE_SOFTBREAK:
359 if (options & CMARK_OPT_HARDBREAKS) {
360 LIT("\\\\");
361 CR();
362 } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
363 CR();
364 } else {
365 OUT(" ", allow_wrap, NORMAL);
366 }
367 break;
368
369 case CMARK_NODE_CODE:
370 LIT("\\texttt{");
371 OUT(cmark_node_get_literal(node), false, NORMAL);
372 LIT("}");
373 break;
374
375 case CMARK_NODE_HTML_INLINE:
376 break;
377
378 case CMARK_NODE_CUSTOM_INLINE:
379 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
380 false, LITERAL);
381 break;
382
383 case CMARK_NODE_STRONG:
384 if (entering) {
385 LIT("\\textbf{");
386 } else {
387 LIT("}");
388 }
389 break;
390
391 case CMARK_NODE_EMPH:
392 if (entering) {
393 LIT("\\emph{");
394 } else {
395 LIT("}");
396 }
397 break;
398
399 case CMARK_NODE_LINK:
400 if (entering) {
401 const char *url = cmark_node_get_url(node);
402 // requires \usepackage{hyperref}
403 switch (get_link_type(node)) {
404 case URL_AUTOLINK:
405 LIT("\\url{");
406 OUT(url, false, URL);
407 LIT("}");
408 return 0; // Don't process further nodes to avoid double-rendering artefacts
409 case EMAIL_AUTOLINK:
410 LIT("\\href{");
411 OUT(url, false, URL);
412 LIT("}\\nolinkurl{");
413 break;
414 case NORMAL_LINK:
415 LIT("\\href{");
416 OUT(url, false, URL);
417 LIT("}{");
418 break;
419 case INTERNAL_LINK:
420 LIT("\\protect\\hyperlink{");
421 OUT(url + 1, false, URL);
422 LIT("}{");
423 break;
424 case NO_LINK:
425 LIT("{"); // error?
426 }
427 } else {
428 LIT("}");
429 }
430
431 break;
432
433 case CMARK_NODE_IMAGE:
434 if (entering) {
435 LIT("\\protect\\includegraphics{");
436 // requires \include{graphicx}
437 OUT(cmark_node_get_url(node), false, URL);
438 LIT("}");
439 return 0;
440 }
441 break;
442
443 default:
444 assert(false);
445 break;
446 }
447
448 return 1;
449}
450
451char *cmark_render_latex(cmark_node *root, int options, int width) {
452 return cmark_render(root, options, width, outc, S_render_node);
453}
454