1 | #include <stdlib.h> |
2 | #include <stdio.h> |
3 | #include <string.h> |
4 | #include <stdint.h> |
5 | #include <assert.h> |
6 | |
7 | #include "config.h" |
8 | #include "cmark.h" |
9 | #include "node.h" |
10 | #include "buffer.h" |
11 | #include "utf8.h" |
12 | #include "scanners.h" |
13 | #include "render.h" |
14 | |
15 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) |
16 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) |
17 | #define CR() renderer->cr(renderer) |
18 | #define BLANKLINE() renderer->blankline(renderer) |
19 | #define ENCODED_SIZE 20 |
20 | #define LISTMARKER_SIZE 20 |
21 | |
22 | // Functions to convert cmark_nodes to commonmark strings. |
23 | |
24 | static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, |
25 | int32_t c, unsigned char nextc) { |
26 | bool needs_escaping = false; |
27 | bool follows_digit = |
28 | renderer->buffer->size > 0 && |
29 | cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); |
30 | char encoded[ENCODED_SIZE]; |
31 | int options = renderer->options; |
32 | |
33 | needs_escaping = |
34 | c < 0x80 && escape != LITERAL && |
35 | ((escape == NORMAL && |
36 | (c < 0x20 || |
37 | c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || |
38 | c == '>' || c == '\\' || c == '`' || c == '!' || |
39 | (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || |
40 | ((CMARK_OPT_SMART & options) && |
41 | ((c == '-' && nextc == '-') || |
42 | (c == '.' && nextc == '.') || |
43 | c == '"' || c == '\'')) || |
44 | (renderer->begin_content && (c == '-' || c == '+' || c == '=') && |
45 | // begin_content doesn't get set to false til we've passed digits |
46 | // at the beginning of line, so... |
47 | !follows_digit) || |
48 | (renderer->begin_content && (c == '.' || c == ')') && follows_digit && |
49 | (nextc == 0 || cmark_isspace(nextc))))) || |
50 | (escape == URL && |
51 | (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || |
52 | c == ')' || c == '(')) || |
53 | (escape == TITLE && |
54 | (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); |
55 | |
56 | if (needs_escaping) { |
57 | if (escape == URL && cmark_isspace(c)) { |
58 | // use percent encoding for spaces |
59 | snprintf(encoded, ENCODED_SIZE, "%%%2X" , c); |
60 | cmark_strbuf_puts(renderer->buffer, encoded); |
61 | renderer->column += 3; |
62 | } else if (cmark_ispunct(c)) { |
63 | cmark_render_ascii(renderer, "\\" ); |
64 | cmark_render_code_point(renderer, c); |
65 | } else { // render as entity |
66 | snprintf(encoded, ENCODED_SIZE, "&#%d;" , c); |
67 | cmark_strbuf_puts(renderer->buffer, encoded); |
68 | renderer->column += strlen(encoded); |
69 | } |
70 | } else { |
71 | cmark_render_code_point(renderer, c); |
72 | } |
73 | } |
74 | |
75 | static int longest_backtick_sequence(const char *code) { |
76 | int longest = 0; |
77 | int current = 0; |
78 | size_t i = 0; |
79 | size_t code_len = strlen(code); |
80 | while (i <= code_len) { |
81 | if (code[i] == '`') { |
82 | current++; |
83 | } else { |
84 | if (current > longest) { |
85 | longest = current; |
86 | } |
87 | current = 0; |
88 | } |
89 | i++; |
90 | } |
91 | return longest; |
92 | } |
93 | |
94 | static int shortest_unused_backtick_sequence(const char *code) { |
95 | // note: if the shortest sequence is >= 32, this returns 32 |
96 | // so as not to overflow the bit array. |
97 | uint32_t used = 1; |
98 | int current = 0; |
99 | size_t i = 0; |
100 | size_t code_len = strlen(code); |
101 | while (i <= code_len) { |
102 | if (code[i] == '`') { |
103 | current++; |
104 | } else { |
105 | if (current > 0 && current < 32) { |
106 | used |= (1U << current); |
107 | } |
108 | current = 0; |
109 | } |
110 | i++; |
111 | } |
112 | // return number of first bit that is 0: |
113 | i = 0; |
114 | while (i < 32 && used & 1) { |
115 | used = used >> 1; |
116 | i++; |
117 | } |
118 | return (int)i; |
119 | } |
120 | |
121 | static bool is_autolink(cmark_node *node) { |
122 | const unsigned char *title; |
123 | const unsigned char *url; |
124 | cmark_node *link_text; |
125 | |
126 | if (node->type != CMARK_NODE_LINK) { |
127 | return false; |
128 | } |
129 | |
130 | url = node->as.link.url; |
131 | if (url == NULL || _scan_scheme(url) == 0) { |
132 | return false; |
133 | } |
134 | |
135 | title = node->as.link.title; |
136 | // if it has a title, we can't treat it as an autolink: |
137 | if (title && title[0]) { |
138 | return false; |
139 | } |
140 | |
141 | link_text = node->first_child; |
142 | if (link_text == NULL) { |
143 | return false; |
144 | } |
145 | cmark_consolidate_text_nodes(link_text); |
146 | if (strncmp((const char *)url, "mailto:" , 7) == 0) { |
147 | url += 7; |
148 | } |
149 | return link_text->data != NULL && |
150 | strcmp((const char *)url, (char *)link_text->data) == 0; |
151 | } |
152 | |
153 | // if node is a block node, returns node. |
154 | // otherwise returns first block-level node that is an ancestor of node. |
155 | // if there is no block-level ancestor, returns NULL. |
156 | static cmark_node *get_containing_block(cmark_node *node) { |
157 | while (node) { |
158 | if (node->type >= CMARK_NODE_FIRST_BLOCK && |
159 | node->type <= CMARK_NODE_LAST_BLOCK) { |
160 | return node; |
161 | } else { |
162 | node = node->parent; |
163 | } |
164 | } |
165 | return NULL; |
166 | } |
167 | |
168 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, |
169 | cmark_event_type ev_type, int options) { |
170 | cmark_node *tmp; |
171 | int list_number; |
172 | cmark_delim_type list_delim; |
173 | size_t numticks; |
174 | bool ; |
175 | size_t i; |
176 | bool entering = (ev_type == CMARK_EVENT_ENTER); |
177 | const char *info, *code, *title; |
178 | char fencechar[2] = {'\0', '\0'}; |
179 | size_t code_len; |
180 | char listmarker[LISTMARKER_SIZE]; |
181 | const char *emph_delim; |
182 | bool first_in_list_item; |
183 | bufsize_t marker_width; |
184 | bool has_nonspace; |
185 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && |
186 | !(CMARK_OPT_HARDBREAKS & options); |
187 | |
188 | // Don't adjust tight list status til we've started the list. |
189 | // Otherwise we loose the blank line between a paragraph and |
190 | // a following list. |
191 | if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { |
192 | tmp = get_containing_block(node); |
193 | renderer->in_tight_list_item = |
194 | tmp && // tmp might be NULL if there is no containing block |
195 | ((tmp->type == CMARK_NODE_ITEM && |
196 | cmark_node_get_list_tight(tmp->parent)) || |
197 | (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && |
198 | cmark_node_get_list_tight(tmp->parent->parent))); |
199 | } |
200 | |
201 | switch (node->type) { |
202 | case CMARK_NODE_DOCUMENT: |
203 | break; |
204 | |
205 | case CMARK_NODE_BLOCK_QUOTE: |
206 | if (entering) { |
207 | LIT("> " ); |
208 | renderer->begin_content = true; |
209 | cmark_strbuf_puts(renderer->prefix, "> " ); |
210 | } else { |
211 | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); |
212 | BLANKLINE(); |
213 | } |
214 | break; |
215 | |
216 | case CMARK_NODE_LIST: |
217 | if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { |
218 | // this ensures that a following indented code block or list will be |
219 | // inteprereted correctly. |
220 | CR(); |
221 | LIT("<!-- end list -->" ); |
222 | BLANKLINE(); |
223 | } |
224 | break; |
225 | |
226 | case CMARK_NODE_ITEM: |
227 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
228 | marker_width = 4; |
229 | } else { |
230 | list_number = cmark_node_get_list_start(node->parent); |
231 | list_delim = cmark_node_get_list_delim(node->parent); |
232 | tmp = node; |
233 | while (tmp->prev) { |
234 | tmp = tmp->prev; |
235 | list_number += 1; |
236 | } |
237 | // we ensure a width of at least 4 so |
238 | // we get nice transition from single digits |
239 | // to double |
240 | snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s" , list_number, |
241 | list_delim == CMARK_PAREN_DELIM ? ")" : "." , |
242 | list_number < 10 ? " " : " " ); |
243 | marker_width = strlen(listmarker); |
244 | } |
245 | if (entering) { |
246 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
247 | LIT(" - " ); |
248 | renderer->begin_content = true; |
249 | } else { |
250 | LIT(listmarker); |
251 | renderer->begin_content = true; |
252 | } |
253 | for (i = marker_width; i--;) { |
254 | cmark_strbuf_putc(renderer->prefix, ' '); |
255 | } |
256 | } else { |
257 | cmark_strbuf_truncate(renderer->prefix, |
258 | renderer->prefix->size - marker_width); |
259 | CR(); |
260 | } |
261 | break; |
262 | |
263 | case CMARK_NODE_HEADING: |
264 | if (entering) { |
265 | for (i = cmark_node_get_heading_level(node); i > 0; i--) { |
266 | LIT("#" ); |
267 | } |
268 | LIT(" " ); |
269 | renderer->begin_content = true; |
270 | renderer->no_linebreaks = true; |
271 | } else { |
272 | renderer->no_linebreaks = false; |
273 | BLANKLINE(); |
274 | } |
275 | break; |
276 | |
277 | case CMARK_NODE_CODE_BLOCK: |
278 | |
279 | first_in_list_item = node->prev == NULL && node->parent && |
280 | node->parent->type == CMARK_NODE_ITEM; |
281 | |
282 | if (!first_in_list_item) { |
283 | BLANKLINE(); |
284 | } |
285 | info = cmark_node_get_fence_info(node); |
286 | fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; |
287 | code = cmark_node_get_literal(node); |
288 | |
289 | numticks = longest_backtick_sequence(code) + 1; |
290 | if (numticks < 3) { |
291 | numticks = 3; |
292 | } |
293 | for (i = 0; i < numticks; i++) { |
294 | LIT(fencechar); |
295 | } |
296 | LIT(" " ); |
297 | OUT(info, false, LITERAL); |
298 | CR(); |
299 | OUT(cmark_node_get_literal(node), false, LITERAL); |
300 | CR(); |
301 | for (i = 0; i < numticks; i++) { |
302 | LIT(fencechar); |
303 | } |
304 | |
305 | BLANKLINE(); |
306 | break; |
307 | |
308 | case CMARK_NODE_HTML_BLOCK: |
309 | BLANKLINE(); |
310 | OUT(cmark_node_get_literal(node), false, LITERAL); |
311 | BLANKLINE(); |
312 | break; |
313 | |
314 | case CMARK_NODE_CUSTOM_BLOCK: |
315 | BLANKLINE(); |
316 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
317 | false, LITERAL); |
318 | BLANKLINE(); |
319 | break; |
320 | |
321 | case CMARK_NODE_THEMATIC_BREAK: |
322 | BLANKLINE(); |
323 | LIT("-----" ); |
324 | BLANKLINE(); |
325 | break; |
326 | |
327 | case CMARK_NODE_PARAGRAPH: |
328 | if (!entering) { |
329 | BLANKLINE(); |
330 | } |
331 | break; |
332 | |
333 | case CMARK_NODE_TEXT: |
334 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); |
335 | break; |
336 | |
337 | case CMARK_NODE_LINEBREAK: |
338 | if (!(CMARK_OPT_HARDBREAKS & options)) { |
339 | LIT(" " ); |
340 | } |
341 | CR(); |
342 | break; |
343 | |
344 | case CMARK_NODE_SOFTBREAK: |
345 | if (CMARK_OPT_HARDBREAKS & options) { |
346 | LIT(" " ); |
347 | CR(); |
348 | } else if (!renderer->no_linebreaks && renderer->width == 0 && |
349 | !(CMARK_OPT_HARDBREAKS & options) && |
350 | !(CMARK_OPT_NOBREAKS & options)) { |
351 | CR(); |
352 | } else { |
353 | OUT(" " , allow_wrap, LITERAL); |
354 | } |
355 | break; |
356 | |
357 | case CMARK_NODE_CODE: |
358 | code = cmark_node_get_literal(node); |
359 | code_len = strlen(code); |
360 | numticks = shortest_unused_backtick_sequence(code); |
361 | has_nonspace = false; |
362 | for (i=0; i < code_len; i++) { |
363 | if (code[i] != ' ') { |
364 | has_nonspace = true; |
365 | break; |
366 | } |
367 | } |
368 | extra_spaces = code_len == 0 || |
369 | code[0] == '`' || code[code_len - 1] == '`' || |
370 | (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); |
371 | for (i = 0; i < numticks; i++) { |
372 | LIT("`" ); |
373 | } |
374 | if (extra_spaces) { |
375 | LIT(" " ); |
376 | } |
377 | OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); |
378 | if (extra_spaces) { |
379 | LIT(" " ); |
380 | } |
381 | for (i = 0; i < numticks; i++) { |
382 | LIT("`" ); |
383 | } |
384 | break; |
385 | |
386 | case CMARK_NODE_HTML_INLINE: |
387 | OUT(cmark_node_get_literal(node), false, LITERAL); |
388 | break; |
389 | |
390 | case CMARK_NODE_CUSTOM_INLINE: |
391 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
392 | false, LITERAL); |
393 | break; |
394 | |
395 | case CMARK_NODE_STRONG: |
396 | if (entering) { |
397 | LIT("**" ); |
398 | } else { |
399 | LIT("**" ); |
400 | } |
401 | break; |
402 | |
403 | case CMARK_NODE_EMPH: |
404 | // If we have EMPH(EMPH(x)), we need to use *_x_* |
405 | // because **x** is STRONG(x): |
406 | if (node->parent && node->parent->type == CMARK_NODE_EMPH && |
407 | node->next == NULL && node->prev == NULL) { |
408 | emph_delim = "_" ; |
409 | } else { |
410 | emph_delim = "*" ; |
411 | } |
412 | if (entering) { |
413 | LIT(emph_delim); |
414 | } else { |
415 | LIT(emph_delim); |
416 | } |
417 | break; |
418 | |
419 | case CMARK_NODE_LINK: |
420 | if (is_autolink(node)) { |
421 | if (entering) { |
422 | LIT("<" ); |
423 | if (strncmp(cmark_node_get_url(node), "mailto:" , 7) == 0) { |
424 | LIT((const char *)cmark_node_get_url(node) + 7); |
425 | } else { |
426 | LIT((const char *)cmark_node_get_url(node)); |
427 | } |
428 | LIT(">" ); |
429 | // return signal to skip contents of node... |
430 | return 0; |
431 | } |
432 | } else { |
433 | if (entering) { |
434 | LIT("[" ); |
435 | } else { |
436 | LIT("](" ); |
437 | OUT(cmark_node_get_url(node), false, URL); |
438 | title = cmark_node_get_title(node); |
439 | if (strlen(title) > 0) { |
440 | LIT(" \"" ); |
441 | OUT(title, false, TITLE); |
442 | LIT("\"" ); |
443 | } |
444 | LIT(")" ); |
445 | } |
446 | } |
447 | break; |
448 | |
449 | case CMARK_NODE_IMAGE: |
450 | if (entering) { |
451 | LIT("![" ); |
452 | } else { |
453 | LIT("](" ); |
454 | OUT(cmark_node_get_url(node), false, URL); |
455 | title = cmark_node_get_title(node); |
456 | if (strlen(title) > 0) { |
457 | OUT(" \"" , allow_wrap, LITERAL); |
458 | OUT(title, false, TITLE); |
459 | LIT("\"" ); |
460 | } |
461 | LIT(")" ); |
462 | } |
463 | break; |
464 | |
465 | default: |
466 | assert(false); |
467 | break; |
468 | } |
469 | |
470 | return 1; |
471 | } |
472 | |
473 | char *cmark_render_commonmark(cmark_node *root, int options, int width) { |
474 | if (options & CMARK_OPT_HARDBREAKS) { |
475 | // disable breaking on width, since it has |
476 | // a different meaning with OPT_HARDBREAKS |
477 | width = 0; |
478 | } |
479 | return cmark_render(root, options, width, outc, S_render_node); |
480 | } |
481 | |