1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4#include <stdint.h>
5#include <assert.h>
6
7#include "config.h"
8#include "cmark.h"
9#include "node.h"
10#include "buffer.h"
11#include "utf8.h"
12#include "scanners.h"
13#include "render.h"
14
15#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
16#define LIT(s) renderer->out(renderer, s, false, LITERAL)
17#define CR() renderer->cr(renderer)
18#define BLANKLINE() renderer->blankline(renderer)
19#define ENCODED_SIZE 20
20#define LISTMARKER_SIZE 20
21
22// Functions to convert cmark_nodes to commonmark strings.
23
24static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
25 int32_t c, unsigned char nextc) {
26 bool needs_escaping = false;
27 bool follows_digit =
28 renderer->buffer->size > 0 &&
29 cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
30 char encoded[ENCODED_SIZE];
31 int options = renderer->options;
32
33 needs_escaping =
34 c < 0x80 && escape != LITERAL &&
35 ((escape == NORMAL &&
36 (c < 0x20 ||
37 c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
38 c == '>' || c == '\\' || c == '`' || c == '!' ||
39 (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
40 ((CMARK_OPT_SMART & options) &&
41 ((c == '-' && nextc == '-') ||
42 (c == '.' && nextc == '.') ||
43 c == '"' || c == '\'')) ||
44 (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
45 // begin_content doesn't get set to false til we've passed digits
46 // at the beginning of line, so...
47 !follows_digit) ||
48 (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
49 (nextc == 0 || cmark_isspace(nextc))))) ||
50 (escape == URL &&
51 (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
52 c == ')' || c == '(')) ||
53 (escape == TITLE &&
54 (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
55
56 if (needs_escaping) {
57 if (escape == URL && cmark_isspace(c)) {
58 // use percent encoding for spaces
59 snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
60 cmark_strbuf_puts(renderer->buffer, encoded);
61 renderer->column += 3;
62 } else if (cmark_ispunct(c)) {
63 cmark_render_ascii(renderer, "\\");
64 cmark_render_code_point(renderer, c);
65 } else { // render as entity
66 snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
67 cmark_strbuf_puts(renderer->buffer, encoded);
68 renderer->column += strlen(encoded);
69 }
70 } else {
71 cmark_render_code_point(renderer, c);
72 }
73}
74
75static int longest_backtick_sequence(const char *code) {
76 int longest = 0;
77 int current = 0;
78 size_t i = 0;
79 size_t code_len = strlen(code);
80 while (i <= code_len) {
81 if (code[i] == '`') {
82 current++;
83 } else {
84 if (current > longest) {
85 longest = current;
86 }
87 current = 0;
88 }
89 i++;
90 }
91 return longest;
92}
93
94static int shortest_unused_backtick_sequence(const char *code) {
95 // note: if the shortest sequence is >= 32, this returns 32
96 // so as not to overflow the bit array.
97 uint32_t used = 1;
98 int current = 0;
99 size_t i = 0;
100 size_t code_len = strlen(code);
101 while (i <= code_len) {
102 if (code[i] == '`') {
103 current++;
104 } else {
105 if (current > 0 && current < 32) {
106 used |= (1U << current);
107 }
108 current = 0;
109 }
110 i++;
111 }
112 // return number of first bit that is 0:
113 i = 0;
114 while (i < 32 && used & 1) {
115 used = used >> 1;
116 i++;
117 }
118 return (int)i;
119}
120
121static bool is_autolink(cmark_node *node) {
122 const unsigned char *title;
123 const unsigned char *url;
124 cmark_node *link_text;
125
126 if (node->type != CMARK_NODE_LINK) {
127 return false;
128 }
129
130 url = node->as.link.url;
131 if (url == NULL || _scan_scheme(url) == 0) {
132 return false;
133 }
134
135 title = node->as.link.title;
136 // if it has a title, we can't treat it as an autolink:
137 if (title && title[0]) {
138 return false;
139 }
140
141 link_text = node->first_child;
142 if (link_text == NULL) {
143 return false;
144 }
145 cmark_consolidate_text_nodes(link_text);
146 if (strncmp((const char *)url, "mailto:", 7) == 0) {
147 url += 7;
148 }
149 return link_text->data != NULL &&
150 strcmp((const char *)url, (char *)link_text->data) == 0;
151}
152
153// if node is a block node, returns node.
154// otherwise returns first block-level node that is an ancestor of node.
155// if there is no block-level ancestor, returns NULL.
156static cmark_node *get_containing_block(cmark_node *node) {
157 while (node) {
158 if (node->type >= CMARK_NODE_FIRST_BLOCK &&
159 node->type <= CMARK_NODE_LAST_BLOCK) {
160 return node;
161 } else {
162 node = node->parent;
163 }
164 }
165 return NULL;
166}
167
168static int S_render_node(cmark_renderer *renderer, cmark_node *node,
169 cmark_event_type ev_type, int options) {
170 cmark_node *tmp;
171 int list_number;
172 cmark_delim_type list_delim;
173 size_t numticks;
174 bool extra_spaces;
175 size_t i;
176 bool entering = (ev_type == CMARK_EVENT_ENTER);
177 const char *info, *code, *title;
178 char fencechar[2] = {'\0', '\0'};
179 size_t code_len;
180 char listmarker[LISTMARKER_SIZE];
181 const char *emph_delim;
182 bool first_in_list_item;
183 bufsize_t marker_width;
184 bool has_nonspace;
185 bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
186 !(CMARK_OPT_HARDBREAKS & options);
187
188 // Don't adjust tight list status til we've started the list.
189 // Otherwise we loose the blank line between a paragraph and
190 // a following list.
191 if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
192 tmp = get_containing_block(node);
193 renderer->in_tight_list_item =
194 tmp && // tmp might be NULL if there is no containing block
195 ((tmp->type == CMARK_NODE_ITEM &&
196 cmark_node_get_list_tight(tmp->parent)) ||
197 (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
198 cmark_node_get_list_tight(tmp->parent->parent)));
199 }
200
201 switch (node->type) {
202 case CMARK_NODE_DOCUMENT:
203 break;
204
205 case CMARK_NODE_BLOCK_QUOTE:
206 if (entering) {
207 LIT("> ");
208 renderer->begin_content = true;
209 cmark_strbuf_puts(renderer->prefix, "> ");
210 } else {
211 cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
212 BLANKLINE();
213 }
214 break;
215
216 case CMARK_NODE_LIST:
217 if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) {
218 // this ensures that a following indented code block or list will be
219 // inteprereted correctly.
220 CR();
221 LIT("<!-- end list -->");
222 BLANKLINE();
223 }
224 break;
225
226 case CMARK_NODE_ITEM:
227 if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
228 marker_width = 4;
229 } else {
230 list_number = cmark_node_get_list_start(node->parent);
231 list_delim = cmark_node_get_list_delim(node->parent);
232 tmp = node;
233 while (tmp->prev) {
234 tmp = tmp->prev;
235 list_number += 1;
236 }
237 // we ensure a width of at least 4 so
238 // we get nice transition from single digits
239 // to double
240 snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
241 list_delim == CMARK_PAREN_DELIM ? ")" : ".",
242 list_number < 10 ? " " : " ");
243 marker_width = strlen(listmarker);
244 }
245 if (entering) {
246 if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
247 LIT(" - ");
248 renderer->begin_content = true;
249 } else {
250 LIT(listmarker);
251 renderer->begin_content = true;
252 }
253 for (i = marker_width; i--;) {
254 cmark_strbuf_putc(renderer->prefix, ' ');
255 }
256 } else {
257 cmark_strbuf_truncate(renderer->prefix,
258 renderer->prefix->size - marker_width);
259 CR();
260 }
261 break;
262
263 case CMARK_NODE_HEADING:
264 if (entering) {
265 for (i = cmark_node_get_heading_level(node); i > 0; i--) {
266 LIT("#");
267 }
268 LIT(" ");
269 renderer->begin_content = true;
270 renderer->no_linebreaks = true;
271 } else {
272 renderer->no_linebreaks = false;
273 BLANKLINE();
274 }
275 break;
276
277 case CMARK_NODE_CODE_BLOCK:
278
279 first_in_list_item = node->prev == NULL && node->parent &&
280 node->parent->type == CMARK_NODE_ITEM;
281
282 if (!first_in_list_item) {
283 BLANKLINE();
284 }
285 info = cmark_node_get_fence_info(node);
286 fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
287 code = cmark_node_get_literal(node);
288
289 numticks = longest_backtick_sequence(code) + 1;
290 if (numticks < 3) {
291 numticks = 3;
292 }
293 for (i = 0; i < numticks; i++) {
294 LIT(fencechar);
295 }
296 LIT(" ");
297 OUT(info, false, LITERAL);
298 CR();
299 OUT(cmark_node_get_literal(node), false, LITERAL);
300 CR();
301 for (i = 0; i < numticks; i++) {
302 LIT(fencechar);
303 }
304
305 BLANKLINE();
306 break;
307
308 case CMARK_NODE_HTML_BLOCK:
309 BLANKLINE();
310 OUT(cmark_node_get_literal(node), false, LITERAL);
311 BLANKLINE();
312 break;
313
314 case CMARK_NODE_CUSTOM_BLOCK:
315 BLANKLINE();
316 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
317 false, LITERAL);
318 BLANKLINE();
319 break;
320
321 case CMARK_NODE_THEMATIC_BREAK:
322 BLANKLINE();
323 LIT("-----");
324 BLANKLINE();
325 break;
326
327 case CMARK_NODE_PARAGRAPH:
328 if (!entering) {
329 BLANKLINE();
330 }
331 break;
332
333 case CMARK_NODE_TEXT:
334 OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
335 break;
336
337 case CMARK_NODE_LINEBREAK:
338 if (!(CMARK_OPT_HARDBREAKS & options)) {
339 LIT(" ");
340 }
341 CR();
342 break;
343
344 case CMARK_NODE_SOFTBREAK:
345 if (CMARK_OPT_HARDBREAKS & options) {
346 LIT(" ");
347 CR();
348 } else if (!renderer->no_linebreaks && renderer->width == 0 &&
349 !(CMARK_OPT_HARDBREAKS & options) &&
350 !(CMARK_OPT_NOBREAKS & options)) {
351 CR();
352 } else {
353 OUT(" ", allow_wrap, LITERAL);
354 }
355 break;
356
357 case CMARK_NODE_CODE:
358 code = cmark_node_get_literal(node);
359 code_len = strlen(code);
360 numticks = shortest_unused_backtick_sequence(code);
361 has_nonspace = false;
362 for (i=0; i < code_len; i++) {
363 if (code[i] != ' ') {
364 has_nonspace = true;
365 break;
366 }
367 }
368 extra_spaces = code_len == 0 ||
369 code[0] == '`' || code[code_len - 1] == '`' ||
370 (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' ');
371 for (i = 0; i < numticks; i++) {
372 LIT("`");
373 }
374 if (extra_spaces) {
375 LIT(" ");
376 }
377 OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
378 if (extra_spaces) {
379 LIT(" ");
380 }
381 for (i = 0; i < numticks; i++) {
382 LIT("`");
383 }
384 break;
385
386 case CMARK_NODE_HTML_INLINE:
387 OUT(cmark_node_get_literal(node), false, LITERAL);
388 break;
389
390 case CMARK_NODE_CUSTOM_INLINE:
391 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
392 false, LITERAL);
393 break;
394
395 case CMARK_NODE_STRONG:
396 if (entering) {
397 LIT("**");
398 } else {
399 LIT("**");
400 }
401 break;
402
403 case CMARK_NODE_EMPH:
404 // If we have EMPH(EMPH(x)), we need to use *_x_*
405 // because **x** is STRONG(x):
406 if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
407 node->next == NULL && node->prev == NULL) {
408 emph_delim = "_";
409 } else {
410 emph_delim = "*";
411 }
412 if (entering) {
413 LIT(emph_delim);
414 } else {
415 LIT(emph_delim);
416 }
417 break;
418
419 case CMARK_NODE_LINK:
420 if (is_autolink(node)) {
421 if (entering) {
422 LIT("<");
423 if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
424 LIT((const char *)cmark_node_get_url(node) + 7);
425 } else {
426 LIT((const char *)cmark_node_get_url(node));
427 }
428 LIT(">");
429 // return signal to skip contents of node...
430 return 0;
431 }
432 } else {
433 if (entering) {
434 LIT("[");
435 } else {
436 LIT("](");
437 OUT(cmark_node_get_url(node), false, URL);
438 title = cmark_node_get_title(node);
439 if (strlen(title) > 0) {
440 LIT(" \"");
441 OUT(title, false, TITLE);
442 LIT("\"");
443 }
444 LIT(")");
445 }
446 }
447 break;
448
449 case CMARK_NODE_IMAGE:
450 if (entering) {
451 LIT("![");
452 } else {
453 LIT("](");
454 OUT(cmark_node_get_url(node), false, URL);
455 title = cmark_node_get_title(node);
456 if (strlen(title) > 0) {
457 OUT(" \"", allow_wrap, LITERAL);
458 OUT(title, false, TITLE);
459 LIT("\"");
460 }
461 LIT(")");
462 }
463 break;
464
465 default:
466 assert(false);
467 break;
468 }
469
470 return 1;
471}
472
473char *cmark_render_commonmark(cmark_node *root, int options, int width) {
474 if (options & CMARK_OPT_HARDBREAKS) {
475 // disable breaking on width, since it has
476 // a different meaning with OPT_HARDBREAKS
477 width = 0;
478 }
479 return cmark_render(root, options, width, outc, S_render_node);
480}
481