1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4#include <assert.h>
5
6#include "config.h"
7#include "cmark.h"
8#include "node.h"
9#include "buffer.h"
10
11#define BUFFER_SIZE 100
12#define MAX_INDENT 40
13
14// Functions to convert cmark_nodes to XML strings.
15
16// C0 control characters, U+FFFE and U+FFF aren't allowed in XML.
17static const char XML_ESCAPE_TABLE[256] = {
18 /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
19 /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
20 /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21 /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
22 /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,
30 /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31 /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32 /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34};
35
36// U+FFFD Replacement Character encoded in UTF-8
37#define UTF8_REPL "\xEF\xBF\xBD"
38
39static const char *XML_ESCAPES[] = {
40 "", UTF8_REPL, "&quot;", "&amp;", "&lt;", "&gt;"
41};
42
43static void escape_xml(cmark_strbuf *ob, const unsigned char *src,
44 bufsize_t size) {
45 bufsize_t i = 0, org, esc = 0;
46
47 while (i < size) {
48 org = i;
49 while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)
50 i++;
51
52 if (i > org)
53 cmark_strbuf_put(ob, src + org, i - org);
54
55 if (i >= size)
56 break;
57
58 if (esc == 9) {
59 // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to
60 // be changed.
61 // We know that src[i] is 0xBE or 0xBF.
62 if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) {
63 cmark_strbuf_putc(ob, 0xBD);
64 } else {
65 cmark_strbuf_putc(ob, src[i]);
66 }
67 } else {
68 cmark_strbuf_puts(ob, XML_ESCAPES[esc]);
69 }
70
71 i++;
72 }
73}
74
75static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) {
76 if (source)
77 escape_xml(dest, source, strlen((char *)source));
78}
79
80struct render_state {
81 cmark_strbuf *xml;
82 int indent;
83};
84
85static CMARK_INLINE void indent(struct render_state *state) {
86 int i;
87 for (i = 0; i < state->indent && i < MAX_INDENT; i++) {
88 cmark_strbuf_putc(state->xml, ' ');
89 }
90}
91
92static int S_render_node(cmark_node *node, cmark_event_type ev_type,
93 struct render_state *state, int options) {
94 cmark_strbuf *xml = state->xml;
95 bool literal = false;
96 cmark_delim_type delim;
97 bool entering = (ev_type == CMARK_EVENT_ENTER);
98 char buffer[BUFFER_SIZE];
99
100 if (entering) {
101 indent(state);
102 cmark_strbuf_putc(xml, '<');
103 cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
104
105 if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
106 snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
107 node->start_line, node->start_column, node->end_line,
108 node->end_column);
109 cmark_strbuf_puts(xml, buffer);
110 }
111
112 literal = false;
113
114 switch (node->type) {
115 case CMARK_NODE_DOCUMENT:
116 cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
117 break;
118 case CMARK_NODE_TEXT:
119 case CMARK_NODE_CODE:
120 case CMARK_NODE_HTML_BLOCK:
121 case CMARK_NODE_HTML_INLINE:
122 cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
123 escape_xml(xml, node->data, node->len);
124 cmark_strbuf_puts(xml, "</");
125 cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
126 literal = true;
127 break;
128 case CMARK_NODE_LIST:
129 switch (cmark_node_get_list_type(node)) {
130 case CMARK_ORDERED_LIST:
131 cmark_strbuf_puts(xml, " type=\"ordered\"");
132 snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
133 cmark_node_get_list_start(node));
134 cmark_strbuf_puts(xml, buffer);
135 delim = cmark_node_get_list_delim(node);
136 if (delim == CMARK_PAREN_DELIM) {
137 cmark_strbuf_puts(xml, " delim=\"paren\"");
138 } else if (delim == CMARK_PERIOD_DELIM) {
139 cmark_strbuf_puts(xml, " delim=\"period\"");
140 }
141 break;
142 case CMARK_BULLET_LIST:
143 cmark_strbuf_puts(xml, " type=\"bullet\"");
144 break;
145 default:
146 break;
147 }
148 snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
149 (cmark_node_get_list_tight(node) ? "true" : "false"));
150 cmark_strbuf_puts(xml, buffer);
151 break;
152 case CMARK_NODE_HEADING:
153 snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
154 cmark_strbuf_puts(xml, buffer);
155 break;
156 case CMARK_NODE_CODE_BLOCK:
157 if (node->as.code.info) {
158 cmark_strbuf_puts(xml, " info=\"");
159 escape_xml_str(xml, node->as.code.info);
160 cmark_strbuf_putc(xml, '"');
161 }
162 cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
163 escape_xml(xml, node->data, node->len);
164 cmark_strbuf_puts(xml, "</");
165 cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
166 literal = true;
167 break;
168 case CMARK_NODE_CUSTOM_BLOCK:
169 case CMARK_NODE_CUSTOM_INLINE:
170 cmark_strbuf_puts(xml, " on_enter=\"");
171 escape_xml_str(xml, node->as.custom.on_enter);
172 cmark_strbuf_putc(xml, '"');
173 cmark_strbuf_puts(xml, " on_exit=\"");
174 escape_xml_str(xml, node->as.custom.on_exit);
175 cmark_strbuf_putc(xml, '"');
176 break;
177 case CMARK_NODE_LINK:
178 case CMARK_NODE_IMAGE:
179 cmark_strbuf_puts(xml, " destination=\"");
180 escape_xml_str(xml, node->as.link.url);
181 cmark_strbuf_putc(xml, '"');
182 if (node->as.link.title) {
183 cmark_strbuf_puts(xml, " title=\"");
184 escape_xml_str(xml, node->as.link.title);
185 cmark_strbuf_putc(xml, '"');
186 }
187 break;
188 default:
189 break;
190 }
191 if (node->first_child) {
192 state->indent += 2;
193 } else if (!literal) {
194 cmark_strbuf_puts(xml, " /");
195 }
196 cmark_strbuf_puts(xml, ">\n");
197
198 } else if (node->first_child) {
199 state->indent -= 2;
200 indent(state);
201 cmark_strbuf_puts(xml, "</");
202 cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
203 cmark_strbuf_puts(xml, ">\n");
204 }
205
206 return 1;
207}
208
209char *cmark_render_xml(cmark_node *root, int options) {
210 char *result;
211 cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
212 cmark_event_type ev_type;
213 cmark_node *cur;
214 struct render_state state = {&xml, 0};
215
216 cmark_iter *iter = cmark_iter_new(root);
217
218 cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
219 cmark_strbuf_puts(state.xml,
220 "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
221 while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
222 cur = cmark_iter_get_node(iter);
223 S_render_node(cur, ev_type, &state, options);
224 }
225 result = (char *)cmark_strbuf_detach(&xml);
226
227 cmark_iter_free(iter);
228 return result;
229}
230