1#ifndef CMARK_H
2#define CMARK_H
3
4#include <stdio.h>
5#include <cmark_export.h>
6#include <cmark_version.h>
7
8#ifdef __cplusplus
9extern "C" {
10#endif
11
12/** # NAME
13 *
14 * **cmark** - CommonMark parsing, manipulating, and rendering
15 */
16
17/** # DESCRIPTION
18 *
19 * ## Simple Interface
20 */
21
22/** Convert 'text' (assumed to be a UTF-8 encoded string with length
23 * 'len') from CommonMark Markdown to HTML, returning a null-terminated,
24 * UTF-8-encoded string. It is the caller's responsibility
25 * to free the returned buffer.
26 */
27CMARK_EXPORT
28char *cmark_markdown_to_html(const char *text, size_t len, int options);
29
30/** ## Node Structure
31 */
32
33typedef enum {
34 /* Error status */
35 CMARK_NODE_NONE,
36
37 /* Block */
38 CMARK_NODE_DOCUMENT,
39 CMARK_NODE_BLOCK_QUOTE,
40 CMARK_NODE_LIST,
41 CMARK_NODE_ITEM,
42 CMARK_NODE_CODE_BLOCK,
43 CMARK_NODE_HTML_BLOCK,
44 CMARK_NODE_CUSTOM_BLOCK,
45 CMARK_NODE_PARAGRAPH,
46 CMARK_NODE_HEADING,
47 CMARK_NODE_THEMATIC_BREAK,
48
49 CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
50 CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
51
52 /* Inline */
53 CMARK_NODE_TEXT,
54 CMARK_NODE_SOFTBREAK,
55 CMARK_NODE_LINEBREAK,
56 CMARK_NODE_CODE,
57 CMARK_NODE_HTML_INLINE,
58 CMARK_NODE_CUSTOM_INLINE,
59 CMARK_NODE_EMPH,
60 CMARK_NODE_STRONG,
61 CMARK_NODE_LINK,
62 CMARK_NODE_IMAGE,
63
64 CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
65 CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
66} cmark_node_type;
67
68/* For backwards compatibility: */
69#define CMARK_NODE_HEADER CMARK_NODE_HEADING
70#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
71#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
72#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE
73
74typedef enum {
75 CMARK_NO_LIST,
76 CMARK_BULLET_LIST,
77 CMARK_ORDERED_LIST
78} cmark_list_type;
79
80typedef enum {
81 CMARK_NO_DELIM,
82 CMARK_PERIOD_DELIM,
83 CMARK_PAREN_DELIM
84} cmark_delim_type;
85
86typedef struct cmark_node cmark_node;
87typedef struct cmark_parser cmark_parser;
88typedef struct cmark_iter cmark_iter;
89
90/**
91 * ## Custom memory allocator support
92 */
93
94/** Defines the memory allocation functions to be used by CMark
95 * when parsing and allocating a document tree
96 */
97typedef struct cmark_mem {
98 void *(*calloc)(size_t, size_t);
99 void *(*realloc)(void *, size_t);
100 void (*free)(void *);
101} cmark_mem;
102
103/** Returns a pointer to the default memory allocator.
104 */
105CMARK_EXPORT cmark_mem *cmark_get_default_mem_allocator();
106
107/**
108 * ## Creating and Destroying Nodes
109 */
110
111/** Creates a new node of type 'type'. Note that the node may have
112 * other required properties, which it is the caller's responsibility
113 * to assign.
114 */
115CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
116
117/** Same as `cmark_node_new`, but explicitly listing the memory
118 * allocator used to allocate the node. Note: be sure to use the same
119 * allocator for every node in a tree, or bad things can happen.
120 */
121CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type,
122 cmark_mem *mem);
123
124/** Frees the memory allocated for a node and any children.
125 */
126CMARK_EXPORT void cmark_node_free(cmark_node *node);
127
128/**
129 * ## Tree Traversal
130 */
131
132/** Returns the next node in the sequence after 'node', or NULL if
133 * there is none.
134 */
135CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node);
136
137/** Returns the previous node in the sequence after 'node', or NULL if
138 * there is none.
139 */
140CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node);
141
142/** Returns the parent of 'node', or NULL if there is none.
143 */
144CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node);
145
146/** Returns the first child of 'node', or NULL if 'node' has no children.
147 */
148CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
149
150/** Returns the last child of 'node', or NULL if 'node' has no children.
151 */
152CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
153
154/**
155 * ## Iterator
156 *
157 * An iterator will walk through a tree of nodes, starting from a root
158 * node, returning one node at a time, together with information about
159 * whether the node is being entered or exited. The iterator will
160 * first descend to a child node, if there is one. When there is no
161 * child, the iterator will go to the next sibling. When there is no
162 * next sibling, the iterator will return to the parent (but with
163 * a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will
164 * return `CMARK_EVENT_DONE` when it reaches the root node again.
165 * One natural application is an HTML renderer, where an `ENTER` event
166 * outputs an open tag and an `EXIT` event outputs a close tag.
167 * An iterator might also be used to transform an AST in some systematic
168 * way, for example, turning all level-3 headings into regular paragraphs.
169 *
170 * void
171 * usage_example(cmark_node *root) {
172 * cmark_event_type ev_type;
173 * cmark_iter *iter = cmark_iter_new(root);
174 *
175 * while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
176 * cmark_node *cur = cmark_iter_get_node(iter);
177 * // Do something with `cur` and `ev_type`
178 * }
179 *
180 * cmark_iter_free(iter);
181 * }
182 *
183 * Iterators will never return `EXIT` events for leaf nodes, which are nodes
184 * of type:
185 *
186 * * CMARK_NODE_HTML_BLOCK
187 * * CMARK_NODE_THEMATIC_BREAK
188 * * CMARK_NODE_CODE_BLOCK
189 * * CMARK_NODE_TEXT
190 * * CMARK_NODE_SOFTBREAK
191 * * CMARK_NODE_LINEBREAK
192 * * CMARK_NODE_CODE
193 * * CMARK_NODE_HTML_INLINE
194 *
195 * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
196 * leaf nodes.
197 */
198
199typedef enum {
200 CMARK_EVENT_NONE,
201 CMARK_EVENT_DONE,
202 CMARK_EVENT_ENTER,
203 CMARK_EVENT_EXIT
204} cmark_event_type;
205
206/** Creates a new iterator starting at 'root'. The current node and event
207 * type are undefined until 'cmark_iter_next' is called for the first time.
208 * The memory allocated for the iterator should be released using
209 * 'cmark_iter_free' when it is no longer needed.
210 */
211CMARK_EXPORT
212cmark_iter *cmark_iter_new(cmark_node *root);
213
214/** Frees the memory allocated for an iterator.
215 */
216CMARK_EXPORT
217void cmark_iter_free(cmark_iter *iter);
218
219/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
220 * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
221 */
222CMARK_EXPORT
223cmark_event_type cmark_iter_next(cmark_iter *iter);
224
225/** Returns the current node.
226 */
227CMARK_EXPORT
228cmark_node *cmark_iter_get_node(cmark_iter *iter);
229
230/** Returns the current event type.
231 */
232CMARK_EXPORT
233cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
234
235/** Returns the root node.
236 */
237CMARK_EXPORT
238cmark_node *cmark_iter_get_root(cmark_iter *iter);
239
240/** Resets the iterator so that the current node is 'current' and
241 * the event type is 'event_type'. The new current node must be a
242 * descendant of the root node or the root node itself.
243 */
244CMARK_EXPORT
245void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
246 cmark_event_type event_type);
247
248/**
249 * ## Accessors
250 */
251
252/** Returns the user data of 'node'.
253 */
254CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node);
255
256/** Sets arbitrary user data for 'node'. Returns 1 on success,
257 * 0 on failure.
258 */
259CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);
260
261/** Returns the type of 'node', or `CMARK_NODE_NONE` on error.
262 */
263CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);
264
265/** Like 'cmark_node_get_type', but returns a string representation
266 of the type, or `"<unknown>"`.
267 */
268CMARK_EXPORT
269const char *cmark_node_get_type_string(cmark_node *node);
270
271/** Returns the string contents of 'node', or an empty
272 string if none is set. Returns NULL if called on a
273 node that does not have string content.
274 */
275CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node);
276
277/** Sets the string contents of 'node'. Returns 1 on success,
278 * 0 on failure.
279 */
280CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);
281
282/** Returns the heading level of 'node', or 0 if 'node' is not a heading.
283 */
284CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node);
285
286/* For backwards compatibility */
287#define cmark_node_get_header_level cmark_node_get_heading_level
288#define cmark_node_set_header_level cmark_node_set_heading_level
289
290/** Sets the heading level of 'node', returning 1 on success and 0 on error.
291 */
292CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);
293
294/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node'
295 * is not a list.
296 */
297CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);
298
299/** Sets the list type of 'node', returning 1 on success and 0 on error.
300 */
301CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node,
302 cmark_list_type type);
303
304/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node'
305 * is not a list.
306 */
307CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);
308
309/** Sets the list delimiter type of 'node', returning 1 on success and 0
310 * on error.
311 */
312CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node,
313 cmark_delim_type delim);
314
315/** Returns starting number of 'node', if it is an ordered list, otherwise 0.
316 */
317CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node);
318
319/** Sets starting number of 'node', if it is an ordered list. Returns 1
320 * on success, 0 on failure.
321 */
322CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);
323
324/** Returns 1 if 'node' is a tight list, 0 otherwise.
325 */
326CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node);
327
328/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
329 */
330CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
331
332/** Returns the info string from a fenced code block.
333 */
334CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
335
336/** Sets the info string in a fenced code block, returning 1 on
337 * success and 0 on failure.
338 */
339CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);
340
341/** Returns the URL of a link or image 'node', or an empty string
342 if no URL is set. Returns NULL if called on a node that is
343 not a link or image.
344 */
345CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node);
346
347/** Sets the URL of a link or image 'node'. Returns 1 on success,
348 * 0 on failure.
349 */
350CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);
351
352/** Returns the title of a link or image 'node', or an empty
353 string if no title is set. Returns NULL if called on a node
354 that is not a link or image.
355 */
356CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node);
357
358/** Sets the title of a link or image 'node'. Returns 1 on success,
359 * 0 on failure.
360 */
361CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);
362
363/** Returns the literal "on enter" text for a custom 'node', or
364 an empty string if no on_enter is set. Returns NULL if called
365 on a non-custom node.
366 */
367CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);
368
369/** Sets the literal text to render "on enter" for a custom 'node'.
370 Any children of the node will be rendered after this text.
371 Returns 1 on success 0 on failure.
372 */
373CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node,
374 const char *on_enter);
375
376/** Returns the literal "on exit" text for a custom 'node', or
377 an empty string if no on_exit is set. Returns NULL if
378 called on a non-custom node.
379 */
380CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);
381
382/** Sets the literal text to render "on exit" for a custom 'node'.
383 Any children of the node will be rendered before this text.
384 Returns 1 on success 0 on failure.
385 */
386CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);
387
388/** Returns the line on which 'node' begins.
389 */
390CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node);
391
392/** Returns the column at which 'node' begins.
393 */
394CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node);
395
396/** Returns the line on which 'node' ends.
397 */
398CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node);
399
400/** Returns the column at which 'node' ends.
401 */
402CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node);
403
404/**
405 * ## Tree Manipulation
406 */
407
408/** Unlinks a 'node', removing it from the tree, but not freeing its
409 * memory. (Use 'cmark_node_free' for that.)
410 */
411CMARK_EXPORT void cmark_node_unlink(cmark_node *node);
412
413/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure.
414 */
415CMARK_EXPORT int cmark_node_insert_before(cmark_node *node,
416 cmark_node *sibling);
417
418/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure.
419 */
420CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
421
422/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does
423 * not free its memory).
424 * Returns 1 on success, 0 on failure.
425 */
426CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);
427
428/** Adds 'child' to the beginning of the children of 'node'.
429 * Returns 1 on success, 0 on failure.
430 */
431CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
432
433/** Adds 'child' to the end of the children of 'node'.
434 * Returns 1 on success, 0 on failure.
435 */
436CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);
437
438/** Consolidates adjacent text nodes.
439 */
440CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
441
442/**
443 * ## Parsing
444 *
445 * Simple interface:
446 *
447 * cmark_node *document = cmark_parse_document("Hello *world*", 13,
448 * CMARK_OPT_DEFAULT);
449 *
450 * Streaming interface:
451 *
452 * cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
453 * FILE *fp = fopen("myfile.md", "rb");
454 * while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
455 * cmark_parser_feed(parser, buffer, bytes);
456 * if (bytes < sizeof(buffer)) {
457 * break;
458 * }
459 * }
460 * document = cmark_parser_finish(parser);
461 * cmark_parser_free(parser);
462 */
463
464/** Creates a new parser object.
465 */
466CMARK_EXPORT
467cmark_parser *cmark_parser_new(int options);
468
469/** Creates a new parser object with the given memory allocator
470 */
471CMARK_EXPORT
472cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
473
474/** Frees memory allocated for a parser object.
475 */
476CMARK_EXPORT
477void cmark_parser_free(cmark_parser *parser);
478
479/** Feeds a string of length 'len' to 'parser'.
480 */
481CMARK_EXPORT
482void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
483
484/** Finish parsing and return a pointer to a tree of nodes.
485 */
486CMARK_EXPORT
487cmark_node *cmark_parser_finish(cmark_parser *parser);
488
489/** Parse a CommonMark document in 'buffer' of length 'len'.
490 * Returns a pointer to a tree of nodes. The memory allocated for
491 * the node tree should be released using 'cmark_node_free'
492 * when it is no longer needed.
493 */
494CMARK_EXPORT
495cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
496
497/** Parse a CommonMark document in file 'f', returning a pointer to
498 * a tree of nodes. The memory allocated for the node tree should be
499 * released using 'cmark_node_free' when it is no longer needed.
500 */
501CMARK_EXPORT
502cmark_node *cmark_parse_file(FILE *f, int options);
503
504/**
505 * ## Rendering
506 */
507
508/** Render a 'node' tree as XML. It is the caller's responsibility
509 * to free the returned buffer.
510 */
511CMARK_EXPORT
512char *cmark_render_xml(cmark_node *root, int options);
513
514/** Render a 'node' tree as an HTML fragment. It is up to the user
515 * to add an appropriate header and footer. It is the caller's
516 * responsibility to free the returned buffer.
517 */
518CMARK_EXPORT
519char *cmark_render_html(cmark_node *root, int options);
520
521/** Render a 'node' tree as a groff man page, without the header.
522 * It is the caller's responsibility to free the returned buffer.
523 */
524CMARK_EXPORT
525char *cmark_render_man(cmark_node *root, int options, int width);
526
527/** Render a 'node' tree as a commonmark document.
528 * It is the caller's responsibility to free the returned buffer.
529 */
530CMARK_EXPORT
531char *cmark_render_commonmark(cmark_node *root, int options, int width);
532
533/** Render a 'node' tree as a LaTeX document.
534 * It is the caller's responsibility to free the returned buffer.
535 */
536CMARK_EXPORT
537char *cmark_render_latex(cmark_node *root, int options, int width);
538
539/**
540 * ## Options
541 */
542
543/** Default options.
544 */
545#define CMARK_OPT_DEFAULT 0
546
547/**
548 * ### Options affecting rendering
549 */
550
551/** Include a `data-sourcepos` attribute on all block elements.
552 */
553#define CMARK_OPT_SOURCEPOS (1 << 1)
554
555/** Render `softbreak` elements as hard line breaks.
556 */
557#define CMARK_OPT_HARDBREAKS (1 << 2)
558
559/** `CMARK_OPT_SAFE` is defined here for API compatibility,
560 but it no longer has any effect. "Safe" mode is now the default:
561 set `CMARK_OPT_UNSAFE` to disable it.
562 */
563#define CMARK_OPT_SAFE (1 << 3)
564
565/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
566 * `file:`, and `data:`, except for `image/png`, `image/gif`,
567 * `image/jpeg`, or `image/webp` mime types). By default,
568 * raw HTML is replaced by a placeholder HTML comment. Unsafe
569 * links are replaced by empty strings.
570 */
571#define CMARK_OPT_UNSAFE (1 << 17)
572
573/** Render `softbreak` elements as spaces.
574 */
575#define CMARK_OPT_NOBREAKS (1 << 4)
576
577/**
578 * ### Options affecting parsing
579 */
580
581/** Legacy option (no effect).
582 */
583#define CMARK_OPT_NORMALIZE (1 << 8)
584
585/** Validate UTF-8 in the input before parsing, replacing illegal
586 * sequences with the replacement character U+FFFD.
587 */
588#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
589
590/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
591 */
592#define CMARK_OPT_SMART (1 << 10)
593
594/**
595 * ## Version information
596 */
597
598/** The library version as integer for runtime checks. Also available as
599 * macro CMARK_VERSION for compile time checks.
600 *
601 * * Bits 16-23 contain the major version.
602 * * Bits 8-15 contain the minor version.
603 * * Bits 0-7 contain the patchlevel.
604 *
605 * In hexadecimal format, the number 0x010203 represents version 1.2.3.
606 */
607CMARK_EXPORT
608int cmark_version(void);
609
610/** The library version string for runtime checks. Also available as
611 * macro CMARK_VERSION_STRING for compile time checks.
612 */
613CMARK_EXPORT
614const char *cmark_version_string(void);
615
616/** # AUTHORS
617 *
618 * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
619 */
620
621#ifndef CMARK_NO_SHORT_NAMES
622#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
623#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
624#define NODE_LIST CMARK_NODE_LIST
625#define NODE_ITEM CMARK_NODE_ITEM
626#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
627#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK
628#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK
629#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
630#define NODE_HEADING CMARK_NODE_HEADING
631#define NODE_HEADER CMARK_NODE_HEADER
632#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK
633#define NODE_HRULE CMARK_NODE_HRULE
634#define NODE_TEXT CMARK_NODE_TEXT
635#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
636#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
637#define NODE_CODE CMARK_NODE_CODE
638#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE
639#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
640#define NODE_EMPH CMARK_NODE_EMPH
641#define NODE_STRONG CMARK_NODE_STRONG
642#define NODE_LINK CMARK_NODE_LINK
643#define NODE_IMAGE CMARK_NODE_IMAGE
644#define BULLET_LIST CMARK_BULLET_LIST
645#define ORDERED_LIST CMARK_ORDERED_LIST
646#define PERIOD_DELIM CMARK_PERIOD_DELIM
647#define PAREN_DELIM CMARK_PAREN_DELIM
648#endif
649
650#ifdef __cplusplus
651}
652#endif
653
654#endif
655