inlines.c source code [Aseprite/third_party/cmark/src/inlines.c]

1	#include <stdlib.h>
2	#include <string.h>
3	#include <stdio.h>
4
5	#include "cmark_ctype.h"
6	#include "config.h"
7	#include "node.h"
8	#include "parser.h"
9	#include "references.h"
10	#include "cmark.h"
11	#include "houdini.h"
12	#include "utf8.h"
13	#include "scanners.h"
14	#include "inlines.h"
15
16	static const char *EMDASH = "\xE2\x80\x94";
17	static const char *ENDASH = "\xE2\x80\x93";
18	static const char *ELLIPSES = "\xE2\x80\xA6";
19	static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
20	static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
21	static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
22	static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
23
24	// Macros for creating various kinds of simple.
25	#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
26	#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
27	#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
28	#define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
29
30	#define MAXBACKTICKS 1000
31
32	typedef struct delimiter {
33	struct delimiter *previous;
34	struct delimiter *next;
35	cmark_node *inl_text;
36	bufsize_t length;
37	unsigned char delim_char;
38	bool can_open;
39	bool can_close;
40	} delimiter;
41
42	typedef struct bracket {
43	struct bracket *previous;
44	struct delimiter *previous_delimiter;
45	cmark_node *inl_text;
46	bufsize_t position;
47	bool image;
48	bool active;
49	bool bracket_after;
50	} bracket;
51
52	#define FLAG_SKIP_HTML_CDATA (1u << 0)
53	#define FLAG_SKIP_HTML_DECLARATION (1u << 1)
54	#define FLAG_SKIP_HTML_PI (1u << 2)
55
56	typedef struct {
57	cmark_mem *mem;
58	cmark_chunk input;
59	unsigned flags;
60	int line;
61	bufsize_t pos;
62	int block_offset;
63	int column_offset;
64	cmark_reference_map *refmap;
65	delimiter *last_delim;
66	bracket *last_bracket;
67	bufsize_t backticks[MAXBACKTICKS + `1`];
68	bool scanned_for_backticks;
69	} subject;
70
71	static CMARK_INLINE bool S_is_line_end_char(char c) {
72	return (c == `'\n'` \|\| c == `'\r'`);
73	}
74
75	static delimiter S_insert_emph(subject subj, delimiter *opener,
76	delimiter *closer);
77
78	static int parse_inline(subject subj, cmark_node parent, int options);
79
80	static void subject_from_buf(cmark_mem mem, int* line_number, int block_offset, subject *e,
81	cmark_chunk chunk, cmark_reference_map refmap);
82	static bufsize_t subject_find_special_char(subject subj, int* options);
83
84	// Create an inline with a literal string value.
85	static CMARK_INLINE cmark_node make_literal(subject subj, cmark_node_type t,
86	int start_column, int end_column) {
87	cmark_node e = (cmark_node )subj->mem->calloc(`1`, sizeof(*e));
88	e->mem = subj->mem;
89	e->type = (uint16_t)t;
90	e->start_line = e->end_line = subj->line;
91	// columns are 1 based.
92	e->start_column = start_column + `1` + subj->column_offset + subj->block_offset;
93	e->end_column = end_column + `1` + subj->column_offset + subj->block_offset;
94	return e;
95	}
96
97	// Create an inline with no value.
98	static CMARK_INLINE cmark_node make_simple(cmark_mem mem, cmark_node_type t) {
99	cmark_node e = (cmark_node )mem->calloc(`1`, sizeof(*e));
100	e->mem = mem;
101	e->type = t;
102	return e;
103	}
104
105	static cmark_node make_str(subject subj, int sc, int ec, cmark_chunk s) {
106	cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec);
107	e->data = (unsigned char *)subj->mem->realloc(NULL, s.len + `1`);
108	if (s.data != NULL) {
109	memcpy(e->data, s.data, s.len);
110	}
111	e->data[s.len] = `0`;
112	e->len = s.len;
113	return e;
114	}
115
116	static cmark_node make_str_from_buf(subject subj, int sc, int ec,
117	cmark_strbuf *buf) {
118	cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec);
119	e->len = buf->size;
120	e->data = cmark_strbuf_detach(buf);
121	return e;
122	}
123
124	// Like make_str, but parses entities.
125	static cmark_node make_str_with_entities(subject subj,
126	int start_column, int end_column,
127	cmark_chunk *content) {
128	cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
129
130	if (houdini_unescape_html(&unescaped, content->data, content->len)) {
131	return make_str_from_buf(subj, start_column, end_column, &unescaped);
132	} else {
133	return make_str(subj, start_column, end_column, *content);
134	}
135	}
136
137	// Like cmark_node_append_child but without costly sanity checks.
138	// Assumes that child was newly created.
139	static void append_child(cmark_node node, cmark_node child) {
140	cmark_node *old_last_child = node->last_child;
141
142	child->next = NULL;
143	child->prev = old_last_child;
144	child->parent = node;
145	node->last_child = child;
146
147	if (old_last_child) {
148	old_last_child->next = child;
149	} else {
150	// Also set first_child if node previously had no children.
151	node->first_child = child;
152	}
153	}
154
155	// Duplicate a chunk by creating a copy of the buffer not by reusing the
156	// buffer like cmark_chunk_dup does.
157	static unsigned char cmark_strdup(cmark_mem mem, unsigned char *src) {
158	if (src == NULL) {
159	return NULL;
160	}
161	size_t len = strlen((char *)src);
162	unsigned char data = (unsigned* char *)mem->realloc(NULL, len + `1`);
163	memcpy(data, src, len + `1`);
164	return data;
165	}
166
167	static unsigned char cmark_clean_autolink(cmark_mem mem, cmark_chunk *url,
168	int is_email) {
169	cmark_strbuf buf = CMARK_BUF_INIT(mem);
170
171	cmark_chunk_trim(url);
172
173	if (is_email)
174	cmark_strbuf_puts(&buf, "mailto:");
175
176	houdini_unescape_html_f(&buf, url->data, url->len);
177	return cmark_strbuf_detach(&buf);
178	}
179
180	static CMARK_INLINE cmark_node make_autolink(subject subj,
181	int start_column, int end_column,
182	cmark_chunk url, int is_email) {
183	cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
184	link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
185	link->as.link.title = NULL;
186	link->start_line = link->end_line = subj->line;
187	link->start_column = start_column + `1`;
188	link->end_column = end_column + `1`;
189	append_child(link, make_str_with_entities(subj, start_column + `1`, end_column - `1`, &url));
190	return link;
191	}
192
193	static void subject_from_buf(cmark_mem mem, int* line_number, int block_offset, subject *e,
194	cmark_chunk chunk, cmark_reference_map refmap) {
195	int i;
196	e->mem = mem;
197	e->input = *chunk;
198	e->flags = `0`;
199	e->line = line_number;
200	e->pos = `0`;
201	e->block_offset = block_offset;
202	e->column_offset = `0`;
203	e->refmap = refmap;
204	e->last_delim = NULL;
205	e->last_bracket = NULL;
206	for (i = `0`; i <= MAXBACKTICKS; i++) {
207	e->backticks[i] = `0`;
208	}
209	e->scanned_for_backticks = false;
210	}
211
212	static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
213
214	static CMARK_INLINE unsigned char peek_char(subject *subj) {
215	// NULL bytes should have been stripped out by now. If they're
216	// present, it's a programming error:
217	assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == `0`));
218	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : `0`;
219	}
220
221	static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) {
222	return subj->input.data[pos];
223	}
224
225	// Return true if there are more characters in the subject.
226	static CMARK_INLINE int is_eof(subject *subj) {
227	return (subj->pos >= subj->input.len);
228	}
229
230	// Advance the subject. Doesn't check for eof.
231	#define advance(subj) (subj)->pos += 1
232
233	static CMARK_INLINE bool skip_spaces(subject *subj) {
234	bool skipped = false;
235	while (peek_char(subj) == `' '` \|\| peek_char(subj) == `'\t'`) {
236	advance(subj);
237	skipped = true;
238	}
239	return skipped;
240	}
241
242	static CMARK_INLINE bool skip_line_end(subject *subj) {
243	bool seen_line_end_char = false;
244	if (peek_char(subj) == `'\r'`) {
245	advance(subj);
246	seen_line_end_char = true;
247	}
248	if (peek_char(subj) == `'\n'`) {
249	advance(subj);
250	seen_line_end_char = true;
251	}
252	return seen_line_end_char \|\| is_eof(subj);
253	}
254
255	// Take characters while a predicate holds, and return a string.
256	static CMARK_INLINE cmark_chunk take_while(subject subj, int* (f)(int*)) {
257	unsigned char c;
258	bufsize_t startpos = subj->pos;
259	bufsize_t len = `0`;
260
261	while ((c = peek_char(subj)) && (*f)(c)) {
262	advance(subj);
263	len++;
264	}
265
266	return cmark_chunk_dup(&subj->input, startpos, len);
267	}
268
269	// Return the number of newlines in a given span of text in a subject. If
270	// the number is greater than zero, also return the number of characters
271	// between the last newline and the end of the span in `since_newline`.
272	static int count_newlines(subject subj, bufsize_t from, bufsize_t len, int* *since_newline) {
273	int nls = `0`;
274	int since_nl = `0`;
275
276	while (len--) {
277	if (subj->input.data[from++] == `'\n'`) {
278	++nls;
279	since_nl = `0`;
280	} else {
281	++since_nl;
282	}
283	}
284
285	if (!nls)
286	return `0`;
287
288	*since_newline = since_nl;
289	return nls;
290	}
291
292	// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
293	// `column_offset` according to the number of newlines in a just-matched span
294	// of text in `subj`.
295	static void adjust_subj_node_newlines(subject subj, cmark_node node, int matchlen, int extra, int options) {
296	if (!(options & CMARK_OPT_SOURCEPOS)) {
297	return;
298	}
299
300	int since_newline;
301	int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
302	if (newlines) {
303	subj->line += newlines;
304	node->end_line += newlines;
305	node->end_column = since_newline;
306	subj->column_offset = -subj->pos + since_newline + extra;
307	}
308	}
309
310	// Try to process a backtick code span that began with a
311	// span of ticks of length openticklength length (already
312	// parsed). Return 0 if you don't find matching closing
313	// backticks, otherwise return the position in the subject
314	// after the closing backticks.
315	static bufsize_t scan_to_closing_backticks(subject *subj,
316	bufsize_t openticklength) {
317
318	bool found = false;
319	if (openticklength > MAXBACKTICKS) {
320	// we limit backtick string length because of the array subj->backticks:
321	return `0`;
322	}
323	if (subj->scanned_for_backticks &&
324	subj->backticks[openticklength] <= subj->pos) {
325	// return if we already know there's no closer
326	return `0`;
327	}
328	while (!found) {
329	// read non backticks
330	unsigned char c;
331	while ((c = peek_char(subj)) && c != '`') {
332	advance(subj);
333	}
334	if (is_eof(subj)) {
335	break;
336	}
337	bufsize_t numticks = `0`;
338	while (peek_char(subj) == '`') {
339	advance(subj);
340	numticks++;
341	}
342	// store position of ender
343	if (numticks <= MAXBACKTICKS) {
344	subj->backticks[numticks] = subj->pos - numticks;
345	}
346	if (numticks == openticklength) {
347	return (subj->pos);
348	}
349	}
350	// got through whole input without finding closer
351	subj->scanned_for_backticks = true;
352	return `0`;
353	}
354
355	// Destructively modify string, converting newlines to
356	// spaces, then removing a single leading + trailing space,
357	// unless the code span consists entirely of space characters.
358	static void S_normalize_code(cmark_strbuf *s) {
359	bufsize_t r, w;
360	bool contains_nonspace = false;
361
362	for (r = `0`, w = `0`; r < s->size; ++r) {
363	switch (s->ptr[r]) {
364	case `'\r'`:
365	if (s->ptr[r + `1`] != `'\n'`) {
366	s->ptr[w++] = `' '`;
367	}
368	break;
369	case `'\n'`:
370	s->ptr[w++] = `' '`;
371	break;
372	default:
373	s->ptr[w++] = s->ptr[r];
374	}
375	if (s->ptr[r] != `' '`) {
376	contains_nonspace = true;
377	}
378	}
379
380	// begins and ends with space?
381	if (contains_nonspace &&
382	s->ptr[`0`] == `' '` && s->ptr[w - `1`] == `' '`) {
383	cmark_strbuf_drop(s, `1`);
384	cmark_strbuf_truncate(s, w - `2`);
385	} else {
386	cmark_strbuf_truncate(s, w);
387	}
388
389	}
390
391
392	// Parse backtick code section or raw backticks, return an inline.
393	// Assumes that the subject has a backtick at the current position.
394	static cmark_node handle_backticks(subject subj, int options) {
395	cmark_chunk openticks = take_while(subj, isbacktick);
396	bufsize_t startpos = subj->pos;
397	bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
398
399	if (endpos == `0`) { // not found
400	subj->pos = startpos; // rewind
401	return make_str(subj, subj->pos, subj->pos, openticks);
402	} else {
403	cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
404
405	cmark_strbuf_set(&buf, subj->input.data + startpos,
406	endpos - startpos - openticks.len);
407	S_normalize_code(&buf);
408
409	cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos,
410	endpos - openticks.len - `1`);
411	node->len = buf.size;
412	node->data = cmark_strbuf_detach(&buf);
413	adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
414	return node;
415	}
416	}
417
418
419	// Scan , , or * and return number scanned, or 0.*
420	// Advances position.
421	static int scan_delims(subject subj, unsigned* char c, bool *can_open,
422	bool *can_close) {
423	int numdelims = `0`;
424	bufsize_t before_char_pos;
425	int32_t after_char = `0`;
426	int32_t before_char = `0`;
427	int len;
428	bool left_flanking, right_flanking;
429
430	if (subj->pos == `0`) {
431	before_char = `10`;
432	} else {
433	before_char_pos = subj->pos - `1`;
434	// walk back to the beginning of the UTF_8 sequence:
435	while (peek_at(subj, before_char_pos) >> `6` == `2` && before_char_pos > `0`) {
436	before_char_pos -= `1`;
437	}
438	len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
439	subj->pos - before_char_pos, &before_char);
440	if (len == -`1`) {
441	before_char = `10`;
442	}
443	}
444
445	if (c == `'\''` \|\| c == `'"'`) {
446	numdelims++;
447	advance(subj); // limit to 1 delim for quotes
448	} else {
449	while (peek_char(subj) == c) {
450	numdelims++;
451	advance(subj);
452	}
453	}
454
455	len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
456	subj->input.len - subj->pos, &after_char);
457	if (len == -`1`) {
458	after_char = `10`;
459	}
460	left_flanking = numdelims > `0` && !cmark_utf8proc_is_space(after_char) &&
461	(!cmark_utf8proc_is_punctuation(after_char) \|\|
462	cmark_utf8proc_is_space(before_char) \|\|
463	cmark_utf8proc_is_punctuation(before_char));
464	right_flanking = numdelims > `0` && !cmark_utf8proc_is_space(before_char) &&
465	(!cmark_utf8proc_is_punctuation(before_char) \|\|
466	cmark_utf8proc_is_space(after_char) \|\|
467	cmark_utf8proc_is_punctuation(after_char));
468	if (c == `'_'`) {
469	*can_open = left_flanking &&
470	(!right_flanking \|\| cmark_utf8proc_is_punctuation(before_char));
471	*can_close = right_flanking &&
472	(!left_flanking \|\| cmark_utf8proc_is_punctuation(after_char));
473	} else if (c == `'\''` \|\| c == `'"'`) {
474	*can_open = left_flanking &&
475	(!right_flanking \|\| before_char == `'('` \|\| before_char == `'['`) &&
476	before_char != `']'` && before_char != `')'`;
477	*can_close = right_flanking;
478	} else {
479	*can_open = left_flanking;
480	*can_close = right_flanking;
481	}
482	return numdelims;
483	}
484
485	/*
486	static void print_delimiters(subject subj)*
487	{
488	delimiter delim;*
489	delim = subj->last_delim;
490	while (delim != NULL) {
491	printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n",
492	(void)delim, delim->delim_char,*
493	delim->can_open, delim->can_close,
494	(void)delim->next, (void)delim->previous);
495	delim = delim->previous;
496	}
497	}
498	*/
499
500	static void remove_delimiter(subject subj, delimiter delim) {
501	if (delim == NULL)
502	return;
503	if (delim->next == NULL) {
504	// end of list:
505	assert(delim == subj->last_delim);
506	subj->last_delim = delim->previous;
507	} else {
508	delim->next->previous = delim->previous;
509	}
510	if (delim->previous != NULL) {
511	delim->previous->next = delim->next;
512	}
513	subj->mem->free(delim);
514	}
515
516	static void pop_bracket(subject *subj) {
517	bracket *b;
518	if (subj->last_bracket == NULL)
519	return;
520	b = subj->last_bracket;
521	subj->last_bracket = subj->last_bracket->previous;
522	subj->mem->free(b);
523	}
524
525	static void push_delimiter(subject subj, unsigned* char c, bool can_open,
526	bool can_close, cmark_node *inl_text) {
527	delimiter delim = (delimiter )subj->mem->calloc(`1`, sizeof(delimiter));
528	delim->delim_char = c;
529	delim->can_open = can_open;
530	delim->can_close = can_close;
531	delim->inl_text = inl_text;
532	delim->length = inl_text->len;
533	delim->previous = subj->last_delim;
534	delim->next = NULL;
535	if (delim->previous != NULL) {
536	delim->previous->next = delim;
537	}
538	subj->last_delim = delim;
539	}
540
541	static void push_bracket(subject subj, bool image, cmark_node inl_text) {
542	bracket b = (bracket )subj->mem->calloc(`1`, sizeof(bracket));
543	if (subj->last_bracket != NULL) {
544	subj->last_bracket->bracket_after = true;
545	}
546	b->image = image;
547	b->active = true;
548	b->inl_text = inl_text;
549	b->previous = subj->last_bracket;
550	b->previous_delimiter = subj->last_delim;
551	b->position = subj->pos;
552	b->bracket_after = false;
553	subj->last_bracket = b;
554	}
555
556	// Assumes the subject has a c at the current position.
557	static cmark_node handle_delim(subject subj, unsigned char c, bool smart) {
558	bufsize_t numdelims;
559	cmark_node *inl_text;
560	bool can_open, can_close;
561	cmark_chunk contents;
562
563	numdelims = scan_delims(subj, c, &can_open, &can_close);
564
565	if (c == `'\''` && smart) {
566	contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
567	} else if (c == `'"'` && smart) {
568	contents =
569	cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
570	} else {
571	contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
572	}
573
574	inl_text = make_str(subj, subj->pos - numdelims, subj->pos - `1`, contents);
575
576	if ((can_open \|\| can_close) && (!(c == `'\''` \|\| c == `'"'`) \|\| smart)) {
577	push_delimiter(subj, c, can_open, can_close, inl_text);
578	}
579
580	return inl_text;
581	}
582
583	// Assumes we have a hyphen at the current position.
584	static cmark_node handle_hyphen(subject subj, bool smart) {
585	int startpos = subj->pos;
586
587	advance(subj);
588
589	if (!smart \|\| peek_char(subj) != `'-'`) {
590	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("-"));
591	}
592
593	while (smart && peek_char(subj) == `'-'`) {
594	advance(subj);
595	}
596
597	int numhyphens = subj->pos - startpos;
598	int en_count = `0`;
599	int em_count = `0`;
600	int i;
601	cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
602
603	if (numhyphens % `3` == `0`) { // if divisible by 3, use all em dashes
604	em_count = numhyphens / `3`;
605	} else if (numhyphens % `2` == `0`) { // if divisible by 2, use all en dashes
606	en_count = numhyphens / `2`;
607	} else if (numhyphens % `3` == `2`) { // use one en dash at end
608	en_count = `1`;
609	em_count = (numhyphens - `2`) / `3`;
610	} else { // use two en dashes at the end
611	en_count = `2`;
612	em_count = (numhyphens - `4`) / `3`;
613	}
614
615	for (i = em_count; i > `0`; i--) {
616	cmark_strbuf_puts(&buf, EMDASH);
617	}
618
619	for (i = en_count; i > `0`; i--) {
620	cmark_strbuf_puts(&buf, ENDASH);
621	}
622
623	return make_str_from_buf(subj, startpos, subj->pos - `1`, &buf);
624	}
625
626	// Assumes we have a period at the current position.
627	static cmark_node handle_period(subject subj, bool smart) {
628	advance(subj);
629	if (smart && peek_char(subj) == `'.'`) {
630	advance(subj);
631	if (peek_char(subj) == `'.'`) {
632	advance(subj);
633	return make_str(subj, subj->pos - `3`, subj->pos - `1`, cmark_chunk_literal(ELLIPSES));
634	} else {
635	return make_str(subj, subj->pos - `2`, subj->pos - `1`, cmark_chunk_literal(".."));
636	}
637	} else {
638	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("."));
639	}
640	}
641
642	static void process_emphasis(subject subj, delimiter stack_bottom) {
643	delimiter *closer = subj->last_delim;
644	delimiter *opener;
645	delimiter *old_closer;
646	delimiter *new_stack_bottom;
647	bool opener_found;
648
649	int openers_bottom_index = `0`;
650	delimiter *openers_bottom[`6`] = {stack_bottom, stack_bottom, stack_bottom,
651	stack_bottom};
652
653	// move back to first relevant delim.
654	while (closer != NULL && closer->previous != stack_bottom) {
655	closer = closer->previous;
656	}
657
658	// now move forward, looking for closers, and handling each
659	while (closer != NULL) {
660	if (closer->can_close) {
661	switch (closer->delim_char) {
662	case `'"'`:
663	openers_bottom_index = `0`;
664	break;
665	case `'\''`:
666	openers_bottom_index = `1`;
667	break;
668	case `'_'`:
669	openers_bottom_index = `2`;
670	break;
671	case `'*'`:
672	openers_bottom_index = `3`;
673	break;
674	default:
675	assert(false);
676	}
677
678	// Now look backwards for first matching opener:
679	opener = closer->previous;
680	opener_found = false;
681	new_stack_bottom = closer->previous;
682
683	while (opener != NULL && opener != openers_bottom[openers_bottom_index]) {
684	if (opener->can_open && opener->delim_char == closer->delim_char) {
685	// interior closer of size 2 can't match opener of size 1
686	// or of size 1 can't match 2
687	if (!(closer->can_open \|\| opener->can_close) \|\|
688	closer->length % `3` == `0` \|\|
689	(opener->length + closer->length) % `3` != `0`) {
690	opener_found = true;
691	break;
692	} else {
693	// If we failed to match because of the mod-3 rule,
694	// then we want to make sure the stack bottom extends
695	// back to here at least, since a later closer might
696	// match this same opener... (see #383)
697	new_stack_bottom = opener->previous;
698	}
699	}
700	opener = opener->previous;
701	}
702	old_closer = closer;
703	if (closer->delim_char == `'*'` \|\| closer->delim_char == `'_'`) {
704	if (opener_found) {
705	closer = S_insert_emph(subj, opener, closer);
706	} else {
707	closer = closer->next;
708	}
709	} else if (closer->delim_char == `'\''`) {
710	cmark_node_set_literal(closer->inl_text, RIGHTSINGLEQUOTE);
711	if (opener_found) {
712	cmark_node_set_literal(opener->inl_text, LEFTSINGLEQUOTE);
713	}
714	closer = closer->next;
715	} else if (closer->delim_char == `'"'`) {
716	cmark_node_set_literal(closer->inl_text, RIGHTDOUBLEQUOTE);
717	if (opener_found) {
718	cmark_node_set_literal(opener->inl_text, LEFTDOUBLEQUOTE);
719	}
720	closer = closer->next;
721	}
722	if (!opener_found) {
723	// set lower bound for future searches for openers (see #383).
724	openers_bottom[openers_bottom_index] = new_stack_bottom;
725	if (!old_closer->can_open) {
726	// we can remove a closer that can't be an
727	// opener, once we've seen there's no
728	// matching opener:
729	remove_delimiter(subj, old_closer);
730	}
731	}
732	} else {
733	closer = closer->next;
734	}
735	}
736	// free all delimiters in list until stack_bottom:
737	while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
738	remove_delimiter(subj, subj->last_delim);
739	}
740	}
741
742	static delimiter S_insert_emph(subject subj, delimiter *opener,
743	delimiter *closer) {
744	delimiter delim, tmp_delim;
745	bufsize_t use_delims;
746	cmark_node *opener_inl = opener->inl_text;
747	cmark_node *closer_inl = closer->inl_text;
748	bufsize_t opener_num_chars = opener_inl->len;
749	bufsize_t closer_num_chars = closer_inl->len;
750	cmark_node tmp, tmpnext, *emph;
751
752	// calculate the actual number of characters used from this closer
753	use_delims = (closer_num_chars >= `2` && opener_num_chars >= `2`) ? `2` : `1`;
754
755	// remove used characters from associated inlines.
756	opener_num_chars -= use_delims;
757	closer_num_chars -= use_delims;
758	opener_inl->len = opener_num_chars;
759	opener_inl->data[opener_num_chars] = `0`;
760	closer_inl->len = closer_num_chars;
761	closer_inl->data[closer_num_chars] = `0`;
762
763	// free delimiters between opener and closer
764	delim = closer->previous;
765	while (delim != NULL && delim != opener) {
766	tmp_delim = delim->previous;
767	remove_delimiter(subj, delim);
768	delim = tmp_delim;
769	}
770
771	// create new emph or strong, and splice it in to our inlines
772	// between the opener and closer
773	emph = use_delims == `1` ? make_emph(subj->mem) : make_strong(subj->mem);
774
775	tmp = opener_inl->next;
776	while (tmp && tmp != closer_inl) {
777	tmpnext = tmp->next;
778	cmark_node_unlink(tmp);
779	append_child(emph, tmp);
780	tmp = tmpnext;
781	}
782	cmark_node_insert_after(opener_inl, emph);
783
784	emph->start_line = opener_inl->start_line;
785	emph->end_line = closer_inl->end_line;
786	emph->start_column = opener_inl->start_column;
787	emph->end_column = closer_inl->end_column;
788
789	// if opener has 0 characters, remove it and its associated inline
790	if (opener_num_chars == `0`) {
791	cmark_node_free(opener_inl);
792	remove_delimiter(subj, opener);
793	}
794
795	// if closer has 0 characters, remove it and its associated inline
796	if (closer_num_chars == `0`) {
797	// remove empty closer inline
798	cmark_node_free(closer_inl);
799	// remove closer from list
800	tmp_delim = closer->next;
801	remove_delimiter(subj, closer);
802	closer = tmp_delim;
803	}
804
805	return closer;
806	}
807
808	// Parse backslash-escape or just a backslash, returning an inline.
809	static cmark_node handle_backslash(subject subj) {
810	advance(subj);
811	unsigned char nextchar = peek_char(subj);
812	if (cmark_ispunct(
813	nextchar)) { // only ascii symbols and newline can be escaped
814	advance(subj);
815	return make_str(subj, subj->pos - `2`, subj->pos - `1`, cmark_chunk_dup(&subj->input, subj->pos - `1`, `1`));
816	} else if (!is_eof(subj) && skip_line_end(subj)) {
817	return make_linebreak(subj->mem);
818	} else {
819	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("\\"));
820	}
821	}
822
823	// Parse an entity or a regular "&" string.
824	// Assumes the subject has an '&' character at the current position.
825	static cmark_node handle_entity(subject subj) {
826	cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
827	bufsize_t len;
828
829	advance(subj);
830
831	len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
832	subj->input.len - subj->pos);
833
834	if (len <= `0`)
835	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("&"));
836
837	subj->pos += len;
838	return make_str_from_buf(subj, subj->pos - `1` - len, subj->pos - `1`, &ent);
839	}
840
841	// Clean a URL: remove surrounding whitespace, and remove \ that escape
842	// punctuation.
843	unsigned char cmark_clean_url(cmark_mem mem, cmark_chunk *url) {
844	cmark_strbuf buf = CMARK_BUF_INIT(mem);
845
846	cmark_chunk_trim(url);
847
848	houdini_unescape_html_f(&buf, url->data, url->len);
849
850	cmark_strbuf_unescape(&buf);
851	return cmark_strbuf_detach(&buf);
852	}
853
854	unsigned char cmark_clean_title(cmark_mem mem, cmark_chunk *title) {
855	cmark_strbuf buf = CMARK_BUF_INIT(mem);
856	unsigned char first, last;
857
858	if (title->len == `0`) {
859	return NULL;
860	}
861
862	first = title->data[`0`];
863	last = title->data[title->len - `1`];
864
865	// remove surrounding quotes if any:
866	if ((first == `'\''` && last == `'\''`) \|\| (first == `'('` && last == `')'`) \|\|
867	(first == `'"'` && last == `'"'`)) {
868	houdini_unescape_html_f(&buf, title->data + `1`, title->len - `2`);
869	} else {
870	houdini_unescape_html_f(&buf, title->data, title->len);
871	}
872
873	cmark_strbuf_unescape(&buf);
874	return cmark_strbuf_detach(&buf);
875	}
876
877	// Parse an autolink or HTML tag.
878	// Assumes the subject has a '<' character at the current position.
879	static cmark_node handle_pointy_brace(subject subj, int options) {
880	bufsize_t matchlen = `0`;
881	cmark_chunk contents;
882
883	advance(subj); // advance past first <
884
885	// first try to match a URL autolink
886	matchlen = scan_autolink_uri(&subj->input, subj->pos);
887	if (matchlen > `0`) {
888	contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - `1`);
889	subj->pos += matchlen;
890
891	return make_autolink(subj, subj->pos - `1` - matchlen, subj->pos - `1`, contents, `0`);
892	}
893
894	// next try to match an email autolink
895	matchlen = scan_autolink_email(&subj->input, subj->pos);
896	if (matchlen > `0`) {
897	contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - `1`);
898	subj->pos += matchlen;
899
900	return make_autolink(subj, subj->pos - `1` - matchlen, subj->pos - `1`, contents, `1`);
901	}
902
903	// finally, try to match an html tag
904	if (subj->pos + `2` <= subj->input.len) {
905	int c = subj->input.data[subj->pos];
906	if (c == `'!'`) {
907	c = subj->input.data[subj->pos+`1`];
908	if (c == `'-'`) {
909	matchlen = scan_html_comment(&subj->input, subj->pos + `2`);
910	if (matchlen > `0`)
911	matchlen += `2`; // prefix "<-"
912	} else if (c == `'['`) {
913	if ((subj->flags & FLAG_SKIP_HTML_CDATA) == `0`) {
914	matchlen = scan_html_cdata(&subj->input, subj->pos + `2`);
915	if (matchlen > `0`) {
916	// The regex doesn't require the final "]]>". But if we're not at
917	// the end of input, it must come after the match. Otherwise,
918	// disable subsequent scans to avoid quadratic behavior.
919	matchlen += `5`; // prefix "![", suffix "]]>"
920	if (subj->pos + matchlen > subj->input.len) {
921	subj->flags \|= FLAG_SKIP_HTML_CDATA;
922	matchlen = `0`;
923	}
924	}
925	}
926	} else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == `0`) {
927	matchlen = scan_html_declaration(&subj->input, subj->pos + `1`);
928	if (matchlen > `0`) {
929	matchlen += `2`; // prefix "!", suffix ">"
930	if (subj->pos + matchlen > subj->input.len) {
931	subj->flags \|= FLAG_SKIP_HTML_DECLARATION;
932	matchlen = `0`;
933	}
934	}
935	}
936	} else if (c == `'?'`) {
937	if ((subj->flags & FLAG_SKIP_HTML_PI) == `0`) {
938	// Note that we allow an empty match.
939	matchlen = scan_html_pi(&subj->input, subj->pos + `1`);
940	matchlen += `3`; // prefix "?", suffix "?>"
941	if (subj->pos + matchlen > subj->input.len) {
942	subj->flags \|= FLAG_SKIP_HTML_PI;
943	matchlen = `0`;
944	}
945	}
946	} else {
947	matchlen = scan_html_tag(&subj->input, subj->pos);
948	}
949	}
950	if (matchlen > `0`) {
951	const unsigned char *src = subj->input.data + subj->pos - `1`;
952	bufsize_t len = matchlen + `1`;
953	subj->pos += matchlen;
954	cmark_node *node = make_literal(subj, CMARK_NODE_HTML_INLINE,
955	subj->pos - matchlen - `1`, subj->pos - `1`);
956	node->data = (unsigned char *)subj->mem->realloc(NULL, len + `1`);
957	memcpy(node->data, src, len);
958	node->data[len] = `0`;
959	node->len = len;
960	adjust_subj_node_newlines(subj, node, matchlen, `1`, options);
961	return node;
962	}
963
964	// if nothing matches, just return the opening <:
965	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("<"));
966	}
967
968	// Parse a link label. Returns 1 if successful.
969	// Note: unescaped brackets are not allowed in labels.
970	// The label begins with `[` and ends with the first `]` character
971	// encountered. Backticks in labels do not start code spans.
972	static int link_label(subject subj, cmark_chunk raw_label) {
973	bufsize_t startpos = subj->pos;
974	int length = `0`;
975	unsigned char c;
976
977	// advance past [
978	if (peek_char(subj) == `'['`) {
979	advance(subj);
980	} else {
981	return `0`;
982	}
983
984	while ((c = peek_char(subj)) && c != `'['` && c != `']'`) {
985	if (c == `'\\'`) {
986	advance(subj);
987	length++;
988	if (cmark_ispunct(peek_char(subj))) {
989	advance(subj);
990	length++;
991	}
992	} else {
993	advance(subj);
994	length++;
995	}
996	if (length > MAX_LINK_LABEL_LENGTH) {
997	goto noMatch;
998	}
999	}
1000
1001	if (c == `']'`) { // match found
1002	*raw_label =
1003	cmark_chunk_dup(&subj->input, startpos + `1`, subj->pos - (startpos + `1`));
1004	cmark_chunk_trim(raw_label);
1005	advance(subj); // advance past ]
1006	return `1`;
1007	}
1008
1009	noMatch:
1010	subj->pos = startpos; // rewind
1011	return `0`;
1012	}
1013
1014	static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
1015	cmark_chunk *output) {
1016	bufsize_t i = offset;
1017	size_t nb_p = `0`;
1018
1019	while (i < input->len) {
1020	if (input->data[i] == `'\\'` &&
1021	i + `1` < input-> len &&
1022	cmark_ispunct(input->data[i+`1`]))
1023	i += `2`;
1024	else if (input->data[i] == `'('`) {
1025	++nb_p;
1026	++i;
1027	if (nb_p > `32`)
1028	return -`1`;
1029	} else if (input->data[i] == `')'`) {
1030	if (nb_p == `0`)
1031	break;
1032	--nb_p;
1033	++i;
1034	} else if (cmark_isspace(input->data[i])) {
1035	if (i == offset) {
1036	return -`1`;
1037	}
1038	break;
1039	} else {
1040	++i;
1041	}
1042	}
1043
1044	if (i >= input->len \|\| nb_p != `0`)
1045	return -`1`;
1046
1047	{
1048	cmark_chunk result = {input->data + offset, i - offset};
1049	*output = result;
1050	}
1051	return i - offset;
1052	}
1053
1054	static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
1055	cmark_chunk *output) {
1056	bufsize_t i = offset;
1057
1058	if (i < input->len && input->data[i] == `'<'`) {
1059	++i;
1060	while (i < input->len) {
1061	if (input->data[i] == `'>'`) {
1062	++i;
1063	break;
1064	} else if (input->data[i] == `'\\'`)
1065	i += `2`;
1066	else if (input->data[i] == `'\n'` \|\| input->data[i] == `'<'`)
1067	return -`1`;
1068	else
1069	++i;
1070	}
1071	} else {
1072	return manual_scan_link_url_2(input, offset, output);
1073	}
1074
1075	if (i >= input->len)
1076	return -`1`;
1077
1078	{
1079	cmark_chunk result = {input->data + offset + `1`, i - `2` - offset};
1080	*output = result;
1081	}
1082	return i - offset;
1083	}
1084
1085	// Return a link, an image, or a literal close bracket.
1086	static cmark_node handle_close_bracket(subject subj) {
1087	bufsize_t initial_pos, after_link_text_pos;
1088	bufsize_t endurl, starttitle, endtitle, endall;
1089	bufsize_t sps, n;
1090	cmark_reference *ref = NULL;
1091	cmark_chunk url_chunk, title_chunk;
1092	unsigned char url, title;
1093	bracket *opener;
1094	cmark_node *inl;
1095	cmark_chunk raw_label;
1096	int found_label;
1097	cmark_node tmp, tmpnext;
1098	bool is_image;
1099
1100	advance(subj); // advance past ]
1101	initial_pos = subj->pos;
1102
1103	// get last [ or ![
1104	opener = subj->last_bracket;
1105
1106	if (opener == NULL) {
1107	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("]"));
1108	}
1109
1110	if (!opener->active) {
1111	// take delimiter off stack
1112	pop_bracket(subj);
1113	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("]"));
1114	}
1115
1116	// If we got here, we matched a potential link/image text.
1117	// Now we check to see if it's a link/image.
1118	is_image = opener->image;
1119
1120	after_link_text_pos = subj->pos;
1121
1122	// First, look for an inline link.
1123	if (peek_char(subj) == `'('` &&
1124	((sps = scan_spacechars(&subj->input, subj->pos + `1`)) > -`1`) &&
1125	((n = manual_scan_link_url(&subj->input, subj->pos + `1` + sps,
1126	&url_chunk)) > -`1`)) {
1127
1128	// try to parse an explicit link:
1129	endurl = subj->pos + `1` + sps + n;
1130	starttitle = endurl + scan_spacechars(&subj->input, endurl);
1131
1132	// ensure there are spaces btw url and title
1133	endtitle = (starttitle == endurl)
1134	? starttitle
1135	: starttitle + scan_link_title(&subj->input, starttitle);
1136
1137	endall = endtitle + scan_spacechars(&subj->input, endtitle);
1138
1139	if (peek_at(subj, endall) == `')'`) {
1140	subj->pos = endall + `1`;
1141
1142	title_chunk =
1143	cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
1144	url = cmark_clean_url(subj->mem, &url_chunk);
1145	title = cmark_clean_title(subj->mem, &title_chunk);
1146	cmark_chunk_free(&url_chunk);
1147	cmark_chunk_free(&title_chunk);
1148	goto match;
1149
1150	} else {
1151	// it could still be a shortcut reference link
1152	subj->pos = after_link_text_pos;
1153	}
1154	}
1155
1156	// Next, look for a following [link label] that matches in refmap.
1157	// skip spaces
1158	raw_label = cmark_chunk_literal("");
1159	found_label = link_label(subj, &raw_label);
1160	if (!found_label) {
1161	// If we have a shortcut reference link, back up
1162	// to before the spacse we skipped.
1163	subj->pos = initial_pos;
1164	}
1165
1166	if ((!found_label \|\| raw_label.len == `0`) && !opener->bracket_after) {
1167	cmark_chunk_free(&raw_label);
1168	raw_label = cmark_chunk_dup(&subj->input, opener->position,
1169	initial_pos - opener->position - `1`);
1170	found_label = true;
1171	}
1172
1173	if (found_label) {
1174	ref = cmark_reference_lookup(subj->refmap, &raw_label);
1175	cmark_chunk_free(&raw_label);
1176	}
1177
1178	if (ref != NULL) { // found
1179	url = cmark_strdup(subj->mem, ref->url);
1180	title = cmark_strdup(subj->mem, ref->title);
1181	goto match;
1182	} else {
1183	goto noMatch;
1184	}
1185
1186	noMatch:
1187	// If we fall through to here, it means we didn't match a link:
1188	pop_bracket(subj); // remove this opener from delimiter list
1189	subj->pos = initial_pos;
1190	return make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("]"));
1191
1192	match:
1193	inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
1194	inl->as.link.url = url;
1195	inl->as.link.title = title;
1196	inl->start_line = inl->end_line = subj->line;
1197	inl->start_column = opener->inl_text->start_column;
1198	inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1199	cmark_node_insert_before(opener->inl_text, inl);
1200	// Add link text:
1201	tmp = opener->inl_text->next;
1202	while (tmp) {
1203	tmpnext = tmp->next;
1204	cmark_node_unlink(tmp);
1205	append_child(inl, tmp);
1206	tmp = tmpnext;
1207	}
1208
1209	// Free the bracket [:
1210	cmark_node_free(opener->inl_text);
1211
1212	process_emphasis(subj, opener->previous_delimiter);
1213	pop_bracket(subj);
1214
1215	// Now, if we have a link, we also want to deactivate earlier link
1216	// delimiters. (This code can be removed if we decide to allow links
1217	// inside links.)
1218	if (!is_image) {
1219	opener = subj->last_bracket;
1220	while (opener != NULL) {
1221	if (!opener->image) {
1222	if (!opener->active) {
1223	break;
1224	} else {
1225	opener->active = false;
1226	}
1227	}
1228	opener = opener->previous;
1229	}
1230	}
1231
1232	return NULL;
1233	}
1234
1235	// Parse a hard or soft linebreak, returning an inline.
1236	// Assumes the subject has a cr or newline at the current position.
1237	static cmark_node handle_newline(subject subj) {
1238	bufsize_t nlpos = subj->pos;
1239	// skip over cr, crlf, or lf:
1240	if (peek_at(subj, subj->pos) == `'\r'`) {
1241	advance(subj);
1242	}
1243	if (peek_at(subj, subj->pos) == `'\n'`) {
1244	advance(subj);
1245	}
1246	++subj->line;
1247	subj->column_offset = -subj->pos;
1248	// skip spaces at beginning of line
1249	skip_spaces(subj);
1250	if (nlpos > `1` && peek_at(subj, nlpos - `1`) == `' '` &&
1251	peek_at(subj, nlpos - `2`) == `' '`) {
1252	return make_linebreak(subj->mem);
1253	} else {
1254	return make_softbreak(subj->mem);
1255	}
1256	}
1257
1258	static bufsize_t subject_find_special_char(subject subj, int* options) {
1259	// "\r\n\\`&_[]<!"*
1260	static const int8_t SPECIAL_CHARS[`256`] = {
1261	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1262	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`,
1263	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1264	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `0`, `1`,
1265	`1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1266	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1267	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1268	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1269	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1270	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1271	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`};
1272
1273	// " ' . -
1274	static const char SMART_PUNCT_CHARS[] = {
1275	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1276	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `0`,
1277	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1278	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1279	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1280	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1281	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1282	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1283	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1284	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1285	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1286	};
1287
1288	bufsize_t n = subj->pos + `1`;
1289
1290	while (n < subj->input.len) {
1291	if (SPECIAL_CHARS[subj->input.data[n]])
1292	return n;
1293	if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
1294	return n;
1295	n++;
1296	}
1297
1298	return subj->input.len;
1299	}
1300
1301	// Parse an inline, advancing subject, and add it as a child of parent.
1302	// Return 0 if no inline can be parsed, 1 otherwise.
1303	static int parse_inline(subject subj, cmark_node parent, int options) {
1304	cmark_node *new_inl = NULL;
1305	cmark_chunk contents;
1306	unsigned char c;
1307	bufsize_t startpos, endpos;
1308	c = peek_char(subj);
1309	if (c == `0`) {
1310	return `0`;
1311	}
1312	switch (c) {
1313	case `'\r'`:
1314	case `'\n'`:
1315	new_inl = handle_newline(subj);
1316	break;
1317	case '`':
1318	new_inl = handle_backticks(subj, options);
1319	break;
1320	case `'\\'`:
1321	new_inl = handle_backslash(subj);
1322	break;
1323	case `'&'`:
1324	new_inl = handle_entity(subj);
1325	break;
1326	case `'<'`:
1327	new_inl = handle_pointy_brace(subj, options);
1328	break;
1329	case `'*'`:
1330	case `'_'`:
1331	case `'\''`:
1332	case `'"'`:
1333	new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != `0`);
1334	break;
1335	case `'-'`:
1336	new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != `0`);
1337	break;
1338	case `'.'`:
1339	new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != `0`);
1340	break;
1341	case `'['`:
1342	advance(subj);
1343	new_inl = make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("["));
1344	push_bracket(subj, false, new_inl);
1345	break;
1346	case `']'`:
1347	new_inl = handle_close_bracket(subj);
1348	break;
1349	case `'!'`:
1350	advance(subj);
1351	if (peek_char(subj) == `'['`) {
1352	advance(subj);
1353	new_inl = make_str(subj, subj->pos - `2`, subj->pos - `1`, cmark_chunk_literal("!["));
1354	push_bracket(subj, true, new_inl);
1355	} else {
1356	new_inl = make_str(subj, subj->pos - `1`, subj->pos - `1`, cmark_chunk_literal("!"));
1357	}
1358	break;
1359	default:
1360	endpos = subject_find_special_char(subj, options);
1361	contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1362	startpos = subj->pos;
1363	subj->pos = endpos;
1364
1365	// if we're at a newline, strip trailing spaces.
1366	if (S_is_line_end_char(peek_char(subj))) {
1367	cmark_chunk_rtrim(&contents);
1368	}
1369
1370	new_inl = make_str(subj, startpos, endpos - `1`, contents);
1371	}
1372	if (new_inl != NULL) {
1373	append_child(parent, new_inl);
1374	}
1375
1376	return `1`;
1377	}
1378
1379	// Parse inlines from parent's string_content, adding as children of parent.
1380	void cmark_parse_inlines(cmark_mem mem, cmark_node parent,
1381	cmark_reference_map refmap, int* options) {
1382	subject subj;
1383	cmark_chunk content = {parent->data, parent->len};
1384	subject_from_buf(mem, parent->start_line, parent->start_column - `1` + parent->internal_offset, &subj, &content, refmap);
1385	cmark_chunk_rtrim(&subj.input);
1386
1387	while (!is_eof(&subj) && parse_inline(&subj, parent, options))
1388	;
1389
1390	process_emphasis(&subj, NULL);
1391	// free bracket and delim stack
1392	while (subj.last_delim) {
1393	remove_delimiter(&subj, subj.last_delim);
1394	}
1395	while (subj.last_bracket) {
1396	pop_bracket(&subj);
1397	}
1398	}
1399
1400	// Parse zero or more space characters, including at most one newline.
1401	static void spnl(subject *subj) {
1402	skip_spaces(subj);
1403	if (skip_line_end(subj)) {
1404	skip_spaces(subj);
1405	}
1406	}
1407
1408	// Parse reference. Assumes string begins with '[' character.
1409	// Modify refmap if a reference is encountered.
1410	// Return 0 if no reference found, otherwise position of subject
1411	// after reference is parsed.
1412	bufsize_t cmark_parse_reference_inline(cmark_mem mem, cmark_chunk input,
1413	cmark_reference_map *refmap) {
1414	subject subj;
1415
1416	cmark_chunk lab;
1417	cmark_chunk url;
1418	cmark_chunk title;
1419
1420	bufsize_t matchlen = `0`;
1421	bufsize_t beforetitle;
1422
1423	subject_from_buf(mem, -`1`, `0`, &subj, input, NULL);
1424
1425	// parse label:
1426	if (!link_label(&subj, &lab) \|\| lab.len == `0`)
1427	return `0`;
1428
1429	// colon:
1430	if (peek_char(&subj) == `':'`) {
1431	advance(&subj);
1432	} else {
1433	return `0`;
1434	}
1435
1436	// parse link url:
1437	spnl(&subj);
1438	if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -`1`) {
1439	subj.pos += matchlen;
1440	} else {
1441	return `0`;
1442	}
1443
1444	// parse optional link_title
1445	beforetitle = subj.pos;
1446	spnl(&subj);
1447	matchlen = subj.pos == beforetitle ? `0` : scan_link_title(&subj.input, subj.pos);
1448	if (matchlen) {
1449	title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1450	subj.pos += matchlen;
1451	} else {
1452	subj.pos = beforetitle;
1453	title = cmark_chunk_literal("");
1454	}
1455
1456	// parse final spaces and newline:
1457	skip_spaces(&subj);
1458	if (!skip_line_end(&subj)) {
1459	if (matchlen) { // try rewinding before title
1460	subj.pos = beforetitle;
1461	skip_spaces(&subj);
1462	if (!skip_line_end(&subj)) {
1463	return `0`;
1464	}
1465	} else {
1466	return `0`;
1467	}
1468	}
1469	// insert reference into refmap
1470	cmark_reference_create(refmap, &lab, &url, &title);
1471	return subj.pos;
1472	}
1473

Browse the source code of Aseprite/third_party/cmark/src/inlines.c