1#include <stdlib.h>
2#include <string.h>
3#include <stdio.h>
4
5#include "cmark_ctype.h"
6#include "config.h"
7#include "node.h"
8#include "parser.h"
9#include "references.h"
10#include "cmark.h"
11#include "houdini.h"
12#include "utf8.h"
13#include "scanners.h"
14#include "inlines.h"
15
16static const char *EMDASH = "\xE2\x80\x94";
17static const char *ENDASH = "\xE2\x80\x93";
18static const char *ELLIPSES = "\xE2\x80\xA6";
19static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
20static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
21static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
22static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
23
24// Macros for creating various kinds of simple.
25#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
26#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
27#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
28#define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
29
30#define MAXBACKTICKS 1000
31
32typedef struct delimiter {
33 struct delimiter *previous;
34 struct delimiter *next;
35 cmark_node *inl_text;
36 bufsize_t length;
37 unsigned char delim_char;
38 bool can_open;
39 bool can_close;
40} delimiter;
41
42typedef struct bracket {
43 struct bracket *previous;
44 struct delimiter *previous_delimiter;
45 cmark_node *inl_text;
46 bufsize_t position;
47 bool image;
48 bool active;
49 bool bracket_after;
50} bracket;
51
52#define FLAG_SKIP_HTML_CDATA (1u << 0)
53#define FLAG_SKIP_HTML_DECLARATION (1u << 1)
54#define FLAG_SKIP_HTML_PI (1u << 2)
55
56typedef struct {
57 cmark_mem *mem;
58 cmark_chunk input;
59 unsigned flags;
60 int line;
61 bufsize_t pos;
62 int block_offset;
63 int column_offset;
64 cmark_reference_map *refmap;
65 delimiter *last_delim;
66 bracket *last_bracket;
67 bufsize_t backticks[MAXBACKTICKS + 1];
68 bool scanned_for_backticks;
69} subject;
70
71static CMARK_INLINE bool S_is_line_end_char(char c) {
72 return (c == '\n' || c == '\r');
73}
74
75static delimiter *S_insert_emph(subject *subj, delimiter *opener,
76 delimiter *closer);
77
78static int parse_inline(subject *subj, cmark_node *parent, int options);
79
80static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
81 cmark_chunk *chunk, cmark_reference_map *refmap);
82static bufsize_t subject_find_special_char(subject *subj, int options);
83
84// Create an inline with a literal string value.
85static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
86 int start_column, int end_column) {
87 cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
88 e->mem = subj->mem;
89 e->type = (uint16_t)t;
90 e->start_line = e->end_line = subj->line;
91 // columns are 1 based.
92 e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
93 e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
94 return e;
95}
96
97// Create an inline with no value.
98static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
99 cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
100 e->mem = mem;
101 e->type = t;
102 return e;
103}
104
105static cmark_node *make_str(subject *subj, int sc, int ec, cmark_chunk s) {
106 cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec);
107 e->data = (unsigned char *)subj->mem->realloc(NULL, s.len + 1);
108 if (s.data != NULL) {
109 memcpy(e->data, s.data, s.len);
110 }
111 e->data[s.len] = 0;
112 e->len = s.len;
113 return e;
114}
115
116static cmark_node *make_str_from_buf(subject *subj, int sc, int ec,
117 cmark_strbuf *buf) {
118 cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec);
119 e->len = buf->size;
120 e->data = cmark_strbuf_detach(buf);
121 return e;
122}
123
124// Like make_str, but parses entities.
125static cmark_node *make_str_with_entities(subject *subj,
126 int start_column, int end_column,
127 cmark_chunk *content) {
128 cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
129
130 if (houdini_unescape_html(&unescaped, content->data, content->len)) {
131 return make_str_from_buf(subj, start_column, end_column, &unescaped);
132 } else {
133 return make_str(subj, start_column, end_column, *content);
134 }
135}
136
137// Like cmark_node_append_child but without costly sanity checks.
138// Assumes that child was newly created.
139static void append_child(cmark_node *node, cmark_node *child) {
140 cmark_node *old_last_child = node->last_child;
141
142 child->next = NULL;
143 child->prev = old_last_child;
144 child->parent = node;
145 node->last_child = child;
146
147 if (old_last_child) {
148 old_last_child->next = child;
149 } else {
150 // Also set first_child if node previously had no children.
151 node->first_child = child;
152 }
153}
154
155// Duplicate a chunk by creating a copy of the buffer not by reusing the
156// buffer like cmark_chunk_dup does.
157static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) {
158 if (src == NULL) {
159 return NULL;
160 }
161 size_t len = strlen((char *)src);
162 unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1);
163 memcpy(data, src, len + 1);
164 return data;
165}
166
167static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
168 int is_email) {
169 cmark_strbuf buf = CMARK_BUF_INIT(mem);
170
171 cmark_chunk_trim(url);
172
173 if (is_email)
174 cmark_strbuf_puts(&buf, "mailto:");
175
176 houdini_unescape_html_f(&buf, url->data, url->len);
177 return cmark_strbuf_detach(&buf);
178}
179
180static CMARK_INLINE cmark_node *make_autolink(subject *subj,
181 int start_column, int end_column,
182 cmark_chunk url, int is_email) {
183 cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
184 link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
185 link->as.link.title = NULL;
186 link->start_line = link->end_line = subj->line;
187 link->start_column = start_column + 1;
188 link->end_column = end_column + 1;
189 append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
190 return link;
191}
192
193static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
194 cmark_chunk *chunk, cmark_reference_map *refmap) {
195 int i;
196 e->mem = mem;
197 e->input = *chunk;
198 e->flags = 0;
199 e->line = line_number;
200 e->pos = 0;
201 e->block_offset = block_offset;
202 e->column_offset = 0;
203 e->refmap = refmap;
204 e->last_delim = NULL;
205 e->last_bracket = NULL;
206 for (i = 0; i <= MAXBACKTICKS; i++) {
207 e->backticks[i] = 0;
208 }
209 e->scanned_for_backticks = false;
210}
211
212static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
213
214static CMARK_INLINE unsigned char peek_char(subject *subj) {
215 // NULL bytes should have been stripped out by now. If they're
216 // present, it's a programming error:
217 assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
218 return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
219}
220
221static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) {
222 return subj->input.data[pos];
223}
224
225// Return true if there are more characters in the subject.
226static CMARK_INLINE int is_eof(subject *subj) {
227 return (subj->pos >= subj->input.len);
228}
229
230// Advance the subject. Doesn't check for eof.
231#define advance(subj) (subj)->pos += 1
232
233static CMARK_INLINE bool skip_spaces(subject *subj) {
234 bool skipped = false;
235 while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
236 advance(subj);
237 skipped = true;
238 }
239 return skipped;
240}
241
242static CMARK_INLINE bool skip_line_end(subject *subj) {
243 bool seen_line_end_char = false;
244 if (peek_char(subj) == '\r') {
245 advance(subj);
246 seen_line_end_char = true;
247 }
248 if (peek_char(subj) == '\n') {
249 advance(subj);
250 seen_line_end_char = true;
251 }
252 return seen_line_end_char || is_eof(subj);
253}
254
255// Take characters while a predicate holds, and return a string.
256static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
257 unsigned char c;
258 bufsize_t startpos = subj->pos;
259 bufsize_t len = 0;
260
261 while ((c = peek_char(subj)) && (*f)(c)) {
262 advance(subj);
263 len++;
264 }
265
266 return cmark_chunk_dup(&subj->input, startpos, len);
267}
268
269// Return the number of newlines in a given span of text in a subject. If
270// the number is greater than zero, also return the number of characters
271// between the last newline and the end of the span in `since_newline`.
272static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
273 int nls = 0;
274 int since_nl = 0;
275
276 while (len--) {
277 if (subj->input.data[from++] == '\n') {
278 ++nls;
279 since_nl = 0;
280 } else {
281 ++since_nl;
282 }
283 }
284
285 if (!nls)
286 return 0;
287
288 *since_newline = since_nl;
289 return nls;
290}
291
292// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
293// `column_offset` according to the number of newlines in a just-matched span
294// of text in `subj`.
295static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
296 if (!(options & CMARK_OPT_SOURCEPOS)) {
297 return;
298 }
299
300 int since_newline;
301 int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
302 if (newlines) {
303 subj->line += newlines;
304 node->end_line += newlines;
305 node->end_column = since_newline;
306 subj->column_offset = -subj->pos + since_newline + extra;
307 }
308}
309
310// Try to process a backtick code span that began with a
311// span of ticks of length openticklength length (already
312// parsed). Return 0 if you don't find matching closing
313// backticks, otherwise return the position in the subject
314// after the closing backticks.
315static bufsize_t scan_to_closing_backticks(subject *subj,
316 bufsize_t openticklength) {
317
318 bool found = false;
319 if (openticklength > MAXBACKTICKS) {
320 // we limit backtick string length because of the array subj->backticks:
321 return 0;
322 }
323 if (subj->scanned_for_backticks &&
324 subj->backticks[openticklength] <= subj->pos) {
325 // return if we already know there's no closer
326 return 0;
327 }
328 while (!found) {
329 // read non backticks
330 unsigned char c;
331 while ((c = peek_char(subj)) && c != '`') {
332 advance(subj);
333 }
334 if (is_eof(subj)) {
335 break;
336 }
337 bufsize_t numticks = 0;
338 while (peek_char(subj) == '`') {
339 advance(subj);
340 numticks++;
341 }
342 // store position of ender
343 if (numticks <= MAXBACKTICKS) {
344 subj->backticks[numticks] = subj->pos - numticks;
345 }
346 if (numticks == openticklength) {
347 return (subj->pos);
348 }
349 }
350 // got through whole input without finding closer
351 subj->scanned_for_backticks = true;
352 return 0;
353}
354
355// Destructively modify string, converting newlines to
356// spaces, then removing a single leading + trailing space,
357// unless the code span consists entirely of space characters.
358static void S_normalize_code(cmark_strbuf *s) {
359 bufsize_t r, w;
360 bool contains_nonspace = false;
361
362 for (r = 0, w = 0; r < s->size; ++r) {
363 switch (s->ptr[r]) {
364 case '\r':
365 if (s->ptr[r + 1] != '\n') {
366 s->ptr[w++] = ' ';
367 }
368 break;
369 case '\n':
370 s->ptr[w++] = ' ';
371 break;
372 default:
373 s->ptr[w++] = s->ptr[r];
374 }
375 if (s->ptr[r] != ' ') {
376 contains_nonspace = true;
377 }
378 }
379
380 // begins and ends with space?
381 if (contains_nonspace &&
382 s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
383 cmark_strbuf_drop(s, 1);
384 cmark_strbuf_truncate(s, w - 2);
385 } else {
386 cmark_strbuf_truncate(s, w);
387 }
388
389}
390
391
392// Parse backtick code section or raw backticks, return an inline.
393// Assumes that the subject has a backtick at the current position.
394static cmark_node *handle_backticks(subject *subj, int options) {
395 cmark_chunk openticks = take_while(subj, isbacktick);
396 bufsize_t startpos = subj->pos;
397 bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
398
399 if (endpos == 0) { // not found
400 subj->pos = startpos; // rewind
401 return make_str(subj, subj->pos, subj->pos, openticks);
402 } else {
403 cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
404
405 cmark_strbuf_set(&buf, subj->input.data + startpos,
406 endpos - startpos - openticks.len);
407 S_normalize_code(&buf);
408
409 cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos,
410 endpos - openticks.len - 1);
411 node->len = buf.size;
412 node->data = cmark_strbuf_detach(&buf);
413 adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
414 return node;
415 }
416}
417
418
419// Scan ***, **, or * and return number scanned, or 0.
420// Advances position.
421static int scan_delims(subject *subj, unsigned char c, bool *can_open,
422 bool *can_close) {
423 int numdelims = 0;
424 bufsize_t before_char_pos;
425 int32_t after_char = 0;
426 int32_t before_char = 0;
427 int len;
428 bool left_flanking, right_flanking;
429
430 if (subj->pos == 0) {
431 before_char = 10;
432 } else {
433 before_char_pos = subj->pos - 1;
434 // walk back to the beginning of the UTF_8 sequence:
435 while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
436 before_char_pos -= 1;
437 }
438 len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
439 subj->pos - before_char_pos, &before_char);
440 if (len == -1) {
441 before_char = 10;
442 }
443 }
444
445 if (c == '\'' || c == '"') {
446 numdelims++;
447 advance(subj); // limit to 1 delim for quotes
448 } else {
449 while (peek_char(subj) == c) {
450 numdelims++;
451 advance(subj);
452 }
453 }
454
455 len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
456 subj->input.len - subj->pos, &after_char);
457 if (len == -1) {
458 after_char = 10;
459 }
460 left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
461 (!cmark_utf8proc_is_punctuation(after_char) ||
462 cmark_utf8proc_is_space(before_char) ||
463 cmark_utf8proc_is_punctuation(before_char));
464 right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
465 (!cmark_utf8proc_is_punctuation(before_char) ||
466 cmark_utf8proc_is_space(after_char) ||
467 cmark_utf8proc_is_punctuation(after_char));
468 if (c == '_') {
469 *can_open = left_flanking &&
470 (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
471 *can_close = right_flanking &&
472 (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
473 } else if (c == '\'' || c == '"') {
474 *can_open = left_flanking &&
475 (!right_flanking || before_char == '(' || before_char == '[') &&
476 before_char != ']' && before_char != ')';
477 *can_close = right_flanking;
478 } else {
479 *can_open = left_flanking;
480 *can_close = right_flanking;
481 }
482 return numdelims;
483}
484
485/*
486static void print_delimiters(subject *subj)
487{
488 delimiter *delim;
489 delim = subj->last_delim;
490 while (delim != NULL) {
491 printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n",
492 (void*)delim, delim->delim_char,
493 delim->can_open, delim->can_close,
494 (void*)delim->next, (void*)delim->previous);
495 delim = delim->previous;
496 }
497}
498*/
499
500static void remove_delimiter(subject *subj, delimiter *delim) {
501 if (delim == NULL)
502 return;
503 if (delim->next == NULL) {
504 // end of list:
505 assert(delim == subj->last_delim);
506 subj->last_delim = delim->previous;
507 } else {
508 delim->next->previous = delim->previous;
509 }
510 if (delim->previous != NULL) {
511 delim->previous->next = delim->next;
512 }
513 subj->mem->free(delim);
514}
515
516static void pop_bracket(subject *subj) {
517 bracket *b;
518 if (subj->last_bracket == NULL)
519 return;
520 b = subj->last_bracket;
521 subj->last_bracket = subj->last_bracket->previous;
522 subj->mem->free(b);
523}
524
525static void push_delimiter(subject *subj, unsigned char c, bool can_open,
526 bool can_close, cmark_node *inl_text) {
527 delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter));
528 delim->delim_char = c;
529 delim->can_open = can_open;
530 delim->can_close = can_close;
531 delim->inl_text = inl_text;
532 delim->length = inl_text->len;
533 delim->previous = subj->last_delim;
534 delim->next = NULL;
535 if (delim->previous != NULL) {
536 delim->previous->next = delim;
537 }
538 subj->last_delim = delim;
539}
540
541static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
542 bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
543 if (subj->last_bracket != NULL) {
544 subj->last_bracket->bracket_after = true;
545 }
546 b->image = image;
547 b->active = true;
548 b->inl_text = inl_text;
549 b->previous = subj->last_bracket;
550 b->previous_delimiter = subj->last_delim;
551 b->position = subj->pos;
552 b->bracket_after = false;
553 subj->last_bracket = b;
554}
555
556// Assumes the subject has a c at the current position.
557static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
558 bufsize_t numdelims;
559 cmark_node *inl_text;
560 bool can_open, can_close;
561 cmark_chunk contents;
562
563 numdelims = scan_delims(subj, c, &can_open, &can_close);
564
565 if (c == '\'' && smart) {
566 contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
567 } else if (c == '"' && smart) {
568 contents =
569 cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
570 } else {
571 contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
572 }
573
574 inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
575
576 if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
577 push_delimiter(subj, c, can_open, can_close, inl_text);
578 }
579
580 return inl_text;
581}
582
583// Assumes we have a hyphen at the current position.
584static cmark_node *handle_hyphen(subject *subj, bool smart) {
585 int startpos = subj->pos;
586
587 advance(subj);
588
589 if (!smart || peek_char(subj) != '-') {
590 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
591 }
592
593 while (smart && peek_char(subj) == '-') {
594 advance(subj);
595 }
596
597 int numhyphens = subj->pos - startpos;
598 int en_count = 0;
599 int em_count = 0;
600 int i;
601 cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
602
603 if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
604 em_count = numhyphens / 3;
605 } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
606 en_count = numhyphens / 2;
607 } else if (numhyphens % 3 == 2) { // use one en dash at end
608 en_count = 1;
609 em_count = (numhyphens - 2) / 3;
610 } else { // use two en dashes at the end
611 en_count = 2;
612 em_count = (numhyphens - 4) / 3;
613 }
614
615 for (i = em_count; i > 0; i--) {
616 cmark_strbuf_puts(&buf, EMDASH);
617 }
618
619 for (i = en_count; i > 0; i--) {
620 cmark_strbuf_puts(&buf, ENDASH);
621 }
622
623 return make_str_from_buf(subj, startpos, subj->pos - 1, &buf);
624}
625
626// Assumes we have a period at the current position.
627static cmark_node *handle_period(subject *subj, bool smart) {
628 advance(subj);
629 if (smart && peek_char(subj) == '.') {
630 advance(subj);
631 if (peek_char(subj) == '.') {
632 advance(subj);
633 return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
634 } else {
635 return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
636 }
637 } else {
638 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
639 }
640}
641
642static void process_emphasis(subject *subj, delimiter *stack_bottom) {
643 delimiter *closer = subj->last_delim;
644 delimiter *opener;
645 delimiter *old_closer;
646 delimiter *new_stack_bottom;
647 bool opener_found;
648
649 int openers_bottom_index = 0;
650 delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom,
651 stack_bottom};
652
653 // move back to first relevant delim.
654 while (closer != NULL && closer->previous != stack_bottom) {
655 closer = closer->previous;
656 }
657
658 // now move forward, looking for closers, and handling each
659 while (closer != NULL) {
660 if (closer->can_close) {
661 switch (closer->delim_char) {
662 case '"':
663 openers_bottom_index = 0;
664 break;
665 case '\'':
666 openers_bottom_index = 1;
667 break;
668 case '_':
669 openers_bottom_index = 2;
670 break;
671 case '*':
672 openers_bottom_index = 3;
673 break;
674 default:
675 assert(false);
676 }
677
678 // Now look backwards for first matching opener:
679 opener = closer->previous;
680 opener_found = false;
681 new_stack_bottom = closer->previous;
682
683 while (opener != NULL && opener != openers_bottom[openers_bottom_index]) {
684 if (opener->can_open && opener->delim_char == closer->delim_char) {
685 // interior closer of size 2 can't match opener of size 1
686 // or of size 1 can't match 2
687 if (!(closer->can_open || opener->can_close) ||
688 closer->length % 3 == 0 ||
689 (opener->length + closer->length) % 3 != 0) {
690 opener_found = true;
691 break;
692 } else {
693 // If we failed to match because of the mod-3 rule,
694 // then we want to make sure the stack bottom extends
695 // back to here at least, since a later closer might
696 // match this same opener... (see #383)
697 new_stack_bottom = opener->previous;
698 }
699 }
700 opener = opener->previous;
701 }
702 old_closer = closer;
703 if (closer->delim_char == '*' || closer->delim_char == '_') {
704 if (opener_found) {
705 closer = S_insert_emph(subj, opener, closer);
706 } else {
707 closer = closer->next;
708 }
709 } else if (closer->delim_char == '\'') {
710 cmark_node_set_literal(closer->inl_text, RIGHTSINGLEQUOTE);
711 if (opener_found) {
712 cmark_node_set_literal(opener->inl_text, LEFTSINGLEQUOTE);
713 }
714 closer = closer->next;
715 } else if (closer->delim_char == '"') {
716 cmark_node_set_literal(closer->inl_text, RIGHTDOUBLEQUOTE);
717 if (opener_found) {
718 cmark_node_set_literal(opener->inl_text, LEFTDOUBLEQUOTE);
719 }
720 closer = closer->next;
721 }
722 if (!opener_found) {
723 // set lower bound for future searches for openers (see #383).
724 openers_bottom[openers_bottom_index] = new_stack_bottom;
725 if (!old_closer->can_open) {
726 // we can remove a closer that can't be an
727 // opener, once we've seen there's no
728 // matching opener:
729 remove_delimiter(subj, old_closer);
730 }
731 }
732 } else {
733 closer = closer->next;
734 }
735 }
736 // free all delimiters in list until stack_bottom:
737 while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
738 remove_delimiter(subj, subj->last_delim);
739 }
740}
741
742static delimiter *S_insert_emph(subject *subj, delimiter *opener,
743 delimiter *closer) {
744 delimiter *delim, *tmp_delim;
745 bufsize_t use_delims;
746 cmark_node *opener_inl = opener->inl_text;
747 cmark_node *closer_inl = closer->inl_text;
748 bufsize_t opener_num_chars = opener_inl->len;
749 bufsize_t closer_num_chars = closer_inl->len;
750 cmark_node *tmp, *tmpnext, *emph;
751
752 // calculate the actual number of characters used from this closer
753 use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
754
755 // remove used characters from associated inlines.
756 opener_num_chars -= use_delims;
757 closer_num_chars -= use_delims;
758 opener_inl->len = opener_num_chars;
759 opener_inl->data[opener_num_chars] = 0;
760 closer_inl->len = closer_num_chars;
761 closer_inl->data[closer_num_chars] = 0;
762
763 // free delimiters between opener and closer
764 delim = closer->previous;
765 while (delim != NULL && delim != opener) {
766 tmp_delim = delim->previous;
767 remove_delimiter(subj, delim);
768 delim = tmp_delim;
769 }
770
771 // create new emph or strong, and splice it in to our inlines
772 // between the opener and closer
773 emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem);
774
775 tmp = opener_inl->next;
776 while (tmp && tmp != closer_inl) {
777 tmpnext = tmp->next;
778 cmark_node_unlink(tmp);
779 append_child(emph, tmp);
780 tmp = tmpnext;
781 }
782 cmark_node_insert_after(opener_inl, emph);
783
784 emph->start_line = opener_inl->start_line;
785 emph->end_line = closer_inl->end_line;
786 emph->start_column = opener_inl->start_column;
787 emph->end_column = closer_inl->end_column;
788
789 // if opener has 0 characters, remove it and its associated inline
790 if (opener_num_chars == 0) {
791 cmark_node_free(opener_inl);
792 remove_delimiter(subj, opener);
793 }
794
795 // if closer has 0 characters, remove it and its associated inline
796 if (closer_num_chars == 0) {
797 // remove empty closer inline
798 cmark_node_free(closer_inl);
799 // remove closer from list
800 tmp_delim = closer->next;
801 remove_delimiter(subj, closer);
802 closer = tmp_delim;
803 }
804
805 return closer;
806}
807
808// Parse backslash-escape or just a backslash, returning an inline.
809static cmark_node *handle_backslash(subject *subj) {
810 advance(subj);
811 unsigned char nextchar = peek_char(subj);
812 if (cmark_ispunct(
813 nextchar)) { // only ascii symbols and newline can be escaped
814 advance(subj);
815 return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
816 } else if (!is_eof(subj) && skip_line_end(subj)) {
817 return make_linebreak(subj->mem);
818 } else {
819 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
820 }
821}
822
823// Parse an entity or a regular "&" string.
824// Assumes the subject has an '&' character at the current position.
825static cmark_node *handle_entity(subject *subj) {
826 cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
827 bufsize_t len;
828
829 advance(subj);
830
831 len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
832 subj->input.len - subj->pos);
833
834 if (len <= 0)
835 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
836
837 subj->pos += len;
838 return make_str_from_buf(subj, subj->pos - 1 - len, subj->pos - 1, &ent);
839}
840
841// Clean a URL: remove surrounding whitespace, and remove \ that escape
842// punctuation.
843unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
844 cmark_strbuf buf = CMARK_BUF_INIT(mem);
845
846 cmark_chunk_trim(url);
847
848 houdini_unescape_html_f(&buf, url->data, url->len);
849
850 cmark_strbuf_unescape(&buf);
851 return cmark_strbuf_detach(&buf);
852}
853
854unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
855 cmark_strbuf buf = CMARK_BUF_INIT(mem);
856 unsigned char first, last;
857
858 if (title->len == 0) {
859 return NULL;
860 }
861
862 first = title->data[0];
863 last = title->data[title->len - 1];
864
865 // remove surrounding quotes if any:
866 if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
867 (first == '"' && last == '"')) {
868 houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
869 } else {
870 houdini_unescape_html_f(&buf, title->data, title->len);
871 }
872
873 cmark_strbuf_unescape(&buf);
874 return cmark_strbuf_detach(&buf);
875}
876
877// Parse an autolink or HTML tag.
878// Assumes the subject has a '<' character at the current position.
879static cmark_node *handle_pointy_brace(subject *subj, int options) {
880 bufsize_t matchlen = 0;
881 cmark_chunk contents;
882
883 advance(subj); // advance past first <
884
885 // first try to match a URL autolink
886 matchlen = scan_autolink_uri(&subj->input, subj->pos);
887 if (matchlen > 0) {
888 contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
889 subj->pos += matchlen;
890
891 return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
892 }
893
894 // next try to match an email autolink
895 matchlen = scan_autolink_email(&subj->input, subj->pos);
896 if (matchlen > 0) {
897 contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
898 subj->pos += matchlen;
899
900 return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
901 }
902
903 // finally, try to match an html tag
904 if (subj->pos + 2 <= subj->input.len) {
905 int c = subj->input.data[subj->pos];
906 if (c == '!') {
907 c = subj->input.data[subj->pos+1];
908 if (c == '-') {
909 matchlen = scan_html_comment(&subj->input, subj->pos + 2);
910 if (matchlen > 0)
911 matchlen += 2; // prefix "<-"
912 } else if (c == '[') {
913 if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) {
914 matchlen = scan_html_cdata(&subj->input, subj->pos + 2);
915 if (matchlen > 0) {
916 // The regex doesn't require the final "]]>". But if we're not at
917 // the end of input, it must come after the match. Otherwise,
918 // disable subsequent scans to avoid quadratic behavior.
919 matchlen += 5; // prefix "![", suffix "]]>"
920 if (subj->pos + matchlen > subj->input.len) {
921 subj->flags |= FLAG_SKIP_HTML_CDATA;
922 matchlen = 0;
923 }
924 }
925 }
926 } else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) {
927 matchlen = scan_html_declaration(&subj->input, subj->pos + 1);
928 if (matchlen > 0) {
929 matchlen += 2; // prefix "!", suffix ">"
930 if (subj->pos + matchlen > subj->input.len) {
931 subj->flags |= FLAG_SKIP_HTML_DECLARATION;
932 matchlen = 0;
933 }
934 }
935 }
936 } else if (c == '?') {
937 if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) {
938 // Note that we allow an empty match.
939 matchlen = scan_html_pi(&subj->input, subj->pos + 1);
940 matchlen += 3; // prefix "?", suffix "?>"
941 if (subj->pos + matchlen > subj->input.len) {
942 subj->flags |= FLAG_SKIP_HTML_PI;
943 matchlen = 0;
944 }
945 }
946 } else {
947 matchlen = scan_html_tag(&subj->input, subj->pos);
948 }
949 }
950 if (matchlen > 0) {
951 const unsigned char *src = subj->input.data + subj->pos - 1;
952 bufsize_t len = matchlen + 1;
953 subj->pos += matchlen;
954 cmark_node *node = make_literal(subj, CMARK_NODE_HTML_INLINE,
955 subj->pos - matchlen - 1, subj->pos - 1);
956 node->data = (unsigned char *)subj->mem->realloc(NULL, len + 1);
957 memcpy(node->data, src, len);
958 node->data[len] = 0;
959 node->len = len;
960 adjust_subj_node_newlines(subj, node, matchlen, 1, options);
961 return node;
962 }
963
964 // if nothing matches, just return the opening <:
965 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
966}
967
968// Parse a link label. Returns 1 if successful.
969// Note: unescaped brackets are not allowed in labels.
970// The label begins with `[` and ends with the first `]` character
971// encountered. Backticks in labels do not start code spans.
972static int link_label(subject *subj, cmark_chunk *raw_label) {
973 bufsize_t startpos = subj->pos;
974 int length = 0;
975 unsigned char c;
976
977 // advance past [
978 if (peek_char(subj) == '[') {
979 advance(subj);
980 } else {
981 return 0;
982 }
983
984 while ((c = peek_char(subj)) && c != '[' && c != ']') {
985 if (c == '\\') {
986 advance(subj);
987 length++;
988 if (cmark_ispunct(peek_char(subj))) {
989 advance(subj);
990 length++;
991 }
992 } else {
993 advance(subj);
994 length++;
995 }
996 if (length > MAX_LINK_LABEL_LENGTH) {
997 goto noMatch;
998 }
999 }
1000
1001 if (c == ']') { // match found
1002 *raw_label =
1003 cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
1004 cmark_chunk_trim(raw_label);
1005 advance(subj); // advance past ]
1006 return 1;
1007 }
1008
1009noMatch:
1010 subj->pos = startpos; // rewind
1011 return 0;
1012}
1013
1014static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
1015 cmark_chunk *output) {
1016 bufsize_t i = offset;
1017 size_t nb_p = 0;
1018
1019 while (i < input->len) {
1020 if (input->data[i] == '\\' &&
1021 i + 1 < input-> len &&
1022 cmark_ispunct(input->data[i+1]))
1023 i += 2;
1024 else if (input->data[i] == '(') {
1025 ++nb_p;
1026 ++i;
1027 if (nb_p > 32)
1028 return -1;
1029 } else if (input->data[i] == ')') {
1030 if (nb_p == 0)
1031 break;
1032 --nb_p;
1033 ++i;
1034 } else if (cmark_isspace(input->data[i])) {
1035 if (i == offset) {
1036 return -1;
1037 }
1038 break;
1039 } else {
1040 ++i;
1041 }
1042 }
1043
1044 if (i >= input->len || nb_p != 0)
1045 return -1;
1046
1047 {
1048 cmark_chunk result = {input->data + offset, i - offset};
1049 *output = result;
1050 }
1051 return i - offset;
1052}
1053
1054static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
1055 cmark_chunk *output) {
1056 bufsize_t i = offset;
1057
1058 if (i < input->len && input->data[i] == '<') {
1059 ++i;
1060 while (i < input->len) {
1061 if (input->data[i] == '>') {
1062 ++i;
1063 break;
1064 } else if (input->data[i] == '\\')
1065 i += 2;
1066 else if (input->data[i] == '\n' || input->data[i] == '<')
1067 return -1;
1068 else
1069 ++i;
1070 }
1071 } else {
1072 return manual_scan_link_url_2(input, offset, output);
1073 }
1074
1075 if (i >= input->len)
1076 return -1;
1077
1078 {
1079 cmark_chunk result = {input->data + offset + 1, i - 2 - offset};
1080 *output = result;
1081 }
1082 return i - offset;
1083}
1084
1085// Return a link, an image, or a literal close bracket.
1086static cmark_node *handle_close_bracket(subject *subj) {
1087 bufsize_t initial_pos, after_link_text_pos;
1088 bufsize_t endurl, starttitle, endtitle, endall;
1089 bufsize_t sps, n;
1090 cmark_reference *ref = NULL;
1091 cmark_chunk url_chunk, title_chunk;
1092 unsigned char *url, *title;
1093 bracket *opener;
1094 cmark_node *inl;
1095 cmark_chunk raw_label;
1096 int found_label;
1097 cmark_node *tmp, *tmpnext;
1098 bool is_image;
1099
1100 advance(subj); // advance past ]
1101 initial_pos = subj->pos;
1102
1103 // get last [ or ![
1104 opener = subj->last_bracket;
1105
1106 if (opener == NULL) {
1107 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1108 }
1109
1110 if (!opener->active) {
1111 // take delimiter off stack
1112 pop_bracket(subj);
1113 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1114 }
1115
1116 // If we got here, we matched a potential link/image text.
1117 // Now we check to see if it's a link/image.
1118 is_image = opener->image;
1119
1120 after_link_text_pos = subj->pos;
1121
1122 // First, look for an inline link.
1123 if (peek_char(subj) == '(' &&
1124 ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
1125 ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
1126 &url_chunk)) > -1)) {
1127
1128 // try to parse an explicit link:
1129 endurl = subj->pos + 1 + sps + n;
1130 starttitle = endurl + scan_spacechars(&subj->input, endurl);
1131
1132 // ensure there are spaces btw url and title
1133 endtitle = (starttitle == endurl)
1134 ? starttitle
1135 : starttitle + scan_link_title(&subj->input, starttitle);
1136
1137 endall = endtitle + scan_spacechars(&subj->input, endtitle);
1138
1139 if (peek_at(subj, endall) == ')') {
1140 subj->pos = endall + 1;
1141
1142 title_chunk =
1143 cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
1144 url = cmark_clean_url(subj->mem, &url_chunk);
1145 title = cmark_clean_title(subj->mem, &title_chunk);
1146 cmark_chunk_free(&url_chunk);
1147 cmark_chunk_free(&title_chunk);
1148 goto match;
1149
1150 } else {
1151 // it could still be a shortcut reference link
1152 subj->pos = after_link_text_pos;
1153 }
1154 }
1155
1156 // Next, look for a following [link label] that matches in refmap.
1157 // skip spaces
1158 raw_label = cmark_chunk_literal("");
1159 found_label = link_label(subj, &raw_label);
1160 if (!found_label) {
1161 // If we have a shortcut reference link, back up
1162 // to before the spacse we skipped.
1163 subj->pos = initial_pos;
1164 }
1165
1166 if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
1167 cmark_chunk_free(&raw_label);
1168 raw_label = cmark_chunk_dup(&subj->input, opener->position,
1169 initial_pos - opener->position - 1);
1170 found_label = true;
1171 }
1172
1173 if (found_label) {
1174 ref = cmark_reference_lookup(subj->refmap, &raw_label);
1175 cmark_chunk_free(&raw_label);
1176 }
1177
1178 if (ref != NULL) { // found
1179 url = cmark_strdup(subj->mem, ref->url);
1180 title = cmark_strdup(subj->mem, ref->title);
1181 goto match;
1182 } else {
1183 goto noMatch;
1184 }
1185
1186noMatch:
1187 // If we fall through to here, it means we didn't match a link:
1188 pop_bracket(subj); // remove this opener from delimiter list
1189 subj->pos = initial_pos;
1190 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1191
1192match:
1193 inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
1194 inl->as.link.url = url;
1195 inl->as.link.title = title;
1196 inl->start_line = inl->end_line = subj->line;
1197 inl->start_column = opener->inl_text->start_column;
1198 inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1199 cmark_node_insert_before(opener->inl_text, inl);
1200 // Add link text:
1201 tmp = opener->inl_text->next;
1202 while (tmp) {
1203 tmpnext = tmp->next;
1204 cmark_node_unlink(tmp);
1205 append_child(inl, tmp);
1206 tmp = tmpnext;
1207 }
1208
1209 // Free the bracket [:
1210 cmark_node_free(opener->inl_text);
1211
1212 process_emphasis(subj, opener->previous_delimiter);
1213 pop_bracket(subj);
1214
1215 // Now, if we have a link, we also want to deactivate earlier link
1216 // delimiters. (This code can be removed if we decide to allow links
1217 // inside links.)
1218 if (!is_image) {
1219 opener = subj->last_bracket;
1220 while (opener != NULL) {
1221 if (!opener->image) {
1222 if (!opener->active) {
1223 break;
1224 } else {
1225 opener->active = false;
1226 }
1227 }
1228 opener = opener->previous;
1229 }
1230 }
1231
1232 return NULL;
1233}
1234
1235// Parse a hard or soft linebreak, returning an inline.
1236// Assumes the subject has a cr or newline at the current position.
1237static cmark_node *handle_newline(subject *subj) {
1238 bufsize_t nlpos = subj->pos;
1239 // skip over cr, crlf, or lf:
1240 if (peek_at(subj, subj->pos) == '\r') {
1241 advance(subj);
1242 }
1243 if (peek_at(subj, subj->pos) == '\n') {
1244 advance(subj);
1245 }
1246 ++subj->line;
1247 subj->column_offset = -subj->pos;
1248 // skip spaces at beginning of line
1249 skip_spaces(subj);
1250 if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
1251 peek_at(subj, nlpos - 2) == ' ') {
1252 return make_linebreak(subj->mem);
1253 } else {
1254 return make_softbreak(subj->mem);
1255 }
1256}
1257
1258static bufsize_t subject_find_special_char(subject *subj, int options) {
1259 // "\r\n\\`&_*[]<!"
1260 static const int8_t SPECIAL_CHARS[256] = {
1261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1262 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1265 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1269 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1270 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1271 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1272
1273 // " ' . -
1274 static const char SMART_PUNCT_CHARS[] = {
1275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1276 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
1277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1284 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1285 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1286 };
1287
1288 bufsize_t n = subj->pos + 1;
1289
1290 while (n < subj->input.len) {
1291 if (SPECIAL_CHARS[subj->input.data[n]])
1292 return n;
1293 if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
1294 return n;
1295 n++;
1296 }
1297
1298 return subj->input.len;
1299}
1300
1301// Parse an inline, advancing subject, and add it as a child of parent.
1302// Return 0 if no inline can be parsed, 1 otherwise.
1303static int parse_inline(subject *subj, cmark_node *parent, int options) {
1304 cmark_node *new_inl = NULL;
1305 cmark_chunk contents;
1306 unsigned char c;
1307 bufsize_t startpos, endpos;
1308 c = peek_char(subj);
1309 if (c == 0) {
1310 return 0;
1311 }
1312 switch (c) {
1313 case '\r':
1314 case '\n':
1315 new_inl = handle_newline(subj);
1316 break;
1317 case '`':
1318 new_inl = handle_backticks(subj, options);
1319 break;
1320 case '\\':
1321 new_inl = handle_backslash(subj);
1322 break;
1323 case '&':
1324 new_inl = handle_entity(subj);
1325 break;
1326 case '<':
1327 new_inl = handle_pointy_brace(subj, options);
1328 break;
1329 case '*':
1330 case '_':
1331 case '\'':
1332 case '"':
1333 new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
1334 break;
1335 case '-':
1336 new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
1337 break;
1338 case '.':
1339 new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
1340 break;
1341 case '[':
1342 advance(subj);
1343 new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
1344 push_bracket(subj, false, new_inl);
1345 break;
1346 case ']':
1347 new_inl = handle_close_bracket(subj);
1348 break;
1349 case '!':
1350 advance(subj);
1351 if (peek_char(subj) == '[') {
1352 advance(subj);
1353 new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
1354 push_bracket(subj, true, new_inl);
1355 } else {
1356 new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
1357 }
1358 break;
1359 default:
1360 endpos = subject_find_special_char(subj, options);
1361 contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1362 startpos = subj->pos;
1363 subj->pos = endpos;
1364
1365 // if we're at a newline, strip trailing spaces.
1366 if (S_is_line_end_char(peek_char(subj))) {
1367 cmark_chunk_rtrim(&contents);
1368 }
1369
1370 new_inl = make_str(subj, startpos, endpos - 1, contents);
1371 }
1372 if (new_inl != NULL) {
1373 append_child(parent, new_inl);
1374 }
1375
1376 return 1;
1377}
1378
1379// Parse inlines from parent's string_content, adding as children of parent.
1380void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
1381 cmark_reference_map *refmap, int options) {
1382 subject subj;
1383 cmark_chunk content = {parent->data, parent->len};
1384 subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
1385 cmark_chunk_rtrim(&subj.input);
1386
1387 while (!is_eof(&subj) && parse_inline(&subj, parent, options))
1388 ;
1389
1390 process_emphasis(&subj, NULL);
1391 // free bracket and delim stack
1392 while (subj.last_delim) {
1393 remove_delimiter(&subj, subj.last_delim);
1394 }
1395 while (subj.last_bracket) {
1396 pop_bracket(&subj);
1397 }
1398}
1399
1400// Parse zero or more space characters, including at most one newline.
1401static void spnl(subject *subj) {
1402 skip_spaces(subj);
1403 if (skip_line_end(subj)) {
1404 skip_spaces(subj);
1405 }
1406}
1407
1408// Parse reference. Assumes string begins with '[' character.
1409// Modify refmap if a reference is encountered.
1410// Return 0 if no reference found, otherwise position of subject
1411// after reference is parsed.
1412bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1413 cmark_reference_map *refmap) {
1414 subject subj;
1415
1416 cmark_chunk lab;
1417 cmark_chunk url;
1418 cmark_chunk title;
1419
1420 bufsize_t matchlen = 0;
1421 bufsize_t beforetitle;
1422
1423 subject_from_buf(mem, -1, 0, &subj, input, NULL);
1424
1425 // parse label:
1426 if (!link_label(&subj, &lab) || lab.len == 0)
1427 return 0;
1428
1429 // colon:
1430 if (peek_char(&subj) == ':') {
1431 advance(&subj);
1432 } else {
1433 return 0;
1434 }
1435
1436 // parse link url:
1437 spnl(&subj);
1438 if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
1439 subj.pos += matchlen;
1440 } else {
1441 return 0;
1442 }
1443
1444 // parse optional link_title
1445 beforetitle = subj.pos;
1446 spnl(&subj);
1447 matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
1448 if (matchlen) {
1449 title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1450 subj.pos += matchlen;
1451 } else {
1452 subj.pos = beforetitle;
1453 title = cmark_chunk_literal("");
1454 }
1455
1456 // parse final spaces and newline:
1457 skip_spaces(&subj);
1458 if (!skip_line_end(&subj)) {
1459 if (matchlen) { // try rewinding before title
1460 subj.pos = beforetitle;
1461 skip_spaces(&subj);
1462 if (!skip_line_end(&subj)) {
1463 return 0;
1464 }
1465 } else {
1466 return 0;
1467 }
1468 }
1469 // insert reference into refmap
1470 cmark_reference_create(refmap, &lab, &url, &title);
1471 return subj.pos;
1472}
1473