md4c.c source code [Qt/src/3rdparty/md4c/md4c.c]

1	/*
2	* MD4C: Markdown parser for C
3	* (http://github.com/mity/md4c)
4	*
5	* Copyright (c) 2016-2020 Martin Mitas
6	*
7	* Permission is hereby granted, free of charge, to any person obtaining a
8	* copy of this software and associated documentation files (the "Software"),
9	* to deal in the Software without restriction, including without limitation
10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
11	* and/or sell copies of the Software, and to permit persons to whom the
12	* Software is furnished to do so, subject to the following conditions:
13	*
14	* The above copyright notice and this permission notice shall be included in
15	* all copies or substantial portions of the Software.
16	*
17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23	* IN THE SOFTWARE.
24	*/
25
26	#include "md4c.h"
27
28	#include <limits.h>
29	#include <stdio.h>
30	#include <stdlib.h>
31	#include <string.h>
32
33
34	/*****************************
35	* Miscellaneous Stuff *
36	*****************************/
37
38	#if !defined(__STDC_VERSION__) \|\| __STDC_VERSION__ < 199409L
39	/ C89/90 or old compilers in general may not understand "inline". /
40	#if defined __GNUC__
41	#define inline __inline__
42	#elif defined _MSC_VER
43	#define inline __inline
44	#else
45	#define inline
46	#endif
47	#endif
48
49	/ Make the UTF-8 support the default. /
50	#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
51	#define MD4C_USE_UTF8
52	#endif
53
54	/ Magic for making wide literals with MD4C_USE_UTF16. /
55	#ifdef _T
56	#undef _T
57	#endif
58	#if defined MD4C_USE_UTF16
59	#define _T(x) L##x
60	#else
61	#define _T(x) x
62	#endif
63
64	/ Misc. macros. /
65	#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
66
67	#define STRINGIZE_(x) #x
68	#define STRINGIZE(x) STRINGIZE_(x)
69
70	#ifndef TRUE
71	#define TRUE 1
72	#define FALSE 0
73	#endif
74
75
76	/************************
77	* Internal Types *
78	************************/
79
80	/ These are omnipresent so lets save some typing. /
81	#define CHAR MD_CHAR
82	#define SZ MD_SIZE
83	#define OFF MD_OFFSET
84
85	typedef struct MD_MARK_tag MD_MARK;
86	typedef struct MD_BLOCK_tag MD_BLOCK;
87	typedef struct MD_CONTAINER_tag MD_CONTAINER;
88	typedef struct MD_REF_DEF_tag MD_REF_DEF;
89
90
91	/ During analyzes of inline marks, we need to manage some "mark chains",*
92	* of (yet unresolved) openers. This structure holds start/end of the chain.
93	* The chain internals are then realized through MD_MARK::prev and ::next.
94	*/
95	typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
96	struct MD_MARKCHAIN_tag {
97	int head; / Index of first mark in the chain, or -1 if empty. /
98	int tail; / Index of last mark in the chain, or -1 if empty. /
99	};
100
101	/ Context propagated through all the parsing. /
102	typedef struct MD_CTX_tag MD_CTX;
103	struct MD_CTX_tag {
104	/ Immutable stuff (parameters of md_parse()). /
105	const CHAR* text;
106	SZ size;
107	MD_PARSER parser;
108	void* userdata;
109
110	/ When this is true, it allows some optimizations. /
111	int doc_ends_with_newline;
112
113	/ Helper temporary growing buffer. /
114	CHAR* buffer;
115	unsigned alloc_buffer;
116
117	/ Reference definitions. /
118	MD_REF_DEF* ref_defs;
119	int n_ref_defs;
120	int alloc_ref_defs;
121	void** ref_def_hashtable;
122	int ref_def_hashtable_size;
123
124	/ Stack of inline/span markers.*
125	* This is only used for parsing a single block contents but by storing it
126	* here we may reuse the stack for subsequent blocks; i.e. we have fewer
127	* (re)allocations. */
128	MD_MARK* marks;
129	int n_marks;
130	int alloc_marks;
131
132	#if defined MD4C_USE_UTF16
133	char mark_char_map[`128`];
134	#else
135	char mark_char_map[`256`];
136	#endif
137
138	/ For resolving of inline spans. /
139	MD_MARKCHAIN mark_chains[`13`];
140	#define PTR_CHAIN ctx->mark_chains[0]
141	#define TABLECELLBOUNDARIES ctx->mark_chains[1]
142	#define ASTERISK_OPENERS_extraword_mod3_0 ctx->mark_chains[2]
143	#define ASTERISK_OPENERS_extraword_mod3_1 ctx->mark_chains[3]
144	#define ASTERISK_OPENERS_extraword_mod3_2 ctx->mark_chains[4]
145	#define ASTERISK_OPENERS_intraword_mod3_0 ctx->mark_chains[5]
146	#define ASTERISK_OPENERS_intraword_mod3_1 ctx->mark_chains[6]
147	#define ASTERISK_OPENERS_intraword_mod3_2 ctx->mark_chains[7]
148	#define UNDERSCORE_OPENERS ctx->mark_chains[8]
149	#define TILDE_OPENERS_1 ctx->mark_chains[9]
150	#define TILDE_OPENERS_2 ctx->mark_chains[10]
151	#define BRACKET_OPENERS ctx->mark_chains[11]
152	#define DOLLAR_OPENERS ctx->mark_chains[12]
153	#define OPENERS_CHAIN_FIRST 2
154	#define OPENERS_CHAIN_LAST 12
155
156	int n_table_cell_boundaries;
157
158	/ For resolving links. /
159	int unresolved_link_head;
160	int unresolved_link_tail;
161
162	/ For resolving raw HTML. /
163	OFF html_comment_horizon;
164	OFF html_proc_instr_horizon;
165	OFF html_decl_horizon;
166	OFF html_cdata_horizon;
167
168	/ For block analysis.*
169	* Notes:
170	* -- It holds MD_BLOCK as well as MD_LINE structures. After each
171	* MD_BLOCK, its (multiple) MD_LINE(s) follow.
172	* -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
173	* instead of MD_LINE(s).
174	*/
175	void* block_bytes;
176	MD_BLOCK* current_block;
177	int n_block_bytes;
178	int alloc_block_bytes;
179
180	/ For container block analysis. /
181	MD_CONTAINER* containers;
182	int n_containers;
183	int alloc_containers;
184
185	/ Minimal indentation to call the block "indented code block". /
186	unsigned code_indent_offset;
187
188	/ Contextual info for line analysis. /
189	SZ code_fence_length; / For checking closing fence length. /
190	int html_block_type; / For checking closing raw HTML condition. /
191	int last_line_has_list_loosening_effect;
192	int last_list_item_starts_with_two_blank_lines;
193	};
194
195	enum MD_LINETYPE_tag {
196	MD_LINE_BLANK,
197	MD_LINE_HR,
198	MD_LINE_ATXHEADER,
199	MD_LINE_SETEXTHEADER,
200	MD_LINE_SETEXTUNDERLINE,
201	MD_LINE_INDENTEDCODE,
202	MD_LINE_FENCEDCODE,
203	MD_LINE_HTML,
204	MD_LINE_TEXT,
205	MD_LINE_TABLE,
206	MD_LINE_TABLEUNDERLINE
207	};
208	typedef enum MD_LINETYPE_tag MD_LINETYPE;
209
210	typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
211	struct MD_LINE_ANALYSIS_tag {
212	MD_LINETYPE type : `16`;
213	unsigned data : `16`;
214	OFF beg;
215	OFF end;
216	unsigned indent; / Indentation level. /
217	};
218
219	typedef struct MD_LINE_tag MD_LINE;
220	struct MD_LINE_tag {
221	OFF beg;
222	OFF end;
223	};
224
225	typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
226	struct MD_VERBATIMLINE_tag {
227	OFF beg;
228	OFF end;
229	OFF indent;
230	};
231
232
233	/*******************
234	* Debugging *
235	*******************/
236
237	#define MD_LOG(msg) \
238	do { \
239	if(ctx->parser.debug_log != NULL) \
240	ctx->parser.debug_log((msg), ctx->userdata); \
241	} while(0)
242
243	#ifdef DEBUG
244	#define MD_ASSERT(cond) \
245	do { \
246	if(!(cond)) { \
247	MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \
248	"Assertion '" STRINGIZE(cond) "' failed."); \
249	exit(1); \
250	} \
251	} while(0)
252
253	#define MD_UNREACHABLE() MD_ASSERT(1 == 0)
254	#else
255	#ifdef __GNUC__
256	#define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0)
257	#define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0)
258	#elif defined _MSC_VER && _MSC_VER > 120
259	#define MD_ASSERT(cond) do { __assume(cond); } while(0)
260	#define MD_UNREACHABLE() do { __assume(0); } while(0)
261	#else
262	#define MD_ASSERT(cond) do {} while(0)
263	#define MD_UNREACHABLE() do {} while(0)
264	#endif
265	#endif
266
267
268	/*****************
269	* Helpers *
270	*****************/
271
272	/ Character accessors. /
273	#define CH(off) (ctx->text[(off)])
274	#define STR(off) (ctx->text + (off))
275
276	/ Check whether the pointer points into ctx->text. /
277	#define IS_INPUT_STR(ptr) (ctx->text <= (ptr) && (ptr) < (ctx->text + ctx->size))
278
279	/ Character classification.*
280	* Note we assume ASCII compatibility of code points < 128 here. */
281	#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
282	#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL)
283	#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) \|\| (ch) == (ch2))
284	#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) \|\| (ch) == (ch2) \|\| (ch) == (ch3))
285	#define ISASCII_(ch) ((unsigned)(ch) <= 127)
286	#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
287	#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
288	#define ISWHITESPACE_(ch) (ISBLANK_(ch) \|\| ISANYOF2_((ch), _T('\v'), _T('\f')))
289	#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 \|\| (unsigned)(ch) == 127)
290	#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) \|\| ISIN_(ch, 58, 64) \|\| ISIN_(ch, 91, 96) \|\| ISIN_(ch, 123, 126))
291	#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
292	#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
293	#define ISALPHA_(ch) (ISUPPER_(ch) \|\| ISLOWER_(ch))
294	#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
295	#define ISXDIGIT_(ch) (ISDIGIT_(ch) \|\| ISIN_(ch, _T('A'), _T('F')) \|\| ISIN_(ch, _T('a'), _T('f')))
296	#define ISALNUM_(ch) (ISALPHA_(ch) \|\| ISDIGIT_(ch))
297
298	#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
299	#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
300	#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
301	#define ISASCII(off) ISASCII_(CH(off))
302	#define ISBLANK(off) ISBLANK_(CH(off))
303	#define ISNEWLINE(off) ISNEWLINE_(CH(off))
304	#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
305	#define ISCNTRL(off) ISCNTRL_(CH(off))
306	#define ISPUNCT(off) ISPUNCT_(CH(off))
307	#define ISUPPER(off) ISUPPER_(CH(off))
308	#define ISLOWER(off) ISLOWER_(CH(off))
309	#define ISALPHA(off) ISALPHA_(CH(off))
310	#define ISDIGIT(off) ISDIGIT_(CH(off))
311	#define ISXDIGIT(off) ISXDIGIT_(CH(off))
312	#define ISALNUM(off) ISALNUM_(CH(off))
313
314
315	#if defined MD4C_USE_UTF16
316	#define md_strchr wcschr
317	#else
318	#define md_strchr strchr
319	#endif
320
321
322	/ Case insensitive check of string equality. /
323	static inline int
324	md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
325	{
326	OFF i;
327	for(i = `0`; i < n; i++) {
328	CHAR ch1 = s1[i];
329	CHAR ch2 = s2[i];
330
331	if(ISLOWER_(ch1))
332	ch1 += (`'A'`-`'a'`);
333	if(ISLOWER_(ch2))
334	ch2 += (`'A'`-`'a'`);
335	if(ch1 != ch2)
336	return FALSE;
337	}
338	return TRUE;
339	}
340
341	static inline int
342	md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
343	{
344	return memcmp(s1, s2, n * sizeof(CHAR)) == `0`;
345	}
346
347	static int
348	md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
349	{
350	OFF off = `0`;
351	int ret = `0`;
352
353	while(`1`) {
354	while(off < size && str[off] != _T(`'\0'`))
355	off++;
356
357	if(off > `0`) {
358	ret = ctx->parser.text(type, str, off, ctx->userdata);
359	if(ret != `0`)
360	return ret;
361
362	str += off;
363	size -= off;
364	off = `0`;
365	}
366
367	if(off >= size)
368	return `0`;
369
370	ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), `1`, ctx->userdata);
371	if(ret != `0`)
372	return ret;
373	off++;
374	}
375	}
376
377
378	#define MD_CHECK(func) \
379	do { \
380	ret = (func); \
381	if(ret < 0) \
382	goto abort; \
383	} while(0)
384
385
386	#define MD_TEMP_BUFFER(sz) \
387	do { \
388	if(sz > ctx->alloc_buffer) { \
389	CHAR* new_buffer; \
390	SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \
391	\
392	new_buffer = realloc(ctx->buffer, new_size); \
393	if(new_buffer == NULL) { \
394	MD_LOG("realloc() failed."); \
395	ret = -1; \
396	goto abort; \
397	} \
398	\
399	ctx->buffer = new_buffer; \
400	ctx->alloc_buffer = new_size; \
401	} \
402	} while(0)
403
404
405	#define MD_ENTER_BLOCK(type, arg) \
406	do { \
407	ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \
408	if(ret != 0) { \
409	MD_LOG("Aborted from enter_block() callback."); \
410	goto abort; \
411	} \
412	} while(0)
413
414	#define MD_LEAVE_BLOCK(type, arg) \
415	do { \
416	ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \
417	if(ret != 0) { \
418	MD_LOG("Aborted from leave_block() callback."); \
419	goto abort; \
420	} \
421	} while(0)
422
423	#define MD_ENTER_SPAN(type, arg) \
424	do { \
425	ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \
426	if(ret != 0) { \
427	MD_LOG("Aborted from enter_span() callback."); \
428	goto abort; \
429	} \
430	} while(0)
431
432	#define MD_LEAVE_SPAN(type, arg) \
433	do { \
434	ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \
435	if(ret != 0) { \
436	MD_LOG("Aborted from leave_span() callback."); \
437	goto abort; \
438	} \
439	} while(0)
440
441	#define MD_TEXT(type, str, size) \
442	do { \
443	if(size > 0) { \
444	ret = ctx->parser.text((type), (str), (size), ctx->userdata); \
445	if(ret != 0) { \
446	MD_LOG("Aborted from text() callback."); \
447	goto abort; \
448	} \
449	} \
450	} while(0)
451
452	#define MD_TEXT_INSECURE(type, str, size) \
453	do { \
454	if(size > 0) { \
455	ret = md_text_with_null_replacement(ctx, type, str, size); \
456	if(ret != 0) { \
457	MD_LOG("Aborted from text() callback."); \
458	goto abort; \
459	} \
460	} \
461	} while(0)
462
463
464
465	/*************************
466	* Unicode Support *
467	*************************/
468
469	typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
470	struct MD_UNICODE_FOLD_INFO_tag {
471	unsigned codepoints[`3`];
472	int n_codepoints;
473	};
474
475
476	#if defined MD4C_USE_UTF16 \|\| defined MD4C_USE_UTF8
477	/ Binary search over sorted "map" of codepoints. Consecutive sequences*
478	* of codepoints may be encoded in the map by just using the
479	* (MIN_CODEPOINT \| 0x40000000) and (MAX_CODEPOINT \| 0x80000000).
480	*
481	* Returns index of the found record in the map (in the case of ranges,
482	* the minimal value is used); or -1 on failure. */
483	static int
484	md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
485	{
486	int beg, end;
487	int pivot_beg, pivot_end;
488
489	beg = `0`;
490	end = (int) map_size-`1`;
491	while(beg <= end) {
492	/ Pivot may be a range, not just a single value. /
493	pivot_beg = pivot_end = (beg + end) / `2`;
494	if(map[pivot_end] & `0x40000000`)
495	pivot_end++;
496	if(map[pivot_beg] & `0x80000000`)
497	pivot_beg--;
498
499	if(codepoint < (map[pivot_beg] & `0x00ffffff`))
500	end = pivot_beg - `1`;
501	else if(codepoint > (map[pivot_end] & `0x00ffffff`))
502	beg = pivot_end + `1`;
503	else
504	return pivot_beg;
505	}
506
507	return -`1`;
508	}
509
510	static int
511	md_is_unicode_whitespace__(unsigned codepoint)
512	{
513	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
514	#define S(cp) (cp)
515	/ Unicode "Zs" category.*
516	* (generated by scripts/build_whitespace_map.py) */
517	static const unsigned WHITESPACE_MAP[] = {
518	S(`0x0020`), S(`0x00a0`), S(`0x1680`), R(`0x2000`,`0x200a`), S(`0x202f`), S(`0x205f`), S(`0x3000`)
519	};
520	#undef R
521	#undef S
522
523	/ The ASCII ones are the most frequently used ones, also CommonMark*
524	* specification requests few more in this range. */
525	if(codepoint <= `0x7f`)
526	return ISWHITESPACE_(codepoint);
527
528	return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= `0`);
529	}
530
531	static int
532	md_is_unicode_punct__(unsigned codepoint)
533	{
534	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
535	#define S(cp) (cp)
536	/ Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.*
537	* (generated by scripts/build_punct_map.py) */
538	static const unsigned PUNCT_MAP[] = {
539	R(`0x0021`,`0x0023`), R(`0x0025`,`0x002a`), R(`0x002c`,`0x002f`), R(`0x003a`,`0x003b`), R(`0x003f`,`0x0040`),
540	R(`0x005b`,`0x005d`), S(`0x005f`), S(`0x007b`), S(`0x007d`), S(`0x00a1`), S(`0x00a7`), S(`0x00ab`), R(`0x00b6`,`0x00b7`),
541	S(`0x00bb`), S(`0x00bf`), S(`0x037e`), S(`0x0387`), R(`0x055a`,`0x055f`), R(`0x0589`,`0x058a`), S(`0x05be`), S(`0x05c0`),
542	S(`0x05c3`), S(`0x05c6`), R(`0x05f3`,`0x05f4`), R(`0x0609`,`0x060a`), R(`0x060c`,`0x060d`), S(`0x061b`), R(`0x061e`,`0x061f`),
543	R(`0x066a`,`0x066d`), S(`0x06d4`), R(`0x0700`,`0x070d`), R(`0x07f7`,`0x07f9`), R(`0x0830`,`0x083e`), S(`0x085e`),
544	R(`0x0964`,`0x0965`), S(`0x0970`), S(`0x09fd`), S(`0x0a76`), S(`0x0af0`), S(`0x0c77`), S(`0x0c84`), S(`0x0df4`), S(`0x0e4f`),
545	R(`0x0e5a`,`0x0e5b`), R(`0x0f04`,`0x0f12`), S(`0x0f14`), R(`0x0f3a`,`0x0f3d`), S(`0x0f85`), R(`0x0fd0`,`0x0fd4`),
546	R(`0x0fd9`,`0x0fda`), R(`0x104a`,`0x104f`), S(`0x10fb`), R(`0x1360`,`0x1368`), S(`0x1400`), S(`0x166e`), R(`0x169b`,`0x169c`),
547	R(`0x16eb`,`0x16ed`), R(`0x1735`,`0x1736`), R(`0x17d4`,`0x17d6`), R(`0x17d8`,`0x17da`), R(`0x1800`,`0x180a`),
548	R(`0x1944`,`0x1945`), R(`0x1a1e`,`0x1a1f`), R(`0x1aa0`,`0x1aa6`), R(`0x1aa8`,`0x1aad`), R(`0x1b5a`,`0x1b60`),
549	R(`0x1bfc`,`0x1bff`), R(`0x1c3b`,`0x1c3f`), R(`0x1c7e`,`0x1c7f`), R(`0x1cc0`,`0x1cc7`), S(`0x1cd3`), R(`0x2010`,`0x2027`),
550	R(`0x2030`,`0x2043`), R(`0x2045`,`0x2051`), R(`0x2053`,`0x205e`), R(`0x207d`,`0x207e`), R(`0x208d`,`0x208e`),
551	R(`0x2308`,`0x230b`), R(`0x2329`,`0x232a`), R(`0x2768`,`0x2775`), R(`0x27c5`,`0x27c6`), R(`0x27e6`,`0x27ef`),
552	R(`0x2983`,`0x2998`), R(`0x29d8`,`0x29db`), R(`0x29fc`,`0x29fd`), R(`0x2cf9`,`0x2cfc`), R(`0x2cfe`,`0x2cff`), S(`0x2d70`),
553	R(`0x2e00`,`0x2e2e`), R(`0x2e30`,`0x2e4f`), R(`0x3001`,`0x3003`), R(`0x3008`,`0x3011`), R(`0x3014`,`0x301f`), S(`0x3030`),
554	S(`0x303d`), S(`0x30a0`), S(`0x30fb`), R(`0xa4fe`,`0xa4ff`), R(`0xa60d`,`0xa60f`), S(`0xa673`), S(`0xa67e`),
555	R(`0xa6f2`,`0xa6f7`), R(`0xa874`,`0xa877`), R(`0xa8ce`,`0xa8cf`), R(`0xa8f8`,`0xa8fa`), S(`0xa8fc`), R(`0xa92e`,`0xa92f`),
556	S(`0xa95f`), R(`0xa9c1`,`0xa9cd`), R(`0xa9de`,`0xa9df`), R(`0xaa5c`,`0xaa5f`), R(`0xaade`,`0xaadf`), R(`0xaaf0`,`0xaaf1`),
557	S(`0xabeb`), R(`0xfd3e`,`0xfd3f`), R(`0xfe10`,`0xfe19`), R(`0xfe30`,`0xfe52`), R(`0xfe54`,`0xfe61`), S(`0xfe63`), S(`0xfe68`),
558	R(`0xfe6a`,`0xfe6b`), R(`0xff01`,`0xff03`), R(`0xff05`,`0xff0a`), R(`0xff0c`,`0xff0f`), R(`0xff1a`,`0xff1b`),
559	R(`0xff1f`,`0xff20`), R(`0xff3b`,`0xff3d`), S(`0xff3f`), S(`0xff5b`), S(`0xff5d`), R(`0xff5f`,`0xff65`), R(`0x10100`,`0x10102`),
560	S(`0x1039f`), S(`0x103d0`), S(`0x1056f`), S(`0x10857`), S(`0x1091f`), S(`0x1093f`), R(`0x10a50`,`0x10a58`), S(`0x10a7f`),
561	R(`0x10af0`,`0x10af6`), R(`0x10b39`,`0x10b3f`), R(`0x10b99`,`0x10b9c`), R(`0x10f55`,`0x10f59`), R(`0x11047`,`0x1104d`),
562	R(`0x110bb`,`0x110bc`), R(`0x110be`,`0x110c1`), R(`0x11140`,`0x11143`), R(`0x11174`,`0x11175`), R(`0x111c5`,`0x111c8`),
563	S(`0x111cd`), S(`0x111db`), R(`0x111dd`,`0x111df`), R(`0x11238`,`0x1123d`), S(`0x112a9`), R(`0x1144b`,`0x1144f`),
564	S(`0x1145b`), S(`0x1145d`), S(`0x114c6`), R(`0x115c1`,`0x115d7`), R(`0x11641`,`0x11643`), R(`0x11660`,`0x1166c`),
565	R(`0x1173c`,`0x1173e`), S(`0x1183b`), S(`0x119e2`), R(`0x11a3f`,`0x11a46`), R(`0x11a9a`,`0x11a9c`), R(`0x11a9e`,`0x11aa2`),
566	R(`0x11c41`,`0x11c45`), R(`0x11c70`,`0x11c71`), R(`0x11ef7`,`0x11ef8`), S(`0x11fff`), R(`0x12470`,`0x12474`),
567	R(`0x16a6e`,`0x16a6f`), S(`0x16af5`), R(`0x16b37`,`0x16b3b`), S(`0x16b44`), R(`0x16e97`,`0x16e9a`), S(`0x16fe2`),
568	S(`0x1bc9f`), R(`0x1da87`,`0x1da8b`), R(`0x1e95e`,`0x1e95f`)
569	};
570	#undef R
571	#undef S
572
573	/ The ASCII ones are the most frequently used ones, also CommonMark*
574	* specification requests few more in this range. */
575	if(codepoint <= `0x7f`)
576	return ISPUNCT_(codepoint);
577
578	return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= `0`);
579	}
580
581	static void
582	md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
583	{
584	#define R(cp_min, cp_max) ((cp_min) \| 0x40000000), ((cp_max) \| 0x80000000)
585	#define S(cp) (cp)
586	/ Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.*
587	* (generated by scripts/build_punct_map.py) */
588	static const unsigned FOLD_MAP_1[] = {
589	R(`0x0041`,`0x005a`), S(`0x00b5`), R(`0x00c0`,`0x00d6`), R(`0x00d8`,`0x00de`), R(`0x0100`,`0x012e`), R(`0x0132`,`0x0136`),
590	R(`0x0139`,`0x0147`), R(`0x014a`,`0x0176`), S(`0x0178`), R(`0x0179`,`0x017d`), S(`0x017f`), S(`0x0181`), S(`0x0182`),
591	S(`0x0186`), S(`0x0187`), S(`0x0189`), S(`0x018b`), S(`0x018e`), S(`0x018f`), S(`0x0190`), S(`0x0191`), S(`0x0193`),
592	S(`0x0194`), S(`0x0196`), S(`0x0197`), S(`0x0198`), S(`0x019c`), S(`0x019d`), S(`0x019f`), R(`0x01a0`,`0x01a4`), S(`0x01a6`),
593	S(`0x01a7`), S(`0x01a9`), S(`0x01ac`), S(`0x01ae`), S(`0x01af`), S(`0x01b1`), S(`0x01b3`), S(`0x01b7`), S(`0x01b8`),
594	S(`0x01bc`), S(`0x01c4`), S(`0x01c5`), S(`0x01c7`), S(`0x01c8`), S(`0x01ca`), R(`0x01cb`,`0x01db`), R(`0x01de`,`0x01ee`),
595	S(`0x01f1`), S(`0x01f2`), S(`0x01f6`), S(`0x01f7`), R(`0x01f8`,`0x021e`), S(`0x0220`), R(`0x0222`,`0x0232`), S(`0x023a`),
596	S(`0x023b`), S(`0x023d`), S(`0x023e`), S(`0x0241`), S(`0x0243`), S(`0x0244`), S(`0x0245`), R(`0x0246`,`0x024e`), S(`0x0345`),
597	S(`0x0370`), S(`0x0376`), S(`0x037f`), S(`0x0386`), R(`0x0388`,`0x038a`), S(`0x038c`), S(`0x038e`), R(`0x0391`,`0x03a1`),
598	R(`0x03a3`,`0x03ab`), S(`0x03c2`), S(`0x03cf`), S(`0x03d0`), S(`0x03d1`), S(`0x03d5`), S(`0x03d6`), R(`0x03d8`,`0x03ee`),
599	S(`0x03f0`), S(`0x03f1`), S(`0x03f4`), S(`0x03f5`), S(`0x03f7`), S(`0x03f9`), S(`0x03fa`), R(`0x03fd`,`0x03ff`),
600	R(`0x0400`,`0x040f`), R(`0x0410`,`0x042f`), R(`0x0460`,`0x0480`), R(`0x048a`,`0x04be`), S(`0x04c0`), R(`0x04c1`,`0x04cd`),
601	R(`0x04d0`,`0x052e`), R(`0x0531`,`0x0556`), R(`0x10a0`,`0x10c5`), S(`0x10c7`), S(`0x10cd`), R(`0x13f8`,`0x13fd`), S(`0x1c80`),
602	S(`0x1c81`), S(`0x1c82`), S(`0x1c83`), S(`0x1c85`), S(`0x1c86`), S(`0x1c87`), S(`0x1c88`), R(`0x1c90`,`0x1cba`),
603	R(`0x1cbd`,`0x1cbf`), R(`0x1e00`,`0x1e94`), S(`0x1e9b`), R(`0x1ea0`,`0x1efe`), R(`0x1f08`,`0x1f0f`), R(`0x1f18`,`0x1f1d`),
604	R(`0x1f28`,`0x1f2f`), R(`0x1f38`,`0x1f3f`), R(`0x1f48`,`0x1f4d`), S(`0x1f59`), S(`0x1f5b`), S(`0x1f5d`), S(`0x1f5f`),
605	R(`0x1f68`,`0x1f6f`), S(`0x1fb8`), S(`0x1fba`), S(`0x1fbe`), R(`0x1fc8`,`0x1fcb`), S(`0x1fd8`), S(`0x1fda`), S(`0x1fe8`),
606	S(`0x1fea`), S(`0x1fec`), S(`0x1ff8`), S(`0x1ffa`), S(`0x2126`), S(`0x212a`), S(`0x212b`), S(`0x2132`), R(`0x2160`,`0x216f`),
607	S(`0x2183`), R(`0x24b6`,`0x24cf`), R(`0x2c00`,`0x2c2e`), S(`0x2c60`), S(`0x2c62`), S(`0x2c63`), S(`0x2c64`),
608	R(`0x2c67`,`0x2c6b`), S(`0x2c6d`), S(`0x2c6e`), S(`0x2c6f`), S(`0x2c70`), S(`0x2c72`), S(`0x2c75`), S(`0x2c7e`),
609	R(`0x2c80`,`0x2ce2`), S(`0x2ceb`), S(`0x2cf2`), R(`0xa640`,`0xa66c`), R(`0xa680`,`0xa69a`), R(`0xa722`,`0xa72e`),
610	R(`0xa732`,`0xa76e`), S(`0xa779`), S(`0xa77d`), R(`0xa77e`,`0xa786`), S(`0xa78b`), S(`0xa78d`), S(`0xa790`),
611	R(`0xa796`,`0xa7a8`), S(`0xa7aa`), S(`0xa7ab`), S(`0xa7ac`), S(`0xa7ad`), S(`0xa7ae`), S(`0xa7b0`), S(`0xa7b1`), S(`0xa7b2`),
612	S(`0xa7b3`), R(`0xa7b4`,`0xa7be`), S(`0xa7c2`), S(`0xa7c4`), S(`0xa7c5`), S(`0xa7c6`), R(`0xab70`,`0xabbf`),
613	R(`0xff21`,`0xff3a`), R(`0x10400`,`0x10427`), R(`0x104b0`,`0x104d3`), R(`0x10c80`,`0x10cb2`), R(`0x118a0`,`0x118bf`),
614	R(`0x16e40`,`0x16e5f`), R(`0x1e900`,`0x1e921`)
615	};
616	static const unsigned FOLD_MAP_1_DATA[] = {
617	`0x0061`, `0x007a`, `0x03bc`, `0x00e0`, `0x00f6`, `0x00f8`, `0x00fe`, `0x0101`, `0x012f`, `0x0133`, `0x0137`, `0x013a`, `0x0148`,
618	`0x014b`, `0x0177`, `0x00ff`, `0x017a`, `0x017e`, `0x0073`, `0x0253`, `0x0183`, `0x0254`, `0x0188`, `0x0256`, `0x018c`, `0x01dd`,
619	`0x0259`, `0x025b`, `0x0192`, `0x0260`, `0x0263`, `0x0269`, `0x0268`, `0x0199`, `0x026f`, `0x0272`, `0x0275`, `0x01a1`, `0x01a5`,
620	`0x0280`, `0x01a8`, `0x0283`, `0x01ad`, `0x0288`, `0x01b0`, `0x028a`, `0x01b4`, `0x0292`, `0x01b9`, `0x01bd`, `0x01c6`, `0x01c6`,
621	`0x01c9`, `0x01c9`, `0x01cc`, `0x01cc`, `0x01dc`, `0x01df`, `0x01ef`, `0x01f3`, `0x01f3`, `0x0195`, `0x01bf`, `0x01f9`, `0x021f`,
622	`0x019e`, `0x0223`, `0x0233`, `0x2c65`, `0x023c`, `0x019a`, `0x2c66`, `0x0242`, `0x0180`, `0x0289`, `0x028c`, `0x0247`, `0x024f`,
623	`0x03b9`, `0x0371`, `0x0377`, `0x03f3`, `0x03ac`, `0x03ad`, `0x03af`, `0x03cc`, `0x03cd`, `0x03b1`, `0x03c1`, `0x03c3`, `0x03cb`,
624	`0x03c3`, `0x03d7`, `0x03b2`, `0x03b8`, `0x03c6`, `0x03c0`, `0x03d9`, `0x03ef`, `0x03ba`, `0x03c1`, `0x03b8`, `0x03b5`, `0x03f8`,
625	`0x03f2`, `0x03fb`, `0x037b`, `0x037d`, `0x0450`, `0x045f`, `0x0430`, `0x044f`, `0x0461`, `0x0481`, `0x048b`, `0x04bf`, `0x04cf`,
626	`0x04c2`, `0x04ce`, `0x04d1`, `0x052f`, `0x0561`, `0x0586`, `0x2d00`, `0x2d25`, `0x2d27`, `0x2d2d`, `0x13f0`, `0x13f5`, `0x0432`,
627	`0x0434`, `0x043e`, `0x0441`, `0x0442`, `0x044a`, `0x0463`, `0xa64b`, `0x10d0`, `0x10fa`, `0x10fd`, `0x10ff`, `0x1e01`, `0x1e95`,
628	`0x1e61`, `0x1ea1`, `0x1eff`, `0x1f00`, `0x1f07`, `0x1f10`, `0x1f15`, `0x1f20`, `0x1f27`, `0x1f30`, `0x1f37`, `0x1f40`, `0x1f45`,
629	`0x1f51`, `0x1f53`, `0x1f55`, `0x1f57`, `0x1f60`, `0x1f67`, `0x1fb0`, `0x1f70`, `0x03b9`, `0x1f72`, `0x1f75`, `0x1fd0`, `0x1f76`,
630	`0x1fe0`, `0x1f7a`, `0x1fe5`, `0x1f78`, `0x1f7c`, `0x03c9`, `0x006b`, `0x00e5`, `0x214e`, `0x2170`, `0x217f`, `0x2184`, `0x24d0`,
631	`0x24e9`, `0x2c30`, `0x2c5e`, `0x2c61`, `0x026b`, `0x1d7d`, `0x027d`, `0x2c68`, `0x2c6c`, `0x0251`, `0x0271`, `0x0250`, `0x0252`,
632	`0x2c73`, `0x2c76`, `0x023f`, `0x2c81`, `0x2ce3`, `0x2cec`, `0x2cf3`, `0xa641`, `0xa66d`, `0xa681`, `0xa69b`, `0xa723`, `0xa72f`,
633	`0xa733`, `0xa76f`, `0xa77a`, `0x1d79`, `0xa77f`, `0xa787`, `0xa78c`, `0x0265`, `0xa791`, `0xa797`, `0xa7a9`, `0x0266`, `0x025c`,
634	`0x0261`, `0x026c`, `0x026a`, `0x029e`, `0x0287`, `0x029d`, `0xab53`, `0xa7b5`, `0xa7bf`, `0xa7c3`, `0xa794`, `0x0282`, `0x1d8e`,
635	`0x13a0`, `0x13ef`, `0xff41`, `0xff5a`, `0x10428`, `0x1044f`, `0x104d8`, `0x104fb`, `0x10cc0`, `0x10cf2`, `0x118c0`, `0x118df`,
636	`0x16e60`, `0x16e7f`, `0x1e922`, `0x1e943`
637	};
638	static const unsigned FOLD_MAP_2[] = {
639	S(`0x00df`), S(`0x0130`), S(`0x0149`), S(`0x01f0`), S(`0x0587`), S(`0x1e96`), S(`0x1e97`), S(`0x1e98`), S(`0x1e99`),
640	S(`0x1e9a`), S(`0x1e9e`), S(`0x1f50`), R(`0x1f80`,`0x1f87`), R(`0x1f88`,`0x1f8f`), R(`0x1f90`,`0x1f97`), R(`0x1f98`,`0x1f9f`),
641	R(`0x1fa0`,`0x1fa7`), R(`0x1fa8`,`0x1faf`), S(`0x1fb2`), S(`0x1fb3`), S(`0x1fb4`), S(`0x1fb6`), S(`0x1fbc`), S(`0x1fc2`),
642	S(`0x1fc3`), S(`0x1fc4`), S(`0x1fc6`), S(`0x1fcc`), S(`0x1fd6`), S(`0x1fe4`), S(`0x1fe6`), S(`0x1ff2`), S(`0x1ff3`),
643	S(`0x1ff4`), S(`0x1ff6`), S(`0x1ffc`), S(`0xfb00`), S(`0xfb01`), S(`0xfb02`), S(`0xfb05`), S(`0xfb06`), S(`0xfb13`),
644	S(`0xfb14`), S(`0xfb15`), S(`0xfb16`), S(`0xfb17`)
645	};
646	static const unsigned FOLD_MAP_2_DATA[] = {
647	`0x0073`,`0x0073`, `0x0069`,`0x0307`, `0x02bc`,`0x006e`, `0x006a`,`0x030c`, `0x0565`,`0x0582`, `0x0068`,`0x0331`, `0x0074`,`0x0308`,
648	`0x0077`,`0x030a`, `0x0079`,`0x030a`, `0x0061`,`0x02be`, `0x0073`,`0x0073`, `0x03c5`,`0x0313`, `0x1f00`,`0x03b9`, `0x1f07`,`0x03b9`,
649	`0x1f00`,`0x03b9`, `0x1f07`,`0x03b9`, `0x1f20`,`0x03b9`, `0x1f27`,`0x03b9`, `0x1f20`,`0x03b9`, `0x1f27`,`0x03b9`, `0x1f60`,`0x03b9`,
650	`0x1f67`,`0x03b9`, `0x1f60`,`0x03b9`, `0x1f67`,`0x03b9`, `0x1f70`,`0x03b9`, `0x03b1`,`0x03b9`, `0x03ac`,`0x03b9`, `0x03b1`,`0x0342`,
651	`0x03b1`,`0x03b9`, `0x1f74`,`0x03b9`, `0x03b7`,`0x03b9`, `0x03ae`,`0x03b9`, `0x03b7`,`0x0342`, `0x03b7`,`0x03b9`, `0x03b9`,`0x0342`,
652	`0x03c1`,`0x0313`, `0x03c5`,`0x0342`, `0x1f7c`,`0x03b9`, `0x03c9`,`0x03b9`, `0x03ce`,`0x03b9`, `0x03c9`,`0x0342`, `0x03c9`,`0x03b9`,
653	`0x0066`,`0x0066`, `0x0066`,`0x0069`, `0x0066`,`0x006c`, `0x0073`,`0x0074`, `0x0073`,`0x0074`, `0x0574`,`0x0576`, `0x0574`,`0x0565`,
654	`0x0574`,`0x056b`, `0x057e`,`0x0576`, `0x0574`,`0x056d`
655	};
656	static const unsigned FOLD_MAP_3[] = {
657	S(`0x0390`), S(`0x03b0`), S(`0x1f52`), S(`0x1f54`), S(`0x1f56`), S(`0x1fb7`), S(`0x1fc7`), S(`0x1fd2`), S(`0x1fd3`),
658	S(`0x1fd7`), S(`0x1fe2`), S(`0x1fe3`), S(`0x1fe7`), S(`0x1ff7`), S(`0xfb03`), S(`0xfb04`)
659	};
660	static const unsigned FOLD_MAP_3_DATA[] = {
661	`0x03b9`,`0x0308`,`0x0301`, `0x03c5`,`0x0308`,`0x0301`, `0x03c5`,`0x0313`,`0x0300`, `0x03c5`,`0x0313`,`0x0301`,
662	`0x03c5`,`0x0313`,`0x0342`, `0x03b1`,`0x0342`,`0x03b9`, `0x03b7`,`0x0342`,`0x03b9`, `0x03b9`,`0x0308`,`0x0300`,
663	`0x03b9`,`0x0308`,`0x0301`, `0x03b9`,`0x0308`,`0x0342`, `0x03c5`,`0x0308`,`0x0300`, `0x03c5`,`0x0308`,`0x0301`,
664	`0x03c5`,`0x0308`,`0x0342`, `0x03c9`,`0x0342`,`0x03b9`, `0x0066`,`0x0066`,`0x0069`, `0x0066`,`0x0066`,`0x006c`
665	};
666	#undef R
667	#undef S
668	static const struct {
669	const unsigned* map;
670	const unsigned* data;
671	size_t map_size;
672	int n_codepoints;
673	} FOLD_MAP_LIST[] = {
674	{ FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), `1` },
675	{ FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), `2` },
676	{ FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), `3` }
677	};
678
679	int i;
680
681	/ Fast path for ASCII characters. /
682	if(codepoint <= `0x7f`) {
683	info->codepoints[`0`] = codepoint;
684	if(ISUPPER_(codepoint))
685	info->codepoints[`0`] += `'a'` - `'A'`;
686	info->n_codepoints = `1`;
687	return;
688	}
689
690	/ Try to locate the codepoint in any of the maps. /
691	for(i = `0`; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
692	int index;
693
694	index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
695	if(index >= `0`) {
696	/ Found the mapping. /
697	int n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
698	const unsigned* map = FOLD_MAP_LIST[i].map;
699	const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
700
701	memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
702	info->n_codepoints = n_codepoints;
703
704	if(FOLD_MAP_LIST[i].map[index] != codepoint) {
705	/ The found mapping maps whole range of codepoints,*
706	* i.e. we have to offset info->codepoints[0] accordingly. */
707	if((map[index] & `0x00ffffff`)+`1` == codepoints[`0`]) {
708	/ Alternating type of the range. /
709	info->codepoints[`0`] = codepoint + ((codepoint & `0x1`) == (map[index] & `0x1`) ? `1` : `0`);
710	} else {
711	/ Range to range kind of mapping. /
712	info->codepoints[`0`] += (codepoint - (map[index] & `0x00ffffff`));
713	}
714	}
715
716	return;
717	}
718	}
719
720	/ No mapping found. Map the codepoint to itself. /
721	info->codepoints[`0`] = codepoint;
722	info->n_codepoints = `1`;
723	}
724	#endif
725
726
727	#if defined MD4C_USE_UTF16
728	#define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800)
729	#define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00)
730	#define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) \| (((unsigned)(lo) & 0x3ff) << 0)))
731
732	static unsigned
733	md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
734	{
735	if(IS_UTF16_SURROGATE_HI(str[`0`])) {
736	if(`1` < str_size && IS_UTF16_SURROGATE_LO(str[`1`])) {
737	if(p_size != NULL)
738	*p_size = `2`;
739	return UTF16_DECODE_SURROGATE(str[`0`], str[`1`]);
740	}
741	}
742
743	if(p_size != NULL)
744	*p_size = `1`;
745	return str[`0`];
746	}
747
748	static unsigned
749	md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
750	{
751	if(off > `2` && IS_UTF16_SURROGATE_HI(CH(off-`2`)) && IS_UTF16_SURROGATE_LO(CH(off-`1`)))
752	return UTF16_DECODE_SURROGATE(CH(off-`2`), CH(off-`1`));
753
754	return CH(off);
755	}
756
757	/ No whitespace uses surrogates, so no decoding needed here. /
758	#define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
759	#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off))
760	#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1))
761
762	#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
763	#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
764
765	static inline int
766	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
767	{
768	return md_decode_utf16le__(str+off, str_size-off, p_char_size);
769	}
770	#elif defined MD4C_USE_UTF8
771	#define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f)
772	#define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0)
773	#define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0)
774	#define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0)
775	#define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80)
776
777	static unsigned
778	md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
779	{
780	if(!IS_UTF8_LEAD1(str[`0`])) {
781	if(IS_UTF8_LEAD2(str[`0`])) {
782	if(`1` < str_size && IS_UTF8_TAIL(str[`1`])) {
783	if(p_size != NULL)
784	*p_size = `2`;
785
786	return (((unsigned int)str[`0`] & `0x1f`) << `6`) \|
787	(((unsigned int)str[`1`] & `0x3f`) << `0`);
788	}
789	} else if(IS_UTF8_LEAD3(str[`0`])) {
790	if(`2` < str_size && IS_UTF8_TAIL(str[`1`]) && IS_UTF8_TAIL(str[`2`])) {
791	if(p_size != NULL)
792	*p_size = `3`;
793
794	return (((unsigned int)str[`0`] & `0x0f`) << `12`) \|
795	(((unsigned int)str[`1`] & `0x3f`) << `6`) \|
796	(((unsigned int)str[`2`] & `0x3f`) << `0`);
797	}
798	} else if(IS_UTF8_LEAD4(str[`0`])) {
799	if(`3` < str_size && IS_UTF8_TAIL(str[`1`]) && IS_UTF8_TAIL(str[`2`]) && IS_UTF8_TAIL(str[`3`])) {
800	if(p_size != NULL)
801	*p_size = `4`;
802
803	return (((unsigned int)str[`0`] & `0x07`) << `18`) \|
804	(((unsigned int)str[`1`] & `0x3f`) << `12`) \|
805	(((unsigned int)str[`2`] & `0x3f`) << `6`) \|
806	(((unsigned int)str[`3`] & `0x3f`) << `0`);
807	}
808	}
809	}
810
811	if(p_size != NULL)
812	*p_size = `1`;
813	return (unsigned) str[`0`];
814	}
815
816	static unsigned
817	md_decode_utf8_before__(MD_CTX* ctx, OFF off)
818	{
819	if(!IS_UTF8_LEAD1(CH(off-`1`))) {
820	if(off > `1` && IS_UTF8_LEAD2(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
821	return (((unsigned int)CH(off-`2`) & `0x1f`) << `6`) \|
822	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
823
824	if(off > `2` && IS_UTF8_LEAD3(CH(off-`3`)) && IS_UTF8_TAIL(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
825	return (((unsigned int)CH(off-`3`) & `0x0f`) << `12`) \|
826	(((unsigned int)CH(off-`2`) & `0x3f`) << `6`) \|
827	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
828
829	if(off > `3` && IS_UTF8_LEAD4(CH(off-`4`)) && IS_UTF8_TAIL(CH(off-`3`)) && IS_UTF8_TAIL(CH(off-`2`)) && IS_UTF8_TAIL(CH(off-`1`)))
830	return (((unsigned int)CH(off-`4`) & `0x07`) << `18`) \|
831	(((unsigned int)CH(off-`3`) & `0x3f`) << `12`) \|
832	(((unsigned int)CH(off-`2`) & `0x3f`) << `6`) \|
833	(((unsigned int)CH(off-`1`) & `0x3f`) << `0`);
834	}
835
836	return (unsigned) CH(off-`1`);
837	}
838
839	#define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
840	#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
841	#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
842
843	#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
844	#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
845
846	static inline unsigned
847	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
848	{
849	return md_decode_utf8__(str+off, str_size-off, p_char_size);
850	}
851	#else
852	#define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
853	#define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
854	#define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
855
856	#define ISUNICODEPUNCT(off) ISPUNCT(off)
857	#define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
858
859	static inline void
860	md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
861	{
862	info->codepoints[`0`] = codepoint;
863	if(ISUPPER_(codepoint))
864	info->codepoints[`0`] += `'a'` - `'A'`;
865	info->n_codepoints = `1`;
866	}
867
868	static inline unsigned
869	md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
870	{
871	*p_size = `1`;
872	return (unsigned) str[off];
873	}
874	#endif
875
876
877	/*************************************
878	* Helper string manipulations *
879	*************************************/
880
881	/ Fill buffer with copy of the string between 'beg' and 'end' but replace any*
882	* line breaks with given replacement character.
883	*
884	* NOTE: Caller is responsible to make sure the buffer is large enough.
885	* (Given the output is always shorter then input, (end - beg) is good idea
886	* what the caller should allocate.)
887	*/
888	static void
889	md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
890	CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
891	{
892	CHAR* ptr = buffer;
893	int line_index = `0`;
894	OFF off = beg;
895
896	while(`1`) {
897	const MD_LINE* line = &lines[line_index];
898	OFF line_end = line->end;
899	if(end < line_end)
900	line_end = end;
901
902	while(off < line_end) {
903	*ptr = CH(off);
904	ptr++;
905	off++;
906	}
907
908	if(off >= end) {
909	*p_size = ptr - buffer;
910	return;
911	}
912
913	*ptr = line_break_replacement_char;
914	ptr++;
915
916	line_index++;
917	off = lines[line_index].beg;
918	}
919	}
920
921	/ Wrapper of md_merge_lines() which allocates new buffer for the output string.*
922	*/
923	static int
924	md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
925	CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
926	{
927	CHAR* buffer;
928
929	buffer = (CHAR) malloc(sizeof(CHAR) (end - beg));
930	if(buffer == NULL) {
931	MD_LOG("malloc() failed.");
932	return -`1`;
933	}
934
935	md_merge_lines(ctx, beg, end, lines, n_lines,
936	line_break_replacement_char, buffer, p_size);
937
938	*p_str = buffer;
939	return `0`;
940	}
941
942	static OFF
943	md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
944	{
945	SZ char_size;
946	unsigned codepoint;
947
948	while(off < size) {
949	codepoint = md_decode_unicode(label, off, size, &char_size);
950	if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
951	break;
952	off += char_size;
953	}
954
955	return off;
956	}
957
958
959	/******************************
960	* Recognizing raw HTML *
961	******************************/
962
963	/ md_is_html_tag() may be called when processing inlines (inline raw HTML)*
964	* or when breaking document to blocks (checking for start of HTML block type 7).
965	*
966	* When breaking document to blocks, we do not yet know line boundaries, but
967	* in that case the whole tag has to live on a single line. We distinguish this
968	* by n_lines == 0.
969	*/
970	static int
971	md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
972	{
973	int attr_state;
974	OFF off = beg;
975	OFF line_end = (n_lines > `0`) ? lines[`0`].end : ctx->size;
976	int i = `0`;
977
978	MD_ASSERT(CH(beg) == _T(`'<'`));
979
980	if(off + `1` >= line_end)
981	return FALSE;
982	off++;
983
984	/ For parsing attributes, we need a little state automaton below.*
985	* State -1: no attributes are allowed.
986	* State 0: attribute could follow after some whitespace.
987	* State 1: after a whitespace (attribute name may follow).
988	* State 2: after attribute name ('=' MAY follow).
989	* State 3: after '=' (value specification MUST follow).
990	* State 41: in middle of unquoted attribute value.
991	* State 42: in middle of single-quoted attribute value.
992	* State 43: in middle of double-quoted attribute value.
993	*/
994	attr_state = `0`;
995
996	if(CH(off) == _T(`'/'`)) {
997	/ Closer tag "</ ... >". No attributes may be present. /
998	attr_state = -`1`;
999	off++;
1000	}
1001
1002	/ Tag name /
1003	if(off >= line_end \|\| !ISALPHA(off))
1004	return FALSE;
1005	off++;
1006	while(off < line_end && (ISALNUM(off) \|\| CH(off) == _T(`'-'`)))
1007	off++;
1008
1009	/ (Optional) attributes (if not closer), (optional) '/' (if not closer)*
1010	* and final '>'. */
1011	while(`1`) {
1012	while(off < line_end && !ISNEWLINE(off)) {
1013	if(attr_state > `40`) {
1014	if(attr_state == `41` && (ISBLANK(off) \|\| ISANYOF(off, _T("\"'=<>`")))) {
1015	attr_state = `0`;
1016	off--; / Put the char back for re-inspection in the new state. /
1017	} else if(attr_state == `42` && CH(off) == _T(`'\''`)) {
1018	attr_state = `0`;
1019	} else if(attr_state == `43` && CH(off) == _T(`'"'`)) {
1020	attr_state = `0`;
1021	}
1022	off++;
1023	} else if(ISWHITESPACE(off)) {
1024	if(attr_state == `0`)
1025	attr_state = `1`;
1026	off++;
1027	} else if(attr_state <= `2` && CH(off) == _T(`'>'`)) {
1028	/ End. /
1029	goto done;
1030	} else if(attr_state <= `2` && CH(off) == _T(`'/'`) && off+`1` < line_end && CH(off+`1`) == _T(`'>'`)) {
1031	/ End with digraph '/>' /
1032	off++;
1033	goto done;
1034	} else if((attr_state == `1` \|\| attr_state == `2`) && (ISALPHA(off) \|\| CH(off) == _T(`'_'`) \|\| CH(off) == _T(`':'`))) {
1035	off++;
1036	/ Attribute name /
1037	while(off < line_end && (ISALNUM(off) \|\| ISANYOF(off, _T("_.:-"))))
1038	off++;
1039	attr_state = `2`;
1040	} else if(attr_state == `2` && CH(off) == _T(`'='`)) {
1041	/ Attribute assignment sign /
1042	off++;
1043	attr_state = `3`;
1044	} else if(attr_state == `3`) {
1045	/ Expecting start of attribute value. /
1046	if(CH(off) == _T(`'"'`))
1047	attr_state = `43`;
1048	else if(CH(off) == _T(`'\''`))
1049	attr_state = `42`;
1050	else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off))
1051	attr_state = `41`;
1052	else
1053	return FALSE;
1054	off++;
1055	} else {
1056	/ Anything unexpected. /
1057	return FALSE;
1058	}
1059	}
1060
1061	/ We have to be on a single line. See definition of start condition*
1062	* of HTML block, type 7. */
1063	if(n_lines == `0`)
1064	return FALSE;
1065
1066	i++;
1067	if(i >= n_lines)
1068	return FALSE;
1069
1070	off = lines[i].beg;
1071	line_end = lines[i].end;
1072
1073	if(attr_state == `0` \|\| attr_state == `41`)
1074	attr_state = `1`;
1075
1076	if(off >= max_end)
1077	return FALSE;
1078	}
1079
1080	done:
1081	if(off >= max_end)
1082	return FALSE;
1083
1084	*p_end = off+`1`;
1085	return TRUE;
1086	}
1087
1088	static int
1089	md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1090	const MD_LINE* lines, int n_lines,
1091	OFF beg, OFF max_end, OFF* p_end,
1092	OFF* p_scan_horizon)
1093	{
1094	OFF off = beg;
1095	int i = `0`;
1096
1097	if(off < p_scan_horizon && p_scan_horizon >= max_end - len) {
1098	/ We have already scanned the range up to the max_end so we know*
1099	* there is nothing to see. */
1100	return FALSE;
1101	}
1102
1103	while(TRUE) {
1104	while(off + len <= lines[i].end && off + len <= max_end) {
1105	if(md_ascii_eq(STR(off), str, len)) {
1106	/ Success. /
1107	*p_end = off + len;
1108	return TRUE;
1109	}
1110	off++;
1111	}
1112
1113	i++;
1114	if(off >= max_end \|\| i >= n_lines) {
1115	/ Failure. /
1116	*p_scan_horizon = off;
1117	return FALSE;
1118	}
1119
1120	off = lines[i].beg;
1121	}
1122	}
1123
1124	static int
1125	md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1126	{
1127	OFF off = beg;
1128
1129	MD_ASSERT(CH(beg) == _T(`'<'`));
1130
1131	if(off + `4` >= lines[`0`].end)
1132	return FALSE;
1133	if(CH(off+`1`) != _T(`'!'`) \|\| CH(off+`2`) != _T(`'-'`) \|\| CH(off+`3`) != _T(`'-'`))
1134	return FALSE;
1135	off += `4`;
1136
1137	/ ">" and "->" must not follow the opening. /
1138	if(off < lines[`0`].end && CH(off) == _T(`'>'`))
1139	return FALSE;
1140	if(off+`1` < lines[`0`].end && CH(off) == _T(`'-'`) && CH(off+`1`) == _T(`'>'`))
1141	return FALSE;
1142
1143	/ HTML comment must not contain "--", so we scan just for "--" instead*
1144	* of "-->" and verify manually that '>' follows. */
1145	if(md_scan_for_html_closer(ctx, _T("--"), `2`,
1146	lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
1147	{
1148	if(p_end < max_end && CH(p_end) == _T(`'>'`)) {
1149	p_end = p_end + `1`;
1150	return TRUE;
1151	}
1152	}
1153
1154	return FALSE;
1155	}
1156
1157	static int
1158	md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1159	{
1160	OFF off = beg;
1161
1162	if(off + `2` >= lines[`0`].end)
1163	return FALSE;
1164	if(CH(off+`1`) != _T(`'?'`))
1165	return FALSE;
1166	off += `2`;
1167
1168	return md_scan_for_html_closer(ctx, _T("?>"), `2`,
1169	lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1170	}
1171
1172	static int
1173	md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1174	{
1175	OFF off = beg;
1176
1177	if(off + `2` >= lines[`0`].end)
1178	return FALSE;
1179	if(CH(off+`1`) != _T(`'!'`))
1180	return FALSE;
1181	off += `2`;
1182
1183	/ Declaration name. /
1184	if(off >= lines[`0`].end \|\| !ISALPHA(off))
1185	return FALSE;
1186	off++;
1187	while(off < lines[`0`].end && ISALPHA(off))
1188	off++;
1189	if(off < lines[`0`].end && !ISWHITESPACE(off))
1190	return FALSE;
1191
1192	return md_scan_for_html_closer(ctx, _T(">"), `1`,
1193	lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1194	}
1195
1196	static int
1197	md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1198	{
1199	static const CHAR open_str[] = _T("<![CDATA[");
1200	static const SZ open_size = SIZEOF_ARRAY(open_str) - `1`;
1201
1202	OFF off = beg;
1203
1204	if(off + open_size >= lines[`0`].end)
1205	return FALSE;
1206	if(memcmp(STR(off), open_str, open_size) != `0`)
1207	return FALSE;
1208	off += open_size;
1209
1210	if(lines[n_lines-`1`].end < max_end)
1211	max_end = lines[n_lines-`1`].end - `2`;
1212
1213	return md_scan_for_html_closer(ctx, _T("]]>"), `3`,
1214	lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1215	}
1216
1217	static int
1218	md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1219	{
1220	MD_ASSERT(CH(beg) == _T(`'<'`));
1221	return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1222	md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1223	md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1224	md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) \|\|
1225	md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1226	}
1227
1228
1229	/****************************
1230	* Recognizing Entity *
1231	****************************/
1232
1233	static int
1234	md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1235	{
1236	OFF off = beg;
1237
1238	while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= `8`)
1239	off++;
1240
1241	if(`1` <= off - beg && off - beg <= `6`) {
1242	*p_end = off;
1243	return TRUE;
1244	} else {
1245	return FALSE;
1246	}
1247	}
1248
1249	static int
1250	md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1251	{
1252	OFF off = beg;
1253
1254	while(off < max_end && ISDIGIT_(text[off]) && off - beg <= `8`)
1255	off++;
1256
1257	if(`1` <= off - beg && off - beg <= `7`) {
1258	*p_end = off;
1259	return TRUE;
1260	} else {
1261	return FALSE;
1262	}
1263	}
1264
1265	static int
1266	md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1267	{
1268	OFF off = beg;
1269
1270	if(off < max_end && ISALPHA_(text[off]))
1271	off++;
1272	else
1273	return FALSE;
1274
1275	while(off < max_end && ISALNUM_(text[off]) && off - beg <= `48`)
1276	off++;
1277
1278	if(`2` <= off - beg && off - beg <= `48`) {
1279	*p_end = off;
1280	return TRUE;
1281	} else {
1282	return FALSE;
1283	}
1284	}
1285
1286	static int
1287	md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1288	{
1289	int is_contents;
1290	OFF off = beg;
1291
1292	MD_ASSERT(text[off] == _T(`'&'`));
1293	off++;
1294
1295	if(off+`2` < max_end && text[off] == _T(`'#'`) && (text[off+`1`] == _T(`'x'`) \|\| text[off+`1`] == _T(`'X'`)))
1296	is_contents = md_is_hex_entity_contents(ctx, text, off+`2`, max_end, &off);
1297	else if(off+`1` < max_end && text[off] == _T(`'#'`))
1298	is_contents = md_is_dec_entity_contents(ctx, text, off+`1`, max_end, &off);
1299	else
1300	is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1301
1302	if(is_contents && off < max_end && text[off] == _T(`';'`)) {
1303	*p_end = off+`1`;
1304	return TRUE;
1305	} else {
1306	return FALSE;
1307	}
1308	}
1309
1310	static inline int
1311	md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1312	{
1313	return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1314	}
1315
1316
1317	/******************************
1318	* Attribute Management *
1319	******************************/
1320
1321	typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1322	struct MD_ATTRIBUTE_BUILD_tag {
1323	CHAR* text;
1324	MD_TEXTTYPE* substr_types;
1325	OFF* substr_offsets;
1326	int substr_count;
1327	int substr_alloc;
1328	MD_TEXTTYPE trivial_types[`1`];
1329	OFF trivial_offsets[`2`];
1330	};
1331
1332
1333	#define MD_BUILD_ATTR_NO_ESCAPES 0x0001
1334
1335	static int
1336	md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1337	MD_TEXTTYPE type, OFF off)
1338	{
1339	if(build->substr_count >= build->substr_alloc) {
1340	MD_TEXTTYPE* new_substr_types;
1341	OFF* new_substr_offsets;
1342
1343	build->substr_alloc = (build->substr_alloc > `0`
1344	? build->substr_alloc + build->substr_alloc / `2`
1345	: `8`);
1346	new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1347	build->substr_alloc * sizeof(MD_TEXTTYPE));
1348	if(new_substr_types == NULL) {
1349	MD_LOG("realloc() failed.");
1350	return -`1`;
1351	}
1352	/ Note +1 to reserve space for final offset (== raw_size). /
1353	new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1354	(build->substr_alloc+`1`) * sizeof(OFF));
1355	if(new_substr_offsets == NULL) {
1356	MD_LOG("realloc() failed.");
1357	free(new_substr_types);
1358	return -`1`;
1359	}
1360
1361	build->substr_types = new_substr_types;
1362	build->substr_offsets = new_substr_offsets;
1363	}
1364
1365	build->substr_types[build->substr_count] = type;
1366	build->substr_offsets[build->substr_count] = off;
1367	build->substr_count++;
1368	return `0`;
1369	}
1370
1371	static void
1372	md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1373	{
1374	if(build->substr_alloc > `0`) {
1375	free(build->text);
1376	free(build->substr_types);
1377	free(build->substr_offsets);
1378	}
1379	}
1380
1381	static int
1382	md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1383	unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1384	{
1385	OFF raw_off, off;
1386	int is_trivial;
1387	int ret = `0`;
1388
1389	memset(build, `0`, sizeof(MD_ATTRIBUTE_BUILD));
1390
1391	/ If there is no backslash and no ampersand, build trivial attribute*
1392	* without any malloc(). */
1393	is_trivial = TRUE;
1394	for(raw_off = `0`; raw_off < raw_size; raw_off++) {
1395	if(ISANYOF3_(raw_text[raw_off], _T(`'\\'`), _T(`'&'`), _T(`'\0'`))) {
1396	is_trivial = FALSE;
1397	break;
1398	}
1399	}
1400
1401	if(is_trivial) {
1402	build->text = (CHAR*) (raw_size ? raw_text : NULL);
1403	build->substr_types = build->trivial_types;
1404	build->substr_offsets = build->trivial_offsets;
1405	build->substr_count = `1`;
1406	build->substr_alloc = `0`;
1407	build->trivial_types[`0`] = MD_TEXT_NORMAL;
1408	build->trivial_offsets[`0`] = `0`;
1409	build->trivial_offsets[`1`] = raw_size;
1410	off = raw_size;
1411	} else {
1412	build->text = (CHAR) malloc(raw_size sizeof(CHAR));
1413	if(build->text == NULL) {
1414	MD_LOG("malloc() failed.");
1415	goto abort;
1416	}
1417
1418	raw_off = `0`;
1419	off = `0`;
1420
1421	while(raw_off < raw_size) {
1422	if(raw_text[raw_off] == _T(`'\0'`)) {
1423	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1424	memcpy(build->text + off, raw_text + raw_off, `1`);
1425	off++;
1426	raw_off++;
1427	continue;
1428	}
1429
1430	if(raw_text[raw_off] == _T(`'&'`)) {
1431	OFF ent_end;
1432
1433	if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1434	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1435	memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1436	off += ent_end - raw_off;
1437	raw_off = ent_end;
1438	continue;
1439	}
1440	}
1441
1442	if(build->substr_count == `0` \|\| build->substr_types[build->substr_count-`1`] != MD_TEXT_NORMAL)
1443	MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1444
1445	if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
1446	raw_text[raw_off] == _T(`'\\'`) && raw_off+`1` < raw_size &&
1447	(ISPUNCT_(raw_text[raw_off+`1`]) \|\| ISNEWLINE_(raw_text[raw_off+`1`])))
1448	raw_off++;
1449
1450	build->text[off++] = raw_text[raw_off++];
1451	}
1452	build->substr_offsets[build->substr_count] = off;
1453	}
1454
1455	attr->text = build->text;
1456	attr->size = off;
1457	attr->substr_offsets = build->substr_offsets;
1458	attr->substr_types = build->substr_types;
1459	return `0`;
1460
1461	abort:
1462	md_free_attribute(ctx, build);
1463	return -`1`;
1464	}
1465
1466
1467	/*********************************************
1468	* Dictionary of Reference Definitions *
1469	*********************************************/
1470
1471	#define MD_FNV1A_BASE 2166136261U
1472	#define MD_FNV1A_PRIME 16777619U
1473
1474	static inline unsigned
1475	md_fnv1a(unsigned base, const void* data, size_t n)
1476	{
1477	const unsigned char* buf = (const unsigned char*) data;
1478	unsigned hash = base;
1479	size_t i;
1480
1481	for(i = `0`; i < n; i++) {
1482	hash ^= buf[i];
1483	hash *= MD_FNV1A_PRIME;
1484	}
1485
1486	return hash;
1487	}
1488
1489
1490	struct MD_REF_DEF_tag {
1491	CHAR* label;
1492	CHAR* title;
1493	unsigned hash;
1494	SZ label_size;
1495	SZ title_size;
1496	OFF dest_beg;
1497	OFF dest_end;
1498	};
1499
1500	/ Label equivalence is quite complicated with regards to whitespace and case*
1501	* folding. This complicates computing a hash of it as well as direct comparison
1502	* of two labels. */
1503
1504	static unsigned
1505	md_link_label_hash(const CHAR* label, SZ size)
1506	{
1507	unsigned hash = MD_FNV1A_BASE;
1508	OFF off;
1509	unsigned codepoint;
1510	int is_whitespace = FALSE;
1511
1512	off = md_skip_unicode_whitespace(label, `0`, size);
1513	while(off < size) {
1514	SZ char_size;
1515
1516	codepoint = md_decode_unicode(label, off, size, &char_size);
1517	is_whitespace = ISUNICODEWHITESPACE_(codepoint) \|\| ISNEWLINE_(label[off]);
1518
1519	if(is_whitespace) {
1520	codepoint = `' '`;
1521	hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1522	off = md_skip_unicode_whitespace(label, off, size);
1523	} else {
1524	MD_UNICODE_FOLD_INFO fold_info;
1525
1526	md_get_unicode_fold_info(codepoint, &fold_info);
1527	hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1528	off += char_size;
1529	}
1530	}
1531
1532	return hash;
1533	}
1534
1535	static OFF
1536	md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1537	MD_UNICODE_FOLD_INFO* fold_info)
1538	{
1539	unsigned codepoint;
1540	SZ char_size;
1541
1542	if(off >= size) {
1543	/ Treat end of a link label as a whitespace. /
1544	goto whitespace;
1545	}
1546
1547	if(ISNEWLINE_(label[off])) {
1548	/ Treat new lines as a whitespace. /
1549	off++;
1550	goto whitespace;
1551	}
1552
1553	codepoint = md_decode_unicode(label, off, size, &char_size);
1554	off += char_size;
1555	if(ISUNICODEWHITESPACE_(codepoint)) {
1556	/ Treat all whitespace as equivalent /
1557	goto whitespace;
1558	}
1559
1560	/ Get real folding info. /
1561	md_get_unicode_fold_info(codepoint, fold_info);
1562	return off;
1563
1564	whitespace:
1565	fold_info->codepoints[`0`] = _T(`' '`);
1566	fold_info->n_codepoints = `1`;
1567	return md_skip_unicode_whitespace(label, off, size);
1568	}
1569
1570	static int
1571	md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1572	{
1573	OFF a_off;
1574	OFF b_off;
1575	int a_reached_end = FALSE;
1576	int b_reached_end = FALSE;
1577	MD_UNICODE_FOLD_INFO a_fi = { `0` };
1578	MD_UNICODE_FOLD_INFO b_fi = { `0` };
1579	OFF a_fi_off = `0`;
1580	OFF b_fi_off = `0`;
1581	int cmp;
1582
1583	a_off = md_skip_unicode_whitespace(a_label, `0`, a_size);
1584	b_off = md_skip_unicode_whitespace(b_label, `0`, b_size);
1585	while(!a_reached_end \|\| !b_reached_end) {
1586	/ If needed, load fold info for next char. /
1587	if(a_fi_off >= a_fi.n_codepoints) {
1588	a_fi_off = `0`;
1589	a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1590	a_reached_end = (a_off >= a_size);
1591	}
1592	if(b_fi_off >= b_fi.n_codepoints) {
1593	b_fi_off = `0`;
1594	b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1595	b_reached_end = (b_off >= b_size);
1596	}
1597
1598	cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1599	if(cmp != `0`)
1600	return cmp;
1601
1602	a_fi_off++;
1603	b_fi_off++;
1604	}
1605
1606	return `0`;
1607	}
1608
1609	typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1610	struct MD_REF_DEF_LIST_tag {
1611	int n_ref_defs;
1612	int alloc_ref_defs;
1613	MD_REF_DEF* ref_defs[]; / Valid items always point into ctx->ref_defs[] /
1614	};
1615
1616	static int
1617	md_ref_def_cmp(const void* a, const void* b)
1618	{
1619	const MD_REF_DEF* a_ref = (const* MD_REF_DEF**)a;
1620	const MD_REF_DEF* b_ref = (const* MD_REF_DEF**)b;
1621
1622	if(a_ref->hash < b_ref->hash)
1623	return -`1`;
1624	else if(a_ref->hash > b_ref->hash)
1625	return +`1`;
1626	else
1627	return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1628	}
1629
1630	static int
1631	md_ref_def_cmp_for_sort(const void* a, const void* b)
1632	{
1633	int cmp;
1634
1635	cmp = md_ref_def_cmp(a, b);
1636
1637	/ Ensure stability of the sorting. /
1638	if(cmp == `0`) {
1639	const MD_REF_DEF* a_ref = (const* MD_REF_DEF**)a;
1640	const MD_REF_DEF* b_ref = (const* MD_REF_DEF**)b;
1641
1642	if(a_ref < b_ref)
1643	cmp = -`1`;
1644	else if(a_ref > b_ref)
1645	cmp = +`1`;
1646	else
1647	cmp = `0`;
1648	}
1649
1650	return cmp;
1651	}
1652
1653	static int
1654	md_build_ref_def_hashtable(MD_CTX* ctx)
1655	{
1656	int i, j;
1657
1658	if(ctx->n_ref_defs == `0`)
1659	return `0`;
1660
1661	ctx->ref_def_hashtable_size = (ctx->n_ref_defs * `5`) / `4`;
1662	ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1663	if(ctx->ref_def_hashtable == NULL) {
1664	MD_LOG("malloc() failed.");
1665	goto abort;
1666	}
1667	memset(ctx->ref_def_hashtable, `0`, ctx->ref_def_hashtable_size * sizeof(void*));
1668
1669	/ Each member of ctx->ref_def_hashtable[] can be:*
1670	* -- NULL,
1671	* -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1672	* -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1673	* such MD_REF_DEFs.
1674	*/
1675	for(i = `0`; i < ctx->n_ref_defs; i++) {
1676	MD_REF_DEF* def = &ctx->ref_defs[i];
1677	void* bucket;
1678	MD_REF_DEF_LIST* list;
1679
1680	def->hash = md_link_label_hash(def->label, def->label_size);
1681	bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1682
1683	if(bucket == NULL) {
1684	/ The bucket is empty. Make it just point to the def. /
1685	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1686	continue;
1687	}
1688
1689	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1690	/ The bucket already contains one ref. def. Lets see whether it*
1691	* is the same label (ref. def. duplicate) or different one
1692	* (hash conflict). */
1693	MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1694
1695	if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == `0`) {
1696	/ Duplicate label: Ignore this ref. def. /
1697	continue;
1698	}
1699
1700	/ Make the bucket complex, i.e. able to hold more ref. defs. /
1701	list = (MD_REF_DEF_LIST) malloc(sizeof(MD_REF_DEF_LIST) + `2` sizeof(MD_REF_DEF*));
1702	if(list == NULL) {
1703	MD_LOG("malloc() failed.");
1704	goto abort;
1705	}
1706	list->ref_defs[`0`] = old_def;
1707	list->ref_defs[`1`] = def;
1708	list->n_ref_defs = `2`;
1709	list->alloc_ref_defs = `2`;
1710	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1711	continue;
1712	}
1713
1714	/ Append the def to the complex bucket list.*
1715	*
1716	* Note in this case we ignore potential duplicates to avoid expensive
1717	* iterating over the complex bucket. Below, we revisit all the complex
1718	* buckets and handle it more cheaply after the complex bucket contents
1719	* is sorted. */
1720	list = (MD_REF_DEF_LIST*) bucket;
1721	if(list->n_ref_defs >= list->alloc_ref_defs) {
1722	int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / `2`;
1723	MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1724	sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1725	if(list_tmp == NULL) {
1726	MD_LOG("realloc() failed.");
1727	goto abort;
1728	}
1729	list = list_tmp;
1730	list->alloc_ref_defs = alloc_ref_defs;
1731	ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1732	}
1733
1734	list->ref_defs[list->n_ref_defs] = def;
1735	list->n_ref_defs++;
1736	}
1737
1738	/ Sort the complex buckets so we can use bsearch() with them. /
1739	for(i = `0`; i < ctx->ref_def_hashtable_size; i++) {
1740	void* bucket = ctx->ref_def_hashtable[i];
1741	MD_REF_DEF_LIST* list;
1742
1743	if(bucket == NULL)
1744	continue;
1745	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs)
1746	continue;
1747
1748	list = (MD_REF_DEF_LIST*) bucket;
1749	qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1750
1751	/ Disable all duplicates in the complex bucket by forcing all such*
1752	* records to point to the 1st such ref. def. I.e. no matter which
1753	* record is found during the lookup, it will always point to the right
1754	* ref. def. in ctx->ref_defs[]. */
1755	for(j = `1`; j < list->n_ref_defs; j++) {
1756	if(md_ref_def_cmp(&list->ref_defs[j-`1`], &list->ref_defs[j]) == `0`)
1757	list->ref_defs[j] = list->ref_defs[j-`1`];
1758	}
1759	}
1760
1761	return `0`;
1762
1763	abort:
1764	return -`1`;
1765	}
1766
1767	static void
1768	md_free_ref_def_hashtable(MD_CTX* ctx)
1769	{
1770	if(ctx->ref_def_hashtable != NULL) {
1771	int i;
1772
1773	for(i = `0`; i < ctx->ref_def_hashtable_size; i++) {
1774	void* bucket = ctx->ref_def_hashtable[i];
1775	if(bucket == NULL)
1776	continue;
1777	if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs)
1778	continue;
1779	free(bucket);
1780	}
1781
1782	free(ctx->ref_def_hashtable);
1783	}
1784	}
1785
1786	static const MD_REF_DEF*
1787	md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1788	{
1789	unsigned hash;
1790	void* bucket;
1791
1792	if(ctx->ref_def_hashtable_size == `0`)
1793	return NULL;
1794
1795	hash = md_link_label_hash(label, label_size);
1796	bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1797
1798	if(bucket == NULL) {
1799	return NULL;
1800	} else if(ctx->ref_defs <= (MD_REF_DEF) bucket && (MD_REF_DEF) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1801	const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1802
1803	if(md_link_label_cmp(def->label, def->label_size, label, label_size) == `0`)
1804	return def;
1805	else
1806	return NULL;
1807	} else {
1808	MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1809	MD_REF_DEF key_buf;
1810	const MD_REF_DEF* key = &key_buf;
1811	const MD_REF_DEF** ret;
1812
1813	key_buf.label = (CHAR*) label;
1814	key_buf.label_size = label_size;
1815	key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1816
1817	ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1818	list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1819	if(ret != NULL)
1820	return *ret;
1821	else
1822	return NULL;
1823	}
1824	}
1825
1826
1827	/***************************
1828	* Recognizing Links *
1829	***************************/
1830
1831	/ Note this code is partially shared between processing inlines and blocks*
1832	* as reference definitions and links share some helper parser functions.
1833	*/
1834
1835	typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1836	struct MD_LINK_ATTR_tag {
1837	OFF dest_beg;
1838	OFF dest_end;
1839
1840	CHAR* title;
1841	SZ title_size;
1842	int title_needs_free;
1843	};
1844
1845
1846	static int
1847	md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
1848	OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
1849	OFF* p_contents_beg, OFF* p_contents_end)
1850	{
1851	OFF off = beg;
1852	OFF contents_beg = `0`;
1853	OFF contents_end = `0`;
1854	int line_index = `0`;
1855	int len = `0`;
1856
1857	if(CH(off) != _T(`'['`))
1858	return FALSE;
1859	off++;
1860
1861	while(`1`) {
1862	OFF line_end = lines[line_index].end;
1863
1864	while(off < line_end) {
1865	if(CH(off) == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
1866	if(contents_end == `0`) {
1867	contents_beg = off;
1868	*p_beg_line_index = line_index;
1869	}
1870	contents_end = off + `2`;
1871	off += `2`;
1872	} else if(CH(off) == _T(`'['`)) {
1873	return FALSE;
1874	} else if(CH(off) == _T(`']'`)) {
1875	if(contents_beg < contents_end) {
1876	/ Success. /
1877	*p_contents_beg = contents_beg;
1878	*p_contents_end = contents_end;
1879	*p_end = off+`1`;
1880	*p_end_line_index = line_index;
1881	return TRUE;
1882	} else {
1883	/ Link label must have some non-whitespace contents. /
1884	return FALSE;
1885	}
1886	} else {
1887	unsigned codepoint;
1888	SZ char_size;
1889
1890	codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1891	if(!ISUNICODEWHITESPACE_(codepoint)) {
1892	if(contents_end == `0`) {
1893	contents_beg = off;
1894	*p_beg_line_index = line_index;
1895	}
1896	contents_end = off + char_size;
1897	}
1898
1899	off += char_size;
1900	}
1901
1902	len++;
1903	if(len > `999`)
1904	return FALSE;
1905	}
1906
1907	line_index++;
1908	len++;
1909	if(line_index < n_lines)
1910	off = lines[line_index].beg;
1911	else
1912	break;
1913	}
1914
1915	return FALSE;
1916	}
1917
1918	static int
1919	md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1920	OFF* p_contents_beg, OFF* p_contents_end)
1921	{
1922	OFF off = beg;
1923
1924	if(off >= max_end \|\| CH(off) != _T(`'<'`))
1925	return FALSE;
1926	off++;
1927
1928	while(off < max_end) {
1929	if(CH(off) == _T(`'\\'`) && off+`1` < max_end && ISPUNCT(off+`1`)) {
1930	off += `2`;
1931	continue;
1932	}
1933
1934	if(ISNEWLINE(off) \|\| CH(off) == _T(`'<'`))
1935	return FALSE;
1936
1937	if(CH(off) == _T(`'>'`)) {
1938	/ Success. /
1939	*p_contents_beg = beg+`1`;
1940	*p_contents_end = off;
1941	*p_end = off+`1`;
1942	return TRUE;
1943	}
1944
1945	off++;
1946	}
1947
1948	return FALSE;
1949	}
1950
1951	static int
1952	md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1953	OFF* p_contents_beg, OFF* p_contents_end)
1954	{
1955	OFF off = beg;
1956	int parenthesis_level = `0`;
1957
1958	while(off < max_end) {
1959	if(CH(off) == _T(`'\\'`) && off+`1` < max_end && ISPUNCT(off+`1`)) {
1960	off += `2`;
1961	continue;
1962	}
1963
1964	if(ISWHITESPACE(off) \|\| ISCNTRL(off))
1965	break;
1966
1967	/ Link destination may include balanced pairs of unescaped '(' ')'.*
1968	* Note we limit the maximal nesting level by 32 to protect us from
1969	* https://github.com/jgm/cmark/issues/214 */
1970	if(CH(off) == _T(`'('`)) {
1971	parenthesis_level++;
1972	if(parenthesis_level > `32`)
1973	return FALSE;
1974	} else if(CH(off) == _T(`')'`)) {
1975	if(parenthesis_level == `0`)
1976	break;
1977	parenthesis_level--;
1978	}
1979
1980	off++;
1981	}
1982
1983	if(parenthesis_level != `0` \|\| off == beg)
1984	return FALSE;
1985
1986	/ Success. /
1987	*p_contents_beg = beg;
1988	*p_contents_end = off;
1989	*p_end = off;
1990	return TRUE;
1991	}
1992
1993	static inline int
1994	md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1995	OFF* p_contents_beg, OFF* p_contents_end)
1996	{
1997	if(CH(beg) == _T(`'<'`))
1998	return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
1999	else
2000	return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2001	}
2002
2003	static int
2004	md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2005	OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
2006	OFF* p_contents_beg, OFF* p_contents_end)
2007	{
2008	OFF off = beg;
2009	CHAR closer_char;
2010	int line_index = `0`;
2011
2012	/ White space with up to one line break. /
2013	while(off < lines[line_index].end && ISWHITESPACE(off))
2014	off++;
2015	if(off >= lines[line_index].end) {
2016	line_index++;
2017	if(line_index >= n_lines)
2018	return FALSE;
2019	off = lines[line_index].beg;
2020	}
2021	if(off == beg)
2022	return FALSE;
2023
2024	*p_beg_line_index = line_index;
2025
2026	/ First char determines how to detect end of it. /
2027	switch(CH(off)) {
2028	case _T(`'"'`): closer_char = _T(`'"'`); break;
2029	case _T(`'\''`): closer_char = _T(`'\''`); break;
2030	case _T(`'('`): closer_char = _T(`')'`); break;
2031	default: return FALSE;
2032	}
2033	off++;
2034
2035	*p_contents_beg = off;
2036
2037	while(line_index < n_lines) {
2038	OFF line_end = lines[line_index].end;
2039
2040	while(off < line_end) {
2041	if(CH(off) == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
2042	off++;
2043	} else if(CH(off) == closer_char) {
2044	/ Success. /
2045	*p_contents_end = off;
2046	*p_end = off+`1`;
2047	*p_end_line_index = line_index;
2048	return TRUE;
2049	} else if(closer_char == _T(`')'`) && CH(off) == _T(`'('`)) {
2050	/ ()-style title cannot contain (unescaped '(')) /
2051	return FALSE;
2052	}
2053
2054	off++;
2055	}
2056
2057	line_index++;
2058	}
2059
2060	return FALSE;
2061	}
2062
2063	/ Returns 0 if it is not a reference definition.*
2064	*
2065	* Returns N > 0 if it is a reference definition. N then corresponds to the
2066	* number of lines forming it). In this case the definition is stored for
2067	* resolving any links referring to it.
2068	*
2069	* Returns -1 in case of an error (out of memory).
2070	*/
2071	static int
2072	md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
2073	{
2074	OFF label_contents_beg;
2075	OFF label_contents_end;
2076	int label_contents_line_index = -`1`;
2077	int label_is_multiline;
2078	CHAR* label = NULL;
2079	SZ label_size;
2080	OFF dest_contents_beg;
2081	OFF dest_contents_end;
2082	OFF title_contents_beg;
2083	OFF title_contents_end;
2084	int title_contents_line_index;
2085	int title_is_multiline;
2086	OFF off;
2087	int line_index = `0`;
2088	int tmp_line_index;
2089	MD_REF_DEF* def;
2090	int ret;
2091
2092	/ Link label. /
2093	if(!md_is_link_label(ctx, lines, n_lines, lines[`0`].beg,
2094	&off, &label_contents_line_index, &line_index,
2095	&label_contents_beg, &label_contents_end))
2096	return FALSE;
2097	label_is_multiline = (label_contents_line_index != line_index);
2098
2099	/ Colon. /
2100	if(off >= lines[line_index].end \|\| CH(off) != _T(`':'`))
2101	return FALSE;
2102	off++;
2103
2104	/ Optional white space with up to one line break. /
2105	while(off < lines[line_index].end && ISWHITESPACE(off))
2106	off++;
2107	if(off >= lines[line_index].end) {
2108	line_index++;
2109	if(line_index >= n_lines)
2110	return FALSE;
2111	off = lines[line_index].beg;
2112	}
2113
2114	/ Link destination. /
2115	if(!md_is_link_destination(ctx, off, lines[line_index].end,
2116	&off, &dest_contents_beg, &dest_contents_end))
2117	return FALSE;
2118
2119	/ (Optional) title. Note we interpret it as an title only if nothing*
2120	* more follows on its last line. */
2121	if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2122	&off, &title_contents_line_index, &tmp_line_index,
2123	&title_contents_beg, &title_contents_end)
2124	&& off >= lines[line_index + tmp_line_index].end)
2125	{
2126	title_is_multiline = (tmp_line_index != title_contents_line_index);
2127	title_contents_line_index += line_index;
2128	line_index += tmp_line_index;
2129	} else {
2130	/ Not a title. /
2131	title_is_multiline = FALSE;
2132	title_contents_beg = off;
2133	title_contents_end = off;
2134	title_contents_line_index = `0`;
2135	}
2136
2137	/ Nothing more can follow on the last line. /
2138	if(off < lines[line_index].end)
2139	return FALSE;
2140
2141	/ Construct label. /
2142	if(!label_is_multiline) {
2143	label = (CHAR*) STR(label_contents_beg);
2144	label_size = label_contents_end - label_contents_beg;
2145	} else {
2146	MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2147	lines + label_contents_line_index, n_lines - label_contents_line_index,
2148	_T(`' '`), &label, &label_size));
2149	}
2150
2151	/ Store the reference definition. /
2152	if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2153	MD_REF_DEF* new_defs;
2154
2155	ctx->alloc_ref_defs = (ctx->alloc_ref_defs > `0`
2156	? ctx->alloc_ref_defs + ctx->alloc_ref_defs / `2`
2157	: `16`);
2158	new_defs = (MD_REF_DEF) realloc(ctx->ref_defs, ctx->alloc_ref_defs sizeof(MD_REF_DEF));
2159	if(new_defs == NULL) {
2160	MD_LOG("realloc() failed.");
2161	goto abort;
2162	}
2163
2164	ctx->ref_defs = new_defs;
2165	}
2166
2167	def = &ctx->ref_defs[ctx->n_ref_defs];
2168	memset(def, `0`, sizeof(MD_REF_DEF));
2169
2170	def->label = label;
2171	def->label_size = label_size;
2172
2173	def->dest_beg = dest_contents_beg;
2174	def->dest_end = dest_contents_end;
2175
2176	if(title_contents_beg >= title_contents_end) {
2177	def->title = NULL;
2178	def->title_size = `0`;
2179	} else if(!title_is_multiline) {
2180	def->title = (CHAR*) STR(title_contents_beg);
2181	def->title_size = title_contents_end - title_contents_beg;
2182	} else {
2183	MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2184	lines + title_contents_line_index, n_lines - title_contents_line_index,
2185	_T(`'\n'`), &def->title, &def->title_size));
2186	}
2187
2188	/ Success. /
2189	ctx->n_ref_defs++;
2190	return line_index + `1`;
2191
2192	abort:
2193	/ Failure. /
2194	if(!IS_INPUT_STR(label))
2195	free(label);
2196	return ret;
2197	}
2198
2199	static int
2200	md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2201	OFF beg, OFF end, MD_LINK_ATTR* attr)
2202	{
2203	const MD_REF_DEF* def;
2204	const MD_LINE* beg_line;
2205	const MD_LINE* end_line;
2206	CHAR* label;
2207	SZ label_size;
2208	int ret;
2209
2210	MD_ASSERT(CH(beg) == _T(`'['`) \|\| CH(beg) == _T(`'!'`));
2211	MD_ASSERT(CH(end-`1`) == _T(`']'`));
2212
2213	beg += (CH(beg) == _T(`'!'`) ? `2` : `1`);
2214	end--;
2215
2216	/ Find lines corresponding to the beg and end positions. /
2217	MD_ASSERT(lines[`0`].beg <= beg);
2218	beg_line = lines;
2219	while(beg >= beg_line->end)
2220	beg_line++;
2221
2222	MD_ASSERT(end <= lines[n_lines-`1`].end);
2223	end_line = beg_line;
2224	while(end >= end_line->end)
2225	end_line++;
2226
2227	if(beg_line != end_line) {
2228	MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2229	n_lines - (beg_line - lines), _T(`' '`), &label, &label_size));
2230	} else {
2231	label = (CHAR*) STR(beg);
2232	label_size = end - beg;
2233	}
2234
2235	def = md_lookup_ref_def(ctx, label, label_size);
2236	if(def != NULL) {
2237	attr->dest_beg = def->dest_beg;
2238	attr->dest_end = def->dest_end;
2239	attr->title = def->title;
2240	attr->title_size = def->title_size;
2241	attr->title_needs_free = FALSE;
2242	}
2243
2244	if(!IS_INPUT_STR(label))
2245	free(label);
2246
2247	ret = (def != NULL);
2248
2249	abort:
2250	return ret;
2251	}
2252
2253	static int
2254	md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2255	OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2256	{
2257	int line_index = `0`;
2258	int tmp_line_index;
2259	OFF title_contents_beg;
2260	OFF title_contents_end;
2261	int title_contents_line_index;
2262	int title_is_multiline;
2263	OFF off = beg;
2264	int ret = FALSE;
2265
2266	while(off >= lines[line_index].end)
2267	line_index++;
2268
2269	MD_ASSERT(CH(off) == _T(`'('`));
2270	off++;
2271
2272	/ Optional white space with up to one line break. /
2273	while(off < lines[line_index].end && ISWHITESPACE(off))
2274	off++;
2275	if(off >= lines[line_index].end && ISNEWLINE(off)) {
2276	line_index++;
2277	if(line_index >= n_lines)
2278	return FALSE;
2279	off = lines[line_index].beg;
2280	}
2281
2282	/ Link destination may be omitted, but only when not also having a title. /
2283	if(off < ctx->size && CH(off) == _T(`')'`)) {
2284	attr->dest_beg = off;
2285	attr->dest_end = off;
2286	attr->title = NULL;
2287	attr->title_size = `0`;
2288	attr->title_needs_free = FALSE;
2289	off++;
2290	*p_end = off;
2291	return TRUE;
2292	}
2293
2294	/ Link destination. /
2295	if(!md_is_link_destination(ctx, off, lines[line_index].end,
2296	&off, &attr->dest_beg, &attr->dest_end))
2297	return FALSE;
2298
2299	/ (Optional) title. /
2300	if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2301	&off, &title_contents_line_index, &tmp_line_index,
2302	&title_contents_beg, &title_contents_end))
2303	{
2304	title_is_multiline = (tmp_line_index != title_contents_line_index);
2305	title_contents_line_index += line_index;
2306	line_index += tmp_line_index;
2307	} else {
2308	/ Not a title. /
2309	title_is_multiline = FALSE;
2310	title_contents_beg = off;
2311	title_contents_end = off;
2312	title_contents_line_index = `0`;
2313	}
2314
2315	/ Optional whitespace followed with final ')'. /
2316	while(off < lines[line_index].end && ISWHITESPACE(off))
2317	off++;
2318	if(off >= lines[line_index].end && ISNEWLINE(off)) {
2319	line_index++;
2320	if(line_index >= n_lines)
2321	return FALSE;
2322	off = lines[line_index].beg;
2323	}
2324	if(CH(off) != _T(`')'`))
2325	goto abort;
2326	off++;
2327
2328	if(title_contents_beg >= title_contents_end) {
2329	attr->title = NULL;
2330	attr->title_size = `0`;
2331	attr->title_needs_free = FALSE;
2332	} else if(!title_is_multiline) {
2333	attr->title = (CHAR*) STR(title_contents_beg);
2334	attr->title_size = title_contents_end - title_contents_beg;
2335	attr->title_needs_free = FALSE;
2336	} else {
2337	MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2338	lines + title_contents_line_index, n_lines - title_contents_line_index,
2339	_T(`'\n'`), &attr->title, &attr->title_size));
2340	attr->title_needs_free = TRUE;
2341	}
2342
2343	*p_end = off;
2344	ret = TRUE;
2345
2346	abort:
2347	return ret;
2348	}
2349
2350	static void
2351	md_free_ref_defs(MD_CTX* ctx)
2352	{
2353	int i;
2354
2355	for(i = `0`; i < ctx->n_ref_defs; i++) {
2356	MD_REF_DEF* def = &ctx->ref_defs[i];
2357
2358	if(!IS_INPUT_STR(def->label))
2359	free(def->label);
2360	if(!IS_INPUT_STR(def->title))
2361	free(def->title);
2362	}
2363
2364	free(ctx->ref_defs);
2365	}
2366
2367
2368	/******************************************
2369	* Processing Inlines (a.k.a Spans) *
2370	******************************************/
2371
2372	/ We process inlines in few phases:*
2373	*
2374	* (1) We go through the block text and collect all significant characters
2375	* which may start/end a span or some other significant position into
2376	* ctx->marks[]. Core of this is what md_collect_marks() does.
2377	*
2378	* We also do some very brief preliminary context-less analysis, whether
2379	* it might be opener or closer (e.g. of an emphasis span).
2380	*
2381	* This speeds the other steps as we do not need to re-iterate over all
2382	* characters anymore.
2383	*
2384	* (2) We analyze each potential mark types, in order by their precedence.
2385	*
2386	* In each md_analyze_XXX() function, we re-iterate list of the marks,
2387	* skipping already resolved regions (in preceding precedences) and try to
2388	* resolve them.
2389	*
2390	* (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2391	* them as resolved.
2392	*
2393	* (2.2) For range-type marks, we analyze whether the mark could be closer
2394	* and, if yes, whether there is some preceding opener it could satisfy.
2395	*
2396	* If not we check whether it could be really an opener and if yes, we
2397	* remember it so subsequent closers may resolve it.
2398	*
2399	* (3) Finally, when all marks were analyzed, we render the block contents
2400	* by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2401	* or ::close_span() whenever we reach a resolved mark.
2402	*/
2403
2404
2405	/ The mark structure.*
2406	*
2407	* '\\': Maybe escape sequence.
2408	* '\0': NULL char.
2409	* '*': Maybe (strong) emphasis start/end.
2410	* '_': Maybe (strong) emphasis start/end.
2411	* '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2412	* '`': Maybe code span start/end.
2413	* '&': Maybe start of entity.
2414	* ';': Maybe end of entity.
2415	* '<': Maybe start of raw HTML or autolink.
2416	* '>': Maybe end of raw HTML or autolink.
2417	* '[': Maybe start of link label or link text.
2418	* '!': Equivalent of '[' for image.
2419	* ']': Maybe end of link label or link text.
2420	* '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2421	* ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2422	* '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2423	* 'D': Dummy mark, it reserves a space for splitting a previous mark
2424	* (e.g. emphasis) or to make more space for storing some special data
2425	* related to the preceding mark (e.g. link).
2426	*
2427	* Note that not all instances of these chars in the text imply creation of the
2428	* structure. Only those which have (or may have, after we see more context)
2429	* the special meaning.
2430	*
2431	* (Keep this struct as small as possible to fit as much of them into CPU
2432	* cache line.)
2433	*/
2434	struct MD_MARK_tag {
2435	OFF beg;
2436	OFF end;
2437
2438	/ For unresolved openers, 'prev' and 'next' form the chain of open openers*
2439	* of given type 'ch'.
2440	*
2441	* During resolving, we disconnect from the chain and point to the
2442	* corresponding counterpart so opener points to its closer and vice versa.
2443	*/
2444	int prev;
2445	int next;
2446	CHAR ch;
2447	unsigned char flags;
2448	};
2449
2450	/ Mark flags (these apply to ALL mark types). /
2451	#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
2452	#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
2453	#define MD_MARK_OPENER 0x04 /* Definitely opener. */
2454	#define MD_MARK_CLOSER 0x08 /* Definitely closer. */
2455	#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
2456
2457	/ Mark flags specific for various mark types (so they can share bits). /
2458	#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
2459	#define MD_MARK_EMPH_MOD3_0 0x40
2460	#define MD_MARK_EMPH_MOD3_1 0x80
2461	#define MD_MARK_EMPH_MOD3_2 (0x40 \| 0x80)
2462	#define MD_MARK_EMPH_MOD3_MASK (0x40 \| 0x80)
2463	#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
2464	#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */
2465
2466	static MD_MARKCHAIN*
2467	md_asterisk_chain(MD_CTX* ctx, unsigned flags)
2468	{
2469	switch(flags & (MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_MASK)) {
2470	case MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0;
2471	case MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1;
2472	case MD_MARK_EMPH_INTRAWORD \| MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2;
2473	case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0;
2474	case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1;
2475	case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2;
2476	default: MD_UNREACHABLE();
2477	}
2478	return NULL;
2479	}
2480
2481	static MD_MARKCHAIN*
2482	md_mark_chain(MD_CTX* ctx, int mark_index)
2483	{
2484	MD_MARK* mark = &ctx->marks[mark_index];
2485
2486	switch(mark->ch) {
2487	case _T(`''`): return* md_asterisk_chain(ctx, mark->flags);
2488	case _T(`'_'`): return &UNDERSCORE_OPENERS;
2489	case _T(`'~'`): return (mark->end - mark->beg == `1`) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2490	case _T(`'['`): return &BRACKET_OPENERS;
2491	case _T(`'\|'`): return &TABLECELLBOUNDARIES;
2492	default: return NULL;
2493	}
2494	}
2495
2496	static MD_MARK*
2497	md_push_mark(MD_CTX* ctx)
2498	{
2499	if(ctx->n_marks >= ctx->alloc_marks) {
2500	MD_MARK* new_marks;
2501
2502	ctx->alloc_marks = (ctx->alloc_marks > `0`
2503	? ctx->alloc_marks + ctx->alloc_marks / `2`
2504	: `64`);
2505	new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2506	if(new_marks == NULL) {
2507	MD_LOG("realloc() failed.");
2508	return NULL;
2509	}
2510
2511	ctx->marks = new_marks;
2512	}
2513
2514	return &ctx->marks[ctx->n_marks++];
2515	}
2516
2517	#define PUSH_MARK_() \
2518	do { \
2519	mark = md_push_mark(ctx); \
2520	if(mark == NULL) { \
2521	ret = -1; \
2522	goto abort; \
2523	} \
2524	} while(0)
2525
2526	#define PUSH_MARK(ch_, beg_, end_, flags_) \
2527	do { \
2528	PUSH_MARK_(); \
2529	mark->beg = (beg_); \
2530	mark->end = (end_); \
2531	mark->prev = -1; \
2532	mark->next = -1; \
2533	mark->ch = (char)(ch_); \
2534	mark->flags = (flags_); \
2535	} while(0)
2536
2537
2538	static void
2539	md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
2540	{
2541	if(chain->tail >= `0`)
2542	ctx->marks[chain->tail].next = mark_index;
2543	else
2544	chain->head = mark_index;
2545
2546	ctx->marks[mark_index].prev = chain->tail;
2547	ctx->marks[mark_index].next = -`1`;
2548	chain->tail = mark_index;
2549	}
2550
2551	/ Sometimes, we need to store a pointer into the mark. It is quite rare*
2552	* so we do not bother to make MD_MARK use union, and it can only happen
2553	* for dummy marks. */
2554	static inline void
2555	md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2556	{
2557	MD_MARK* mark = &ctx->marks[mark_index];
2558	MD_ASSERT(mark->ch == `'D'`);
2559
2560	/ Check only members beg and end are misused for this. /
2561	MD_ASSERT(sizeof(void) <= `2` sizeof(OFF));
2562	memcpy(mark, &ptr, sizeof(void*));
2563	}
2564
2565	static inline void*
2566	md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2567	{
2568	void* ptr;
2569	MD_MARK* mark = &ctx->marks[mark_index];
2570	MD_ASSERT(mark->ch == `'D'`);
2571	memcpy(&ptr, mark, sizeof(void*));
2572	return ptr;
2573	}
2574
2575	static void
2576	md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
2577	{
2578	MD_MARK* opener = &ctx->marks[opener_index];
2579	MD_MARK* closer = &ctx->marks[closer_index];
2580
2581	/ Remove opener from the list of openers. /
2582	if(chain != NULL) {
2583	if(opener->prev >= `0`)
2584	ctx->marks[opener->prev].next = opener->next;
2585	else
2586	chain->head = opener->next;
2587
2588	if(opener->next >= `0`)
2589	ctx->marks[opener->next].prev = opener->prev;
2590	else
2591	chain->tail = opener->prev;
2592	}
2593
2594	/ Interconnect opener and closer and mark both as resolved. /
2595	opener->next = closer_index;
2596	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
2597	closer->prev = opener_index;
2598	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
2599	}
2600
2601
2602	#define MD_ROLLBACK_ALL 0
2603	#define MD_ROLLBACK_CROSSING 1
2604
2605	/ In the range ctx->marks[opener_index] ... [closer_index], undo some or all*
2606	* resolvings accordingly to these rules:
2607	*
2608	* (1) All openers BEFORE the range corresponding to any closer inside the
2609	* range are un-resolved and they are re-added to their respective chains
2610	* of unresolved openers. This ensures we can reuse the opener for closers
2611	* AFTER the range.
2612	*
2613	* (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2614	* are discarded.
2615	*
2616	* (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
2617	* in (1) are discarded. I.e. pairs of openers and closers which are both
2618	* inside the range are retained as well as any unpaired marks.
2619	*/
2620	static void
2621	md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2622	{
2623	int i;
2624	int mark_index;
2625
2626	/ Cut all unresolved openers at the mark index. /
2627	for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+`1`; i++) {
2628	MD_MARKCHAIN* chain = &ctx->mark_chains[i];
2629
2630	while(chain->tail >= opener_index)
2631	chain->tail = ctx->marks[chain->tail].prev;
2632
2633	if(chain->tail >= `0`)
2634	ctx->marks[chain->tail].next = -`1`;
2635	else
2636	chain->head = -`1`;
2637	}
2638
2639	/ Go backwards so that unresolved openers are re-added into their*
2640	* respective chains, in the right order. */
2641	mark_index = closer_index - `1`;
2642	while(mark_index > opener_index) {
2643	MD_MARK* mark = &ctx->marks[mark_index];
2644	int mark_flags = mark->flags;
2645	int discard_flag = (how == MD_ROLLBACK_ALL);
2646
2647	if(mark->flags & MD_MARK_CLOSER) {
2648	int mark_opener_index = mark->prev;
2649
2650	/ Undo opener BEFORE the range. /
2651	if(mark_opener_index < opener_index) {
2652	MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
2653	MD_MARKCHAIN* chain;
2654
2655	mark_opener->flags &= ~(MD_MARK_OPENER \| MD_MARK_CLOSER \| MD_MARK_RESOLVED);
2656	chain = md_mark_chain(ctx, opener_index);
2657	if(chain != NULL) {
2658	md_mark_chain_append(ctx, chain, mark_opener_index);
2659	discard_flag = `1`;
2660	}
2661	}
2662	}
2663
2664	/ And reset our flags. /
2665	if(discard_flag)
2666	mark->flags &= ~(MD_MARK_OPENER \| MD_MARK_CLOSER \| MD_MARK_RESOLVED);
2667
2668	/ Jump as far as we can over unresolved or non-interesting marks. /
2669	switch(how) {
2670	case MD_ROLLBACK_CROSSING:
2671	if((mark_flags & MD_MARK_CLOSER) && mark->prev > opener_index) {
2672	/ If we are closer with opener INSIDE the range, there may*
2673	* not be any other crosser inside the subrange. */
2674	mark_index = mark->prev;
2675	break;
2676	}
2677	/ Pass through. /
2678	default:
2679	mark_index--;
2680	break;
2681	}
2682	}
2683	}
2684
2685	static void
2686	md_build_mark_char_map(MD_CTX* ctx)
2687	{
2688	memset(ctx->mark_char_map, `0`, sizeof(ctx->mark_char_map));
2689
2690	ctx->mark_char_map[`'\\'`] = `1`;
2691	ctx->mark_char_map[`'*'`] = `1`;
2692	ctx->mark_char_map[`'_'`] = `1`;
2693	ctx->mark_char_map['`'] = `1`;
2694	ctx->mark_char_map[`'&'`] = `1`;
2695	ctx->mark_char_map[`';'`] = `1`;
2696	ctx->mark_char_map[`'<'`] = `1`;
2697	ctx->mark_char_map[`'>'`] = `1`;
2698	ctx->mark_char_map[`'['`] = `1`;
2699	ctx->mark_char_map[`'!'`] = `1`;
2700	ctx->mark_char_map[`']'`] = `1`;
2701	ctx->mark_char_map[`'\0'`] = `1`;
2702
2703	if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2704	ctx->mark_char_map[`'~'`] = `1`;
2705
2706	if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2707	ctx->mark_char_map[`'$'`] = `1`;
2708
2709	if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2710	ctx->mark_char_map[`'@'`] = `1`;
2711
2712	if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2713	ctx->mark_char_map[`':'`] = `1`;
2714
2715	if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2716	ctx->mark_char_map[`'.'`] = `1`;
2717
2718	if((ctx->parser.flags & MD_FLAG_TABLES) \|\| (ctx->parser.flags & MD_FLAG_WIKILINKS))
2719	ctx->mark_char_map[`'\|'`] = `1`;
2720
2721	if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2722	int i;
2723
2724	for(i = `0`; i < (int) sizeof(ctx->mark_char_map); i++) {
2725	if(ISWHITESPACE_(i))
2726	ctx->mark_char_map[i] = `1`;
2727	}
2728	}
2729	}
2730
2731	/ We limit code span marks to lower then 32 backticks. This solves the*
2732	* pathologic case of too many openers, each of different length: Their
2733	* resolving would be then O(n^2). */
2734	#define CODESPAN_MARK_MAXLEN 32
2735
2736	static int
2737	md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2738	OFF* p_opener_beg, OFF* p_opener_end,
2739	OFF* p_closer_beg, OFF* p_closer_end,
2740	OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2741	int* p_reached_paragraph_end)
2742	{
2743	OFF opener_beg = beg;
2744	OFF opener_end;
2745	OFF closer_beg;
2746	OFF closer_end;
2747	SZ mark_len;
2748	OFF line_end;
2749	int has_space_after_opener = FALSE;
2750	int has_eol_after_opener = FALSE;
2751	int has_space_before_closer = FALSE;
2752	int has_eol_before_closer = FALSE;
2753	int has_only_space = TRUE;
2754	int line_index = `0`;
2755
2756	line_end = lines[`0`].end;
2757	opener_end = opener_beg;
2758	while(opener_end < line_end && CH(opener_end) == _T('`'))
2759	opener_end++;
2760	has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(`' '`));
2761	has_eol_after_opener = (opener_end == line_end);
2762
2763	/ The caller needs to know end of the opening mark even if we fail. /
2764	*p_opener_end = opener_end;
2765
2766	mark_len = opener_end - opener_beg;
2767	if(mark_len > CODESPAN_MARK_MAXLEN)
2768	return FALSE;
2769
2770	/ Check whether we already know there is no closer of this length.*
2771	* If so, re-scan does no sense. This fixes issue #59. */
2772	if(last_potential_closers[mark_len-`1`] >= lines[n_lines-`1`].end \|\|
2773	(*p_reached_paragraph_end && last_potential_closers[mark_len-`1`] < opener_end))
2774	return FALSE;
2775
2776	closer_beg = opener_end;
2777	closer_end = opener_end;
2778
2779	/ Find closer mark. /
2780	while(TRUE) {
2781	while(closer_beg < line_end && CH(closer_beg) != _T('`')) {
2782	if(CH(closer_beg) != _T(`' '`))
2783	has_only_space = FALSE;
2784	closer_beg++;
2785	}
2786	closer_end = closer_beg;
2787	while(closer_end < line_end && CH(closer_end) == _T('`'))
2788	closer_end++;
2789
2790	if(closer_end - closer_beg == mark_len) {
2791	/ Success. /
2792	has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-`1`) == _T(`' '`));
2793	has_eol_before_closer = (closer_beg == lines[line_index].beg);
2794	break;
2795	}
2796
2797	if(closer_end - closer_beg > `0`) {
2798	/ We have found a back-tick which is not part of the closer. /
2799	has_only_space = FALSE;
2800
2801	/ But if we eventually fail, remember it as a potential closer*
2802	* of its own length for future attempts. This mitigates needs for
2803	* rescans. */
2804	if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2805	if(closer_beg > last_potential_closers[closer_end - closer_beg - `1`])
2806	last_potential_closers[closer_end - closer_beg - `1`] = closer_beg;
2807	}
2808	}
2809
2810	if(closer_end >= line_end) {
2811	line_index++;
2812	if(line_index >= n_lines) {
2813	/ Reached end of the paragraph and still nothing. /
2814	*p_reached_paragraph_end = TRUE;
2815	return FALSE;
2816	}
2817	/ Try on the next line. /
2818	line_end = lines[line_index].end;
2819	closer_beg = lines[line_index].beg;
2820	} else {
2821	closer_beg = closer_end;
2822	}
2823	}
2824
2825	/ If there is a space or a new line both after and before the opener*
2826	* (and if the code span is not made of spaces only), consume one initial
2827	* and one trailing space as part of the marks. */
2828	if(!has_only_space &&
2829	(has_space_after_opener \|\| has_eol_after_opener) &&
2830	(has_space_before_closer \|\| has_eol_before_closer))
2831	{
2832	if(has_space_after_opener)
2833	opener_end++;
2834	else
2835	opener_end = lines[`1`].beg;
2836
2837	if(has_space_before_closer)
2838	closer_beg--;
2839	else {
2840	closer_beg = lines[line_index-`1`].end;
2841	/ We need to eat the preceding "\r\n" but not any line trailing*
2842	* spaces. */
2843	while(closer_beg < ctx->size && ISBLANK(closer_beg))
2844	closer_beg++;
2845	}
2846	}
2847
2848	*p_opener_beg = opener_beg;
2849	*p_opener_end = opener_end;
2850	*p_closer_beg = closer_beg;
2851	*p_closer_end = closer_end;
2852	return TRUE;
2853	}
2854
2855	static int
2856	md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2857	{
2858	OFF off = beg+`1`;
2859
2860	MD_ASSERT(CH(beg) == _T(`'<'`));
2861
2862	/ Check for scheme. /
2863	if(off >= max_end \|\| !ISASCII(off))
2864	return FALSE;
2865	off++;
2866	while(`1`) {
2867	if(off >= max_end)
2868	return FALSE;
2869	if(off - beg > `32`)
2870	return FALSE;
2871	if(CH(off) == _T(`':'`) && off - beg >= `3`)
2872	break;
2873	if(!ISALNUM(off) && CH(off) != _T(`'+'`) && CH(off) != _T(`'-'`) && CH(off) != _T(`'.'`))
2874	return FALSE;
2875	off++;
2876	}
2877
2878	/ Check the path after the scheme. /
2879	while(off < max_end && CH(off) != _T(`'>'`)) {
2880	if(ISWHITESPACE(off) \|\| ISCNTRL(off) \|\| CH(off) == _T(`'<'`))
2881	return FALSE;
2882	off++;
2883	}
2884
2885	if(off >= max_end)
2886	return FALSE;
2887
2888	MD_ASSERT(CH(off) == _T(`'>'`));
2889	*p_end = off+`1`;
2890	return TRUE;
2891	}
2892
2893	static int
2894	md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2895	{
2896	OFF off = beg + `1`;
2897	int label_len;
2898
2899	MD_ASSERT(CH(beg) == _T(`'<'`));
2900
2901	/ The code should correspond to this regexp:*
2902	/^[a-zA-Z0-9.!#$%&'+\/=?^_`{\|}~-]+*
2903	@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2904	(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)$/*
2905	*/
2906
2907	/ Username (before '@'). /
2908	while(off < max_end && (ISALNUM(off) \|\| ISANYOF(off, _T(".!#$%&'*+/=?^_`{\|}~-"))))
2909	off++;
2910	if(off <= beg+`1`)
2911	return FALSE;
2912
2913	/ '@' /
2914	if(off >= max_end \|\| CH(off) != _T(`'@'`))
2915	return FALSE;
2916	off++;
2917
2918	/ Labels delimited with '.'; each label is sequence of 1 - 63 alnum*
2919	* characters or '-', but '-' is not allowed as first or last char. */
2920	label_len = `0`;
2921	while(off < max_end) {
2922	if(ISALNUM(off))
2923	label_len++;
2924	else if(CH(off) == _T(`'-'`) && label_len > `0`)
2925	label_len++;
2926	else if(CH(off) == _T(`'.'`) && label_len > `0` && CH(off-`1`) != _T(`'-'`))
2927	label_len = `0`;
2928	else
2929	break;
2930
2931	if(label_len > `63`)
2932	return FALSE;
2933
2934	off++;
2935	}
2936
2937	if(label_len <= `0` \|\| off >= max_end \|\| CH(off) != _T(`'>'`) \|\| CH(off-`1`) == _T(`'-'`))
2938	return FALSE;
2939
2940	*p_end = off+`1`;
2941	return TRUE;
2942	}
2943
2944	static int
2945	md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2946	{
2947	if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2948	*p_missing_mailto = FALSE;
2949	return TRUE;
2950	}
2951
2952	if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2953	*p_missing_mailto = TRUE;
2954	return TRUE;
2955	}
2956
2957	return FALSE;
2958	}
2959
2960	static int
2961	md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
2962	{
2963	int i;
2964	int ret = `0`;
2965	MD_MARK* mark;
2966	OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { `0` };
2967	int codespan_scanned_till_paragraph_end = FALSE;
2968
2969	for(i = `0`; i < n_lines; i++) {
2970	const MD_LINE* line = &lines[i];
2971	OFF off = line->beg;
2972	OFF line_end = line->end;
2973
2974	while(TRUE) {
2975	CHAR ch;
2976
2977	#ifdef MD4C_USE_UTF16
2978	/ For UTF-16, mark_char_map[] covers only ASCII. /
2979	#define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
2980	(ctx->mark_char_map[(unsigned char) CH(off)]))
2981	#else
2982	/ For 8-bit encodings, mark_char_map[] covers all 256 elements. /
2983	#define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
2984	#endif
2985
2986	/ Optimization: Use some loop unrolling. /
2987	while(off + `3` < line_end && !IS_MARK_CHAR(off+`0`) && !IS_MARK_CHAR(off+`1`)
2988	&& !IS_MARK_CHAR(off+`2`) && !IS_MARK_CHAR(off+`3`))
2989	off += `4`;
2990	while(off < line_end && !IS_MARK_CHAR(off+`0`))
2991	off++;
2992
2993	if(off >= line_end)
2994	break;
2995
2996	ch = CH(off);
2997
2998	/ A backslash escape.*
2999	* It can go beyond line->end as it may involve escaped new
3000	* line to form a hard break. */
3001	if(ch == _T(`'\\'`) && off+`1` < ctx->size && (ISPUNCT(off+`1`) \|\| ISNEWLINE(off+`1`))) {
3002	/ Hard-break cannot be on the last line of the block. /
3003	if(!ISNEWLINE(off+`1`) \|\| i+`1` < n_lines)
3004	PUSH_MARK(ch, off, off+`2`, MD_MARK_RESOLVED);
3005	off += `2`;
3006	continue;
3007	}
3008
3009	/ A potential (string) emphasis start/end. /
3010	if(ch == _T(`'*'`) \|\| ch == _T(`'_'`)) {
3011	OFF tmp = off+`1`;
3012	int left_level; / What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. /
3013	int right_level; / What follows: 0 = whitespace; 1 = punctuation; 2 = other char. /
3014
3015	while(tmp < line_end && CH(tmp) == ch)
3016	tmp++;
3017
3018	if(off == line->beg \|\| ISUNICODEWHITESPACEBEFORE(off))
3019	left_level = `0`;
3020	else if(ISUNICODEPUNCTBEFORE(off))
3021	left_level = `1`;
3022	else
3023	left_level = `2`;
3024
3025	if(tmp == line_end \|\| ISUNICODEWHITESPACE(tmp))
3026	right_level = `0`;
3027	else if(ISUNICODEPUNCT(tmp))
3028	right_level = `1`;
3029	else
3030	right_level = `2`;
3031
3032	/ Intra-word underscore doesn't have special meaning. /
3033	if(ch == _T(`'_'`) && left_level == `2` && right_level == `2`) {
3034	left_level = `0`;
3035	right_level = `0`;
3036	}
3037
3038	if(left_level != `0` \|\| right_level != `0`) {
3039	unsigned flags = `0`;
3040
3041	if(left_level > `0` && left_level >= right_level)
3042	flags \|= MD_MARK_POTENTIAL_CLOSER;
3043	if(right_level > `0` && right_level >= left_level)
3044	flags \|= MD_MARK_POTENTIAL_OPENER;
3045	if(left_level == `2` && right_level == `2`)
3046	flags \|= MD_MARK_EMPH_INTRAWORD;
3047
3048	/ For "the rule of three" we need to remember the original*
3049	* size of the mark (modulo three), before we potentially
3050	* split the mark when being later resolved partially by some
3051	* shorter closer. */
3052	switch((tmp - off) % `3`) {
3053	case `0`: flags \|= MD_MARK_EMPH_MOD3_0; break;
3054	case `1`: flags \|= MD_MARK_EMPH_MOD3_1; break;
3055	case `2`: flags \|= MD_MARK_EMPH_MOD3_2; break;
3056	}
3057
3058	PUSH_MARK(ch, off, tmp, flags);
3059
3060	/ During resolving, multiple asterisks may have to be*
3061	* split into independent span start/ends. Consider e.g.
3062	* "*foo bar*". Therefore we push also some empty dummy
3063	* marks to have enough space for that. */
3064	off++;
3065	while(off < tmp) {
3066	PUSH_MARK(`'D'`, off, off, `0`);
3067	off++;
3068	}
3069	continue;
3070	}
3071
3072	off = tmp;
3073	continue;
3074	}
3075
3076	/ A potential code span start/end. /
3077	if(ch == _T('`')) {
3078	OFF opener_beg, opener_end;
3079	OFF closer_beg, closer_end;
3080	int is_code_span;
3081
3082	is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off,
3083	&opener_beg, &opener_end, &closer_beg, &closer_end,
3084	codespan_last_potential_closers,
3085	&codespan_scanned_till_paragraph_end);
3086	if(is_code_span) {
3087	PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER \| MD_MARK_RESOLVED);
3088	PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER \| MD_MARK_RESOLVED);
3089	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3090	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3091
3092	off = closer_end;
3093
3094	/ Advance the current line accordingly. /
3095	while(off > line_end) {
3096	i++;
3097	line++;
3098	line_end = line->end;
3099	}
3100	continue;
3101	}
3102
3103	off = opener_end;
3104	continue;
3105	}
3106
3107	/ A potential entity start. /
3108	if(ch == _T(`'&'`)) {
3109	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_OPENER);
3110	off++;
3111	continue;
3112	}
3113
3114	/ A potential entity end. /
3115	if(ch == _T(`';'`)) {
3116	/ We surely cannot be entity unless the previous mark is '&'. /
3117	if(ctx->n_marks > `0` && ctx->marks[ctx->n_marks-`1`].ch == _T(`'&'`))
3118	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_CLOSER);
3119
3120	off++;
3121	continue;
3122	}
3123
3124	/ A potential autolink or raw HTML start/end. /
3125	if(ch == _T(`'<'`)) {
3126	int is_autolink;
3127	OFF autolink_end;
3128	int missing_mailto;
3129
3130	if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3131	int is_html;
3132	OFF html_end;
3133
3134	/ Given the nature of the raw HTML, we have to recognize*
3135	* it here. Doing so later in md_analyze_lt_gt() could
3136	* open can of worms of quadratic complexity. */
3137	is_html = md_is_html_any(ctx, lines + i, n_lines - i, off,
3138	lines[n_lines-`1`].end, &html_end);
3139	if(is_html) {
3140	PUSH_MARK(_T(`'<'`), off, off, MD_MARK_OPENER \| MD_MARK_RESOLVED);
3141	PUSH_MARK(_T(`'>'`), html_end, html_end, MD_MARK_CLOSER \| MD_MARK_RESOLVED);
3142	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3143	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3144	off = html_end;
3145
3146	/ Advance the current line accordingly. /
3147	while(off > line_end) {
3148	i++;
3149	line++;
3150	line_end = line->end;
3151	}
3152	continue;
3153	}
3154	}
3155
3156	is_autolink = md_is_autolink(ctx, off, lines[n_lines-`1`].end,
3157	&autolink_end, &missing_mailto);
3158	if(is_autolink) {
3159	PUSH_MARK((missing_mailto ? _T(`'@'`) : _T(`'<'`)), off, off+`1`,
3160	MD_MARK_OPENER \| MD_MARK_RESOLVED \| MD_MARK_AUTOLINK);
3161	PUSH_MARK(_T(`'>'`), autolink_end-`1`, autolink_end,
3162	MD_MARK_CLOSER \| MD_MARK_RESOLVED \| MD_MARK_AUTOLINK);
3163	ctx->marks[ctx->n_marks-`2`].next = ctx->n_marks-`1`;
3164	ctx->marks[ctx->n_marks-`1`].prev = ctx->n_marks-`2`;
3165	off = autolink_end;
3166	continue;
3167	}
3168
3169	off++;
3170	continue;
3171	}
3172
3173	/ A potential link or its part. /
3174	if(ch == _T(`'['`) \|\| (ch == _T(`'!'`) && off+`1` < line_end && CH(off+`1`) == _T(`'['`))) {
3175	OFF tmp = (ch == _T(`'['`) ? off+`1` : off+`2`);
3176	PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
3177	off = tmp;
3178	/ Two dummies to make enough place for data we need if it is*
3179	* a link. */
3180	PUSH_MARK(`'D'`, off, off, `0`);
3181	PUSH_MARK(`'D'`, off, off, `0`);
3182	continue;
3183	}
3184	if(ch == _T(`']'`)) {
3185	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_CLOSER);
3186	off++;
3187	continue;
3188	}
3189
3190	/ A potential permissive e-mail autolink. /
3191	if(ch == _T(`'@'`)) {
3192	if(line->beg + `1` <= off && ISALNUM(off-`1`) &&
3193	off + `3` < line->end && ISALNUM(off+`1`))
3194	{
3195	PUSH_MARK(ch, off, off+`1`, MD_MARK_POTENTIAL_OPENER);
3196	/ Push a dummy as a reserve for a closer. /
3197	PUSH_MARK(`'D'`, off, off, `0`);
3198	}
3199
3200	off++;
3201	continue;
3202	}
3203
3204	/ A potential permissive URL autolink. /
3205	if(ch == _T(`':'`)) {
3206	static struct {
3207	const CHAR* scheme;
3208	SZ scheme_size;
3209	const CHAR* suffix;
3210	SZ suffix_size;
3211	} scheme_map[] = {
3212	/ In the order from the most frequently used, arguably. /
3213	{ _T("http"), `4`, _T("//"), `2` },
3214	{ _T("https"), `5`, _T("//"), `2` },
3215	{ _T("ftp"), `3`, _T("//"), `2` }
3216	};
3217	int scheme_index;
3218
3219	for(scheme_index = `0`; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3220	const CHAR* scheme = scheme_map[scheme_index].scheme;
3221	const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3222	const CHAR* suffix = scheme_map[scheme_index].suffix;
3223	const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3224
3225	if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), scheme, scheme_size) &&
3226	(line->beg + scheme_size == off \|\| ISWHITESPACE(off-scheme_size-`1`) \|\| ISANYOF(off-scheme_size-`1`, _T("*_~(["))) &&
3227	off + `1` + suffix_size < line->end && md_ascii_eq(STR(off+`1`), suffix, suffix_size))
3228	{
3229	PUSH_MARK(ch, off-scheme_size, off+`1`+suffix_size, MD_MARK_POTENTIAL_OPENER);
3230	/ Push a dummy as a reserve for a closer. /
3231	PUSH_MARK(`'D'`, off, off, `0`);
3232	off += `1` + suffix_size;
3233	continue;
3234	}
3235	}
3236
3237	off++;
3238	continue;
3239	}
3240
3241	/ A potential permissive WWW autolink. /
3242	if(ch == _T(`'.'`)) {
3243	if(line->beg + `3` <= off && md_ascii_eq(STR(off-`3`), _T("www"), `3`) &&
3244	(line->beg + `3` == off \|\| ISWHITESPACE(off-`4`) \|\| ISANYOF(off-`4`, _T("*_~(["))) &&
3245	off + `1` < line_end)
3246	{
3247	PUSH_MARK(ch, off-`3`, off+`1`, MD_MARK_POTENTIAL_OPENER);
3248	/ Push a dummy as a reserve for a closer. /
3249	PUSH_MARK(`'D'`, off, off, `0`);
3250	off++;
3251	continue;
3252	}
3253
3254	off++;
3255	continue;
3256	}
3257
3258	/ A potential table cell boundary or wiki link label delimiter. /
3259	if((table_mode \|\| ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T(`'\|'`)) {
3260	PUSH_MARK(ch, off, off+`1`, `0`);
3261	off++;
3262	continue;
3263	}
3264
3265	/ A potential strikethrough start/end. /
3266	if(ch == _T(`'~'`)) {
3267	OFF tmp = off+`1`;
3268
3269	while(tmp < line_end && CH(tmp) == _T(`'~'`))
3270	tmp++;
3271
3272	if(tmp - off < `3`) {
3273	unsigned flags = `0`;
3274
3275	if(tmp < line_end && !ISUNICODEWHITESPACE(tmp))
3276	flags \|= MD_MARK_POTENTIAL_OPENER;
3277	if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
3278	flags \|= MD_MARK_POTENTIAL_CLOSER;
3279	if(flags != `0`)
3280	PUSH_MARK(ch, off, tmp, flags);
3281	}
3282
3283	off = tmp;
3284	continue;
3285	}
3286
3287	/ A potential equation start/end /
3288	if(ch == _T(`'$'`)) {
3289	/ We can have at most two consecutive $ signs,*
3290	* where two dollar signs signify a display equation. */
3291	OFF tmp = off+`1`;
3292
3293	while(tmp < line_end && CH(tmp) == _T(`'$'`))
3294	tmp++;
3295
3296	if (tmp - off <= `2`)
3297	PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER \| MD_MARK_POTENTIAL_CLOSER);
3298	off = tmp;
3299	continue;
3300	}
3301
3302	/ Turn non-trivial whitespace into single space. /
3303	if(ISWHITESPACE_(ch)) {
3304	OFF tmp = off+`1`;
3305
3306	while(tmp < line_end && ISWHITESPACE(tmp))
3307	tmp++;
3308
3309	if(tmp - off > `1` \|\| ch != _T(`' '`))
3310	PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3311
3312	off = tmp;
3313	continue;
3314	}
3315
3316	/ NULL character. /
3317	if(ch == _T(`'\0'`)) {
3318	PUSH_MARK(ch, off, off+`1`, MD_MARK_RESOLVED);
3319	off++;
3320	continue;
3321	}
3322
3323	off++;
3324	}
3325	}
3326
3327	/ Add a dummy mark at the end of the mark vector to simplify*
3328	* process_inlines(). */
3329	PUSH_MARK(`127`, ctx->size, ctx->size, MD_MARK_RESOLVED);
3330
3331	abort:
3332	return ret;
3333	}
3334
3335	static void
3336	md_analyze_bracket(MD_CTX* ctx, int mark_index)
3337	{
3338	/ We cannot really resolve links here as for that we would need*
3339	* more context. E.g. a following pair of brackets (reference link),
3340	* or enclosing pair of brackets (if the inner is the link, the outer
3341	* one cannot be.)
3342	*
3343	* Therefore we here only construct a list of resolved '[' ']' pairs
3344	* ordered by position of the closer. This allows ur to analyze what is
3345	* or is not link in the right order, from inside to outside in case
3346	* of nested brackets.
3347	*
3348	* The resolving itself is deferred into md_resolve_links().
3349	*/
3350
3351	MD_MARK* mark = &ctx->marks[mark_index];
3352
3353	if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3354	md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
3355	return;
3356	}
3357
3358	if(BRACKET_OPENERS.tail >= `0`) {
3359	/ Pop the opener from the chain. /
3360	int opener_index = BRACKET_OPENERS.tail;
3361	MD_MARK* opener = &ctx->marks[opener_index];
3362	if(opener->prev >= `0`)
3363	ctx->marks[opener->prev].next = -`1`;
3364	else
3365	BRACKET_OPENERS.head = -`1`;
3366	BRACKET_OPENERS.tail = opener->prev;
3367
3368	/ Interconnect the opener and closer. /
3369	opener->next = mark_index;
3370	mark->prev = opener_index;
3371
3372	/ Add the pair into chain of potential links for md_resolve_links().*
3373	* Note we misuse opener->prev for this as opener->next points to its
3374	* closer. */
3375	if(ctx->unresolved_link_tail >= `0`)
3376	ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3377	else
3378	ctx->unresolved_link_head = opener_index;
3379	ctx->unresolved_link_tail = opener_index;
3380	opener->prev = -`1`;
3381	}
3382	}
3383
3384	/ Forward declaration. /
3385	static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3386	int mark_beg, int mark_end);
3387
3388	static int
3389	md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
3390	{
3391	int opener_index = ctx->unresolved_link_head;
3392	OFF last_link_beg = `0`;
3393	OFF last_link_end = `0`;
3394	OFF last_img_beg = `0`;
3395	OFF last_img_end = `0`;
3396
3397	while(opener_index >= `0`) {
3398	MD_MARK* opener = &ctx->marks[opener_index];
3399	int closer_index = opener->next;
3400	MD_MARK* closer = &ctx->marks[closer_index];
3401	int next_index = opener->prev;
3402	MD_MARK* next_opener;
3403	MD_MARK* next_closer;
3404	MD_LINK_ATTR attr;
3405	int is_link = FALSE;
3406
3407	if(next_index >= `0`) {
3408	next_opener = &ctx->marks[next_index];
3409	next_closer = &ctx->marks[next_opener->next];
3410	} else {
3411	next_opener = NULL;
3412	next_closer = NULL;
3413	}
3414
3415	/ If nested ("[ [ ] ]"), we need to make sure that:*
3416	* - The outer does not end inside of (...) belonging to the inner.
3417	* - The outer cannot be link if the inner is link (i.e. not image).
3418	*
3419	* (Note we here analyze from inner to outer as the marks are ordered
3420	* by closer->beg.)
3421	*/
3422	if((opener->beg < last_link_beg && closer->end < last_link_end) \|\|
3423	(opener->beg < last_img_beg && closer->end < last_img_end) \|\|
3424	(opener->beg < last_link_end && opener->ch == `'['`))
3425	{
3426	opener_index = next_index;
3427	continue;
3428	}
3429
3430	/ Recognize and resolve wiki links.*
3431	* Wiki-links maybe '[[destination]]' or '[[destination\|label]]'.
3432	*/
3433	if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3434	(opener->end - opener->beg == `1`) && / not image /
3435	next_opener != NULL && / double '[' opener /
3436	next_opener->ch == `'['` &&
3437	(next_opener->beg == opener->beg - `1`) &&
3438	(next_opener->end - next_opener->beg == `1`) &&
3439	next_closer != NULL && / double ']' closer /
3440	next_closer->ch == `']'` &&
3441	(next_closer->beg == closer->beg + `1`) &&
3442	(next_closer->end - next_closer->beg == `1`))
3443	{
3444	MD_MARK* delim = NULL;
3445	int delim_index;
3446	OFF dest_beg, dest_end;
3447
3448	is_link = TRUE;
3449
3450	/ We don't allow destination to be longer then 100 characters.*
3451	* Lets scan to see whether there is '\|'. (If not then the whole
3452	* wiki-link has to be below the 100 characters.) */
3453	delim_index = opener_index + `1`;
3454	while(delim_index < closer_index) {
3455	MD_MARK* m = &ctx->marks[delim_index];
3456	if(m->ch == `'\|'`) {
3457	delim = m;
3458	break;
3459	}
3460	if(m->ch != `'D'` && m->beg - opener->end > `100`)
3461	break;
3462	delim_index++;
3463	}
3464	dest_beg = opener->end;
3465	dest_end = (delim != NULL) ? delim->beg : closer->beg;
3466	if(dest_end - dest_beg == `0` \|\| dest_end - dest_beg > `100`)
3467	is_link = FALSE;
3468
3469	/ There may not be any new line in the destination. /
3470	if(is_link) {
3471	OFF off;
3472	for(off = dest_beg; off < dest_end; off++) {
3473	if(ISNEWLINE(off)) {
3474	is_link = FALSE;
3475	break;
3476	}
3477	}
3478	}
3479
3480	if(is_link) {
3481	if(delim != NULL) {
3482	if(delim->end < closer->beg) {
3483	opener->end = delim->beg;
3484	} else {
3485	/ The pipe is just before the closer: [[foo\|]] /
3486	closer->beg = delim->beg;
3487	delim = NULL;
3488	}
3489	}
3490
3491	opener->beg = next_opener->beg;
3492	opener->next = closer_index;
3493	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
3494
3495	closer->end = next_closer->end;
3496	closer->prev = opener_index;
3497	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
3498
3499	last_link_beg = opener->beg;
3500	last_link_end = closer->end;
3501
3502	if(delim != NULL) {
3503	delim->flags \|= MD_MARK_RESOLVED;
3504	md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
3505	md_analyze_link_contents(ctx, lines, n_lines, opener_index+`1`, closer_index);
3506	} else {
3507	md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3508	}
3509
3510	opener_index = next_opener->prev;
3511	continue;
3512	}
3513	}
3514
3515	if(next_opener != NULL && next_opener->beg == closer->end) {
3516	if(next_closer->beg > closer->end + `1`) {
3517	/ Might be full reference link. /
3518	is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
3519	} else {
3520	/ Might be shortcut reference link. /
3521	is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3522	}
3523
3524	if(is_link < `0`)
3525	return -`1`;
3526
3527	if(is_link) {
3528	/ Eat the 2nd "[...]". /
3529	closer->end = next_closer->end;
3530	}
3531	} else {
3532	if(closer->end < ctx->size && CH(closer->end) == _T(`'('`)) {
3533	/ Might be inline link. /
3534	OFF inline_link_end = UINT_MAX;
3535
3536	is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
3537	if(is_link < `0`)
3538	return -`1`;
3539
3540	/ Check the closing ')' is not inside an already resolved range*
3541	* (i.e. a range with a higher priority), e.g. a code span. */
3542	if(is_link) {
3543	int i = closer_index + `1`;
3544
3545	while(i < ctx->n_marks) {
3546	MD_MARK* mark = &ctx->marks[i];
3547
3548	if(mark->beg >= inline_link_end)
3549	break;
3550	if((mark->flags & (MD_MARK_OPENER \| MD_MARK_RESOLVED)) == (MD_MARK_OPENER \| MD_MARK_RESOLVED)) {
3551	if(ctx->marks[mark->next].beg >= inline_link_end) {
3552	/ Cancel the link status. /
3553	if(!IS_INPUT_STR(attr.title))
3554	free(attr.title);
3555	is_link = FALSE;
3556	break;
3557	}
3558
3559	i = mark->next + `1`;
3560	} else {
3561	i++;
3562	}
3563	}
3564	}
3565
3566	if(is_link) {
3567	/ Eat the "(...)" /
3568	closer->end = inline_link_end;
3569	}
3570	}
3571
3572	if(!is_link) {
3573	/ Might be collapsed reference link. /
3574	is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3575	if(is_link < `0`)
3576	return -`1`;
3577	}
3578	}
3579
3580	if(is_link) {
3581	/ Resolve the brackets as a link. /
3582	opener->flags \|= MD_MARK_OPENER \| MD_MARK_RESOLVED;
3583	closer->flags \|= MD_MARK_CLOSER \| MD_MARK_RESOLVED;
3584
3585	/ If it is a link, we store the destination and title in the two*
3586	* dummy marks after the opener. */
3587	MD_ASSERT(ctx->marks[opener_index+`1`].ch == `'D'`);
3588	ctx->marks[opener_index+`1`].beg = attr.dest_beg;
3589	ctx->marks[opener_index+`1`].end = attr.dest_end;
3590
3591	MD_ASSERT(ctx->marks[opener_index+`2`].ch == `'D'`);
3592	md_mark_store_ptr(ctx, opener_index+`2`, attr.title);
3593	/ The title might or might not have been allocated for us. /
3594	if(attr.title_needs_free)
3595	md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+`2`);
3596	ctx->marks[opener_index+`2`].prev = attr.title_size;
3597
3598	if(opener->ch == `'['`) {
3599	last_link_beg = opener->beg;
3600	last_link_end = closer->end;
3601	} else {
3602	last_img_beg = opener->beg;
3603	last_img_end = closer->end;
3604	}
3605
3606	md_analyze_link_contents(ctx, lines, n_lines, opener_index+`1`, closer_index);
3607	}
3608
3609	opener_index = next_index;
3610	}
3611
3612	return `0`;
3613	}
3614
3615	/ Analyze whether the mark '&' starts a HTML entity.*
3616	* If so, update its flags as well as flags of corresponding closer ';'. */
3617	static void
3618	md_analyze_entity(MD_CTX* ctx, int mark_index)
3619	{
3620	MD_MARK* opener = &ctx->marks[mark_index];
3621	MD_MARK* closer;
3622	OFF off;
3623
3624	/ Cannot be entity if there is no closer as the next mark.*
3625	* (Any other mark between would mean strange character which cannot be
3626	* part of the entity.
3627	*
3628	* So we can do all the work on '&' and do not call this later for the
3629	* closing mark ';'.
3630	*/
3631	if(mark_index + `1` >= ctx->n_marks)
3632	return;
3633	closer = &ctx->marks[mark_index+`1`];
3634	if(closer->ch != `';'`)
3635	return;
3636
3637	if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
3638	MD_ASSERT(off == closer->end);
3639
3640	md_resolve_range(ctx, NULL, mark_index, mark_index+`1`);
3641	opener->end = closer->end;
3642	}
3643	}
3644
3645	static void
3646	md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3647	{
3648	MD_MARK* mark = &ctx->marks[mark_index];
3649	mark->flags \|= MD_MARK_RESOLVED;
3650
3651	md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
3652	ctx->n_table_cell_boundaries++;
3653	}
3654
3655	/ Split a longer mark into two. The new mark takes the given count of*
3656	* characters. May only be called if an adequate number of dummy 'D' marks
3657	* follows.
3658	*/
3659	static int
3660	md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3661	{
3662	MD_MARK* mark = &ctx->marks[mark_index];
3663	int new_mark_index = mark_index + (mark->end - mark->beg - n);
3664	MD_MARK* dummy = &ctx->marks[new_mark_index];
3665
3666	MD_ASSERT(mark->end - mark->beg > n);
3667	MD_ASSERT(dummy->ch == `'D'`);
3668
3669	memcpy(dummy, mark, sizeof(MD_MARK));
3670	mark->end -= n;
3671	dummy->beg = mark->end;
3672
3673	return new_mark_index;
3674	}
3675
3676	static void
3677	md_analyze_emph(MD_CTX* ctx, int mark_index)
3678	{
3679	MD_MARK* mark = &ctx->marks[mark_index];
3680	MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3681
3682	/ If we can be a closer, try to resolve with the preceding opener. /
3683	if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3684	MD_MARK* opener = NULL;
3685	int opener_index;
3686
3687	if(mark->ch == _T(`'*'`)) {
3688	MD_MARKCHAIN* opener_chains[`6`];
3689	int i, n_opener_chains;
3690	unsigned flags = mark->flags;
3691
3692	/ Apply the "rule of three". /
3693	n_opener_chains = `0`;
3694	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
3695	if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3696	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
3697	if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3698	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
3699	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
3700	if(!(flags & MD_MARK_EMPH_INTRAWORD) \|\| (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3701	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
3702	if(!(flags & MD_MARK_EMPH_INTRAWORD) \|\| (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3703	opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
3704
3705	/ Opener is the most recent mark from the allowed chains. /
3706	for(i = `0`; i < n_opener_chains; i++) {
3707	if(opener_chains[i]->tail >= `0`) {
3708	int tmp_index = opener_chains[i]->tail;
3709	MD_MARK* tmp_mark = &ctx->marks[tmp_index];
3710	if(opener == NULL \|\| tmp_mark->end > opener->end) {
3711	opener_index = tmp_index;
3712	opener = tmp_mark;
3713	}
3714	}
3715	}
3716	} else {
3717	/ Simple emph. mark /
3718	if(chain->tail >= `0`) {
3719	opener_index = chain->tail;
3720	opener = &ctx->marks[opener_index];
3721	}
3722	}
3723
3724	/ Resolve, if we have found matching opener. /
3725	if(opener != NULL) {
3726	SZ opener_size = opener->end - opener->beg;
3727	SZ closer_size = mark->end - mark->beg;
3728	MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
3729
3730	if(opener_size > closer_size) {
3731	opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
3732	md_mark_chain_append(ctx, opener_chain, opener_index);
3733	} else if(opener_size < closer_size) {
3734	md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
3735	}
3736
3737	md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3738	md_resolve_range(ctx, opener_chain, opener_index, mark_index);
3739	return;
3740	}
3741	}
3742
3743	/ If we could not resolve as closer, we may be yet be an opener. /
3744	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3745	md_mark_chain_append(ctx, chain, mark_index);
3746	}
3747
3748	static void
3749	md_analyze_tilde(MD_CTX* ctx, int mark_index)
3750	{
3751	MD_MARK* mark = &ctx->marks[mark_index];
3752	MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3753
3754	/ We attempt to be Github Flavored Markdown compatible here. GFM accepts*
3755	* only tildes sequences of length 1 and 2, and the length of the opener
3756	* and closer has to match. */
3757
3758	if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= `0`) {
3759	int opener_index = chain->head;
3760
3761	md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3762	md_resolve_range(ctx, chain, opener_index, mark_index);
3763	return;
3764	}
3765
3766	if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3767	md_mark_chain_append(ctx, chain, mark_index);
3768	}
3769
3770	static void
3771	md_analyze_dollar(MD_CTX* ctx, int mark_index)
3772	{
3773	/ This should mimic the way inline equations work in LaTeX, so there*
3774	* can only ever be one item in the chain (i.e. the dollars can't be
3775	* nested). This is basically the same as the md_analyze_tilde function,
3776	* except that we require matching openers and closers to be of the same
3777	* length.
3778	*
3779	* E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
3780	if(DOLLAR_OPENERS.head >= `0`) {
3781	/ If the potential closer has a non-matching number of $, discard /
3782	MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
3783	MD_MARK* close = &ctx->marks[mark_index];
3784
3785	int opener_index = DOLLAR_OPENERS.head;
3786	md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL);
3787	if (open->end - open->beg == close->end - close->beg) {
3788	/ We are the matching closer /
3789	md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index);
3790	} else {
3791	/ We don't match the opener, so discard old opener and insert as opener /
3792	md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3793	}
3794	} else {
3795	/ No unmatched openers, so we are opener /
3796	md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3797	}
3798	}
3799
3800	static void
3801	md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
3802	{
3803	MD_MARK* opener = &ctx->marks[mark_index];
3804	int closer_index = mark_index + `1`;
3805	MD_MARK* closer = &ctx->marks[closer_index];
3806	MD_MARK* next_resolved_mark;
3807	OFF off = opener->end;
3808	int n_dots = FALSE;
3809	int has_underscore_in_last_seg = FALSE;
3810	int has_underscore_in_next_to_last_seg = FALSE;
3811	int n_opened_parenthesis = `0`;
3812
3813	/ Check for domain. /
3814	while(off < ctx->size) {
3815	if(ISALNUM(off) \|\| CH(off) == _T(`'-'`)) {
3816	off++;
3817	} else if(CH(off) == _T(`'.'`)) {
3818	/ We must see at least one period. /
3819	n_dots++;
3820	has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
3821	has_underscore_in_last_seg = FALSE;
3822	off++;
3823	} else if(CH(off) == _T(`'_'`)) {
3824	/ No underscore may be present in the last two domain segments. /
3825	has_underscore_in_last_seg = TRUE;
3826	off++;
3827	} else {
3828	break;
3829	}
3830	}
3831	if(off > opener->end && CH(off-`1`) == _T(`'.'`)) {
3832	off--;
3833	n_dots--;
3834	}
3835	if(off <= opener->end \|\| n_dots == `0` \|\| has_underscore_in_next_to_last_seg \|\| has_underscore_in_last_seg)
3836	return;
3837
3838	/ Check for path. /
3839	next_resolved_mark = closer + `1`;
3840	while(next_resolved_mark->ch == `'D'` \|\| !(next_resolved_mark->flags & MD_MARK_RESOLVED))
3841	next_resolved_mark++;
3842	while(off < next_resolved_mark->beg && CH(off) != _T(`'<'`) && !ISWHITESPACE(off) && !ISNEWLINE(off)) {
3843	/ Parenthesis must be balanced. /
3844	if(CH(off) == _T(`'('`)) {
3845	n_opened_parenthesis++;
3846	} else if(CH(off) == _T(`')'`)) {
3847	if(n_opened_parenthesis > `0`)
3848	n_opened_parenthesis--;
3849	else
3850	break;
3851	}
3852
3853	off++;
3854	}
3855	/ These cannot be last char In such case they are more likely normal*
3856	* punctuation. */
3857	if(ISANYOF(off-`1`, _T("?!.,:*_~")))
3858	off--;
3859
3860	/ Ok. Lets call it auto-link. Adapt opener and create closer to zero*
3861	* length so all the contents becomes the link text. */
3862	MD_ASSERT(closer->ch == `'D'`);
3863	opener->end = opener->beg;
3864	closer->ch = opener->ch;
3865	closer->beg = off;
3866	closer->end = off;
3867	md_resolve_range(ctx, NULL, mark_index, closer_index);
3868	}
3869
3870	/ The permissive autolinks do not have to be enclosed in '<' '>' but we*
3871	* instead impose stricter rules what is understood as an e-mail address
3872	* here. Actually any non-alphanumeric characters with exception of '.'
3873	* are prohibited both in username and after '@'. */
3874	static void
3875	md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
3876	{
3877	MD_MARK* opener = &ctx->marks[mark_index];
3878	int closer_index;
3879	MD_MARK* closer;
3880	OFF beg = opener->beg;
3881	OFF end = opener->end;
3882	int dot_count = `0`;
3883
3884	MD_ASSERT(CH(beg) == _T(`'@'`));
3885
3886	/ Scan for name before '@'. /
3887	while(beg > `0` && (ISALNUM(beg-`1`) \|\| ISANYOF(beg-`1`, _T(".-_+"))))
3888	beg--;
3889
3890	/ Scan for domain after '@'. /
3891	while(end < ctx->size && (ISALNUM(end) \|\| ISANYOF(end, _T(".-_")))) {
3892	if(CH(end) == _T(`'.'`))
3893	dot_count++;
3894	end++;
3895	}
3896	if(CH(end-`1`) == _T(`'.'`)) { / Final '.' not part of it. /
3897	dot_count--;
3898	end--;
3899	}
3900	else if(ISANYOF2(end-`1`, _T(`'-'`), _T(`'_'`))) / These are forbidden at the end. /
3901	return;
3902	if(CH(end-`1`) == _T(`'@'`) \|\| dot_count == `0`)
3903	return;
3904
3905	/ Ok. Lets call it auto-link. Adapt opener and create closer to zero*
3906	* length so all the contents becomes the link text. */
3907	closer_index = mark_index + `1`;
3908	closer = &ctx->marks[closer_index];
3909	MD_ASSERT(closer->ch == `'D'`);
3910
3911	opener->beg = beg;
3912	opener->end = beg;
3913	closer->ch = opener->ch;
3914	closer->beg = end;
3915	closer->end = end;
3916	md_resolve_range(ctx, NULL, mark_index, closer_index);
3917	}
3918
3919	static inline void
3920	md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3921	int mark_beg, int mark_end, const CHAR* mark_chars)
3922	{
3923	int i = mark_beg;
3924
3925	while(i < mark_end) {
3926	MD_MARK* mark = &ctx->marks[i];
3927
3928	/ Skip resolved spans. /
3929	if(mark->flags & MD_MARK_RESOLVED) {
3930	if(mark->flags & MD_MARK_OPENER) {
3931	MD_ASSERT(i < mark->next);
3932	i = mark->next + `1`;
3933	} else {
3934	i++;
3935	}
3936	continue;
3937	}
3938
3939	/ Skip marks we do not want to deal with. /
3940	if(!ISANYOF_(mark->ch, mark_chars)) {
3941	i++;
3942	continue;
3943	}
3944
3945	/ Analyze the mark. /
3946	switch(mark->ch) {
3947	case `'['`: / Pass through. /
3948	case `'!'`: / Pass through. /
3949	case `']'`: md_analyze_bracket(ctx, i); break;
3950	case `'&'`: md_analyze_entity(ctx, i); break;
3951	case `'\|'`: md_analyze_table_cell_boundary(ctx, i); break;
3952	case `'_'`: / Pass through. /
3953	case `''`: md_analyze_emph(ctx, i); break*;
3954	case `'~'`: md_analyze_tilde(ctx, i); break;
3955	case `'$'`: md_analyze_dollar(ctx, i); break;
3956	case `'.'`: / Pass through. /
3957	case `':'`: md_analyze_permissive_url_autolink(ctx, i); break;
3958	case `'@'`: md_analyze_permissive_email_autolink(ctx, i); break;
3959	}
3960
3961	i++;
3962	}
3963	}
3964
3965	/ Analyze marks (build ctx->marks). /
3966	static int
3967	md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
3968	{
3969	int ret;
3970
3971	/ Reset the previously collected stack of marks. /
3972	ctx->n_marks = `0`;
3973
3974	/ Collect all marks. /
3975	MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
3976
3977	/ We analyze marks in few groups to handle their precedence. /
3978	/ (1) Entities; code spans; autolinks; raw HTML. /
3979	md_analyze_marks(ctx, lines, n_lines, `0`, ctx->n_marks, _T("&"));
3980
3981	/ (2) Links. /
3982	md_analyze_marks(ctx, lines, n_lines, `0`, ctx->n_marks, _T("[]!"));
3983	MD_CHECK(md_resolve_links(ctx, lines, n_lines));
3984	BRACKET_OPENERS.head = -`1`;
3985	BRACKET_OPENERS.tail = -`1`;
3986	ctx->unresolved_link_head = -`1`;
3987	ctx->unresolved_link_tail = -`1`;
3988
3989	if(table_mode) {
3990	/ (3) Analyze table cell boundaries.*
3991	* Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
3992	* not after, because caller may need it. */
3993	MD_ASSERT(n_lines == `1`);
3994	TABLECELLBOUNDARIES.head = -`1`;
3995	TABLECELLBOUNDARIES.tail = -`1`;
3996	ctx->n_table_cell_boundaries = `0`;
3997	md_analyze_marks(ctx, lines, n_lines, `0`, ctx->n_marks, _T("\|"));
3998	return ret;
3999	}
4000
4001	/ (4) Emphasis and strong emphasis; permissive autolinks. /
4002	md_analyze_link_contents(ctx, lines, n_lines, `0`, ctx->n_marks);
4003
4004	abort:
4005	return ret;
4006	}
4007
4008	static void
4009	md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
4010	int mark_beg, int mark_end)
4011	{
4012	int i;
4013
4014	md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
4015
4016	for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
4017	ctx->mark_chains[i].head = -`1`;
4018	ctx->mark_chains[i].tail = -`1`;
4019	}
4020	}
4021
4022	static int
4023	md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
4024	const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
4025	const CHAR* title, SZ title_size)
4026	{
4027	MD_ATTRIBUTE_BUILD href_build = { `0` };
4028	MD_ATTRIBUTE_BUILD title_build = { `0` };
4029	MD_SPAN_A_DETAIL det;
4030	int ret = `0`;
4031
4032	/ Note we here rely on fact that MD_SPAN_A_DETAIL and*
4033	* MD_SPAN_IMG_DETAIL are binary-compatible. */
4034	memset(&det, `0`, sizeof(MD_SPAN_A_DETAIL));
4035	MD_CHECK(md_build_attribute(ctx, dest, dest_size,
4036	(prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : `0`),
4037	&det.href, &href_build));
4038	MD_CHECK(md_build_attribute(ctx, title, title_size, `0`, &det.title, &title_build));
4039
4040	if(enter)
4041	MD_ENTER_SPAN(type, &det);
4042	else
4043	MD_LEAVE_SPAN(type, &det);
4044
4045	abort:
4046	md_free_attribute(ctx, &href_build);
4047	md_free_attribute(ctx, &title_build);
4048	return ret;
4049	}
4050
4051	static int
4052	md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
4053	{
4054	MD_ATTRIBUTE_BUILD target_build = { `0` };
4055	MD_SPAN_WIKILINK_DETAIL det;
4056	int ret = `0`;
4057
4058	memset(&det, `0`, sizeof(MD_SPAN_WIKILINK_DETAIL));
4059	MD_CHECK(md_build_attribute(ctx, target, target_size, `0`, &det.target, &target_build));
4060
4061	if (enter)
4062	MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
4063	else
4064	MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
4065
4066	abort:
4067	md_free_attribute(ctx, &target_build);
4068	return ret;
4069	}
4070
4071
4072	/ Render the output, accordingly to the analyzed ctx->marks. /
4073	static int
4074	md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4075	{
4076	MD_TEXTTYPE text_type;
4077	const MD_LINE* line = lines;
4078	MD_MARK* prev_mark = NULL;
4079	MD_MARK* mark;
4080	OFF off = lines[`0`].beg;
4081	OFF end = lines[n_lines-`1`].end;
4082	int enforce_hardbreak = `0`;
4083	int ret = `0`;
4084
4085	/ Find first resolved mark. Note there is always at least one resolved*
4086	* mark, the dummy last one after the end of the latest line we actually
4087	* never really reach. This saves us of a lot of special checks and cases
4088	* in this function. */
4089	mark = ctx->marks;
4090	while(!(mark->flags & MD_MARK_RESOLVED))
4091	mark++;
4092
4093	text_type = MD_TEXT_NORMAL;
4094
4095	while(`1`) {
4096	/ Process the text up to the next mark or end-of-line. /
4097	OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
4098	if(tmp > off) {
4099	MD_TEXT(text_type, STR(off), tmp - off);
4100	off = tmp;
4101	}
4102
4103	/ If reached the mark, process it and move to next one. /
4104	if(off >= mark->beg) {
4105	switch(mark->ch) {
4106	case `'\\'`: / Backslash escape. /
4107	if(ISNEWLINE(mark->beg+`1`))
4108	enforce_hardbreak = `1`;
4109	else
4110	MD_TEXT(text_type, STR(mark->beg+`1`), `1`);
4111	break;
4112
4113	case `' '`: / Non-trivial space. /
4114	MD_TEXT(text_type, _T(" "), `1`);
4115	break;
4116
4117	case '`': / Code span. /
4118	if(mark->flags & MD_MARK_OPENER) {
4119	MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
4120	text_type = MD_TEXT_CODE;
4121	} else {
4122	MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
4123	text_type = MD_TEXT_NORMAL;
4124	}
4125	break;
4126
4127	case `'_'`: / Underline (or emphasis if we fall through). /
4128	if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
4129	if(mark->flags & MD_MARK_OPENER) {
4130	while(off < mark->end) {
4131	MD_ENTER_SPAN(MD_SPAN_U, NULL);
4132	off++;
4133	}
4134	} else {
4135	while(off < mark->end) {
4136	MD_LEAVE_SPAN(MD_SPAN_U, NULL);
4137	off++;
4138	}
4139	}
4140	break;
4141	}
4142	/ Fall though. /
4143
4144	case `''`: /* Emphasis, strong emphasis. /
4145	if(mark->flags & MD_MARK_OPENER) {
4146	if((mark->end - off) % `2`) {
4147	MD_ENTER_SPAN(MD_SPAN_EM, NULL);
4148	off++;
4149	}
4150	while(off + `1` < mark->end) {
4151	MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
4152	off += `2`;
4153	}
4154	} else {
4155	while(off + `1` < mark->end) {
4156	MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
4157	off += `2`;
4158	}
4159	if((mark->end - off) % `2`) {
4160	MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
4161	off++;
4162	}
4163	}
4164	break;
4165
4166	case `'~'`:
4167	if(mark->flags & MD_MARK_OPENER)
4168	MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
4169	else
4170	MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
4171	break;
4172
4173	case `'$'`:
4174	if(mark->flags & MD_MARK_OPENER) {
4175	MD_ENTER_SPAN((mark->end - off) % `2` ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4176	text_type = MD_TEXT_LATEXMATH;
4177	} else {
4178	MD_LEAVE_SPAN((mark->end - off) % `2` ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4179	text_type = MD_TEXT_NORMAL;
4180	}
4181	break;
4182
4183	case `'['`: / Link, wiki link, image. /
4184	case `'!'`:
4185	case `']'`:
4186	{
4187	const MD_MARK* opener = (mark->ch != `']'` ? mark : &ctx->marks[mark->prev]);
4188	const MD_MARK* closer = &ctx->marks[opener->next];
4189	const MD_MARK* dest_mark;
4190	const MD_MARK* title_mark;
4191
4192	if ((opener->ch == `'['` && closer->ch == `']'`) &&
4193	opener->end - opener->beg >= `2` &&
4194	closer->end - closer->beg >= `2`)
4195	{
4196	int has_label = (opener->end - opener->beg > `2`);
4197	SZ target_sz;
4198
4199	if(has_label)
4200	target_sz = opener->end - (opener->beg+`2`);
4201	else
4202	target_sz = closer->beg - opener->end;
4203
4204	MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != `']'`),
4205	has_label ? STR(opener->beg+`2`) : STR(opener->end),
4206	target_sz));
4207
4208	break;
4209	}
4210
4211	dest_mark = opener+`1`;
4212	MD_ASSERT(dest_mark->ch == `'D'`);
4213	title_mark = opener+`2`;
4214	MD_ASSERT(title_mark->ch == `'D'`);
4215
4216	MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != `']'`),
4217	(opener->ch == `'!'` ? MD_SPAN_IMG : MD_SPAN_A),
4218	STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
4219	md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev));
4220
4221	/ link/image closer may span multiple lines. /
4222	if(mark->ch == `']'`) {
4223	while(mark->end > line->end)
4224	line++;
4225	}
4226
4227	break;
4228	}
4229
4230	case `'<'`:
4231	case `'>'`: / Autolink or raw HTML. /
4232	if(!(mark->flags & MD_MARK_AUTOLINK)) {
4233	/ Raw HTML. /
4234	if(mark->flags & MD_MARK_OPENER)
4235	text_type = MD_TEXT_HTML;
4236	else
4237	text_type = MD_TEXT_NORMAL;
4238	break;
4239	}
4240	/ Pass through, if auto-link. /
4241
4242	case `'@'`: / Permissive e-mail autolink. /
4243	case `':'`: / Permissive URL autolink. /
4244	case `'.'`: / Permissive WWW autolink. /
4245	{
4246	MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
4247	MD_MARK* closer = &ctx->marks[opener->next];
4248	const CHAR* dest = STR(opener->end);
4249	SZ dest_size = closer->beg - opener->end;
4250
4251	/ For permissive auto-links we do not know closer mark*
4252	* position at the time of md_collect_marks(), therefore
4253	* it can be out-of-order in ctx->marks[].
4254	*
4255	* With this flag, we make sure that we output the closer
4256	* only if we processed the opener. */
4257	if(mark->flags & MD_MARK_OPENER)
4258	closer->flags \|= MD_MARK_VALIDPERMISSIVEAUTOLINK;
4259
4260	if(opener->ch == `'@'` \|\| opener->ch == `'.'`) {
4261	dest_size += `7`;
4262	MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
4263	memcpy(ctx->buffer,
4264	(opener->ch == `'@'` ? _T("mailto:") : _T("http://")),
4265	`7` * sizeof(CHAR));
4266	memcpy(ctx->buffer + `7`, dest, (dest_size-`7`) * sizeof(CHAR));
4267	dest = ctx->buffer;
4268	}
4269
4270	if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
4271	MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
4272	MD_SPAN_A, dest, dest_size, TRUE, NULL, `0`));
4273	break;
4274	}
4275
4276	case `'&'`: / Entity. /
4277	MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
4278	break;
4279
4280	case `'\0'`:
4281	MD_TEXT(MD_TEXT_NULLCHAR, _T(""), `1`);
4282	break;
4283
4284	case `127`:
4285	goto abort;
4286	}
4287
4288	off = mark->end;
4289
4290	/ Move to next resolved mark. /
4291	prev_mark = mark;
4292	mark++;
4293	while(!(mark->flags & MD_MARK_RESOLVED) \|\| mark->beg < off)
4294	mark++;
4295	}
4296
4297	/ If reached end of line, move to next one. /
4298	if(off >= line->end) {
4299	/ If it is the last line, we are done. /
4300	if(off >= end)
4301	break;
4302
4303	if(text_type == MD_TEXT_CODE \|\| text_type == MD_TEXT_LATEXMATH) {
4304	OFF tmp;
4305
4306	MD_ASSERT(prev_mark != NULL);
4307	MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', `'$'`) && (prev_mark->flags & MD_MARK_OPENER));
4308	MD_ASSERT(ISANYOF2_(mark->ch, '`', `'$'`) && (mark->flags & MD_MARK_CLOSER));
4309
4310	/ Inside a code span, trailing line whitespace has to be*
4311	* outputted. */
4312	tmp = off;
4313	while(off < ctx->size && ISBLANK(off))
4314	off++;
4315	if(off > tmp)
4316	MD_TEXT(text_type, STR(tmp), off-tmp);
4317
4318	/ and new lines are transformed into single spaces. /
4319	if(prev_mark->end < off && off < mark->beg)
4320	MD_TEXT(text_type, _T(" "), `1`);
4321	} else if(text_type == MD_TEXT_HTML) {
4322	/ Inside raw HTML, we output the new line verbatim, including*
4323	* any trailing spaces. */
4324	OFF tmp = off;
4325
4326	while(tmp < end && ISBLANK(tmp))
4327	tmp++;
4328	if(tmp > off)
4329	MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
4330	MD_TEXT(MD_TEXT_HTML, _T("\n"), `1`);
4331	} else {
4332	/ Output soft or hard line break. /
4333	MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
4334
4335	if(text_type == MD_TEXT_NORMAL) {
4336	if(enforce_hardbreak)
4337	break_type = MD_TEXT_BR;
4338	else if((CH(line->end) == _T(`' '`) && CH(line->end+`1`) == _T(`' '`)))
4339	break_type = MD_TEXT_BR;
4340	}
4341
4342	MD_TEXT(break_type, _T("\n"), `1`);
4343	}
4344
4345	/ Move to the next line. /
4346	line++;
4347	off = line->beg;
4348
4349	enforce_hardbreak = `0`;
4350	}
4351	}
4352
4353	abort:
4354	return ret;
4355	}
4356
4357
4358	/***************************
4359	* Processing Tables *
4360	***************************/
4361
4362	static void
4363	md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
4364	{
4365	static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
4366	OFF off = beg;
4367
4368	while(n_align > `0`) {
4369	int index = `0`; / index into align_map[] /
4370
4371	while(CH(off) != _T(`'-'`))
4372	off++;
4373	if(off > beg && CH(off-`1`) == _T(`':'`))
4374	index \|= `1`;
4375	while(off < end && CH(off) == _T(`'-'`))
4376	off++;
4377	if(off < end && CH(off) == _T(`':'`))
4378	index \|= `2`;
4379
4380	*align = align_map[index];
4381	align++;
4382	n_align--;
4383	}
4384
4385	}
4386
4387	/ Forward declaration. /
4388	static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
4389
4390	static int
4391	md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4392	{
4393	MD_LINE line;
4394	MD_BLOCK_TD_DETAIL det;
4395	int ret = `0`;
4396
4397	while(beg < end && ISWHITESPACE(beg))
4398	beg++;
4399	while(end > beg && ISWHITESPACE(end-`1`))
4400	end--;
4401
4402	det.align = align;
4403	line.beg = beg;
4404	line.end = end;
4405
4406	MD_ENTER_BLOCK(cell_type, &det);
4407	MD_CHECK(md_process_normal_block_contents(ctx, &line, `1`));
4408	MD_LEAVE_BLOCK(cell_type, &det);
4409
4410	abort:
4411	return ret;
4412	}
4413
4414	static int
4415	md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4416	const MD_ALIGN* align, int col_count)
4417	{
4418	MD_LINE line;
4419	OFF* pipe_offs = NULL;
4420	int i, j, k, n;
4421	int ret = `0`;
4422
4423	line.beg = beg;
4424	line.end = end;
4425
4426	/ Break the line into table cells by identifying pipe characters who*
4427	* form the cell boundary. */
4428	MD_CHECK(md_analyze_inlines(ctx, &line, `1`, TRUE));
4429
4430	/ We have to remember the cell boundaries in local buffer because*
4431	* ctx->marks[] shall be reused during cell contents processing. */
4432	n = ctx->n_table_cell_boundaries + `2`;
4433	pipe_offs = (OFF) malloc(n sizeof(OFF));
4434	if(pipe_offs == NULL) {
4435	MD_LOG("malloc() failed.");
4436	ret = -`1`;
4437	goto abort;
4438	}
4439	j = `0`;
4440	pipe_offs[j++] = beg;
4441	for(i = TABLECELLBOUNDARIES.head; i >= `0`; i = ctx->marks[i].next) {
4442	MD_MARK* mark = &ctx->marks[i];
4443	pipe_offs[j++] = mark->end;
4444	}
4445	pipe_offs[j++] = end+`1`;
4446
4447	/ Process cells. /
4448	MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4449	k = `0`;
4450	for(i = `0`; i < j-`1` && k < col_count; i++) {
4451	if(pipe_offs[i] < pipe_offs[i+`1`]-`1`)
4452	MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+`1`]-`1`));
4453	}
4454	/ Make sure we call enough table cells even if the current table contains*
4455	* too few of them. */
4456	while(k < col_count)
4457	MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], `0`, `0`));
4458	MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4459
4460	abort:
4461	free(pipe_offs);
4462
4463	/ Free any temporary memory blocks stored within some dummy marks. /
4464	for(i = PTR_CHAIN.head; i >= `0`; i = ctx->marks[i].next)
4465	free(md_mark_get_ptr(ctx, i));
4466	PTR_CHAIN.head = -`1`;
4467	PTR_CHAIN.tail = -`1`;
4468
4469	return ret;
4470	}
4471
4472	static int
4473	md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
4474	{
4475	MD_ALIGN* align;
4476	int i;
4477	int ret = `0`;
4478
4479	/ At least two lines have to be present: The column headers and the line*
4480	* with the underlines. */
4481	MD_ASSERT(n_lines >= `2`);
4482
4483	align = malloc(col_count * sizeof(MD_ALIGN));
4484	if(align == NULL) {
4485	MD_LOG("malloc() failed.");
4486	ret = -`1`;
4487	goto abort;
4488	}
4489
4490	md_analyze_table_alignment(ctx, lines[`1`].beg, lines[`1`].end, align, col_count);
4491
4492	MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4493	MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4494	lines[`0`].beg, lines[`0`].end, align, col_count));
4495	MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4496
4497	MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4498	for(i = `2`; i < n_lines; i++) {
4499	MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4500	lines[i].beg, lines[i].end, align, col_count));
4501	}
4502	MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4503
4504	abort:
4505	free(align);
4506	return ret;
4507	}
4508
4509
4510	/**************************
4511	* Processing Block *
4512	**************************/
4513
4514	#define MD_BLOCK_CONTAINER_OPENER 0x01
4515	#define MD_BLOCK_CONTAINER_CLOSER 0x02
4516	#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER \| MD_BLOCK_CONTAINER_CLOSER)
4517	#define MD_BLOCK_LOOSE_LIST 0x04
4518	#define MD_BLOCK_SETEXT_HEADER 0x08
4519
4520	struct MD_BLOCK_tag {
4521	MD_BLOCKTYPE type : `8`;
4522	unsigned flags : `8`;
4523
4524	/ MD_BLOCK_H: Header level (1 - 6)*
4525	* MD_BLOCK_CODE: Non-zero if fenced, zero if indented.
4526	* MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' ').
4527	* MD_BLOCK_TABLE: Column count (as determined by the table underline).
4528	*/
4529	unsigned data : `16`;
4530
4531	/ Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.*
4532	* MD_BLOCK_LI: Task mark offset in the input doc.
4533	* MD_BLOCK_OL: Start item number.
4534	*/
4535	unsigned n_lines;
4536	};
4537
4538	struct MD_CONTAINER_tag {
4539	CHAR ch;
4540	unsigned is_loose : `8`;
4541	unsigned is_task : `8`;
4542	unsigned start;
4543	unsigned mark_indent;
4544	unsigned contents_indent;
4545	OFF block_byte_off;
4546	OFF task_mark_off;
4547	};
4548
4549
4550	static int
4551	md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4552	{
4553	int i;
4554	int ret;
4555
4556	MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4557	MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4558
4559	abort:
4560	/ Free any temporary memory blocks stored within some dummy marks. /
4561	for(i = PTR_CHAIN.head; i >= `0`; i = ctx->marks[i].next)
4562	free(md_mark_get_ptr(ctx, i));
4563	PTR_CHAIN.head = -`1`;
4564	PTR_CHAIN.tail = -`1`;
4565
4566	return ret;
4567	}
4568
4569	static int
4570	md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
4571	{
4572	static const CHAR indent_chunk_str[] = _T(" ");
4573	static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - `1`;
4574
4575	int i;
4576	int ret = `0`;
4577
4578	for(i = `0`; i < n_lines; i++) {
4579	const MD_VERBATIMLINE* line = &lines[i];
4580	int indent = line->indent;
4581
4582	MD_ASSERT(indent >= `0`);
4583
4584	/ Output code indentation. /
4585	while(indent > (int) SIZEOF_ARRAY(indent_chunk_str)) {
4586	MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4587	indent -= SIZEOF_ARRAY(indent_chunk_str);
4588	}
4589	if(indent > `0`)
4590	MD_TEXT(text_type, indent_chunk_str, indent);
4591
4592	/ Output the code line itself. /
4593	MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4594
4595	/ Enforce end-of-line. /
4596	MD_TEXT(text_type, _T("\n"), `1`);
4597	}
4598
4599	abort:
4600	return ret;
4601	}
4602
4603	static int
4604	md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
4605	{
4606	if(is_fenced) {
4607	/ Skip the first line in case of fenced code: It is the fence.*
4608	* (Only the starting fence is present due to logic in md_analyze_line().) */
4609	lines++;
4610	n_lines--;
4611	} else {
4612	/ Ignore blank lines at start/end of indented code block. /
4613	while(n_lines > `0` && lines[`0`].beg == lines[`0`].end) {
4614	lines++;
4615	n_lines--;
4616	}
4617	while(n_lines > `0` && lines[n_lines-`1`].beg == lines[n_lines-`1`].end) {
4618	n_lines--;
4619	}
4620	}
4621
4622	if(n_lines == `0`)
4623	return `0`;
4624
4625	return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
4626	}
4627
4628	static int
4629	md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4630	MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4631	{
4632	const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + `1`);
4633	OFF beg = fence_line->beg;
4634	OFF end = fence_line->end;
4635	OFF lang_end;
4636	CHAR fence_ch = CH(fence_line->beg);
4637	int ret = `0`;
4638
4639	/ Skip the fence itself. /
4640	while(beg < ctx->size && CH(beg) == fence_ch)
4641	beg++;
4642	/ Trim initial spaces. /
4643	while(beg < ctx->size && CH(beg) == _T(`' '`))
4644	beg++;
4645
4646	/ Trim trailing spaces. /
4647	while(end > beg && CH(end-`1`) == _T(`' '`))
4648	end--;
4649
4650	/ Build info string attribute. /
4651	MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, `0`, &det->info, info_build));
4652
4653	/ Build info string attribute. /
4654	lang_end = beg;
4655	while(lang_end < end && !ISWHITESPACE(lang_end))
4656	lang_end++;
4657	MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, `0`, &det->lang, lang_build));
4658
4659	det->fence_char = fence_ch;
4660
4661	abort:
4662	return ret;
4663	}
4664
4665	static int
4666	md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4667	{
4668	union {
4669	MD_BLOCK_H_DETAIL header;
4670	MD_BLOCK_CODE_DETAIL code;
4671	} det;
4672	MD_ATTRIBUTE_BUILD info_build;
4673	MD_ATTRIBUTE_BUILD lang_build;
4674	int is_in_tight_list;
4675	int clean_fence_code_detail = FALSE;
4676	int ret = `0`;
4677
4678	memset(&det, `0`, sizeof(det));
4679
4680	if(ctx->n_containers == `0`)
4681	is_in_tight_list = FALSE;
4682	else
4683	is_in_tight_list = !ctx->containers[ctx->n_containers-`1`].is_loose;
4684
4685	switch(block->type) {
4686	case MD_BLOCK_H:
4687	det.header.level = block->data;
4688	break;
4689
4690	case MD_BLOCK_CODE:
4691	/ For fenced code block, we may need to set the info string. /
4692	if(block->data != `0`) {
4693	memset(&det.code, `0`, sizeof(MD_BLOCK_CODE_DETAIL));
4694	clean_fence_code_detail = TRUE;
4695	MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4696	}
4697	break;
4698
4699	default:
4700	/ Noop. /
4701	break;
4702	}
4703
4704	if(!is_in_tight_list \|\| block->type != MD_BLOCK_P)
4705	MD_ENTER_BLOCK(block->type, (void*) &det);
4706
4707	/ Process the block contents accordingly to is type. /
4708	switch(block->type) {
4709	case MD_BLOCK_HR:
4710	/ noop /
4711	break;
4712
4713	case MD_BLOCK_CODE:
4714	MD_CHECK(md_process_code_block_contents(ctx, (block->data != `0`),
4715	(const MD_VERBATIMLINE*)(block + `1`), block->n_lines));
4716	break;
4717
4718	case MD_BLOCK_HTML:
4719	MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4720	(const MD_VERBATIMLINE*)(block + `1`), block->n_lines));
4721	break;
4722
4723	case MD_BLOCK_TABLE:
4724	MD_CHECK(md_process_table_block_contents(ctx, block->data,
4725	(const MD_LINE*)(block + `1`), block->n_lines));
4726	break;
4727
4728	default:
4729	MD_CHECK(md_process_normal_block_contents(ctx,
4730	(const MD_LINE*)(block + `1`), block->n_lines));
4731	break;
4732	}
4733
4734	if(!is_in_tight_list \|\| block->type != MD_BLOCK_P)
4735	MD_LEAVE_BLOCK(block->type, (void*) &det);
4736
4737	abort:
4738	if(clean_fence_code_detail) {
4739	md_free_attribute(ctx, &info_build);
4740	md_free_attribute(ctx, &lang_build);
4741	}
4742	return ret;
4743	}
4744
4745	static int
4746	md_process_all_blocks(MD_CTX* ctx)
4747	{
4748	int byte_off = `0`;
4749	int ret = `0`;
4750
4751	/ ctx->containers now is not needed for detection of lists and list items*
4752	* so we reuse it for tracking what lists are loose or tight. We rely
4753	* on the fact the vector is large enough to hold the deepest nesting
4754	* level of lists. */
4755	ctx->n_containers = `0`;
4756
4757	while(byte_off < ctx->n_block_bytes) {
4758	MD_BLOCK* block = (MD_BLOCK)((char**)ctx->block_bytes + byte_off);
4759	union {
4760	MD_BLOCK_UL_DETAIL ul;
4761	MD_BLOCK_OL_DETAIL ol;
4762	MD_BLOCK_LI_DETAIL li;
4763	} det;
4764
4765	switch(block->type) {
4766	case MD_BLOCK_UL:
4767	det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4768	det.ul.mark = (CHAR) block->data;
4769	break;
4770
4771	case MD_BLOCK_OL:
4772	det.ol.start = block->n_lines;
4773	det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4774	det.ol.mark_delimiter = (CHAR) block->data;
4775	break;
4776
4777	case MD_BLOCK_LI:
4778	det.li.is_task = (block->data != `0`);
4779	det.li.task_mark = (CHAR) block->data;
4780	det.li.task_mark_offset = (OFF) block->n_lines;
4781	break;
4782
4783	default:
4784	/ noop /
4785	break;
4786	}
4787
4788	if(block->flags & MD_BLOCK_CONTAINER) {
4789	if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4790	MD_LEAVE_BLOCK(block->type, &det);
4791
4792	if(block->type == MD_BLOCK_UL \|\| block->type == MD_BLOCK_OL \|\| block->type == MD_BLOCK_QUOTE)
4793	ctx->n_containers--;
4794	}
4795
4796	if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4797	MD_ENTER_BLOCK(block->type, &det);
4798
4799	if(block->type == MD_BLOCK_UL \|\| block->type == MD_BLOCK_OL) {
4800	ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4801	ctx->n_containers++;
4802	} else if(block->type == MD_BLOCK_QUOTE) {
4803	/ This causes that any text in a block quote, even if*
4804	* nested inside a tight list item, is wrapped with
4805	* <p>...</p>. */
4806	ctx->containers[ctx->n_containers].is_loose = TRUE;
4807	ctx->n_containers++;
4808	}
4809	}
4810	} else {
4811	MD_CHECK(md_process_leaf_block(ctx, block));
4812
4813	if(block->type == MD_BLOCK_CODE \|\| block->type == MD_BLOCK_HTML)
4814	byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4815	else
4816	byte_off += block->n_lines * sizeof(MD_LINE);
4817	}
4818
4819	byte_off += sizeof(MD_BLOCK);
4820	}
4821
4822	ctx->n_block_bytes = `0`;
4823
4824	abort:
4825	return ret;
4826	}
4827
4828
4829	/************************************
4830	* Grouping Lines into Blocks *
4831	************************************/
4832
4833	static void*
4834	md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4835	{
4836	void* ptr;
4837
4838	if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4839	void* new_block_bytes;
4840
4841	ctx->alloc_block_bytes = (ctx->alloc_block_bytes > `0`
4842	? ctx->alloc_block_bytes + ctx->alloc_block_bytes / `2`
4843	: `512`);
4844	new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
4845	if(new_block_bytes == NULL) {
4846	MD_LOG("realloc() failed.");
4847	return NULL;
4848	}
4849
4850	/ Fix the ->current_block after the reallocation. /
4851	if(ctx->current_block != NULL) {
4852	OFF off_current_block = (char) ctx->current_block - (char**) ctx->block_bytes;
4853	ctx->current_block = (MD_BLOCK) ((char**) new_block_bytes + off_current_block);
4854	}
4855
4856	ctx->block_bytes = new_block_bytes;
4857	}
4858
4859	ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4860	ctx->n_block_bytes += n_bytes;
4861	return ptr;
4862	}
4863
4864	static int
4865	md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4866	{
4867	MD_BLOCK* block;
4868
4869	MD_ASSERT(ctx->current_block == NULL);
4870
4871	block = (MD_BLOCK) md_push_block_bytes(ctx, sizeof*(MD_BLOCK));
4872	if(block == NULL)
4873	return -`1`;
4874
4875	switch(line->type) {
4876	case MD_LINE_HR:
4877	block->type = MD_BLOCK_HR;
4878	break;
4879
4880	case MD_LINE_ATXHEADER:
4881	case MD_LINE_SETEXTHEADER:
4882	block->type = MD_BLOCK_H;
4883	break;
4884
4885	case MD_LINE_FENCEDCODE:
4886	case MD_LINE_INDENTEDCODE:
4887	block->type = MD_BLOCK_CODE;
4888	break;
4889
4890	case MD_LINE_TEXT:
4891	block->type = MD_BLOCK_P;
4892	break;
4893
4894	case MD_LINE_HTML:
4895	block->type = MD_BLOCK_HTML;
4896	break;
4897
4898	case MD_LINE_BLANK:
4899	case MD_LINE_SETEXTUNDERLINE:
4900	case MD_LINE_TABLEUNDERLINE:
4901	default:
4902	MD_UNREACHABLE();
4903	break;
4904	}
4905
4906	block->flags = `0`;
4907	block->data = line->data;
4908	block->n_lines = `0`;
4909
4910	ctx->current_block = block;
4911	return `0`;
4912	}
4913
4914	/ Eat from start of current (textual) block any reference definitions and*
4915	* remember them so we can resolve any links referring to them.
4916	*
4917	* (Reference definitions can only be at start of it as they cannot break
4918	* a paragraph.)
4919	*/
4920	static int
4921	md_consume_link_reference_definitions(MD_CTX* ctx)
4922	{
4923	MD_LINE* lines = (MD_LINE*) (ctx->current_block + `1`);
4924	int n_lines = ctx->current_block->n_lines;
4925	int n = `0`;
4926
4927	/ Compute how many lines at the start of the block form one or more*
4928	* reference definitions. */
4929	while(n < n_lines) {
4930	int n_link_ref_lines;
4931
4932	n_link_ref_lines = md_is_link_reference_definition(ctx,
4933	lines + n, n_lines - n);
4934	/ Not a reference definition? /
4935	if(n_link_ref_lines == `0`)
4936	break;
4937
4938	/ We fail if it is the ref. def. but it could not be stored due*
4939	* a memory allocation error. */
4940	if(n_link_ref_lines < `0`)
4941	return -`1`;
4942
4943	n += n_link_ref_lines;
4944	}
4945
4946	/ If there was at least one reference definition, we need to remove*
4947	* its lines from the block, or perhaps even the whole block. */
4948	if(n > `0`) {
4949	if(n == n_lines) {
4950	/ Remove complete block. /
4951	ctx->n_block_bytes -= n * sizeof(MD_LINE);
4952	ctx->n_block_bytes -= sizeof(MD_BLOCK);
4953	ctx->current_block = NULL;
4954	} else {
4955	/ Remove just some initial lines from the block. /
4956	memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
4957	ctx->current_block->n_lines -= n;
4958	ctx->n_block_bytes -= n * sizeof(MD_LINE);
4959	}
4960	}
4961
4962	return `0`;
4963	}
4964
4965	static int
4966	md_end_current_block(MD_CTX* ctx)
4967	{
4968	int ret = `0`;
4969
4970	if(ctx->current_block == NULL)
4971	return ret;
4972
4973	/ Check whether there is a reference definition. (We do this here instead*
4974	* of in md_analyze_line() because reference definition can take multiple
4975	* lines.) */
4976	if(ctx->current_block->type == MD_BLOCK_P \|\|
4977	(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
4978	{
4979	MD_LINE* lines = (MD_LINE*) (ctx->current_block + `1`);
4980	if(CH(lines[`0`].beg) == _T(`'['`)) {
4981	MD_CHECK(md_consume_link_reference_definitions(ctx));
4982	if(ctx->current_block == NULL)
4983	return ret;
4984	}
4985	}
4986
4987	if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
4988	int n_lines = ctx->current_block->n_lines;
4989
4990	if(n_lines > `1`) {
4991	/ Get rid of the underline. /
4992	ctx->current_block->n_lines--;
4993	ctx->n_block_bytes -= sizeof(MD_LINE);
4994	} else {
4995	/ Only the underline has left after eating the ref. defs.*
4996	* Keep the line as beginning of a new ordinary paragraph. */
4997	ctx->current_block->type = MD_BLOCK_P;
4998	return `0`;
4999	}
5000	}
5001
5002	/ Mark we are not building any block anymore. /
5003	ctx->current_block = NULL;
5004
5005	abort:
5006	return ret;
5007	}
5008
5009	static int
5010	md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
5011	{
5012	MD_ASSERT(ctx->current_block != NULL);
5013
5014	if(ctx->current_block->type == MD_BLOCK_CODE \|\| ctx->current_block->type == MD_BLOCK_HTML) {
5015	MD_VERBATIMLINE* line;
5016
5017	line = (MD_VERBATIMLINE) md_push_block_bytes(ctx, sizeof*(MD_VERBATIMLINE));
5018	if(line == NULL)
5019	return -`1`;
5020
5021	line->indent = analysis->indent;
5022	line->beg = analysis->beg;
5023	line->end = analysis->end;
5024	} else {
5025	MD_LINE* line;
5026
5027	line = (MD_LINE) md_push_block_bytes(ctx, sizeof*(MD_LINE));
5028	if(line == NULL)
5029	return -`1`;
5030
5031	line->beg = analysis->beg;
5032	line->end = analysis->end;
5033	}
5034	ctx->current_block->n_lines++;
5035
5036	return `0`;
5037	}
5038
5039	static int
5040	md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
5041	unsigned data, unsigned flags)
5042	{
5043	MD_BLOCK* block;
5044	int ret = `0`;
5045
5046	MD_CHECK(md_end_current_block(ctx));
5047
5048	block = (MD_BLOCK) md_push_block_bytes(ctx, sizeof*(MD_BLOCK));
5049	if(block == NULL)
5050	return -`1`;
5051
5052	block->type = type;
5053	block->flags = flags;
5054	block->data = data;
5055	block->n_lines = start;
5056
5057	abort:
5058	return ret;
5059	}
5060
5061
5062
5063	/***********************
5064	* Line Analysis *
5065	***********************/
5066
5067	static int
5068	md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
5069	{
5070	OFF off = beg + `1`;
5071	int n = `1`;
5072
5073	while(off < ctx->size && (CH(off) == CH(beg) \|\| CH(off) == _T(`' '`) \|\| CH(off) == _T(`'\t'`))) {
5074	if(CH(off) == CH(beg))
5075	n++;
5076	off++;
5077	}
5078
5079	if(n < `3`) {
5080	*p_killer = off;
5081	return FALSE;
5082	}
5083
5084	/ Nothing else can be present on the line. /
5085	if(off < ctx->size && !ISNEWLINE(off)) {
5086	*p_killer = off;
5087	return FALSE;
5088	}
5089
5090	*p_end = off;
5091	return TRUE;
5092	}
5093
5094	static int
5095	md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
5096	{
5097	int n;
5098	OFF off = beg + `1`;
5099
5100	while(off < ctx->size && CH(off) == _T(`'#'`) && off - beg < `7`)
5101	off++;
5102	n = off - beg;
5103
5104	if(n > `6`)
5105	return FALSE;
5106	*p_level = n;
5107
5108	if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size &&
5109	CH(off) != _T(`' '`) && CH(off) != _T(`'\t'`) && !ISNEWLINE(off))
5110	return FALSE;
5111
5112	while(off < ctx->size && CH(off) == _T(`' '`))
5113	off++;
5114	*p_beg = off;
5115	*p_end = off;
5116	return TRUE;
5117	}
5118
5119	static int
5120	md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
5121	{
5122	OFF off = beg + `1`;
5123
5124	while(off < ctx->size && CH(off) == CH(beg))
5125	off++;
5126
5127	/ Optionally, space(s) can follow. /
5128	while(off < ctx->size && CH(off) == _T(`' '`))
5129	off++;
5130
5131	/ But nothing more is allowed on the line. /
5132	if(off < ctx->size && !ISNEWLINE(off))
5133	return FALSE;
5134
5135	*p_level = (CH(beg) == _T(`'='`) ? `1` : `2`);
5136	*p_end = off;
5137	return TRUE;
5138	}
5139
5140	static int
5141	md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
5142	{
5143	OFF off = beg;
5144	int found_pipe = FALSE;
5145	unsigned col_count = `0`;
5146
5147	if(off < ctx->size && CH(off) == _T(`'\|'`)) {
5148	found_pipe = TRUE;
5149	off++;
5150	while(off < ctx->size && ISWHITESPACE(off))
5151	off++;
5152	}
5153
5154	while(`1`) {
5155	OFF cell_beg;
5156	int delimited = FALSE;
5157
5158	/ Cell underline ("-----", ":----", "----:" or ":----:") /
5159	cell_beg = off;
5160	if(off < ctx->size && CH(off) == _T(`':'`))
5161	off++;
5162	while(off < ctx->size && CH(off) == _T(`'-'`))
5163	off++;
5164	if(off < ctx->size && CH(off) == _T(`':'`))
5165	off++;
5166	if(off - cell_beg < `3`)
5167	return FALSE;
5168
5169	col_count++;
5170
5171	/ Pipe delimiter (optional at the end of line). /
5172	while(off < ctx->size && ISWHITESPACE(off))
5173	off++;
5174	if(off < ctx->size && CH(off) == _T(`'\|'`)) {
5175	delimited = TRUE;
5176	found_pipe = TRUE;
5177	off++;
5178	while(off < ctx->size && ISWHITESPACE(off))
5179	off++;
5180	}
5181
5182	/ Success, if we reach end of line. /
5183	if(off >= ctx->size \|\| ISNEWLINE(off))
5184	break;
5185
5186	if(!delimited)
5187	return FALSE;
5188	}
5189
5190	if(!found_pipe)
5191	return FALSE;
5192
5193	*p_end = off;
5194	*p_col_count = col_count;
5195	return TRUE;
5196	}
5197
5198	static int
5199	md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
5200	{
5201	OFF off = beg;
5202
5203	while(off < ctx->size && CH(off) == CH(beg))
5204	off++;
5205
5206	/ Fence must have at least three characters. /
5207	if(off - beg < `3`)
5208	return FALSE;
5209
5210	ctx->code_fence_length = off - beg;
5211
5212	/ Optionally, space(s) can follow. /
5213	while(off < ctx->size && CH(off) == _T(`' '`))
5214	off++;
5215
5216	/ Optionally, an info string can follow. /
5217	while(off < ctx->size && !ISNEWLINE(off)) {
5218	/ Backtick-based fence must not contain '`' in the info string. /
5219	if(CH(beg) == _T('`') && CH(off) == _T('`'))
5220	return FALSE;
5221	off++;
5222	}
5223
5224	*p_end = off;
5225	return TRUE;
5226	}
5227
5228	static int
5229	md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
5230	{
5231	OFF off = beg;
5232	int ret = FALSE;
5233
5234	/ Closing fence must have at least the same length and use same char as*
5235	* opening one. */
5236	while(off < ctx->size && CH(off) == ch)
5237	off++;
5238	if(off - beg < ctx->code_fence_length)
5239	goto out;
5240
5241	/ Optionally, space(s) can follow /
5242	while(off < ctx->size && CH(off) == _T(`' '`))
5243	off++;
5244
5245	/ But nothing more is allowed on the line. /
5246	if(off < ctx->size && !ISNEWLINE(off))
5247	goto out;
5248
5249	ret = TRUE;
5250
5251	out:
5252	/ Note we set p_end even on failure: If we are not closing fence, caller
5253	* would eat the line anyway without any parsing. */
5254	*p_end = off;
5255	return ret;
5256	}
5257
5258	/ Returns type of the raw HTML block, or FALSE if it is not HTML block.*
5259	* (Refer to CommonMark specification for details about the types.)
5260	*/
5261	static int
5262	md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
5263	{
5264	typedef struct TAG_tag TAG;
5265	struct TAG_tag {
5266	const CHAR* name;
5267	unsigned len : `8`;
5268	};
5269
5270	/ Type 6 is started by a long list of allowed tags. We use two-level*
5271	* tree to speed-up the search. */
5272	#ifdef X
5273	#undef X
5274	#endif
5275	#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
5276	#define Xend { NULL, 0 }
5277	static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend };
5278
5279	static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
5280	static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
5281	static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
5282	static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
5283	X("div"), X("dl"), X("dt"), Xend };
5284	static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
5285	X("form"), X("frame"), X("frameset"), Xend };
5286	static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
5287	static const TAG i6[] = { X("iframe"), Xend };
5288	static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
5289	static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
5290	static const TAG n6[] = { X("nav"), X("noframes"), Xend };
5291	static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
5292	static const TAG p6[] = { X("p"), X("param"), Xend };
5293	static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
5294	static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
5295	X("thead"), X("title"), X("tr"), X("track"), Xend };
5296	static const TAG u6[] = { X("ul"), Xend };
5297	static const TAG xx[] = { Xend };
5298	#undef X
5299
5300	static const TAG* map6[`26`] = {
5301	a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
5302	n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
5303	};
5304	OFF off = beg + `1`;
5305	int i;
5306
5307	/ Check for type 1: <script, <pre, or <style /
5308	for(i = `0`; t1[i].name != NULL; i++) {
5309	if(off + t1[i].len <= ctx->size) {
5310	if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
5311	return `1`;
5312	}
5313	}
5314
5315	/ Check for type 2: <!-- /
5316	if(off + `3` < ctx->size && CH(off) == _T(`'!'`) && CH(off+`1`) == _T(`'-'`) && CH(off+`2`) == _T(`'-'`))
5317	return `2`;
5318
5319	/ Check for type 3: <? /
5320	if(off < ctx->size && CH(off) == _T(`'?'`))
5321	return `3`;
5322
5323	/ Check for type 4 or 5: <! /
5324	if(off < ctx->size && CH(off) == _T(`'!'`)) {
5325	/ Check for type 4: <! followed by uppercase letter. /
5326	if(off + `1` < ctx->size && ISUPPER(off+`1`))
5327	return `4`;
5328
5329	/ Check for type 5: <![CDATA[ /
5330	if(off + `8` < ctx->size) {
5331	if(md_ascii_eq(STR(off), _T("![CDATA["), `8`))
5332	return `5`;
5333	}
5334	}
5335
5336	/ Check for type 6: Many possible starting tags listed above. /
5337	if(off + `1` < ctx->size && (ISALPHA(off) \|\| (CH(off) == _T(`'/'`) && ISALPHA(off+`1`)))) {
5338	int slot;
5339	const TAG* tags;
5340
5341	if(CH(off) == _T(`'/'`))
5342	off++;
5343
5344	slot = (ISUPPER(off) ? CH(off) - `'A'` : CH(off) - `'a'`);
5345	tags = map6[slot];
5346
5347	for(i = `0`; tags[i].name != NULL; i++) {
5348	if(off + tags[i].len <= ctx->size) {
5349	if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
5350	OFF tmp = off + tags[i].len;
5351	if(tmp >= ctx->size)
5352	return `6`;
5353	if(ISBLANK(tmp) \|\| ISNEWLINE(tmp) \|\| CH(tmp) == _T(`'>'`))
5354	return `6`;
5355	if(tmp+`1` < ctx->size && CH(tmp) == _T(`'/'`) && CH(tmp+`1`) == _T(`'>'`))
5356	return `6`;
5357	break;
5358	}
5359	}
5360	}
5361	}
5362
5363	/ Check for type 7: any COMPLETE other opening or closing tag. /
5364	if(off + `1` < ctx->size) {
5365	OFF end;
5366
5367	if(md_is_html_tag(ctx, NULL, `0`, beg, ctx->size, &end)) {
5368	/ Only optional whitespace and new line may follow. /
5369	while(end < ctx->size && ISWHITESPACE(end))
5370	end++;
5371	if(end >= ctx->size \|\| ISNEWLINE(end))
5372	return `7`;
5373	}
5374	}
5375
5376	return FALSE;
5377	}
5378
5379	/ Case sensitive check whether there is a substring 'what' between 'beg'*
5380	* and end of line. */
5381	static int
5382	md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
5383	{
5384	OFF i;
5385	for(i = beg; i + what_len < ctx->size; i++) {
5386	if(ISNEWLINE(i))
5387	break;
5388	if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == `0`) {
5389	*p_end = i + what_len;
5390	return TRUE;
5391	}
5392	}
5393
5394	*p_end = i;
5395	return FALSE;
5396	}
5397
5398	/ Returns type of HTML block end condition or FALSE if not an end condition.*
5399	*
5400	* Note it fills p_end even when it is not end condition as the caller
5401	* does not need to analyze contents of a raw HTML block.
5402	*/
5403	static int
5404	md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
5405	{
5406	switch(ctx->html_block_type) {
5407	case `1`:
5408	{
5409	OFF off = beg;
5410
5411	while(off < ctx->size && !ISNEWLINE(off)) {
5412	if(CH(off) == _T(`'<'`)) {
5413	if(md_ascii_case_eq(STR(off), _T("</script>"), `9`)) {
5414	*p_end = off + `9`;
5415	return TRUE;
5416	}
5417
5418	if(md_ascii_case_eq(STR(off), _T("</style>"), `8`)) {
5419	*p_end = off + `8`;
5420	return TRUE;
5421	}
5422
5423	if(md_ascii_case_eq(STR(off), _T("</pre>"), `6`)) {
5424	*p_end = off + `6`;
5425	return TRUE;
5426	}
5427	}
5428
5429	off++;
5430	}
5431	*p_end = off;
5432	return FALSE;
5433	}
5434
5435	case `2`:
5436	return (md_line_contains(ctx, beg, _T("-->"), `3`, p_end) ? `2` : FALSE);
5437
5438	case `3`:
5439	return (md_line_contains(ctx, beg, _T("?>"), `2`, p_end) ? `3` : FALSE);
5440
5441	case `4`:
5442	return (md_line_contains(ctx, beg, _T(">"), `1`, p_end) ? `4` : FALSE);
5443
5444	case `5`:
5445	return (md_line_contains(ctx, beg, _T("]]>"), `3`, p_end) ? `5` : FALSE);
5446
5447	case `6`: / Pass through /
5448	case `7`:
5449	*p_end = beg;
5450	return (ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
5451
5452	default:
5453	MD_UNREACHABLE();
5454	}
5455	return FALSE;
5456	}
5457
5458
5459	static int
5460	md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
5461	{
5462	/ Block quote has no "items" like lists. /
5463	if(container->ch == _T(`'>'`))
5464	return FALSE;
5465
5466	if(container->ch != pivot->ch)
5467	return FALSE;
5468	if(container->mark_indent > pivot->contents_indent)
5469	return FALSE;
5470
5471	return TRUE;
5472	}
5473
5474	static int
5475	md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5476	{
5477	if(ctx->n_containers >= ctx->alloc_containers) {
5478	MD_CONTAINER* new_containers;
5479
5480	ctx->alloc_containers = (ctx->alloc_containers > `0`
5481	? ctx->alloc_containers + ctx->alloc_containers / `2`
5482	: `16`);
5483	new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
5484	if(new_containers == NULL) {
5485	MD_LOG("realloc() failed.");
5486	return -`1`;
5487	}
5488
5489	ctx->containers = new_containers;
5490	}
5491
5492	memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
5493	return `0`;
5494	}
5495
5496	static int
5497	md_enter_child_containers(MD_CTX* ctx, int n_children, unsigned data)
5498	{
5499	int i;
5500	int ret = `0`;
5501
5502	for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5503	MD_CONTAINER* c = &ctx->containers[i];
5504	int is_ordered_list = FALSE;
5505
5506	switch(c->ch) {
5507	case _T(`')'`):
5508	case _T(`'.'`):
5509	is_ordered_list = TRUE;
5510	/ Pass through /
5511
5512	case _T(`'-'`):
5513	case _T(`'+'`):
5514	case _T(`'*'`):
5515	/ Remember offset in ctx->block_bytes so we can revisit the*
5516	* block if we detect it is a loose list. */
5517	md_end_current_block(ctx);
5518	c->block_byte_off = ctx->n_block_bytes;
5519
5520	MD_CHECK(md_push_container_bytes(ctx,
5521	(is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5522	c->start, data, MD_BLOCK_CONTAINER_OPENER));
5523	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5524	c->task_mark_off,
5525	(c->is_task ? CH(c->task_mark_off) : `0`),
5526	MD_BLOCK_CONTAINER_OPENER));
5527	break;
5528
5529	case _T(`'>'`):
5530	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, `0`, `0`, MD_BLOCK_CONTAINER_OPENER));
5531	break;
5532
5533	default:
5534	MD_UNREACHABLE();
5535	break;
5536	}
5537	}
5538
5539	abort:
5540	return ret;
5541	}
5542
5543	static int
5544	md_leave_child_containers(MD_CTX* ctx, int n_keep)
5545	{
5546	int ret = `0`;
5547
5548	while(ctx->n_containers > n_keep) {
5549	MD_CONTAINER* c = &ctx->containers[ctx->n_containers-`1`];
5550	int is_ordered_list = FALSE;
5551
5552	switch(c->ch) {
5553	case _T(`')'`):
5554	case _T(`'.'`):
5555	is_ordered_list = TRUE;
5556	/ Pass through /
5557
5558	case _T(`'-'`):
5559	case _T(`'+'`):
5560	case _T(`'*'`):
5561	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5562	c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : `0`),
5563	MD_BLOCK_CONTAINER_CLOSER));
5564	MD_CHECK(md_push_container_bytes(ctx,
5565	(is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), `0`,
5566	c->ch, MD_BLOCK_CONTAINER_CLOSER));
5567	break;
5568
5569	case _T(`'>'`):
5570	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, `0`,
5571	`0`, MD_BLOCK_CONTAINER_CLOSER));
5572	break;
5573
5574	default:
5575	MD_UNREACHABLE();
5576	break;
5577	}
5578
5579	ctx->n_containers--;
5580	}
5581
5582	abort:
5583	return ret;
5584	}
5585
5586	static int
5587	md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5588	{
5589	OFF off = beg;
5590	OFF max_end;
5591
5592	if(indent >= ctx->code_indent_offset)
5593	return FALSE;
5594
5595	/ Check for block quote mark. /
5596	if(off < ctx->size && CH(off) == _T(`'>'`)) {
5597	off++;
5598	p_container->ch = _T(`'>'`);
5599	p_container->is_loose = FALSE;
5600	p_container->is_task = FALSE;
5601	p_container->mark_indent = indent;
5602	p_container->contents_indent = indent + `1`;
5603	*p_end = off;
5604	return TRUE;
5605	}
5606
5607	/ Check for list item bullet mark. /
5608	if(off+`1` < ctx->size && ISANYOF(off, _T("-+*")) && (ISBLANK(off+`1`) \|\| ISNEWLINE(off+`1`))) {
5609	p_container->ch = CH(off);
5610	p_container->is_loose = FALSE;
5611	p_container->is_task = FALSE;
5612	p_container->mark_indent = indent;
5613	p_container->contents_indent = indent + `1`;
5614	*p_end = off + `1`;
5615	return TRUE;
5616	}
5617
5618	/ Check for ordered list item marks. /
5619	max_end = off + `9`;
5620	if(max_end > ctx->size)
5621	max_end = ctx->size;
5622	p_container->start = `0`;
5623	while(off < max_end && ISDIGIT(off)) {
5624	p_container->start = p_container->start * `10` + CH(off) - _T(`'0'`);
5625	off++;
5626	}
5627	if(off > beg && off+`1` < ctx->size &&
5628	(CH(off) == _T(`'.'`) \|\| CH(off) == _T(`')'`)) &&
5629	(ISBLANK(off+`1`) \|\| ISNEWLINE(off+`1`)))
5630	{
5631	p_container->ch = CH(off);
5632	p_container->is_loose = FALSE;
5633	p_container->is_task = FALSE;
5634	p_container->mark_indent = indent;
5635	p_container->contents_indent = indent + off - beg + `1`;
5636	*p_end = off + `1`;
5637	return TRUE;
5638	}
5639
5640	return FALSE;
5641	}
5642
5643	static unsigned
5644	md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5645	{
5646	OFF off = beg;
5647	unsigned indent = total_indent;
5648
5649	while(off < ctx->size && ISBLANK(off)) {
5650	if(CH(off) == _T(`'\t'`))
5651	indent = (indent + `4`) & ~`3`;
5652	else
5653	indent++;
5654	off++;
5655	}
5656
5657	*p_end = off;
5658	return indent - total_indent;
5659	}
5660
5661	static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, `0` };
5662
5663	/ Analyze type of the line and find some its properties. This serves as a*
5664	* main input for determining type and boundaries of a block. */
5665	static int
5666	md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5667	const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5668	{
5669	unsigned total_indent = `0`;
5670	int n_parents = `0`;
5671	int n_brothers = `0`;
5672	int n_children = `0`;
5673	MD_CONTAINER container = { `0` };
5674	int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5675	OFF off = beg;
5676	OFF hr_killer = `0`;
5677	int ret = `0`;
5678
5679	line->indent = md_line_indentation(ctx, total_indent, off, &off);
5680	total_indent += line->indent;
5681	line->beg = off;
5682
5683	/ Given the indentation and block quote marks '>', determine how many of*
5684	* the current containers are our parents. */
5685	while(n_parents < ctx->n_containers) {
5686	MD_CONTAINER* c = &ctx->containers[n_parents];
5687
5688	if(c->ch == _T(`'>'`) && line->indent < ctx->code_indent_offset &&
5689	off < ctx->size && CH(off) == _T(`'>'`))
5690	{
5691	/ Block quote mark. /
5692	off++;
5693	total_indent++;
5694	line->indent = md_line_indentation(ctx, total_indent, off, &off);
5695	total_indent += line->indent;
5696
5697	/ The optional 1st space after '>' is part of the block quote mark. /
5698	if(line->indent > `0`)
5699	line->indent--;
5700
5701	line->beg = off;
5702
5703	} else if(c->ch != _T(`'>'`) && line->indent >= c->contents_indent) {
5704	/ List. /
5705	line->indent -= c->contents_indent;
5706	} else {
5707	break;
5708	}
5709
5710	n_parents++;
5711	}
5712
5713	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5714	/ Blank line does not need any real indentation to be nested inside*
5715	* a list. */
5716	if(n_brothers + n_children == `0`) {
5717	while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T(`'>'`))
5718	n_parents++;
5719	}
5720	}
5721
5722	while(TRUE) {
5723	/ Check whether we are fenced code continuation. /
5724	if(pivot_line->type == MD_LINE_FENCEDCODE) {
5725	line->beg = off;
5726
5727	/ We are another MD_LINE_FENCEDCODE unless we are closing fence*
5728	* which we transform into MD_LINE_BLANK. */
5729	if(line->indent < ctx->code_indent_offset) {
5730	if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
5731	line->type = MD_LINE_BLANK;
5732	ctx->last_line_has_list_loosening_effect = FALSE;
5733	break;
5734	}
5735	}
5736
5737	/ Change indentation accordingly to the initial code fence. /
5738	if(n_parents == ctx->n_containers) {
5739	if(line->indent > pivot_line->indent)
5740	line->indent -= pivot_line->indent;
5741	else
5742	line->indent = `0`;
5743
5744	line->type = MD_LINE_FENCEDCODE;
5745	break;
5746	}
5747	}
5748
5749	/ Check whether we are HTML block continuation. /
5750	if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > `0`) {
5751	int html_block_type;
5752
5753	html_block_type = md_is_html_block_end_condition(ctx, off, &off);
5754	if(html_block_type > `0`) {
5755	MD_ASSERT(html_block_type == ctx->html_block_type);
5756
5757	/ Make sure this is the last line of the block. /
5758	ctx->html_block_type = `0`;
5759
5760	/ Some end conditions serve as blank lines at the same time. /
5761	if(html_block_type == `6` \|\| html_block_type == `7`) {
5762	line->type = MD_LINE_BLANK;
5763	line->indent = `0`;
5764	break;
5765	}
5766	}
5767
5768	if(n_parents == ctx->n_containers) {
5769	line->type = MD_LINE_HTML;
5770	break;
5771	}
5772	}
5773
5774	/ Check for blank line. /
5775	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5776	if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) {
5777	line->type = MD_LINE_INDENTEDCODE;
5778	if(line->indent > ctx->code_indent_offset)
5779	line->indent -= ctx->code_indent_offset;
5780	else
5781	line->indent = `0`;
5782	ctx->last_line_has_list_loosening_effect = FALSE;
5783	} else {
5784	line->type = MD_LINE_BLANK;
5785	ctx->last_line_has_list_loosening_effect = (n_parents > `0` &&
5786	n_brothers + n_children == `0` &&
5787	ctx->containers[n_parents-`1`].ch != _T(`'>'`));
5788
5789	#if 1
5790	/ See https://github.com/mity/md4c/issues/6*
5791	*
5792	* This ugly checking tests we are in (yet empty) list item but not
5793	* its very first line (with the list item mark).
5794	*
5795	* If we are such blank line, then any following non-blank line
5796	* which would be part of this list item actually ends the list
5797	* because "a list item can begin with at most one blank line."
5798	*/
5799	if(n_parents > `0` && ctx->containers[n_parents-`1`].ch != _T(`'>'`) &&
5800	n_brothers + n_children == `0` && ctx->current_block == NULL &&
5801	ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5802	{
5803	MD_BLOCK* top_block = (MD_BLOCK) ((char)ctx->block_bytes + ctx->n_block_bytes - sizeof**(MD_BLOCK));
5804	if(top_block->type == MD_BLOCK_LI)
5805	ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5806	}
5807	#endif
5808	}
5809	break;
5810	} else {
5811	#if 1
5812	/ This is 2nd half of the hack. If the flag is set (that is there*
5813	* were 2nd blank line at the start of the list item) and we would also
5814	* belonging to such list item, then interrupt the list. */
5815	ctx->last_line_has_list_loosening_effect = FALSE;
5816	if(ctx->last_list_item_starts_with_two_blank_lines) {
5817	if(n_parents > `0` && ctx->containers[n_parents-`1`].ch != _T(`'>'`) &&
5818	n_brothers + n_children == `0` && ctx->current_block == NULL &&
5819	ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5820	{
5821	MD_BLOCK* top_block = (MD_BLOCK) ((char)ctx->block_bytes + ctx->n_block_bytes - sizeof**(MD_BLOCK));
5822	if(top_block->type == MD_BLOCK_LI)
5823	n_parents--;
5824	}
5825
5826	ctx->last_list_item_starts_with_two_blank_lines = FALSE;
5827	}
5828	#endif
5829	}
5830
5831	/ Check whether we are Setext underline. /
5832	if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT
5833	&& (CH(off) == _T(`'='`) \|\| CH(off) == _T(`'-'`))
5834	&& (n_parents == ctx->n_containers))
5835	{
5836	unsigned level;
5837
5838	if(md_is_setext_underline(ctx, off, &off, &level)) {
5839	line->type = MD_LINE_SETEXTUNDERLINE;
5840	line->data = level;
5841	break;
5842	}
5843	}
5844
5845	/ Check for thematic break line. /
5846	if(line->indent < ctx->code_indent_offset && ISANYOF(off, _T("-_*")) && off >= hr_killer) {
5847	if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
5848	line->type = MD_LINE_HR;
5849	break;
5850	}
5851	}
5852
5853	/ Check for "brother" container. I.e. whether we are another list item*
5854	* in already started list. */
5855	if(n_parents < ctx->n_containers && n_brothers + n_children == `0`) {
5856	OFF tmp;
5857
5858	if(md_is_container_mark(ctx, line->indent, off, &tmp, &container) &&
5859	md_is_container_compatible(&ctx->containers[n_parents], &container))
5860	{
5861	pivot_line = &md_dummy_blank_line;
5862
5863	off = tmp;
5864
5865	total_indent += container.contents_indent - container.mark_indent;
5866	line->indent = md_line_indentation(ctx, total_indent, off, &off);
5867	total_indent += line->indent;
5868	line->beg = off;
5869
5870	/ Some of the following whitespace actually still belongs to the mark. /
5871	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5872	container.contents_indent++;
5873	} else if(line->indent <= ctx->code_indent_offset) {
5874	container.contents_indent += line->indent;
5875	line->indent = `0`;
5876	} else {
5877	container.contents_indent += `1`;
5878	line->indent--;
5879	}
5880
5881	ctx->containers[n_parents].mark_indent = container.mark_indent;
5882	ctx->containers[n_parents].contents_indent = container.contents_indent;
5883
5884	n_brothers++;
5885	continue;
5886	}
5887	}
5888
5889	/ Check for indented code.*
5890	* Note indented code block cannot interrupt a paragraph. */
5891	if(line->indent >= ctx->code_indent_offset &&
5892	(pivot_line->type == MD_LINE_BLANK \|\| pivot_line->type == MD_LINE_INDENTEDCODE))
5893	{
5894	line->type = MD_LINE_INDENTEDCODE;
5895	MD_ASSERT(line->indent >= ctx->code_indent_offset);
5896	line->indent -= ctx->code_indent_offset;
5897	line->data = `0`;
5898	break;
5899	}
5900
5901	/ Check for start of a new container block. /
5902	if(line->indent < ctx->code_indent_offset &&
5903	md_is_container_mark(ctx, line->indent, off, &off, &container))
5904	{
5905	if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5906	(off >= ctx->size \|\| ISNEWLINE(off)) && container.ch != _T(`'>'`))
5907	{
5908	/ Noop. List mark followed by a blank line cannot interrupt a paragraph. /
5909	} else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5910	(container.ch == _T(`'.'`) \|\| container.ch == _T(`')'`)) && container.start != `1`)
5911	{
5912	/ Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. /
5913	} else {
5914	total_indent += container.contents_indent - container.mark_indent;
5915	line->indent = md_line_indentation(ctx, total_indent, off, &off);
5916	total_indent += line->indent;
5917
5918	line->beg = off;
5919	line->data = container.ch;
5920
5921	/ Some of the following whitespace actually still belongs to the mark. /
5922	if(off >= ctx->size \|\| ISNEWLINE(off)) {
5923	container.contents_indent++;
5924	} else if(line->indent <= ctx->code_indent_offset) {
5925	container.contents_indent += line->indent;
5926	line->indent = `0`;
5927	} else {
5928	container.contents_indent += `1`;
5929	line->indent--;
5930	}
5931
5932	if(n_brothers + n_children == `0`)
5933	pivot_line = &md_dummy_blank_line;
5934
5935	if(n_children == `0`)
5936	MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5937
5938	n_children++;
5939	MD_CHECK(md_push_container(ctx, &container));
5940	continue;
5941	}
5942	}
5943
5944	/ Check whether we are table continuation. /
5945	if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
5946	line->type = MD_LINE_TABLE;
5947	break;
5948	}
5949
5950	/ Check for ATX header. /
5951	if(line->indent < ctx->code_indent_offset && CH(off) == _T(`'#'`)) {
5952	unsigned level;
5953
5954	if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
5955	line->type = MD_LINE_ATXHEADER;
5956	line->data = level;
5957	break;
5958	}
5959	}
5960
5961	/ Check whether we are starting code fence. /
5962	if(CH(off) == _T('`') \|\| CH(off) == _T(`'~'`)) {
5963	if(md_is_opening_code_fence(ctx, off, &off)) {
5964	line->type = MD_LINE_FENCEDCODE;
5965	line->data = `1`;
5966	break;
5967	}
5968	}
5969
5970	/ Check for start of raw HTML block. /
5971	if(CH(off) == _T(`'<'`) && !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
5972	{
5973	ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
5974
5975	/ HTML block type 7 cannot interrupt paragraph. /
5976	if(ctx->html_block_type == `7` && pivot_line->type == MD_LINE_TEXT)
5977	ctx->html_block_type = `0`;
5978
5979	if(ctx->html_block_type > `0`) {
5980	/ The line itself also may immediately close the block. /
5981	if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
5982	/ Make sure this is the last line of the block. /
5983	ctx->html_block_type = `0`;
5984	}
5985
5986	line->type = MD_LINE_HTML;
5987	break;
5988	}
5989	}
5990
5991	/ Check for table underline. /
5992	if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT &&
5993	(CH(off) == _T(`'\|'`) \|\| CH(off) == _T(`'-'`) \|\| CH(off) == _T(`':'`)) &&
5994	n_parents == ctx->n_containers)
5995	{
5996	unsigned col_count;
5997
5998	if(ctx->current_block != NULL && ctx->current_block->n_lines == `1` &&
5999	md_is_table_underline(ctx, off, &off, &col_count))
6000	{
6001	line->data = col_count;
6002	line->type = MD_LINE_TABLEUNDERLINE;
6003	break;
6004	}
6005	}
6006
6007	/ By default, we are normal text line. /
6008	line->type = MD_LINE_TEXT;
6009	if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == `0`) {
6010	/ Lazy continuation. /
6011	n_parents = ctx->n_containers;
6012	}
6013
6014	/ Check for task mark. /
6015	if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > `0` &&
6016	ISANYOF_(ctx->containers[ctx->n_containers-`1`].ch, _T("-+*.)")))
6017	{
6018	OFF tmp = off;
6019
6020	while(tmp < ctx->size && tmp < off + `3` && ISBLANK(tmp))
6021	tmp++;
6022	if(tmp + `2` < ctx->size && CH(tmp) == _T(`'['`) &&
6023	ISANYOF(tmp+`1`, _T("xX ")) && CH(tmp+`2`) == _T(`']'`) &&
6024	(tmp + `3` == ctx->size \|\| ISBLANK(tmp+`3`) \|\| ISNEWLINE(tmp+`3`)))
6025	{
6026	MD_CONTAINER* task_container = (n_children > `0` ? &ctx->containers[ctx->n_containers-`1`] : &container);
6027	task_container->is_task = TRUE;
6028	task_container->task_mark_off = tmp + `1`;
6029	off = tmp + `3`;
6030	while(ISWHITESPACE(off))
6031	off++;
6032	line->beg = off;
6033	}
6034	}
6035
6036	break;
6037	}
6038
6039	/ Scan for end of the line.*
6040	*
6041	* Note this is quite a bottleneck of the parsing as we here iterate almost
6042	* over compete document.
6043	*/
6044	#if defined __linux__ && !defined MD4C_USE_UTF16
6045	/ Recent glibc versions have superbly optimized strcspn(), even using*
6046	* vectorization if available. */
6047	if(ctx->doc_ends_with_newline && off < ctx->size) {
6048	while(TRUE) {
6049	off += (OFF) strcspn(STR(off), "\r\n");
6050
6051	/ strcspn() can stop on zero terminator; but that can appear*
6052	* anywhere in the Markfown input... */
6053	if(CH(off) == _T(`'\0'`))
6054	off++;
6055	else
6056	break;
6057	}
6058	} else
6059	#endif
6060	{
6061	/ Optimization: Use some loop unrolling. /
6062	while(off + `3` < ctx->size && !ISNEWLINE(off+`0`) && !ISNEWLINE(off+`1`)
6063	&& !ISNEWLINE(off+`2`) && !ISNEWLINE(off+`3`))
6064	off += `4`;
6065	while(off < ctx->size && !ISNEWLINE(off))
6066	off++;
6067	}
6068
6069	/ Set end of the line. /
6070	line->end = off;
6071
6072	/ But for ATX header, we should exclude the optional trailing mark. /
6073	if(line->type == MD_LINE_ATXHEADER) {
6074	OFF tmp = line->end;
6075	while(tmp > line->beg && CH(tmp-`1`) == _T(`' '`))
6076	tmp--;
6077	while(tmp > line->beg && CH(tmp-`1`) == _T(`'#'`))
6078	tmp--;
6079	if(tmp == line->beg \|\| CH(tmp-`1`) == _T(`' '`) \|\| (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
6080	line->end = tmp;
6081	}
6082
6083	/ Trim trailing spaces. /
6084	if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE) {
6085	while(line->end > line->beg && CH(line->end-`1`) == _T(`' '`))
6086	line->end--;
6087	}
6088
6089	/ Eat also the new line. /
6090	if(off < ctx->size && CH(off) == _T(`'\r'`))
6091	off++;
6092	if(off < ctx->size && CH(off) == _T(`'\n'`))
6093	off++;
6094
6095	*p_end = off;
6096
6097	/ If we belong to a list after seeing a blank line, the list is loose. /
6098	if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > `0`) {
6099	MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - `1`];
6100	if(c->ch != _T(`'>'`)) {
6101	MD_BLOCK* block = (MD_BLOCK) (((char**)ctx->block_bytes) + c->block_byte_off);
6102	block->flags \|= MD_BLOCK_LOOSE_LIST;
6103	}
6104	}
6105
6106	/ Leave any containers we are not part of anymore. /
6107	if(n_children == `0` && n_parents + n_brothers < ctx->n_containers)
6108	MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6109
6110	/ Enter any container we found a mark for. /
6111	if(n_brothers > `0`) {
6112	MD_ASSERT(n_brothers == `1`);
6113	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6114	ctx->containers[n_parents].task_mark_off,
6115	(ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : `0`),
6116	MD_BLOCK_CONTAINER_CLOSER));
6117	MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6118	container.task_mark_off,
6119	(container.is_task ? CH(container.task_mark_off) : `0`),
6120	MD_BLOCK_CONTAINER_OPENER));
6121	ctx->containers[n_parents].is_task = container.is_task;
6122	ctx->containers[n_parents].task_mark_off = container.task_mark_off;
6123	}
6124
6125	if(n_children > `0`)
6126	MD_CHECK(md_enter_child_containers(ctx, n_children, line->data));
6127
6128	abort:
6129	return ret;
6130	}
6131
6132	static int
6133	md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
6134	{
6135	const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
6136	int ret = `0`;
6137
6138	/ Blank line ends current leaf block. /
6139	if(line->type == MD_LINE_BLANK) {
6140	MD_CHECK(md_end_current_block(ctx));
6141	*p_pivot_line = &md_dummy_blank_line;
6142	return `0`;
6143	}
6144
6145	/ Some line types form block on their own. /
6146	if(line->type == MD_LINE_HR \|\| line->type == MD_LINE_ATXHEADER) {
6147	MD_CHECK(md_end_current_block(ctx));
6148
6149	/ Add our single-line block. /
6150	MD_CHECK(md_start_new_block(ctx, line));
6151	MD_CHECK(md_add_line_into_current_block(ctx, line));
6152	MD_CHECK(md_end_current_block(ctx));
6153	*p_pivot_line = &md_dummy_blank_line;
6154	return `0`;
6155	}
6156
6157	/ MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. /
6158	if(line->type == MD_LINE_SETEXTUNDERLINE) {
6159	MD_ASSERT(ctx->current_block != NULL);
6160	ctx->current_block->type = MD_BLOCK_H;
6161	ctx->current_block->data = line->data;
6162	ctx->current_block->flags \|= MD_BLOCK_SETEXT_HEADER;
6163	MD_CHECK(md_add_line_into_current_block(ctx, line));
6164	MD_CHECK(md_end_current_block(ctx));
6165	if(ctx->current_block == NULL) {
6166	*p_pivot_line = &md_dummy_blank_line;
6167	} else {
6168	/ This happens if we have consumed all the body as link ref. defs.*
6169	* and downgraded the underline into start of a new paragraph block. */
6170	line->type = MD_LINE_TEXT;
6171	*p_pivot_line = line;
6172	}
6173	return `0`;
6174	}
6175
6176	/ MD_LINE_TABLEUNDERLINE changes meaning of the current block. /
6177	if(line->type == MD_LINE_TABLEUNDERLINE) {
6178	MD_ASSERT(ctx->current_block != NULL);
6179	MD_ASSERT(ctx->current_block->n_lines == `1`);
6180	ctx->current_block->type = MD_BLOCK_TABLE;
6181	ctx->current_block->data = line->data;
6182	MD_ASSERT(pivot_line != &md_dummy_blank_line);
6183	((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
6184	MD_CHECK(md_add_line_into_current_block(ctx, line));
6185	return `0`;
6186	}
6187
6188	/ The current block also ends if the line has different type. /
6189	if(line->type != pivot_line->type)
6190	MD_CHECK(md_end_current_block(ctx));
6191
6192	/ The current line may start a new block. /
6193	if(ctx->current_block == NULL) {
6194	MD_CHECK(md_start_new_block(ctx, line));
6195	*p_pivot_line = line;
6196	}
6197
6198	/ In all other cases the line is just a continuation of the current block. /
6199	MD_CHECK(md_add_line_into_current_block(ctx, line));
6200
6201	abort:
6202	return ret;
6203	}
6204
6205	static int
6206	md_process_doc(MD_CTX *ctx)
6207	{
6208	const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
6209	MD_LINE_ANALYSIS line_buf[`2`];
6210	MD_LINE_ANALYSIS* line = &line_buf[`0`];
6211	OFF off = `0`;
6212	int ret = `0`;
6213
6214	MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
6215
6216	while(off < ctx->size) {
6217	if(line == pivot_line)
6218	line = (line == &line_buf[`0`] ? &line_buf[`1`] : &line_buf[`0`]);
6219
6220	MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
6221	MD_CHECK(md_process_line(ctx, &pivot_line, line));
6222	}
6223
6224	md_end_current_block(ctx);
6225
6226	MD_CHECK(md_build_ref_def_hashtable(ctx));
6227
6228	/ Process all blocks. /
6229	MD_CHECK(md_leave_child_containers(ctx, `0`));
6230	MD_CHECK(md_process_all_blocks(ctx));
6231
6232	MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
6233
6234	abort:
6235
6236	#if 0
6237	/ Output some memory consumption statistics. /
6238	{
6239	char buffer[`256`];
6240	sprintf(buffer, "Alloced %u bytes for block buffer.",
6241	(unsigned)(ctx->alloc_block_bytes));
6242	MD_LOG(buffer);
6243
6244	sprintf(buffer, "Alloced %u bytes for containers buffer.",
6245	(unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
6246	MD_LOG(buffer);
6247
6248	sprintf(buffer, "Alloced %u bytes for marks buffer.",
6249	(unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
6250	MD_LOG(buffer);
6251
6252	sprintf(buffer, "Alloced %u bytes for aux. buffer.",
6253	(unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
6254	MD_LOG(buffer);
6255	}
6256	#endif
6257
6258	return ret;
6259	}
6260
6261
6262	/********************
6263	* Public API *
6264	********************/
6265
6266	int
6267	md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
6268	{
6269	MD_CTX ctx;
6270	int i;
6271	int ret;
6272
6273	if(parser->abi_version != `0`) {
6274	if(parser->debug_log != NULL)
6275	parser->debug_log("Unsupported abi_version.", userdata);
6276	return -`1`;
6277	}
6278
6279	/ Setup context structure. /
6280	memset(&ctx, `0`, sizeof(MD_CTX));
6281	ctx.text = text;
6282	ctx.size = size;
6283	memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
6284	ctx.userdata = userdata;
6285	ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-`1`) : `4`;
6286	md_build_mark_char_map(&ctx);
6287	ctx.doc_ends_with_newline = (size > `0` && ISNEWLINE_(text[size-`1`]));
6288
6289	/ Reset all unresolved opener mark chains. /
6290	for(i = `0`; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
6291	ctx.mark_chains[i].head = -`1`;
6292	ctx.mark_chains[i].tail = -`1`;
6293	}
6294	ctx.unresolved_link_head = -`1`;
6295	ctx.unresolved_link_tail = -`1`;
6296
6297	/ All the work. /
6298	ret = md_process_doc(&ctx);
6299
6300	/ Clean-up. /
6301	md_free_ref_defs(&ctx);
6302	md_free_ref_def_hashtable(&ctx);
6303	free(ctx.buffer);
6304	free(ctx.marks);
6305	free(ctx.block_bytes);
6306	free(ctx.containers);
6307
6308	return ret;
6309	}
6310

Browse the source code of Qt/src/3rdparty/md4c/md4c.c