houdini_href_e.c source code [Aseprite/third_party/cmark/src/houdini_href_e.c]

1	#include <assert.h>
2	#include <stdio.h>
3	#include <string.h>
4
5	#include "houdini.h"
6
7	/*
8	* The following characters will not be escaped:
9	*
10	* -_.+!*'(),%#@?=;:/,+&$ alphanum
11	*
12	* Note that this character set is the addition of:
13	*
14	* - The characters which are safe to be in an URL
15	* - The characters which are not safe to be in
16	* an URL because they are RESERVED characters.
17	*
18	* We assume (lazily) that any RESERVED char that
19	* appears inside an URL is actually meant to
20	* have its native function (i.e. as an URL
21	* component/separator) and hence needs no escaping.
22	*
23	* There are two exceptions: the characters & (amp)
24	* and ' (single quote) do not appear in the table.
25	* They are meant to appear in the URL as components,
26	* yet they require special HTML-entity escaping
27	* to generate valid HTML markup.
28	*
29	* All other characters will be escaped to %XX.
30	*
31	*/
32	static const char HREF_SAFE[] = {
33	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
34	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `1`, `1`, `1`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
35	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `1`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
36	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `1`,
37	`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
38	`1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
39	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
40	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
41	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
42	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
43	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
44	};
45
46	int houdini_escape_href(cmark_strbuf ob, const* uint8_t *src, bufsize_t size) {
47	static const uint8_t hex_chars[] = "0123456789ABCDEF";
48	bufsize_t i = `0`, org;
49	uint8_t hex_str[`3`];
50
51	hex_str[`0`] = `'%'`;
52
53	while (i < size) {
54	org = i;
55	while (i < size && HREF_SAFE[src[i]] != `0`)
56	i++;
57
58	if (likely(i > org))
59	cmark_strbuf_put(ob, src + org, i - org);
60
61	/ escaping /
62	if (i >= size)
63	break;
64
65	switch (src[i]) {
66	/ amp appears all the time in URLs, but needs*
67	* HTML-entity escaping to be inside an href */
68	case `'&'`:
69	cmark_strbuf_puts(ob, "&");
70	break;
71
72	/ the single quote is a valid URL character*
73	* according to the standard; it needs HTML
74	* entity escaping too */
75	case `'\''`:
76	cmark_strbuf_puts(ob, "'");
77	break;
78
79	/ the space can be escaped to %20 or a plus*
80	* sign. we're going with the generic escape
81	* for now. the plus thing is more commonly seen
82	* when building GET strings */
83	#if 0
84	case `' '`:
85	cmark_strbuf_putc(ob, `'+'`);
86	break;
87	#endif
88
89	/ every other character goes with a %XX escaping /
90	default:
91	hex_str[`1`] = hex_chars[(src[i] >> `4`) & `0xF`];
92	hex_str[`2`] = hex_chars[src[i] & `0xF`];
93	cmark_strbuf_put(ob, hex_str, `3`);
94	}
95
96	i++;
97	}
98
99	return `1`;
100	}
101

Browse the source code of Aseprite/third_party/cmark/src/houdini_href_e.c