1#include <assert.h>
2#include <stdio.h>
3#include <string.h>
4
5#include "houdini.h"
6
7/**
8 * According to the OWASP rules:
9 *
10 * & --> &amp;
11 * < --> &lt;
12 * > --> &gt;
13 * " --> &quot;
14 * ' --> &#x27; &apos; is not recommended
15 * / --> &#x2F; forward slash is included as it helps end an HTML entity
16 *
17 */
18static const char HTML_ESCAPE_TABLE[] = {
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30};
31
32static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
33 "&#47;", "&lt;", "&gt;"};
34
35int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
36 int secure) {
37 bufsize_t i = 0, org, esc = 0;
38
39 while (i < size) {
40 org = i;
41 while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
42 i++;
43
44 if (i > org)
45 cmark_strbuf_put(ob, src + org, i - org);
46
47 /* escaping */
48 if (unlikely(i >= size))
49 break;
50
51 /* The forward slash is only escaped in secure mode */
52 if ((src[i] == '/' || src[i] == '\'') && !secure) {
53 cmark_strbuf_putc(ob, src[i]);
54 } else {
55 cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
56 }
57
58 i++;
59 }
60
61 return 1;
62}
63
64int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
65 return houdini_escape_html0(ob, src, size, 1);
66}
67