1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief Noodle literal matcher: build code.
32 */
33
34#include "noodle_build.h"
35
36#include "hwlm_literal.h"
37#include "noodle_internal.h"
38#include "util/bitutils.h"
39#include "util/compare.h"
40#include "util/verify_types.h"
41#include "ue2common.h"
42
43#include <cstring> // for memcpy
44#include <vector>
45
46using std::vector;
47
48namespace ue2 {
49
50static
51u64a make_u64a_mask(const vector<u8> &v) {
52 assert(v.size() <= sizeof(u64a));
53 if (v.size() > sizeof(u64a)) {
54 throw std::exception();
55 }
56
57 u64a mask = 0;
58 size_t len = v.size();
59 unsigned char *m = (unsigned char *)&mask;
60 DEBUG_PRINTF("making mask len %zu\n", len);
61 memcpy(m, &v[0], len);
62 return mask;
63}
64
65static
66size_t findNoodFragOffset(const hwlmLiteral &lit) {
67 const auto &s = lit.s;
68 const size_t len = lit.s.length();
69
70 size_t offset = 0;
71 for (size_t i = 0; i + 1 < len; i++) {
72 int diff = 0;
73 const char c = s[i];
74 const char d = s[i + 1];
75 if (lit.nocase && ourisalpha(c)) {
76 diff = (mytoupper(c) != mytoupper(d));
77 } else {
78 diff = (c != d);
79 }
80 offset = i;
81 if (diff) {
82 break;
83 }
84 }
85 return offset;
86}
87
88bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
89 const auto &s = lit.s;
90
91 size_t mask_len = std::max(s.length(), lit.msk.size());
92 DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size());
93 assert(mask_len <= 8);
94 assert(lit.msk.size() == lit.cmp.size());
95
96 vector<u8> n_msk(mask_len);
97 vector<u8> n_cmp(mask_len);
98
99 for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len;
100 i++, j++) {
101 DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]);
102 n_msk[i] = lit.msk[j];
103 n_cmp[i] = lit.cmp[j];
104 }
105
106 size_t s_off = mask_len - s.length();
107 for (unsigned i = s_off; i < mask_len; i++) {
108 u8 c = s[i - s_off];
109 u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff;
110 n_msk[i] |= si_msk;
111 n_cmp[i] |= c & si_msk;
112 assert((n_cmp[i] & si_msk) == c);
113 DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i],
114 ourisprint(c) ? (char)c : '.');
115 }
116
117 auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable));
118 assert(n);
119 DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable));
120
121 size_t key_offset = findNoodFragOffset(lit);
122
123 n->id = lit.id;
124 n->single = s.length() == 1 ? 1 : 0;
125 n->key_offset = verify_u8(s.length() - key_offset);
126 n->nocase = lit.nocase ? 1 : 0;
127 n->key0 = s[key_offset];
128 if (n->single) {
129 n->key1 = 0;
130 } else {
131 n->key1 = s[key_offset + 1];
132 }
133 n->msk = make_u64a_mask(n_msk);
134 n->cmp = make_u64a_mask(n_cmp);
135 n->msk_len = mask_len;
136
137 return n;
138}
139
140size_t noodSize(const noodTable *) {
141 return sizeof(noodTable);
142}
143
144} // namespace ue2
145
146#ifdef DUMP_SUPPORT
147#include <cctype>
148
149namespace ue2 {
150
151void noodPrintStats(const noodTable *n, FILE *f) {
152 fprintf(f, "Noodle table\n");
153 fprintf(f, "Key Offset: %u\n", n->key_offset);
154 fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n",
155 n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len),
156 n->msk_len);
157 fprintf(f, "String: ");
158 for (u32 i = 0; i < n->msk_len; i++) {
159 const u8 *m = (const u8 *)&n->cmp;
160 if (isgraph(m[i]) && m[i] != '\\') {
161 fprintf(f, "%c", m[i]);
162 } else {
163 fprintf(f, "\\x%02hhx", m[i]);
164 }
165 }
166 fprintf(f, "\n");
167}
168
169} // namespace ue2
170
171#endif
172