1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Tools for string manipulation, ue2_literal definition.
31 */
32
33#include "ue2string.h"
34
35#include "charreach.h"
36#include "compare.h"
37#include "hash_dynamic_bitset.h"
38
39#include <algorithm>
40#include <cstring>
41#include <iomanip>
42#include <sstream>
43#include <string>
44
45using namespace std;
46
47namespace ue2 {
48
49#if defined(DUMP_SUPPORT) || defined(DEBUG)
50
51// Escape a string so that it's screen-printable
52string escapeString(const string &s) {
53 ostringstream os;
54 for (unsigned int i = 0; i < s.size(); ++i) {
55 char c = s[i];
56 if (0x20 <= c && c <= 0x7e && c != '\\') {
57 os << c;
58 } else if (c == '\n') {
59 os << "\\n";
60 } else if (c == '\r') {
61 os << "\\r";
62 } else if (c == '\t') {
63 os << "\\t";
64 } else {
65 os << "\\x" << hex << setw(2) << setfill('0')
66 << (unsigned)(c & 0xff) << dec;
67 }
68 }
69 return os.str();
70}
71
72string escapeString(const ue2_literal &lit) {
73 ostringstream os;
74 for (ue2_literal::const_iterator it = lit.begin(); it != lit.end(); ++it) {
75 char c = it->c;
76 if (0x20 <= c && c <= 0x7e && c != '\\') {
77 os << c;
78 } else if (c == '\n') {
79 os << "\\n";
80 } else {
81 os << "\\x" << hex << setw(2) << setfill('0')
82 << (unsigned)(c & 0xff) << dec;
83 }
84 }
85 return os.str();
86}
87
88// escape any metacharacters in a literal string
89string escapeStringMeta(const string &s) {
90 ostringstream os;
91 for (unsigned int i = 0; i < s.size(); ++i) {
92 char c = s[i];
93 switch (c) {
94 case '#': case '$': case '(': case ')':
95 case '*': case '+': case '.': case '/':
96 case '?': case '[': case ']': case '^':
97 case '|':
98 os << "\\" << c; break;
99 default:
100 os << c; break;
101 }
102 }
103 return os.str();
104}
105
106string dotEscapeString(const string &s) {
107 string ss = escapeString(s);
108 string out;
109 out.reserve(ss.size());
110 for (size_t i = 0; i != ss.size(); i++) {
111 char c = ss[i];
112 switch (c) {
113 case '\"':
114 case '\\':
115 out.push_back('\\');
116 // fall through
117 default:
118 out.push_back(c);
119 break;
120 }
121 }
122 return out;
123}
124
125string dumpString(const ue2_literal &lit) {
126 string s = escapeString(lit.get_string());
127 if (lit.any_nocase()) {
128 s += " (nocase)";
129 }
130
131 return s;
132}
133#endif
134
135void upperString(string &s) {
136 for (auto &c : s) {
137 c = mytoupper(c);
138 }
139}
140
141size_t maxStringOverlap(const string &a, const string &b, bool nocase) {
142 size_t lena = a.length(), lenb = b.length();
143 const char *astart = a.c_str();
144 const char *bstart = b.c_str();
145 const char *aend = astart + lena;
146 size_t i = lenb;
147
148 for (; i > lena; i--) {
149 if (!cmp(astart, bstart + i - lena, lena, nocase)) {
150 return i;
151 }
152 }
153
154 for (; i && cmp(aend - i, bstart, i, nocase); i--) {
155 ;
156 }
157
158 return i;
159}
160
161size_t maxStringOverlap(const ue2_literal &a, const ue2_literal &b) {
162 /* todo: handle nocase better */
163 return maxStringOverlap(a.get_string(), b.get_string(),
164 a.any_nocase() || b.any_nocase());
165}
166
167size_t maxStringSelfOverlap(const string &a, bool nocase) {
168 size_t lena = a.length();
169 const char *astart = a.c_str();
170 const char *bstart = a.c_str();
171 const char *aend = astart + lena;
172 size_t i = lena - 1;
173
174 for (; i && cmp(aend - i, bstart, i, nocase); i--) {
175 ;
176 }
177
178 return i;
179}
180
181u32 cmp(const char *a, const char *b, size_t len, bool nocase) {
182 if (!nocase) {
183 return memcmp(a, b, len);
184 }
185
186 for (const auto *a_end = a + len; a < a_end; a++, b++) {
187 if (mytoupper(*a) != mytoupper(*b)) {
188 return 1;
189 }
190 }
191 return 0;
192}
193
194case_iter::case_iter(const ue2_literal &ss) : s(ss.get_string()),
195 s_orig(ss.get_string()) {
196 for (ue2_literal::const_iterator it = ss.begin(); it != ss.end(); ++it) {
197 nocase.push_back(it->nocase);
198 }
199}
200
201case_iter caseIterateBegin(const ue2_literal &s) {
202 return case_iter(s);
203}
204
205case_iter caseIterateEnd() {
206 return case_iter(ue2_literal());
207}
208
209case_iter &case_iter::operator++ () {
210 for (size_t i = s.length(); i != 0; i--) {
211 char lower = mytolower(s[i - 1]);
212 if (nocase[i - 1] && lower != s[i - 1]) {
213 s[i - 1] = lower;
214 copy(s_orig.begin() + i, s_orig.end(), s.begin() + i);
215 return *this;
216 }
217 }
218
219 s.clear();
220 return *this;
221}
222
223static
224string toUpperString(string s) {
225 upperString(s);
226 return s;
227}
228
229ue2_literal::elem::operator CharReach () const {
230 if (!nocase) {
231 return CharReach(c);
232 } else {
233 CharReach rv;
234 rv.set(mytoupper(c));
235 rv.set(mytolower(c));
236 return rv;
237 }
238}
239
240const ue2_literal::size_type ue2_literal::npos = std::string::npos;
241
242ue2_literal::ue2_literal(const std::string &s_in, bool nc_in)
243 : s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size()) {
244 if (nc_in) {
245 // Switch on nocase bit for all alpha characters.
246 for (size_t i = 0; i < s.length(); i++) {
247 if (ourisalpha(s[i])) {
248 nocase.set(i);
249 }
250 }
251 }
252}
253
254ue2_literal::ue2_literal(char c, bool nc)
255 : s(1, nc ? mytoupper(c) : c), nocase(1, ourisalpha(c) ? nc : false) {}
256
257ue2_literal ue2_literal::substr(size_type pos, size_type n) const {
258 ue2_literal rv;
259 rv.s = s.substr(pos, n);
260 size_type upper = nocase.size();
261 if (n != npos && n + pos < nocase.size()) {
262 upper = n + pos;
263 }
264
265 rv.nocase.resize(upper - pos, false);
266 for (size_t i = pos; i < upper; i++) {
267 rv.nocase.set(i - pos, nocase.test(i));
268 }
269 assert(s.size() == nocase.size());
270 return rv;
271}
272
273ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
274 s.erase(pos, n);
275
276 if (n != npos) {
277 for (size_type i = pos + n; i < nocase.size(); i++) {
278 nocase.set(i - n, nocase.test(i));
279 }
280 }
281 nocase.resize(s.size());
282 return *this;
283}
284
285void ue2_literal::push_back(char c, bool nc) {
286 assert(!nc || ourisalpha(c));
287 if (nc) {
288 c = mytoupper(c);
289 }
290 nocase.push_back(nc);
291 s.push_back(c);
292}
293
294void ue2_literal::reverse() {
295 std::reverse(s.begin(), s.end());
296
297 const size_t len = nocase.size();
298 for (size_t i = 0; i < len / 2; i++) {
299 size_t j = len - i - 1;
300 bool a = nocase.test(i);
301 bool b = nocase.test(j);
302 nocase.set(i, b);
303 nocase.set(j, a);
304 }
305}
306
307// Return a copy of this literal in reverse order.
308ue2_literal reverse_literal(const ue2_literal &in) {
309 auto out = in;
310 out.reverse();
311 return out;
312}
313
314bool ue2_literal::operator<(const ue2_literal &b) const {
315 if (s < b.s) {
316 return true;
317 }
318 if (s > b.s) {
319 return false;
320 }
321 return nocase < b.nocase;
322}
323
324void ue2_literal::operator+=(const ue2_literal &b) {
325 s += b.s;
326 size_t prefix = nocase.size();
327 nocase.resize(prefix + b.nocase.size());
328 for (size_t i = 0; i < b.nocase.size(); i++) {
329 nocase.set(prefix + i, b.nocase[i]);
330 }
331}
332
333bool ue2_literal::any_nocase() const {
334 return nocase.any();
335}
336
337size_t ue2_literal::hash() const {
338 return hash_all(s, hash_dynamic_bitset()(nocase));
339}
340
341void make_nocase(ue2_literal *lit) {
342 ue2_literal rv;
343
344 for (const auto &elem: *lit) {
345 rv.push_back(elem.c, ourisalpha(elem.c));
346 }
347
348 lit->swap(rv);
349}
350
351static
352bool testchar(char c, const CharReach &cr, bool nocase) {
353 if (nocase) {
354 return cr.test((unsigned char)mytolower(c))
355 || cr.test((unsigned char)mytoupper(c));
356 } else {
357 return cr.test((unsigned char)c);
358 }
359}
360
361// Returns true if the given literal contains a char in the given CharReach
362bool contains(const ue2_literal &s, const CharReach &cr) {
363 for (ue2_literal::const_iterator it = s.begin(), ite = s.end();
364 it != ite; ++it) {
365 if (testchar(it->c, cr, it->nocase)) {
366 return true;
367 }
368 }
369 return false;
370}
371
372size_t maxStringSelfOverlap(const ue2_literal &a) {
373 /* overly conservative if only part of the string is nocase, TODO: fix */
374 return maxStringSelfOverlap(a.get_string(), a.any_nocase());
375}
376
377size_t minStringPeriod(const ue2_literal &a) {
378 return a.length() - maxStringSelfOverlap(a);
379}
380
381// Returns true if `a' is a suffix of (or equal to) `b'.
382bool isSuffix(const ue2_literal &a, const ue2_literal &b) {
383 size_t alen = a.length(), blen = b.length();
384 if (alen > blen) {
385 return false;
386 }
387 return equal(a.begin(), a.end(), b.begin() + (blen - alen));
388}
389
390bool is_flood(const ue2_literal &s) {
391 assert(!s.empty());
392
393 ue2_literal::const_iterator it = s.begin(), ite = s.end();
394 ue2_literal::elem f = *it;
395 for (++it; it != ite; ++it) {
396 if (*it != f) {
397 return false;
398 }
399 }
400
401 return true;
402}
403
404} // namespace ue2
405