1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qbytearraymatcher.h"
41
42#include <limits.h>
43
44QT_BEGIN_NAMESPACE
45
46static inline void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
47{
48 int l = int(qMin(len, qsizetype(255)));
49 memset(skiptable, l, 256*sizeof(uchar));
50 cc += len - l;
51 while (l--)
52 skiptable[*cc++] = l;
53}
54
55static inline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc,
56 qsizetype pl, const uchar *skiptable)
57{
58 if (pl == 0)
59 return index > l ? -1 : index;
60 const qsizetype pl_minus_one = pl - 1;
61
62 const uchar *current = cc + index + pl_minus_one;
63 const uchar *end = cc + l;
64 while (current < end) {
65 qsizetype skip = skiptable[*current];
66 if (!skip) {
67 // possible match
68 while (skip < pl) {
69 if (*(current - skip) != puc[pl_minus_one - skip])
70 break;
71 skip++;
72 }
73 if (skip > pl_minus_one) // we have a match
74 return (current - cc) - skip + 1;
75
76 // in case we don't have a match we are a bit inefficient as we only skip by one
77 // when we have the non matching char in the string.
78 if (skiptable[*(current - skip)] == pl)
79 skip = pl - skip;
80 else
81 skip = 1;
82 }
83 if (current > end - skip)
84 break;
85 current += skip;
86 }
87 return -1; // not found
88}
89
90/*! \class QByteArrayMatcher
91 \inmodule QtCore
92 \brief The QByteArrayMatcher class holds a sequence of bytes that
93 can be quickly matched in a byte array.
94
95 \ingroup tools
96 \ingroup string-processing
97
98 This class is useful when you have a sequence of bytes that you
99 want to repeatedly match against some byte arrays (perhaps in a
100 loop), or when you want to search for the same sequence of bytes
101 multiple times in the same byte array. Using a matcher object and
102 indexIn() is faster than matching a plain QByteArray with
103 QByteArray::indexOf() if repeated matching takes place. This
104 class offers no benefit if you are doing one-off byte array
105 matches.
106
107 Create the QByteArrayMatcher with the QByteArray you want to
108 search for. Then call indexIn() on the QByteArray that you want to
109 search.
110
111 \sa QByteArray, QStringMatcher
112*/
113
114/*!
115 Constructs an empty byte array matcher that won't match anything.
116 Call setPattern() to give it a pattern to match.
117*/
118QByteArrayMatcher::QByteArrayMatcher()
119 : d(nullptr)
120{
121 p.p = nullptr;
122 p.l = 0;
123 memset(p.q_skiptable, 0, sizeof(p.q_skiptable));
124}
125
126/*!
127 Constructs a byte array matcher from \a pattern. \a pattern
128 has the given \a length. \a pattern must remain in scope, but
129 the destructor does not delete \a pattern.
130 */
131QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr)
132{
133 p.p = reinterpret_cast<const uchar *>(pattern);
134 p.l = length;
135 bm_init_skiptable(p.p, p.l, p.q_skiptable);
136}
137
138/*!
139 Constructs a byte array matcher that will search for \a pattern.
140 Call indexIn() to perform a search.
141*/
142QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
143 : d(nullptr), q_pattern(pattern)
144{
145 p.p = reinterpret_cast<const uchar *>(pattern.constData());
146 p.l = pattern.size();
147 bm_init_skiptable(p.p, p.l, p.q_skiptable);
148}
149
150/*!
151 Copies the \a other byte array matcher to this byte array matcher.
152*/
153QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
154 : d(nullptr)
155{
156 operator=(other);
157}
158
159/*!
160 Destroys the byte array matcher.
161*/
162QByteArrayMatcher::~QByteArrayMatcher()
163{
164 Q_UNUSED(d);
165}
166
167/*!
168 Assigns the \a other byte array matcher to this byte array matcher.
169*/
170QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
171{
172 q_pattern = other.q_pattern;
173 memcpy(&p, &other.p, sizeof(p));
174 return *this;
175}
176
177/*!
178 Sets the byte array that this byte array matcher will search for
179 to \a pattern.
180
181 \sa pattern(), indexIn()
182*/
183void QByteArrayMatcher::setPattern(const QByteArray &pattern)
184{
185 q_pattern = pattern;
186 p.p = reinterpret_cast<const uchar *>(pattern.constData());
187 p.l = pattern.size();
188 bm_init_skiptable(p.p, p.l, p.q_skiptable);
189}
190
191/*!
192 Searches the byte array \a ba, from byte position \a from (default
193 0, i.e. from the first byte), for the byte array pattern() that
194 was set in the constructor or in the most recent call to
195 setPattern(). Returns the position where the pattern() matched in
196 \a ba, or -1 if no match was found.
197*/
198qsizetype QByteArrayMatcher::indexIn(const QByteArray &ba, qsizetype from) const
199{
200 if (from < 0)
201 from = 0;
202 return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from,
203 p.p, p.l, p.q_skiptable);
204}
205
206/*!
207 Searches the char string \a str, which has length \a len, from
208 byte position \a from (default 0, i.e. from the first byte), for
209 the byte array pattern() that was set in the constructor or in the
210 most recent call to setPattern(). Returns the position where the
211 pattern() matched in \a str, or -1 if no match was found.
212*/
213qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const
214{
215 if (from < 0)
216 from = 0;
217 return bm_find(reinterpret_cast<const uchar *>(str), len, from,
218 p.p, p.l, p.q_skiptable);
219}
220
221/*!
222 \fn QByteArray QByteArrayMatcher::pattern() const
223
224 Returns the byte array pattern that this byte array matcher will
225 search for.
226
227 \sa setPattern()
228*/
229
230
231static qsizetype findChar(const char *str, qsizetype len, char ch, qsizetype from)
232{
233 const uchar *s = (const uchar *)str;
234 uchar c = (uchar)ch;
235 if (from < 0)
236 from = qMax(from + len, qsizetype(0));
237 if (from < len) {
238 const uchar *n = s + from - 1;
239 const uchar *e = s + len;
240 while (++n != e)
241 if (*n == c)
242 return n - s;
243 }
244 return -1;
245}
246
247/*!
248 \internal
249 */
250static qsizetype qFindByteArrayBoyerMoore(
251 const char *haystack, qsizetype haystackLen, qsizetype haystackOffset,
252 const char *needle, qsizetype needleLen)
253{
254 uchar skiptable[256];
255 bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
256 if (haystackOffset < 0)
257 haystackOffset = 0;
258 return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
259 (const uchar *)needle, needleLen, skiptable);
260}
261
262#define REHASH(a) \
263 if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \
264 hashHaystack -= std::size_t(a) << sl_minus_1; \
265 hashHaystack <<= 1
266
267/*!
268 \internal
269 */
270qsizetype qFindByteArray(
271 const char *haystack0, qsizetype haystackLen, qsizetype from,
272 const char *needle, qsizetype needleLen)
273{
274 const auto l = haystackLen;
275 const auto sl = needleLen;
276 if (from < 0)
277 from += l;
278 if (std::size_t(sl + from) > std::size_t(l))
279 return -1;
280 if (!sl)
281 return from;
282 if (!l)
283 return -1;
284
285 if (sl == 1)
286 return findChar(haystack0, haystackLen, needle[0], from);
287
288 /*
289 We use the Boyer-Moore algorithm in cases where the overhead
290 for the skip table should pay off, otherwise we use a simple
291 hash function.
292 */
293 if (l > 500 && sl > 5)
294 return qFindByteArrayBoyerMoore(haystack0, haystackLen, from,
295 needle, needleLen);
296
297 /*
298 We use some hashing for efficiency's sake. Instead of
299 comparing strings, we compare the hash value of str with that
300 of a part of this QString. Only if that matches, we call memcmp().
301 */
302 const char *haystack = haystack0 + from;
303 const char *end = haystack0 + (l - sl);
304 const auto sl_minus_1 = std::size_t(sl - 1);
305 std::size_t hashNeedle = 0, hashHaystack = 0;
306 qsizetype idx;
307 for (idx = 0; idx < sl; ++idx) {
308 hashNeedle = ((hashNeedle<<1) + needle[idx]);
309 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
310 }
311 hashHaystack -= *(haystack + sl_minus_1);
312
313 while (haystack <= end) {
314 hashHaystack += *(haystack + sl_minus_1);
315 if (hashHaystack == hashNeedle && *needle == *haystack
316 && memcmp(needle, haystack, sl) == 0)
317 return haystack - haystack0;
318
319 REHASH(*haystack);
320 ++haystack;
321 }
322 return -1;
323}
324
325/*!
326 \class QStaticByteArrayMatcherBase
327 \since 5.9
328 \internal
329 \brief Non-template base class of QStaticByteArrayMatcher.
330*/
331
332/*!
333 \class QStaticByteArrayMatcher
334 \since 5.9
335 \inmodule QtCore
336 \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher.
337
338 \ingroup tools
339 \ingroup string-processing
340
341 This class is useful when you have a sequence of bytes that you
342 want to repeatedly match against some byte arrays (perhaps in a
343 loop), or when you want to search for the same sequence of bytes
344 multiple times in the same byte array. Using a matcher object and
345 indexIn() is faster than matching a plain QByteArray with
346 QByteArray::indexOf(), in particular if repeated matching takes place.
347
348 Unlike QByteArrayMatcher, this class calculates the internal
349 representation at \e{compile-time}, if your compiler supports
350 C++14-level \c{constexpr} (C++11 is not sufficient), so it can
351 even benefit if you are doing one-off byte array matches.
352
353 Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
354 passing it the C string literal you want to search for. Store the return
355 value of that function in a \c{static const auto} variable, so you don't need
356 to pass the \c{N} template parameter explicitly:
357
358 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
359
360 Then call indexIn() on the QByteArray in which you want to search, just like
361 with QByteArrayMatcher.
362
363 Since this class is designed to do all the up-front calculations at compile-time,
364 it does not offer a setPattern() method.
365
366 \note Qt detects the necessary C++14 compiler support by way of the feature
367 test recommendations from
368 \l{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations}
369 {C++ Committee's Standing Document 6}.
370
371 \sa QByteArrayMatcher, QStringMatcher
372*/
373
374/*!
375 \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const
376
377 Searches the char string \a haystack, which has length \a hlen, from
378 byte position \a from (default 0, i.e. from the first byte), for
379 the byte array pattern() that was set in the constructor.
380
381 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
382*/
383
384/*!
385 \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const
386
387 Searches the char string \a haystack, from byte position \a from
388 (default 0, i.e. from the first byte), for the byte array pattern()
389 that was set in the constructor.
390
391 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
392*/
393
394/*!
395 \fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
396
397 Returns the byte array pattern that this byte array matcher will
398 search for.
399
400 \sa QByteArrayMatcher::setPattern()
401*/
402
403/*!
404 \internal
405*/
406int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept
407{
408 if (from < 0)
409 from = 0;
410 return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
411 reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
412}
413
414/*!
415 \fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
416 \internal
417*/
418
419/*!
420 \fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
421 \since 5.9
422 \relates QStaticByteArrayMatcher
423
424 Return a QStaticByteArrayMatcher with the correct \c{N} determined
425 automatically from the \a pattern passed.
426
427 To take full advantage of this function, assign the result to an
428 \c{auto} variable:
429
430 \snippet code/src_corelib_text_qbytearraymatcher.cpp 1
431*/
432
433
434QT_END_NAMESPACE
435