1/****************************************************************************
2**
3** Copyright (C) 2020 The Qt Company Ltd.
4** Copyright (C) 2019 Mail.ru Group.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#include "qstringmatcher.h"
42
43QT_BEGIN_NAMESPACE
44
45static void bm_init_skiptable(QStringView needle, uchar *skiptable, Qt::CaseSensitivity cs)
46{
47 const char16_t *uc = needle.utf16();
48 const qsizetype len = needle.size();
49 qsizetype l = qMin(len, qsizetype(255));
50 memset(skiptable, l, 256 * sizeof(uchar));
51 uc += len - l;
52 if (cs == Qt::CaseSensitive) {
53 while (l--) {
54 skiptable[*uc & 0xff] = l;
55 ++uc;
56 }
57 } else {
58 const char16_t *start = uc;
59 while (l--) {
60 skiptable[foldCase(uc, start) & 0xff] = l;
61 ++uc;
62 }
63 }
64}
65
66static inline qsizetype bm_find(QStringView haystack, qsizetype index, QStringView needle,
67 const uchar *skiptable, Qt::CaseSensitivity cs)
68{
69 const char16_t *uc = haystack.utf16();
70 const qsizetype l = haystack.size();
71 const char16_t *puc = needle.utf16();
72 const qsizetype pl = needle.size();
73
74 if (pl == 0)
75 return index > l ? -1 : index;
76 const qsizetype pl_minus_one = pl - 1;
77
78 const char16_t *current = uc + index + pl_minus_one;
79 const char16_t *end = uc + l;
80 if (cs == Qt::CaseSensitive) {
81 while (current < end) {
82 qsizetype skip = skiptable[*current & 0xff];
83 if (!skip) {
84 // possible match
85 while (skip < pl) {
86 if (*(current - skip) != puc[pl_minus_one-skip])
87 break;
88 ++skip;
89 }
90 if (skip > pl_minus_one) // we have a match
91 return (current - uc) - pl_minus_one;
92
93 // in case we don't have a match we are a bit inefficient as we only skip by one
94 // when we have the non matching char in the string.
95 if (skiptable[*(current - skip) & 0xff] == pl)
96 skip = pl - skip;
97 else
98 skip = 1;
99 }
100 if (current > end - skip)
101 break;
102 current += skip;
103 }
104 } else {
105 while (current < end) {
106 qsizetype skip = skiptable[foldCase(current, uc) & 0xff];
107 if (!skip) {
108 // possible match
109 while (skip < pl) {
110 if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc))
111 break;
112 ++skip;
113 }
114 if (skip > pl_minus_one) // we have a match
115 return (current - uc) - pl_minus_one;
116 // in case we don't have a match we are a bit inefficient as we only skip by one
117 // when we have the non matching char in the string.
118 if (skiptable[foldCase(current - skip, uc) & 0xff] == pl)
119 skip = pl - skip;
120 else
121 skip = 1;
122 }
123 if (current > end - skip)
124 break;
125 current += skip;
126 }
127 }
128 return -1; // not found
129}
130
131void QStringMatcher::updateSkipTable()
132{
133 bm_init_skiptable(q_sv, q_skiptable, q_cs);
134}
135
136/*!
137 \class QStringMatcher
138 \inmodule QtCore
139 \brief The QStringMatcher class holds a sequence of characters that
140 can be quickly matched in a Unicode string.
141
142 \ingroup tools
143 \ingroup string-processing
144
145 This class is useful when you have a sequence of \l{QChar}s that
146 you want to repeatedly match against some strings (perhaps in a
147 loop), or when you want to search for the same sequence of
148 characters multiple times in the same string. Using a matcher
149 object and indexIn() is faster than matching a plain QString with
150 QString::indexOf() if repeated matching takes place. This class
151 offers no benefit if you are doing one-off string matches.
152
153 Create the QStringMatcher with the QString you want to search
154 for. Then call indexIn() on the QString that you want to search.
155
156 \sa QString, QByteArrayMatcher, QRegularExpression
157*/
158
159/*! \fn QStringMatcher::QStringMatcher()
160
161 Constructs an empty string matcher that won't match anything.
162 Call setPattern() to give it a pattern to match.
163*/
164
165/*!
166 Constructs a string matcher that will search for \a pattern, with
167 case sensitivity \a cs.
168
169 Call indexIn() to perform a search.
170*/
171QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
172 : d_ptr(nullptr), q_cs(cs), q_pattern(pattern)
173{
174 q_sv = q_pattern;
175 updateSkipTable();
176}
177
178/*!
179 \fn QStringMatcher::QStringMatcher(const QChar *uc, qsizetype length, Qt::CaseSensitivity cs)
180 \since 4.5
181
182 Constructs a string matcher that will search for the pattern referred to
183 by \a uc with the given \a length and case sensitivity specified by \a cs.
184*/
185
186/*!
187 \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive)
188 \since 5.14
189
190 Constructs a string matcher that will search for \a pattern, with
191 case sensitivity \a cs.
192
193 Call indexIn() to perform a search.
194*/
195QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs)
196 : d_ptr(nullptr), q_cs(cs), q_sv(str)
197{
198 updateSkipTable();
199}
200/*!
201 Copies the \a other string matcher to this string matcher.
202*/
203QStringMatcher::QStringMatcher(const QStringMatcher &other)
204 : d_ptr(nullptr)
205{
206 operator=(other);
207}
208
209/*!
210 Destroys the string matcher.
211*/
212QStringMatcher::~QStringMatcher()
213{
214 Q_UNUSED(d_ptr);
215}
216
217/*!
218 Assigns the \a other string matcher to this string matcher.
219*/
220QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
221{
222 if (this != &other) {
223 q_pattern = other.q_pattern;
224 q_cs = other.q_cs;
225 q_sv = other.q_sv;
226 memcpy(q_skiptable, other.q_skiptable, sizeof(q_skiptable));
227 }
228 return *this;
229}
230
231/*!
232 Sets the string that this string matcher will search for to \a
233 pattern.
234
235 \sa pattern(), setCaseSensitivity(), indexIn()
236*/
237void QStringMatcher::setPattern(const QString &pattern)
238{
239 q_pattern = pattern;
240 q_sv = q_pattern;
241 updateSkipTable();
242}
243
244/*!
245 \fn QString QStringMatcher::pattern() const
246
247 Returns the string pattern that this string matcher will search
248 for.
249
250 \sa setPattern()
251*/
252
253QString QStringMatcher::pattern() const
254{
255 if (!q_pattern.isEmpty())
256 return q_pattern;
257 return q_sv.toString();
258}
259
260/*!
261 Sets the case sensitivity setting of this string matcher to \a
262 cs.
263
264 \sa caseSensitivity(), setPattern(), indexIn()
265*/
266void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
267{
268 if (cs == q_cs)
269 return;
270 q_cs = cs;
271 updateSkipTable();
272}
273
274/*! \fn qsizetype QStringMatcher::indexIn(const QString &str, qsizetype from) const
275
276 Searches the string \a str from character position \a from
277 (default 0, i.e. from the first character), for the string
278 pattern() that was set in the constructor or in the most recent
279 call to setPattern(). Returns the position where the pattern()
280 matched in \a str, or -1 if no match was found.
281
282 \sa setPattern(), setCaseSensitivity()
283*/
284
285/*! \fn qsizetype QStringMatcher::indexIn(const QChar *str, qsizetype length, qsizetype from) const
286 \since 4.5
287
288 Searches the string starting at \a str (of length \a length) from
289 character position \a from (default 0, i.e. from the first
290 character), for the string pattern() that was set in the
291 constructor or in the most recent call to setPattern(). Returns
292 the position where the pattern() matched in \a str, or -1 if no
293 match was found.
294
295 \sa setPattern(), setCaseSensitivity()
296*/
297
298/*!
299 \since 5.14
300
301 Searches the string \a str from character position \a from
302 (default 0, i.e. from the first character), for the string
303 pattern() that was set in the constructor or in the most recent
304 call to setPattern(). Returns the position where the pattern()
305 matched in \a str, or -1 if no match was found.
306
307 \sa setPattern(), setCaseSensitivity()
308*/
309qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const
310{
311 if (from < 0)
312 from = 0;
313 return bm_find(str, from, q_sv, q_skiptable, q_cs);
314}
315
316/*!
317 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
318
319 Returns the case sensitivity setting for this string matcher.
320
321 \sa setCaseSensitivity()
322*/
323
324/*!
325 \internal
326*/
327
328qsizetype qFindStringBoyerMoore(
329 QStringView haystack, qsizetype haystackOffset,
330 QStringView needle, Qt::CaseSensitivity cs)
331{
332 uchar skiptable[256];
333 bm_init_skiptable(needle, skiptable, cs);
334 if (haystackOffset < 0)
335 haystackOffset = 0;
336 return bm_find(haystack, haystackOffset, needle, skiptable, cs);
337}
338
339QT_END_NAMESPACE
340