1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2020 The Qt Company Ltd. |
4 | ** Copyright (C) 2019 Mail.ru Group. |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the QtCore module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | #include "qstringmatcher.h" |
42 | |
43 | QT_BEGIN_NAMESPACE |
44 | |
45 | static void bm_init_skiptable(QStringView needle, uchar *skiptable, Qt::CaseSensitivity cs) |
46 | { |
47 | const char16_t *uc = needle.utf16(); |
48 | const qsizetype len = needle.size(); |
49 | qsizetype l = qMin(len, qsizetype(255)); |
50 | memset(skiptable, l, 256 * sizeof(uchar)); |
51 | uc += len - l; |
52 | if (cs == Qt::CaseSensitive) { |
53 | while (l--) { |
54 | skiptable[*uc & 0xff] = l; |
55 | ++uc; |
56 | } |
57 | } else { |
58 | const char16_t *start = uc; |
59 | while (l--) { |
60 | skiptable[foldCase(uc, start) & 0xff] = l; |
61 | ++uc; |
62 | } |
63 | } |
64 | } |
65 | |
66 | static inline qsizetype bm_find(QStringView haystack, qsizetype index, QStringView needle, |
67 | const uchar *skiptable, Qt::CaseSensitivity cs) |
68 | { |
69 | const char16_t *uc = haystack.utf16(); |
70 | const qsizetype l = haystack.size(); |
71 | const char16_t *puc = needle.utf16(); |
72 | const qsizetype pl = needle.size(); |
73 | |
74 | if (pl == 0) |
75 | return index > l ? -1 : index; |
76 | const qsizetype pl_minus_one = pl - 1; |
77 | |
78 | const char16_t *current = uc + index + pl_minus_one; |
79 | const char16_t *end = uc + l; |
80 | if (cs == Qt::CaseSensitive) { |
81 | while (current < end) { |
82 | qsizetype skip = skiptable[*current & 0xff]; |
83 | if (!skip) { |
84 | // possible match |
85 | while (skip < pl) { |
86 | if (*(current - skip) != puc[pl_minus_one-skip]) |
87 | break; |
88 | ++skip; |
89 | } |
90 | if (skip > pl_minus_one) // we have a match |
91 | return (current - uc) - pl_minus_one; |
92 | |
93 | // in case we don't have a match we are a bit inefficient as we only skip by one |
94 | // when we have the non matching char in the string. |
95 | if (skiptable[*(current - skip) & 0xff] == pl) |
96 | skip = pl - skip; |
97 | else |
98 | skip = 1; |
99 | } |
100 | if (current > end - skip) |
101 | break; |
102 | current += skip; |
103 | } |
104 | } else { |
105 | while (current < end) { |
106 | qsizetype skip = skiptable[foldCase(current, uc) & 0xff]; |
107 | if (!skip) { |
108 | // possible match |
109 | while (skip < pl) { |
110 | if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc)) |
111 | break; |
112 | ++skip; |
113 | } |
114 | if (skip > pl_minus_one) // we have a match |
115 | return (current - uc) - pl_minus_one; |
116 | // in case we don't have a match we are a bit inefficient as we only skip by one |
117 | // when we have the non matching char in the string. |
118 | if (skiptable[foldCase(current - skip, uc) & 0xff] == pl) |
119 | skip = pl - skip; |
120 | else |
121 | skip = 1; |
122 | } |
123 | if (current > end - skip) |
124 | break; |
125 | current += skip; |
126 | } |
127 | } |
128 | return -1; // not found |
129 | } |
130 | |
131 | void QStringMatcher::updateSkipTable() |
132 | { |
133 | bm_init_skiptable(q_sv, q_skiptable, q_cs); |
134 | } |
135 | |
136 | /*! |
137 | \class QStringMatcher |
138 | \inmodule QtCore |
139 | \brief The QStringMatcher class holds a sequence of characters that |
140 | can be quickly matched in a Unicode string. |
141 | |
142 | \ingroup tools |
143 | \ingroup string-processing |
144 | |
145 | This class is useful when you have a sequence of \l{QChar}s that |
146 | you want to repeatedly match against some strings (perhaps in a |
147 | loop), or when you want to search for the same sequence of |
148 | characters multiple times in the same string. Using a matcher |
149 | object and indexIn() is faster than matching a plain QString with |
150 | QString::indexOf() if repeated matching takes place. This class |
151 | offers no benefit if you are doing one-off string matches. |
152 | |
153 | Create the QStringMatcher with the QString you want to search |
154 | for. Then call indexIn() on the QString that you want to search. |
155 | |
156 | \sa QString, QByteArrayMatcher, QRegularExpression |
157 | */ |
158 | |
159 | /*! \fn QStringMatcher::QStringMatcher() |
160 | |
161 | Constructs an empty string matcher that won't match anything. |
162 | Call setPattern() to give it a pattern to match. |
163 | */ |
164 | |
165 | /*! |
166 | Constructs a string matcher that will search for \a pattern, with |
167 | case sensitivity \a cs. |
168 | |
169 | Call indexIn() to perform a search. |
170 | */ |
171 | QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs) |
172 | : d_ptr(nullptr), q_cs(cs), q_pattern(pattern) |
173 | { |
174 | q_sv = q_pattern; |
175 | updateSkipTable(); |
176 | } |
177 | |
178 | /*! |
179 | \fn QStringMatcher::QStringMatcher(const QChar *uc, qsizetype length, Qt::CaseSensitivity cs) |
180 | \since 4.5 |
181 | |
182 | Constructs a string matcher that will search for the pattern referred to |
183 | by \a uc with the given \a length and case sensitivity specified by \a cs. |
184 | */ |
185 | |
186 | /*! |
187 | \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive) |
188 | \since 5.14 |
189 | |
190 | Constructs a string matcher that will search for \a pattern, with |
191 | case sensitivity \a cs. |
192 | |
193 | Call indexIn() to perform a search. |
194 | */ |
195 | QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs) |
196 | : d_ptr(nullptr), q_cs(cs), q_sv(str) |
197 | { |
198 | updateSkipTable(); |
199 | } |
200 | /*! |
201 | Copies the \a other string matcher to this string matcher. |
202 | */ |
203 | QStringMatcher::QStringMatcher(const QStringMatcher &other) |
204 | : d_ptr(nullptr) |
205 | { |
206 | operator=(other); |
207 | } |
208 | |
209 | /*! |
210 | Destroys the string matcher. |
211 | */ |
212 | QStringMatcher::~QStringMatcher() |
213 | { |
214 | Q_UNUSED(d_ptr); |
215 | } |
216 | |
217 | /*! |
218 | Assigns the \a other string matcher to this string matcher. |
219 | */ |
220 | QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other) |
221 | { |
222 | if (this != &other) { |
223 | q_pattern = other.q_pattern; |
224 | q_cs = other.q_cs; |
225 | q_sv = other.q_sv; |
226 | memcpy(q_skiptable, other.q_skiptable, sizeof(q_skiptable)); |
227 | } |
228 | return *this; |
229 | } |
230 | |
231 | /*! |
232 | Sets the string that this string matcher will search for to \a |
233 | pattern. |
234 | |
235 | \sa pattern(), setCaseSensitivity(), indexIn() |
236 | */ |
237 | void QStringMatcher::setPattern(const QString &pattern) |
238 | { |
239 | q_pattern = pattern; |
240 | q_sv = q_pattern; |
241 | updateSkipTable(); |
242 | } |
243 | |
244 | /*! |
245 | \fn QString QStringMatcher::pattern() const |
246 | |
247 | Returns the string pattern that this string matcher will search |
248 | for. |
249 | |
250 | \sa setPattern() |
251 | */ |
252 | |
253 | QString QStringMatcher::pattern() const |
254 | { |
255 | if (!q_pattern.isEmpty()) |
256 | return q_pattern; |
257 | return q_sv.toString(); |
258 | } |
259 | |
260 | /*! |
261 | Sets the case sensitivity setting of this string matcher to \a |
262 | cs. |
263 | |
264 | \sa caseSensitivity(), setPattern(), indexIn() |
265 | */ |
266 | void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) |
267 | { |
268 | if (cs == q_cs) |
269 | return; |
270 | q_cs = cs; |
271 | updateSkipTable(); |
272 | } |
273 | |
274 | /*! \fn qsizetype QStringMatcher::indexIn(const QString &str, qsizetype from) const |
275 | |
276 | Searches the string \a str from character position \a from |
277 | (default 0, i.e. from the first character), for the string |
278 | pattern() that was set in the constructor or in the most recent |
279 | call to setPattern(). Returns the position where the pattern() |
280 | matched in \a str, or -1 if no match was found. |
281 | |
282 | \sa setPattern(), setCaseSensitivity() |
283 | */ |
284 | |
285 | /*! \fn qsizetype QStringMatcher::indexIn(const QChar *str, qsizetype length, qsizetype from) const |
286 | \since 4.5 |
287 | |
288 | Searches the string starting at \a str (of length \a length) from |
289 | character position \a from (default 0, i.e. from the first |
290 | character), for the string pattern() that was set in the |
291 | constructor or in the most recent call to setPattern(). Returns |
292 | the position where the pattern() matched in \a str, or -1 if no |
293 | match was found. |
294 | |
295 | \sa setPattern(), setCaseSensitivity() |
296 | */ |
297 | |
298 | /*! |
299 | \since 5.14 |
300 | |
301 | Searches the string \a str from character position \a from |
302 | (default 0, i.e. from the first character), for the string |
303 | pattern() that was set in the constructor or in the most recent |
304 | call to setPattern(). Returns the position where the pattern() |
305 | matched in \a str, or -1 if no match was found. |
306 | |
307 | \sa setPattern(), setCaseSensitivity() |
308 | */ |
309 | qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const |
310 | { |
311 | if (from < 0) |
312 | from = 0; |
313 | return bm_find(str, from, q_sv, q_skiptable, q_cs); |
314 | } |
315 | |
316 | /*! |
317 | \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const |
318 | |
319 | Returns the case sensitivity setting for this string matcher. |
320 | |
321 | \sa setCaseSensitivity() |
322 | */ |
323 | |
324 | /*! |
325 | \internal |
326 | */ |
327 | |
328 | qsizetype qFindStringBoyerMoore( |
329 | QStringView haystack, qsizetype haystackOffset, |
330 | QStringView needle, Qt::CaseSensitivity cs) |
331 | { |
332 | uchar skiptable[256]; |
333 | bm_init_skiptable(needle, skiptable, cs); |
334 | if (haystackOffset < 0) |
335 | haystackOffset = 0; |
336 | return bm_find(haystack, haystackOffset, needle, skiptable, cs); |
337 | } |
338 | |
339 | QT_END_NAMESPACE |
340 | |