1/****************************************************************************
2**
3** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
4** Contact: http://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qstringtokenizer.h"
41#include "qstringalgorithms.h"
42
43QT_BEGIN_NAMESPACE
44
45/*!
46 \class QStringTokenizer
47 \inmodule QtCore
48 \since 6.0
49 \brief The QStringTokenizer class splits strings into tokens along given separators.
50 \reentrant
51 \ingroup tools
52 \ingroup string-processing
53
54 Splits a string into substrings wherever a given separator occurs,
55 returning a (lazily constructed) list of those strings. If the separator does
56 not match anywhere in the string, produces a single-element list
57 containing this string. If the separator is empty,
58 QStringTokenizer produces an empty string, followed by each of the
59 string's characters, followed by another empty string. The two
60 enumerations Qt::SplitBehavior and Qt::CaseSensitivity further
61 control the output.
62
63 QStringTokenizer drives QStringView::tokenize(), but, at least with a
64 recent compiler, you can use it directly, too:
65
66 \code
67 for (auto it : QStringTokenizer{string, separator})
68 use(*it);
69 \endcode
70
71 \note You should never, ever, name the template arguments of a
72 QStringTokenizer explicitly. If you can use C++17 Class Template
73 Argument Deduction (CTAD), you may write
74 \c{QStringTokenizer{string, separator}} (without template
75 arguments). If you can't use C++17 CTAD, you must use the
76 QStringView::split() or QLatin1String::split() member functions
77 and store the return value only in \c{auto} variables:
78
79 \code
80 auto result = string.split(sep);
81 \endcode
82
83 This is because the template arguments of QStringTokenizer have a
84 very subtle dependency on the specific string and separator types
85 from with which they are constructed, and they don't usually
86 correspond to the actual types passed.
87
88 \section1 Lazy Sequences
89
90 QStringTokenizer acts as a so-called lazy sequence, that is, each
91 next element is only computed once you ask for it. Lazy sequences
92 have the advantage that they only require O(1) memory. They have
93 the disadvantage that, at least for QStringTokenizer, they only
94 allow forward, not random-access, iteration.
95
96 The intended use-case is that you just plug it into a ranged for loop:
97
98 \code
99 for (auto it : QStringTokenizer{string, separator})
100 use(*it);
101 \endcode
102
103 or a C++20 ranged algorithm:
104
105 \code
106 std::ranges::for_each(QStringTokenizer{string, separator},
107 [] (auto token) { use(token); });
108 \endcode
109
110 \section1 End Sentinel
111
112 The QStringTokenizer iterators cannot be used with classical STL
113 algorithms, because those require iterator/iterator pairs, while
114 QStringTokenizer uses sentinels. That is, it uses a different
115 type, QStringTokenizer::sentinel, to mark the end of the
116 range. This improves performance, because the sentinel is an empty
117 type. Sentinels are supported from C++17 (for ranged for)
118 and C++20 (for algorithms using the new ranges library).
119
120 \section1 Temporaries
121
122 QStringTokenizer is very carefully designed to avoid dangling
123 references. If you construct a tokenizer from a temporary string
124 (an rvalue), that argument is stored internally, so the referenced
125 data isn't deleted before it is tokenized:
126
127 \code
128 auto tok = QStringTokenizer{widget.text(), u','};
129 // return value of `widget.text()` is destroyed, but content was moved into `tok`
130 for (auto e : tok)
131 use(e);
132 \endcode
133
134 If you pass named objects (lvalues), then QStringTokenizer does
135 not store a copy. You are responsible to keep the named object's
136 data around for longer than the tokenizer operates on it:
137
138 \code
139 auto text = widget.text();
140 auto tok = QStringTokenizer{text, u','};
141 text.clear(); // destroy content of `text`
142 for (auto e : tok) // ERROR: `tok` references deleted data!
143 use(e);
144 \endcode
145
146 \sa QStringView::split(), QString::split(), QRegularExpression
147*/
148
149/*!
150 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::value_type
151
152 Alias for \c{const QStringView} or \c{const QLatin1String},
153 depending on the tokenizer's \c Haystack template argument.
154*/
155
156/*!
157 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::difference_type
158
159 Alias for qsizetype.
160*/
161
162/*!
163 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::size_type
164
165 Alias for qsizetype.
166*/
167
168/*!
169 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::reference
170
171 Alias for \c{value_type &}.
172
173 QStringTokenizer does not support mutable references, so this is
174 the same as const_reference.
175*/
176
177/*!
178 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::const_reference
179
180 Alias for \c{value_type &}.
181*/
182
183/*!
184 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::pointer
185
186 Alias for \c{value_type *}.
187
188 QStringTokenizer does not support mutable iterators, so this is
189 the same as const_pointer.
190*/
191
192/*!
193 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::const_pointer
194
195 Alias for \c{value_type *}.
196*/
197
198/*!
199 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::iterator
200
201 This typedef provides an STL-style const iterator for
202 QStringTokenizer.
203
204 QStringTokenizer does not support mutable iterators, so this is
205 the same as const_iterator.
206
207 \sa const_iterator
208*/
209
210/*!
211 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::const_iterator
212
213 This typedef provides an STL-style const iterator for
214 QStringTokenizer.
215
216 \sa iterator
217*/
218
219/*!
220 \typedef template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::sentinel
221
222 This typedef provides an STL-style sentinel for
223 QStringTokenizer::iterator and QStringTokenizer::const_iterator.
224
225 \sa const_iterator
226*/
227
228/*!
229 \fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::QStringTokenizer(Haystack haystack, Needle needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb)
230 \fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::QStringTokenizer(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs)
231
232 Constructs a string tokenizer that splits the string \a haystack
233 into substrings wherever \a needle occurs, and allows iteration
234 over those strings as they are found. If \a needle does not match
235 anywhere in \a haystack, a single element containing \a haystack
236 is produced.
237
238 \a cs specifies whether \a needle should be matched case
239 sensitively or case insensitively.
240
241 If \a sb is Qt::SkipEmptyParts, empty entries don't
242 appear in the result. By default, empty entries are included.
243
244 \sa QStringView::split(), QString::split(), Qt::CaseSensitivity, Qt::SplitBehavior
245*/
246
247/*!
248 \fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::const_iterator QStringTokenizer<Haystack, Needle>::begin() const
249
250 Returns a const \l{STL-style iterators}{STL-style iterator}
251 pointing to the first token in the list.
252
253 \sa end(), cbegin()
254*/
255
256/*!
257 \fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::const_iterator QStringTokenizer<Haystack, Needle>::cbegin() const
258
259 Same as begin().
260
261 \sa cend(), begin()
262*/
263
264/*!
265 \fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::sentinel QStringTokenizer<Haystack, Needle>::end() const
266
267 Returns a const \l{STL-style iterators}{STL-style sentinel}
268 pointing to the imaginary token after the last token in the list.
269
270 \sa begin(), cend()
271*/
272
273/*!
274 \fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::sentinel QStringTokenizer<Haystack, Needle>::cend() const
275
276 Same as end().
277
278 \sa cbegin(), end()
279*/
280
281/*!
282 \fn template <typename Haystack, typename Needle> template<typename Container> Container QStringTokenizer<Haystack, Needle>::toContainer(Container &&c) const &
283
284 Converts the lazy sequence into a (typically) random-access container of
285 type \c Container.
286
287 This function is only available if \c Container has a \c value_type
288 matching this tokenizer's value_type.
289
290 If you pass in a named container (an lvalue)for \a c, then that container
291 is filled, and a reference to it is returned. If you pass in a temporary
292 container (an rvalue, incl. the default argument), then that container is
293 filled, and returned by value.
294
295 \code
296 // assuming tok's value_type is QStringView, then...
297 auto tok = QStringTokenizer{~~~};
298 // ... rac1 is a QList:
299 auto rac1 = tok.toContainer();
300 // ... rac2 is std::pmr::vector<QStringView>:
301 auto rac2 = tok.toContainer<std::pmr::vector<QStringView>>();
302 auto rac3 = QVarLengthArray<QStringView, 12>{};
303 // appends the token sequence produced by tok to rac3
304 // and returns a reference to rac3 (which we ignore here):
305 tok.toContainer(rac3);
306 \endcode
307
308 This gives you maximum flexibility in how you want the sequence to
309 be stored.
310*/
311
312/*!
313 \fn template <typename Haystack, typename Needle> template<typename Container> Container QStringTokenizer<Haystack, Needle>::toContainer(Container &&c) const &&
314 \overload
315
316 Converts the lazy sequence into a (typically) random-access container of
317 type \c Container.
318
319 In addition to the constraints on the lvalue-this overload, this
320 rvalue-this overload is only available when this QStringTokenizer
321 does not store the haystack internally, as this could create a
322 container full of dangling references:
323
324 \code
325 auto tokens = QStringTokenizer{widget.text(), u','}.toContainer();
326 // ERROR: cannot call toContainer() on rvalue
327 // 'tokens' references the data of the copy of widget.text()
328 // stored inside the QStringTokenizer, which has since been deleted
329 \endcode
330
331 To fix, store the QStringTokenizer in a temporary:
332
333 \code
334 auto tokenizer = QStringTokenizer{widget.text90, u','};
335 auto tokens = tokenizer.toContainer();
336 // OK: the copy of widget.text() stored in 'tokenizer' keeps the data
337 // referenced by 'tokens' alive.
338 \endcode
339
340 You can force this function into existence by passing a view instead:
341
342 \code
343 func(QStringTokenizer{QStringView{widget.text()}, u','}.toContainer());
344 // OK: compiler keeps widget.text() around until after func() has executed
345 \endcode
346
347 If you pass in a named container (an lvalue)for \a c, then that container
348 is filled, and a reference to it is returned. If you pass in a temporary
349 container (an rvalue, incl. the default argument), then that container is
350 filled, and returned by value.
351*/
352
353/*!
354 \fn template <typename Haystack, typename Needle, typename...Flags> auto qTokenize(Haystack &&haystack, Needle &&needle, Flags...flags)
355 \relates QStringTokenizer
356 \since 6.0
357
358 Factory function for a QStringTokenizer that splits the string \a haystack
359 into substrings wherever \a needle occurs, and allows iteration
360 over those strings as they are found. If \a needle does not match
361 anywhere in \a haystack, a single element containing \a haystack
362 is produced.
363
364 Pass values from Qt::CaseSensitivity and Qt::SplitBehavior enumerators
365 as \a flags to modify the behavior of the tokenizer.
366
367 You can use this function if your compiler doesn't, yet, support C++17 Class
368 Template Argument Deduction (CTAD). We recommend direct use of QStringTokenizer
369 with CTAD instead.
370*/
371
372QT_END_NAMESPACE
373