1/****************************************************************************
2**
3** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
4** Contact: http://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39#ifndef QSTRINGTOKENIZER_H
40#define QSTRINGTOKENIZER_H
41
42#include <QtCore/qnamespace.h>
43#include <QtCore/qcontainerfwd.h>
44
45QT_BEGIN_NAMESPACE
46
47template <typename, typename> class QStringBuilder;
48
49#if defined(Q_QDOC) || 1 || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603)
50# define Q_STRINGTOKENIZER_USE_SENTINEL
51#endif
52
53class QStringTokenizerBaseBase
54{
55protected:
56 ~QStringTokenizerBaseBase() = default;
57 constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
58 : m_sb{sb}, m_cs{cs} {}
59
60 struct tokenizer_state {
61 qsizetype start, end, extra;
62 friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept
63 { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; }
64 friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept
65 { return !operator==(lhs, rhs); }
66 };
67
68 Qt::SplitBehavior m_sb;
69 Qt::CaseSensitivity m_cs;
70};
71
72template <typename Haystack, typename Needle>
73class QStringTokenizerBase : protected QStringTokenizerBaseBase
74{
75 struct next_result {
76 Haystack value;
77 bool ok;
78 tokenizer_state state;
79 };
80 inline next_result next(tokenizer_state state) const noexcept;
81 inline next_result toFront() const noexcept { return next({}); }
82public:
83 constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
84 : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {}
85
86 class iterator;
87 friend class iterator;
88#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
89 class sentinel {
90 friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; }
91 friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; }
92 };
93#else
94 using sentinel = iterator;
95#endif
96 class iterator {
97 const QStringTokenizerBase *tokenizer;
98 next_result current;
99 friend class QStringTokenizerBase;
100 explicit iterator(const QStringTokenizerBase &t) noexcept
101 : tokenizer{&t}, current{t.toFront()} {}
102 public:
103 using difference_type = qsizetype;
104 using value_type = Haystack;
105 using pointer = const value_type*;
106 using reference = const value_type&;
107 using iterator_category = std::forward_iterator_tag;
108
109 iterator() noexcept = default;
110
111 // violates std::forward_iterator (returns a reference into the iterator)
112 [[nodiscard]] constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), &current.value; }
113 [[nodiscard]] constexpr const Haystack& operator*() const { return *operator->(); }
114
115 iterator& operator++() { advance(); return *this; }
116 iterator operator++(int) { auto tmp = *this; advance(); return tmp; }
117
118 friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept
119 { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); }
120 friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept
121 { return !operator==(lhs, rhs); }
122#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
123 friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept
124 { return !lhs.current.ok; }
125 friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept
126 { return !operator==(lhs, sentinel{}); }
127 friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept
128 { return !rhs.current.ok; }
129 friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept
130 { return !operator==(sentinel{}, rhs); }
131#endif
132 private:
133 void advance() {
134 Q_ASSERT(current.ok);
135 current = tokenizer->next(current.state);
136 }
137 };
138 using const_iterator = iterator;
139
140 using size_type = std::size_t;
141 using difference_type = typename iterator::difference_type;
142 using value_type = typename iterator::value_type;
143 using pointer = typename iterator::pointer;
144 using const_pointer = pointer;
145 using reference = typename iterator::reference;
146 using const_reference = reference;
147
148 [[nodiscard]] iterator begin() const noexcept { return iterator{*this}; }
149 [[nodiscard]] iterator cbegin() const noexcept { return begin(); }
150 template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
151 [[nodiscard]] constexpr sentinel end() const noexcept { return {}; }
152 template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
153 [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; }
154
155private:
156 Haystack m_haystack;
157 Needle m_needle;
158};
159
160QT_BEGIN_INCLUDE_NAMESPACE
161#include <QtCore/qstringview.h>
162QT_END_INCLUDE_NAMESPACE
163
164namespace QtPrivate {
165namespace Tok {
166
167 constexpr qsizetype size(QChar) noexcept { return 1; }
168 template <typename String>
169 constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); }
170
171 template <typename String> struct ViewForImpl {};
172 template <> struct ViewForImpl<QStringView> { using type = QStringView; };
173 template <> struct ViewForImpl<QLatin1String> { using type = QLatin1String; };
174 template <> struct ViewForImpl<QChar> { using type = QChar; };
175 template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {};
176 template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {};
177 template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {};
178 template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {};
179 template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {};
180 template <typename LHS, typename RHS>
181 struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {};
182 template <typename Char, typename...Args>
183 struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {};
184#ifdef __cpp_lib_string_view
185 template <typename Char, typename...Args>
186 struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {};
187#endif
188
189 // This metafunction maps a StringLike to a View (currently, QChar,
190 // QStringView, QLatin1String). This is what QStringTokenizerBase
191 // operates on. QStringTokenizer adds pinning to keep rvalues alive
192 // for the duration of the algorithm.
193 template <typename String>
194 using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type;
195
196 // Pinning:
197 // rvalues of owning string types need to be moved into QStringTokenizer
198 // to keep them alive for the lifetime of the tokenizer. For lvalues, we
199 // assume the user takes care of that.
200
201 // default: don't pin anything (characters are pinned implicitly)
202 template <typename String>
203 struct PinForImpl { using type = ViewFor<String>; };
204
205 // rvalue QString -> QString
206 template <>
207 struct PinForImpl<QString> { using type = QString; };
208
209 // rvalue std::basic_string -> basic_string
210 template <typename Char, typename...Args>
211 struct PinForImpl<std::basic_string<Char, Args...>>
212 { using type = std::basic_string<Char, Args...>; };
213
214 // rvalue QStringBuilder -> pin as the nested ConvertTo type
215 template <typename LHS, typename RHS>
216 struct PinForImpl<QStringBuilder<LHS, RHS>>
217 : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {};
218
219 template <typename StringLike>
220 using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type;
221
222 template <typename T> struct is_owning_string_type : std::false_type {};
223 template <> struct is_owning_string_type<QString> : std::true_type {};
224 template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {};
225
226 // unpinned
227 template <typename T, bool pinned = is_owning_string_type<T>::value>
228 struct Pinning
229 {
230 // this is the storage for non-pinned types - no storage
231 constexpr Pinning(const T&) noexcept {}
232 // Since we don't store something, the view() method needs to be
233 // given something it can return.
234 constexpr T view(T t) const noexcept { return t; }
235 };
236
237 // pinned
238 template <typename T>
239 struct Pinning<T, true>
240 {
241 T m_string;
242 // specialisation for owning string types (QString, std::u16string):
243 // stores the string:
244 constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {}
245 // ... and thus view() uses that instead of the argument passed in:
246 constexpr QStringView view(const T&) const noexcept { return m_string; }
247 };
248
249 // NeedlePinning and HaystackPinning are there to distinguish them as
250 // base classes of QStringTokenizer. We use inheritance to reap the
251 // empty base class optimization.
252 template <typename T>
253 struct NeedlePinning : Pinning<T>
254 {
255 using Pinning<T>::Pinning;
256 template <typename Arg>
257 constexpr auto needleView(Arg &&a) noexcept
258 -> decltype(this->view(std::forward<Arg>(a)))
259 { return this->view(std::forward<Arg>(a)); }
260 };
261
262 template <typename T>
263 struct HaystackPinning : Pinning<T>
264 {
265 using Pinning<T>::Pinning;
266 template <typename Arg>
267 constexpr auto haystackView(Arg &&a) noexcept
268 -> decltype(this->view(std::forward<Arg>(a)))
269 { return this->view(std::forward<Arg>(a)); }
270 };
271
272 // The Base of a QStringTokenizer is QStringTokenizerBase for the views
273 // corresponding to the Haystack and Needle template arguments
274 //
275 // ie. QStringTokenizer<QString, QString>
276 // : QStringTokenizerBase<QStringView, QStringView> (+ pinning)
277 template <typename Haystack, typename Needle>
278 using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>;
279} // namespace Tok
280} // namespace QtPrivate
281
282template <typename Haystack, typename Needle>
283class QStringTokenizer
284 : private QtPrivate::Tok::HaystackPinning<Haystack>,
285 private QtPrivate::Tok::NeedlePinning<Needle>,
286 public QtPrivate::Tok::TokenizerBase<Haystack, Needle>
287{
288 using HPin = QtPrivate::Tok::HaystackPinning<Haystack>;
289 using NPin = QtPrivate::Tok::NeedlePinning<Needle>;
290 using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>;
291 template <typename Container, typename HPin>
292 struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {};
293 template <typename Container>
294 using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type;
295 template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))>
296 using if_compatible_container = typename std::enable_if<
297 std::is_same<
298 typename Base::value_type,
299 typename std::iterator_traits<Iterator>::value_type
300 >::value,
301 bool
302 >::type;
303public:
304 using value_type = typename Base::value_type;
305
306 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
307 Qt::CaseSensitivity cs,
308 Qt::SplitBehavior sb = Qt::KeepEmptyParts)
309 noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
310 // here, we present the haystack to Pinning<>, for optional storing.
311 // If it did store, haystack is moved-from and mustn't be touched
312 // any longer, which is why view() for these Pinning<>s ignores the
313 // argument.
314 : HPin{std::forward<Haystack>(haystack)},
315 NPin{std::forward<Needle>(needle)},
316 // If Pinning<> didn't store, we pass the haystack (ditto needle)
317 // to view() again, so it can be copied from there.
318 Base{this->haystackView(haystack),
319 this->needleView(needle), sb, cs}
320 {}
321 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
322 Qt::SplitBehavior sb = Qt::KeepEmptyParts,
323 Qt::CaseSensitivity cs = Qt::CaseSensitive)
324 noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
325 : HPin{std::forward<Haystack>(haystack)},
326 NPin{std::forward<Needle>(needle)},
327 Base{this->haystackView(haystack),
328 this->needleView(needle), sb, cs}
329 {}
330
331#ifdef Q_QDOC
332 template<typename Container>
333#else
334 template<typename Container = QList<value_type>, if_compatible_container<Container> = true>
335#endif
336 Container toContainer(Container &&c = {}) const &
337 {
338 for (auto e : *this)
339 c.emplace_back(e);
340 return std::move(c);
341 }
342
343#ifdef Q_QDOC
344 template<typename Container>
345#else
346 template<typename Container = QList<value_type>, if_compatible_container<Container> = true,
347 if_haystack_not_pinned<Container> = true>
348#endif
349 Container toContainer(Container &&c = {}) const &&
350 {
351 for (auto e : *this)
352 c.emplace_back(e);
353 return std::move(c);
354 }
355};
356
357namespace QtPrivate {
358namespace Tok {
359// This meta function just calculated the template arguments for the
360// QStringTokenizer (not -Base), based on the actual arguments passed
361// to qTokenize() (or the ctor, with CTAD). It basically detects rvalue
362// QString and std::basic_string and otherwise decays the arguments to
363// the respective view type.
364//
365// #define works around a C++ restriction: [temp.deduct.guide]/3 seems
366// to ask for the simple-template-id following the `->` of a deduction
367// guide to be identical to the class name for which we guide deduction.
368// In particular, Clang rejects a template alias there, while GCC accepts
369// it.
370#define Q_TOK_RESULT \
371 QStringTokenizer< \
372 QtPrivate::Tok::PinFor<Haystack>, \
373 QtPrivate::Tok::PinFor<Needle> \
374 > \
375 /*end*/
376template <typename Haystack, typename Needle>
377using TokenizerResult = Q_TOK_RESULT;
378template <typename Haystack, typename Needle>
379using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>;
380}
381}
382
383#ifdef __cpp_deduction_guides
384// these tell the compiler how to determine the QStringTokenizer
385// template arguments based on the constructor arguments (CTAD):
386template <typename Haystack, typename Needle>
387QStringTokenizer(Haystack&&, Needle&&)
388 -> Q_TOK_RESULT;
389template <typename Haystack, typename Needle>
390QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior)
391 -> Q_TOK_RESULT;
392template <typename Haystack, typename Needle>
393QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity)
394 -> Q_TOK_RESULT;
395template <typename Haystack, typename Needle>
396QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity)
397 -> Q_TOK_RESULT;
398template <typename Haystack, typename Needle>
399QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior)
400 -> Q_TOK_RESULT;
401#endif
402
403#undef Q_TOK_RESULT
404
405template <typename Haystack, typename Needle, typename...Flags>
406[[nodiscard]] constexpr auto
407qTokenize(Haystack &&h, Needle &&n, Flags...flags)
408 noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value)
409 -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
410 std::forward<Needle>(n), flags...})
411{ return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
412 std::forward<Needle>(n),
413 flags...}; }
414
415template <typename Haystack, typename Needle>
416auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result
417{
418 while (true) {
419 if (state.end < 0) {
420 // already at end:
421 return {{}, false, state};
422 }
423 state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs);
424 Haystack result;
425 if (state.end >= 0) {
426 // token separator found => return intermediate element:
427 result = m_haystack.sliced(state.start, state.end - state.start);
428 const auto ns = QtPrivate::Tok::size(m_needle);
429 state.start = state.end + ns;
430 state.extra = (ns == 0 ? 1 : 0);
431 } else {
432 // token separator not found => return final element:
433 result = m_haystack.sliced(state.start);
434 }
435 if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty())
436 continue;
437 return {result, true, state};
438 }
439}
440
441QT_END_NAMESPACE
442
443#endif /* QSTRINGTOKENIZER_H */
444