1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> |
4 | ** Contact: http://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | #ifndef QSTRINGTOKENIZER_H |
40 | #define QSTRINGTOKENIZER_H |
41 | |
42 | #include <QtCore/qnamespace.h> |
43 | #include <QtCore/qcontainerfwd.h> |
44 | |
45 | QT_BEGIN_NAMESPACE |
46 | |
47 | template <typename, typename> class QStringBuilder; |
48 | |
49 | #if defined(Q_QDOC) || 1 || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603) |
50 | # define Q_STRINGTOKENIZER_USE_SENTINEL |
51 | #endif |
52 | |
53 | class QStringTokenizerBaseBase |
54 | { |
55 | protected: |
56 | ~QStringTokenizerBaseBase() = default; |
57 | constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept |
58 | : m_sb{sb}, m_cs{cs} {} |
59 | |
60 | struct tokenizer_state { |
61 | qsizetype start, end, ; |
62 | friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept |
63 | { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; } |
64 | friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept |
65 | { return !operator==(lhs, rhs); } |
66 | }; |
67 | |
68 | Qt::SplitBehavior m_sb; |
69 | Qt::CaseSensitivity m_cs; |
70 | }; |
71 | |
72 | template <typename Haystack, typename Needle> |
73 | class QStringTokenizerBase : protected QStringTokenizerBaseBase |
74 | { |
75 | struct next_result { |
76 | Haystack value; |
77 | bool ok; |
78 | tokenizer_state state; |
79 | }; |
80 | inline next_result next(tokenizer_state state) const noexcept; |
81 | inline next_result toFront() const noexcept { return next({}); } |
82 | public: |
83 | constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept |
84 | : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {} |
85 | |
86 | class iterator; |
87 | friend class iterator; |
88 | #ifdef Q_STRINGTOKENIZER_USE_SENTINEL |
89 | class sentinel { |
90 | friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; } |
91 | friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; } |
92 | }; |
93 | #else |
94 | using sentinel = iterator; |
95 | #endif |
96 | class iterator { |
97 | const QStringTokenizerBase *tokenizer; |
98 | next_result current; |
99 | friend class QStringTokenizerBase; |
100 | explicit iterator(const QStringTokenizerBase &t) noexcept |
101 | : tokenizer{&t}, current{t.toFront()} {} |
102 | public: |
103 | using difference_type = qsizetype; |
104 | using value_type = Haystack; |
105 | using pointer = const value_type*; |
106 | using reference = const value_type&; |
107 | using iterator_category = std::forward_iterator_tag; |
108 | |
109 | iterator() noexcept = default; |
110 | |
111 | // violates std::forward_iterator (returns a reference into the iterator) |
112 | [[nodiscard]] constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), ¤t.value; } |
113 | [[nodiscard]] constexpr const Haystack& operator*() const { return *operator->(); } |
114 | |
115 | iterator& operator++() { advance(); return *this; } |
116 | iterator operator++(int) { auto tmp = *this; advance(); return tmp; } |
117 | |
118 | friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept |
119 | { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); } |
120 | friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept |
121 | { return !operator==(lhs, rhs); } |
122 | #ifdef Q_STRINGTOKENIZER_USE_SENTINEL |
123 | friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept |
124 | { return !lhs.current.ok; } |
125 | friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept |
126 | { return !operator==(lhs, sentinel{}); } |
127 | friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept |
128 | { return !rhs.current.ok; } |
129 | friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept |
130 | { return !operator==(sentinel{}, rhs); } |
131 | #endif |
132 | private: |
133 | void advance() { |
134 | Q_ASSERT(current.ok); |
135 | current = tokenizer->next(current.state); |
136 | } |
137 | }; |
138 | using const_iterator = iterator; |
139 | |
140 | using size_type = std::size_t; |
141 | using difference_type = typename iterator::difference_type; |
142 | using value_type = typename iterator::value_type; |
143 | using pointer = typename iterator::pointer; |
144 | using const_pointer = pointer; |
145 | using reference = typename iterator::reference; |
146 | using const_reference = reference; |
147 | |
148 | [[nodiscard]] iterator begin() const noexcept { return iterator{*this}; } |
149 | [[nodiscard]] iterator cbegin() const noexcept { return begin(); } |
150 | template <bool = std::is_same<iterator, sentinel>::value> // ODR protection |
151 | [[nodiscard]] constexpr sentinel end() const noexcept { return {}; } |
152 | template <bool = std::is_same<iterator, sentinel>::value> // ODR protection |
153 | [[nodiscard]] constexpr sentinel cend() const noexcept { return {}; } |
154 | |
155 | private: |
156 | Haystack m_haystack; |
157 | Needle m_needle; |
158 | }; |
159 | |
160 | QT_BEGIN_INCLUDE_NAMESPACE |
161 | #include <QtCore/qstringview.h> |
162 | QT_END_INCLUDE_NAMESPACE |
163 | |
164 | namespace QtPrivate { |
165 | namespace Tok { |
166 | |
167 | constexpr qsizetype size(QChar) noexcept { return 1; } |
168 | template <typename String> |
169 | constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); } |
170 | |
171 | template <typename String> struct ViewForImpl {}; |
172 | template <> struct ViewForImpl<QStringView> { using type = QStringView; }; |
173 | template <> struct ViewForImpl<QLatin1String> { using type = QLatin1String; }; |
174 | template <> struct ViewForImpl<QChar> { using type = QChar; }; |
175 | template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {}; |
176 | template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {}; |
177 | template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {}; |
178 | template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {}; |
179 | template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {}; |
180 | template <typename LHS, typename RHS> |
181 | struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {}; |
182 | template <typename Char, typename...Args> |
183 | struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {}; |
184 | #ifdef __cpp_lib_string_view |
185 | template <typename Char, typename...Args> |
186 | struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {}; |
187 | #endif |
188 | |
189 | // This metafunction maps a StringLike to a View (currently, QChar, |
190 | // QStringView, QLatin1String). This is what QStringTokenizerBase |
191 | // operates on. QStringTokenizer adds pinning to keep rvalues alive |
192 | // for the duration of the algorithm. |
193 | template <typename String> |
194 | using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type; |
195 | |
196 | // Pinning: |
197 | // rvalues of owning string types need to be moved into QStringTokenizer |
198 | // to keep them alive for the lifetime of the tokenizer. For lvalues, we |
199 | // assume the user takes care of that. |
200 | |
201 | // default: don't pin anything (characters are pinned implicitly) |
202 | template <typename String> |
203 | struct PinForImpl { using type = ViewFor<String>; }; |
204 | |
205 | // rvalue QString -> QString |
206 | template <> |
207 | struct PinForImpl<QString> { using type = QString; }; |
208 | |
209 | // rvalue std::basic_string -> basic_string |
210 | template <typename Char, typename...Args> |
211 | struct PinForImpl<std::basic_string<Char, Args...>> |
212 | { using type = std::basic_string<Char, Args...>; }; |
213 | |
214 | // rvalue QStringBuilder -> pin as the nested ConvertTo type |
215 | template <typename LHS, typename RHS> |
216 | struct PinForImpl<QStringBuilder<LHS, RHS>> |
217 | : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {}; |
218 | |
219 | template <typename StringLike> |
220 | using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type; |
221 | |
222 | template <typename T> struct is_owning_string_type : std::false_type {}; |
223 | template <> struct is_owning_string_type<QString> : std::true_type {}; |
224 | template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {}; |
225 | |
226 | // unpinned |
227 | template <typename T, bool pinned = is_owning_string_type<T>::value> |
228 | struct Pinning |
229 | { |
230 | // this is the storage for non-pinned types - no storage |
231 | constexpr Pinning(const T&) noexcept {} |
232 | // Since we don't store something, the view() method needs to be |
233 | // given something it can return. |
234 | constexpr T view(T t) const noexcept { return t; } |
235 | }; |
236 | |
237 | // pinned |
238 | template <typename T> |
239 | struct Pinning<T, true> |
240 | { |
241 | T m_string; |
242 | // specialisation for owning string types (QString, std::u16string): |
243 | // stores the string: |
244 | constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {} |
245 | // ... and thus view() uses that instead of the argument passed in: |
246 | constexpr QStringView view(const T&) const noexcept { return m_string; } |
247 | }; |
248 | |
249 | // NeedlePinning and HaystackPinning are there to distinguish them as |
250 | // base classes of QStringTokenizer. We use inheritance to reap the |
251 | // empty base class optimization. |
252 | template <typename T> |
253 | struct NeedlePinning : Pinning<T> |
254 | { |
255 | using Pinning<T>::Pinning; |
256 | template <typename Arg> |
257 | constexpr auto needleView(Arg &&a) noexcept |
258 | -> decltype(this->view(std::forward<Arg>(a))) |
259 | { return this->view(std::forward<Arg>(a)); } |
260 | }; |
261 | |
262 | template <typename T> |
263 | struct HaystackPinning : Pinning<T> |
264 | { |
265 | using Pinning<T>::Pinning; |
266 | template <typename Arg> |
267 | constexpr auto haystackView(Arg &&a) noexcept |
268 | -> decltype(this->view(std::forward<Arg>(a))) |
269 | { return this->view(std::forward<Arg>(a)); } |
270 | }; |
271 | |
272 | // The Base of a QStringTokenizer is QStringTokenizerBase for the views |
273 | // corresponding to the Haystack and Needle template arguments |
274 | // |
275 | // ie. QStringTokenizer<QString, QString> |
276 | // : QStringTokenizerBase<QStringView, QStringView> (+ pinning) |
277 | template <typename Haystack, typename Needle> |
278 | using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>; |
279 | } // namespace Tok |
280 | } // namespace QtPrivate |
281 | |
282 | template <typename Haystack, typename Needle> |
283 | class QStringTokenizer |
284 | : private QtPrivate::Tok::HaystackPinning<Haystack>, |
285 | private QtPrivate::Tok::NeedlePinning<Needle>, |
286 | public QtPrivate::Tok::TokenizerBase<Haystack, Needle> |
287 | { |
288 | using HPin = QtPrivate::Tok::HaystackPinning<Haystack>; |
289 | using NPin = QtPrivate::Tok::NeedlePinning<Needle>; |
290 | using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>; |
291 | template <typename Container, typename HPin> |
292 | struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {}; |
293 | template <typename Container> |
294 | using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type; |
295 | template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))> |
296 | using if_compatible_container = typename std::enable_if< |
297 | std::is_same< |
298 | typename Base::value_type, |
299 | typename std::iterator_traits<Iterator>::value_type |
300 | >::value, |
301 | bool |
302 | >::type; |
303 | public: |
304 | using value_type = typename Base::value_type; |
305 | |
306 | constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, |
307 | Qt::CaseSensitivity cs, |
308 | Qt::SplitBehavior sb = Qt::KeepEmptyParts) |
309 | noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) |
310 | // here, we present the haystack to Pinning<>, for optional storing. |
311 | // If it did store, haystack is moved-from and mustn't be touched |
312 | // any longer, which is why view() for these Pinning<>s ignores the |
313 | // argument. |
314 | : HPin{std::forward<Haystack>(haystack)}, |
315 | NPin{std::forward<Needle>(needle)}, |
316 | // If Pinning<> didn't store, we pass the haystack (ditto needle) |
317 | // to view() again, so it can be copied from there. |
318 | Base{this->haystackView(haystack), |
319 | this->needleView(needle), sb, cs} |
320 | {} |
321 | constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, |
322 | Qt::SplitBehavior sb = Qt::KeepEmptyParts, |
323 | Qt::CaseSensitivity cs = Qt::CaseSensitive) |
324 | noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) |
325 | : HPin{std::forward<Haystack>(haystack)}, |
326 | NPin{std::forward<Needle>(needle)}, |
327 | Base{this->haystackView(haystack), |
328 | this->needleView(needle), sb, cs} |
329 | {} |
330 | |
331 | #ifdef Q_QDOC |
332 | template<typename Container> |
333 | #else |
334 | template<typename Container = QList<value_type>, if_compatible_container<Container> = true> |
335 | #endif |
336 | Container toContainer(Container &&c = {}) const & |
337 | { |
338 | for (auto e : *this) |
339 | c.emplace_back(e); |
340 | return std::move(c); |
341 | } |
342 | |
343 | #ifdef Q_QDOC |
344 | template<typename Container> |
345 | #else |
346 | template<typename Container = QList<value_type>, if_compatible_container<Container> = true, |
347 | if_haystack_not_pinned<Container> = true> |
348 | #endif |
349 | Container toContainer(Container &&c = {}) const && |
350 | { |
351 | for (auto e : *this) |
352 | c.emplace_back(e); |
353 | return std::move(c); |
354 | } |
355 | }; |
356 | |
357 | namespace QtPrivate { |
358 | namespace Tok { |
359 | // This meta function just calculated the template arguments for the |
360 | // QStringTokenizer (not -Base), based on the actual arguments passed |
361 | // to qTokenize() (or the ctor, with CTAD). It basically detects rvalue |
362 | // QString and std::basic_string and otherwise decays the arguments to |
363 | // the respective view type. |
364 | // |
365 | // #define works around a C++ restriction: [temp.deduct.guide]/3 seems |
366 | // to ask for the simple-template-id following the `->` of a deduction |
367 | // guide to be identical to the class name for which we guide deduction. |
368 | // In particular, Clang rejects a template alias there, while GCC accepts |
369 | // it. |
370 | #define Q_TOK_RESULT \ |
371 | QStringTokenizer< \ |
372 | QtPrivate::Tok::PinFor<Haystack>, \ |
373 | QtPrivate::Tok::PinFor<Needle> \ |
374 | > \ |
375 | /*end*/ |
376 | template <typename Haystack, typename Needle> |
377 | using TokenizerResult = Q_TOK_RESULT; |
378 | template <typename Haystack, typename Needle> |
379 | using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>; |
380 | } |
381 | } |
382 | |
383 | #ifdef __cpp_deduction_guides |
384 | // these tell the compiler how to determine the QStringTokenizer |
385 | // template arguments based on the constructor arguments (CTAD): |
386 | template <typename Haystack, typename Needle> |
387 | QStringTokenizer(Haystack&&, Needle&&) |
388 | -> Q_TOK_RESULT; |
389 | template <typename Haystack, typename Needle> |
390 | QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior) |
391 | -> Q_TOK_RESULT; |
392 | template <typename Haystack, typename Needle> |
393 | QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity) |
394 | -> Q_TOK_RESULT; |
395 | template <typename Haystack, typename Needle> |
396 | QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity) |
397 | -> Q_TOK_RESULT; |
398 | template <typename Haystack, typename Needle> |
399 | QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior) |
400 | -> Q_TOK_RESULT; |
401 | #endif |
402 | |
403 | #undef Q_TOK_RESULT |
404 | |
405 | template <typename Haystack, typename Needle, typename...Flags> |
406 | [[nodiscard]] constexpr auto |
407 | qTokenize(Haystack &&h, Needle &&n, Flags...flags) |
408 | noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value) |
409 | -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), |
410 | std::forward<Needle>(n), flags...}) |
411 | { return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), |
412 | std::forward<Needle>(n), |
413 | flags...}; } |
414 | |
415 | template <typename Haystack, typename Needle> |
416 | auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result |
417 | { |
418 | while (true) { |
419 | if (state.end < 0) { |
420 | // already at end: |
421 | return {{}, false, state}; |
422 | } |
423 | state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs); |
424 | Haystack result; |
425 | if (state.end >= 0) { |
426 | // token separator found => return intermediate element: |
427 | result = m_haystack.sliced(state.start, state.end - state.start); |
428 | const auto ns = QtPrivate::Tok::size(m_needle); |
429 | state.start = state.end + ns; |
430 | state.extra = (ns == 0 ? 1 : 0); |
431 | } else { |
432 | // token separator not found => return final element: |
433 | result = m_haystack.sliced(state.start); |
434 | } |
435 | if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty()) |
436 | continue; |
437 | return {result, true, state}; |
438 | } |
439 | } |
440 | |
441 | QT_END_NAMESPACE |
442 | |
443 | #endif /* QSTRINGTOKENIZER_H */ |
444 | |