1// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16// This file declares INTERNAL parts of the Split API that are inline/templated
17// or otherwise need to be available at compile time. The main abstractions
18// defined in here are
19//
20// - ConvertibleToStringView
21// - SplitIterator<>
22// - Splitter<>
23//
24// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25// absl/strings/str_split.h.
26//
27// IWYU pragma: private, include "absl/strings/str_split.h"
28
29#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31
32#include <array>
33#include <initializer_list>
34#include <iterator>
35#include <map>
36#include <type_traits>
37#include <utility>
38#include <vector>
39
40#include "absl/base/macros.h"
41#include "absl/base/port.h"
42#include "absl/meta/type_traits.h"
43#include "absl/strings/string_view.h"
44
45#ifdef _GLIBCXX_DEBUG
46#include "absl/strings/internal/stl_type_traits.h"
47#endif // _GLIBCXX_DEBUG
48
49namespace absl {
50namespace strings_internal {
51
52// This class is implicitly constructible from everything that absl::string_view
53// is implicitly constructible from. If it's constructed from a temporary
54// string, the data is moved into a data member so its lifetime matches that of
55// the ConvertibleToStringView instance.
56class ConvertibleToStringView {
57 public:
58 ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
59 : value_(s) {}
60 ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
61 ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
62 : value_(s) {}
63 ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
64 : value_(s) {}
65
66 // Matches rvalue strings and moves their data to a member.
67ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
68 : copy_(std::move(s)), value_(copy_) {}
69
70 ConvertibleToStringView(const ConvertibleToStringView& other)
71 : copy_(other.copy_),
72 value_(other.IsSelfReferential() ? copy_ : other.value_) {}
73
74 ConvertibleToStringView(ConvertibleToStringView&& other) {
75 StealMembers(std::move(other));
76 }
77
78 ConvertibleToStringView& operator=(ConvertibleToStringView other) {
79 StealMembers(std::move(other));
80 return *this;
81 }
82
83 absl::string_view value() const { return value_; }
84
85 private:
86 // Returns true if ctsp's value refers to its internal copy_ member.
87 bool IsSelfReferential() const { return value_.data() == copy_.data(); }
88
89 void StealMembers(ConvertibleToStringView&& other) {
90 if (other.IsSelfReferential()) {
91 copy_ = std::move(other.copy_);
92 value_ = copy_;
93 other.value_ = other.copy_;
94 } else {
95 value_ = other.value_;
96 }
97 }
98
99 // Holds the data moved from temporary std::string arguments. Declared first
100 // so that 'value' can refer to 'copy_'.
101 std::string copy_;
102 absl::string_view value_;
103};
104
105// An iterator that enumerates the parts of a string from a Splitter. The text
106// to be split, the Delimiter, and the Predicate are all taken from the given
107// Splitter object. Iterators may only be compared if they refer to the same
108// Splitter instance.
109//
110// This class is NOT part of the public splitting API.
111template <typename Splitter>
112class SplitIterator {
113 public:
114 using iterator_category = std::input_iterator_tag;
115 using value_type = absl::string_view;
116 using difference_type = ptrdiff_t;
117 using pointer = const value_type*;
118 using reference = const value_type&;
119
120 enum State { kInitState, kLastState, kEndState };
121 SplitIterator(State state, const Splitter* splitter)
122 : pos_(0),
123 state_(state),
124 splitter_(splitter),
125 delimiter_(splitter->delimiter()),
126 predicate_(splitter->predicate()) {
127 // Hack to maintain backward compatibility. This one block makes it so an
128 // empty absl::string_view whose .data() happens to be nullptr behaves
129 // *differently* from an otherwise empty absl::string_view whose .data() is
130 // not nullptr. This is an undesirable difference in general, but this
131 // behavior is maintained to avoid breaking existing code that happens to
132 // depend on this old behavior/bug. Perhaps it will be fixed one day. The
133 // difference in behavior is as follows:
134 // Split(absl::string_view(""), '-'); // {""}
135 // Split(absl::string_view(), '-'); // {}
136 if (splitter_->text().data() == nullptr) {
137 state_ = kEndState;
138 pos_ = splitter_->text().size();
139 return;
140 }
141
142 if (state_ == kEndState) {
143 pos_ = splitter_->text().size();
144 } else {
145 ++(*this);
146 }
147 }
148
149 bool at_end() const { return state_ == kEndState; }
150
151 reference operator*() const { return curr_; }
152 pointer operator->() const { return &curr_; }
153
154 SplitIterator& operator++() {
155 do {
156 if (state_ == kLastState) {
157 state_ = kEndState;
158 return *this;
159 }
160 const absl::string_view text = splitter_->text();
161 const absl::string_view d = delimiter_.Find(text, pos_);
162 if (d.data() == text.data() + text.size()) state_ = kLastState;
163 curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
164 pos_ += curr_.size() + d.size();
165 } while (!predicate_(curr_));
166 return *this;
167 }
168
169 SplitIterator operator++(int) {
170 SplitIterator old(*this);
171 ++(*this);
172 return old;
173 }
174
175 friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
176 return a.state_ == b.state_ && a.pos_ == b.pos_;
177 }
178
179 friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
180 return !(a == b);
181 }
182
183 private:
184 size_t pos_;
185 State state_;
186 absl::string_view curr_;
187 const Splitter* splitter_;
188 typename Splitter::DelimiterType delimiter_;
189 typename Splitter::PredicateType predicate_;
190};
191
192// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
193template <typename T, typename = void>
194struct HasMappedType : std::false_type {};
195template <typename T>
196struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
197 : std::true_type {};
198
199// HasValueType<T>::value is true iff there exists a type T::value_type.
200template <typename T, typename = void>
201struct HasValueType : std::false_type {};
202template <typename T>
203struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
204};
205
206// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
207template <typename T, typename = void>
208struct HasConstIterator : std::false_type {};
209template <typename T>
210struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
211 : std::true_type {};
212
213// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
214// details below in Splitter<> where this is used.
215std::false_type IsInitializerListDispatch(...); // default: No
216template <typename T>
217std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
218template <typename T>
219struct IsInitializerList
220 : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
221
222// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
223// is true for type 'C'.
224//
225// Restricts conversion to container-like types (by testing for the presence of
226// a const_iterator member type) and also to disable conversion to an
227// std::initializer_list (which also has a const_iterator). Otherwise, code
228// compiled in C++11 will get an error due to ambiguous conversion paths (in
229// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
230// or an std::initializer_list<T>).
231
232template <typename C, bool has_value_type, bool has_mapped_type>
233struct SplitterIsConvertibleToImpl : std::false_type {};
234
235template <typename C>
236struct SplitterIsConvertibleToImpl<C, true, false>
237 : std::is_constructible<typename C::value_type, absl::string_view> {};
238
239template <typename C>
240struct SplitterIsConvertibleToImpl<C, true, true>
241 : absl::conjunction<
242 std::is_constructible<typename C::key_type, absl::string_view>,
243 std::is_constructible<typename C::mapped_type, absl::string_view>> {};
244
245template <typename C>
246struct SplitterIsConvertibleTo
247 : SplitterIsConvertibleToImpl<
248 C,
249#ifdef _GLIBCXX_DEBUG
250 !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
251#endif // _GLIBCXX_DEBUG
252 !IsInitializerList<
253 typename std::remove_reference<C>::type>::value &&
254 HasValueType<C>::value && HasConstIterator<C>::value,
255 HasMappedType<C>::value> {
256};
257
258// This class implements the range that is returned by absl::StrSplit(). This
259// class has templated conversion operators that allow it to be implicitly
260// converted to a variety of types that the caller may have specified on the
261// left-hand side of an assignment.
262//
263// The main interface for interacting with this class is through its implicit
264// conversion operators. However, this class may also be used like a container
265// in that it has .begin() and .end() member functions. It may also be used
266// within a range-for loop.
267//
268// Output containers can be collections of any type that is constructible from
269// an absl::string_view.
270//
271// An Predicate functor may be supplied. This predicate will be used to filter
272// the split strings: only strings for which the predicate returns true will be
273// kept. A Predicate object is any unary functor that takes an absl::string_view
274// and returns bool.
275template <typename Delimiter, typename Predicate>
276class Splitter {
277 public:
278 using DelimiterType = Delimiter;
279 using PredicateType = Predicate;
280 using const_iterator = strings_internal::SplitIterator<Splitter>;
281 using value_type = typename std::iterator_traits<const_iterator>::value_type;
282
283 Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
284 : text_(std::move(input_text)),
285 delimiter_(std::move(d)),
286 predicate_(std::move(p)) {}
287
288 absl::string_view text() const { return text_.value(); }
289 const Delimiter& delimiter() const { return delimiter_; }
290 const Predicate& predicate() const { return predicate_; }
291
292 // Range functions that iterate the split substrings as absl::string_view
293 // objects. These methods enable a Splitter to be used in a range-based for
294 // loop.
295 const_iterator begin() const { return {const_iterator::kInitState, this}; }
296 const_iterator end() const { return {const_iterator::kEndState, this}; }
297
298 // An implicit conversion operator that is restricted to only those containers
299 // that the splitter is convertible to.
300 template <typename Container,
301 typename = typename std::enable_if<
302 SplitterIsConvertibleTo<Container>::value>::type>
303 operator Container() const { // NOLINT(runtime/explicit)
304 return ConvertToContainer<Container, typename Container::value_type,
305 HasMappedType<Container>::value>()(*this);
306 }
307
308 // Returns a pair with its .first and .second members set to the first two
309 // strings returned by the begin() iterator. Either/both of .first and .second
310 // will be constructed with empty strings if the iterator doesn't have a
311 // corresponding value.
312 template <typename First, typename Second>
313 operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
314 absl::string_view first, second;
315 auto it = begin();
316 if (it != end()) {
317 first = *it;
318 if (++it != end()) {
319 second = *it;
320 }
321 }
322 return {First(first), Second(second)};
323 }
324
325 private:
326 // ConvertToContainer is a functor converting a Splitter to the requested
327 // Container of ValueType. It is specialized below to optimize splitting to
328 // certain combinations of Container and ValueType.
329 //
330 // This base template handles the generic case of storing the split results in
331 // the requested non-map-like container and converting the split substrings to
332 // the requested type.
333 template <typename Container, typename ValueType, bool is_map = false>
334 struct ConvertToContainer {
335 Container operator()(const Splitter& splitter) const {
336 Container c;
337 auto it = std::inserter(c, c.end());
338 for (const auto sp : splitter) {
339 *it++ = ValueType(sp);
340 }
341 return c;
342 }
343 };
344
345 // Partial specialization for a std::vector<absl::string_view>.
346 //
347 // Optimized for the common case of splitting to a
348 // std::vector<absl::string_view>. In this case we first split the results to
349 // a small array of absl::string_view on the stack, to reduce reallocations.
350 template <typename A>
351 struct ConvertToContainer<std::vector<absl::string_view, A>,
352 absl::string_view, false> {
353 std::vector<absl::string_view, A> operator()(
354 const Splitter& splitter) const {
355 struct raw_view {
356 const char* data;
357 size_t size;
358 operator absl::string_view() const { // NOLINT(runtime/explicit)
359 return {data, size};
360 }
361 };
362 std::vector<absl::string_view, A> v;
363 std::array<raw_view, 16> ar;
364 for (auto it = splitter.begin(); !it.at_end();) {
365 size_t index = 0;
366 do {
367 ar[index].data = it->data();
368 ar[index].size = it->size();
369 ++it;
370 } while (++index != ar.size() && !it.at_end());
371 v.insert(v.end(), ar.begin(), ar.begin() + index);
372 }
373 return v;
374 }
375 };
376
377 // Partial specialization for a std::vector<std::string>.
378 //
379 // Optimized for the common case of splitting to a std::vector<std::string>.
380 // In this case we first split the results to a std::vector<absl::string_view>
381 // so the returned std::vector<std::string> can have space reserved to avoid
382 // std::string moves.
383 template <typename A>
384 struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
385 std::vector<std::string, A> operator()(const Splitter& splitter) const {
386 const std::vector<absl::string_view> v = splitter;
387 return std::vector<std::string, A>(v.begin(), v.end());
388 }
389 };
390
391 // Partial specialization for containers of pairs (e.g., maps).
392 //
393 // The algorithm is to insert a new pair into the map for each even-numbered
394 // item, with the even-numbered item as the key with a default-constructed
395 // value. Each odd-numbered item will then be assigned to the last pair's
396 // value.
397 template <typename Container, typename First, typename Second>
398 struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
399 Container operator()(const Splitter& splitter) const {
400 Container m;
401 typename Container::iterator it;
402 bool insert = true;
403 for (const auto sp : splitter) {
404 if (insert) {
405 it = Inserter<Container>::Insert(&m, First(sp), Second());
406 } else {
407 it->second = Second(sp);
408 }
409 insert = !insert;
410 }
411 return m;
412 }
413
414 // Inserts the key and value into the given map, returning an iterator to
415 // the inserted item. Specialized for std::map and std::multimap to use
416 // emplace() and adapt emplace()'s return value.
417 template <typename Map>
418 struct Inserter {
419 using M = Map;
420 template <typename... Args>
421 static typename M::iterator Insert(M* m, Args&&... args) {
422 return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
423 }
424 };
425
426 template <typename... Ts>
427 struct Inserter<std::map<Ts...>> {
428 using M = std::map<Ts...>;
429 template <typename... Args>
430 static typename M::iterator Insert(M* m, Args&&... args) {
431 return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
432 }
433 };
434
435 template <typename... Ts>
436 struct Inserter<std::multimap<Ts...>> {
437 using M = std::multimap<Ts...>;
438 template <typename... Args>
439 static typename M::iterator Insert(M* m, Args&&... args) {
440 return m->emplace(std::make_pair(std::forward<Args>(args)...));
441 }
442 };
443 };
444
445 ConvertibleToStringView text_;
446 Delimiter delimiter_;
447 Predicate predicate_;
448};
449
450} // namespace strings_internal
451} // namespace absl
452
453#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
454