1/*
2 * Copyright 2014-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18#define FOLLY_GEN_STRING_H_
19
20#include <folly/Range.h>
21#include <folly/gen/Base.h>
22#include <folly/io/IOBuf.h>
23
24namespace folly {
25namespace gen {
26
27namespace detail {
28class StringResplitter;
29
30template <class Delimiter>
31class SplitStringSource;
32
33template <class Delimiter, class Output>
34class Unsplit;
35
36template <class Delimiter, class OutputBuffer>
37class UnsplitBuffer;
38
39template <class TargetContainer, class Delimiter, class... Targets>
40class SplitTo;
41
42} // namespace detail
43
44/**
45 * Split the output from a generator into StringPiece "lines" delimited by
46 * the given delimiter. Delimters are NOT included in the output.
47 *
48 * resplit() behaves as if the input strings were concatenated into one long
49 * string and then split.
50 *
51 * Equivalently, you can use StreamSplitter outside of a folly::gen setting.
52 */
53// make this a template so we don't require StringResplitter to be complete
54// until use
55template <class S = detail::StringResplitter>
56S resplit(char delimiter, bool keepDelimiter = false) {
57 return S(delimiter, keepDelimiter);
58}
59
60template <class S = detail::SplitStringSource<char>>
61S split(const StringPiece source, char delimiter) {
62 return S(source, delimiter);
63}
64
65template <class S = detail::SplitStringSource<StringPiece>>
66S split(StringPiece source, StringPiece delimiter) {
67 return S(source, delimiter);
68}
69
70/**
71 * EOL terms ("\r", "\n", or "\r\n").
72 */
73class MixedNewlines {};
74
75/**
76 * Split by EOL ("\r", "\n", or "\r\n").
77 * @see split().
78 */
79template <class S = detail::SplitStringSource<MixedNewlines>>
80S lines(StringPiece source) {
81 return S(source, MixedNewlines{});
82}
83
84/*
85 * Joins a sequence of tokens into a string, with the chosen delimiter.
86 *
87 * E.G.
88 * fbstring result = split("a,b,c", ",") | unsplit(",");
89 * assert(result == "a,b,c");
90 *
91 * std::string result = split("a,b,c", ",") | unsplit<std::string>(" ");
92 * assert(result == "a b c");
93 */
94
95// NOTE: The template arguments are reversed to allow the user to cleanly
96// specify the output type while still inferring the type of the delimiter.
97template <
98 class Output = folly::fbstring,
99 class Delimiter,
100 class Unsplit = detail::Unsplit<Delimiter, Output>>
101Unsplit unsplit(const Delimiter& delimiter) {
102 return Unsplit(delimiter);
103}
104
105template <
106 class Output = folly::fbstring,
107 class Unsplit = detail::Unsplit<fbstring, Output>>
108Unsplit unsplit(const char* delimiter) {
109 return Unsplit(delimiter);
110}
111
112/*
113 * Joins a sequence of tokens into a string, appending them to the output
114 * buffer. If the output buffer is empty, an initial delimiter will not be
115 * inserted at the start.
116 *
117 * E.G.
118 * std::string buffer;
119 * split("a,b,c", ",") | unsplit(",", &buffer);
120 * assert(buffer == "a,b,c");
121 *
122 * std::string anotherBuffer("initial");
123 * split("a,b,c", ",") | unsplit(",", &anotherbuffer);
124 * assert(anotherBuffer == "initial,a,b,c");
125 */
126template <
127 class Delimiter,
128 class OutputBuffer,
129 class UnsplitBuffer = detail::UnsplitBuffer<Delimiter, OutputBuffer>>
130UnsplitBuffer unsplit(Delimiter delimiter, OutputBuffer* outputBuffer) {
131 return UnsplitBuffer(delimiter, outputBuffer);
132}
133
134template <
135 class OutputBuffer,
136 class UnsplitBuffer = detail::UnsplitBuffer<fbstring, OutputBuffer>>
137UnsplitBuffer unsplit(const char* delimiter, OutputBuffer* outputBuffer) {
138 return UnsplitBuffer(delimiter, outputBuffer);
139}
140
141template <class... Targets>
142detail::Map<detail::SplitTo<std::tuple<Targets...>, char, Targets...>>
143eachToTuple(char delim) {
144 return detail::Map<detail::SplitTo<std::tuple<Targets...>, char, Targets...>>(
145 detail::SplitTo<std::tuple<Targets...>, char, Targets...>(delim));
146}
147
148template <class... Targets>
149detail::Map<detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>>
150eachToTuple(StringPiece delim) {
151 return detail::Map<
152 detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>>(
153 detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>(delim));
154}
155
156template <class First, class Second>
157detail::Map<detail::SplitTo<std::pair<First, Second>, char, First, Second>>
158eachToPair(char delim) {
159 return detail::Map<
160 detail::SplitTo<std::pair<First, Second>, char, First, Second>>(
161 detail::SplitTo<std::pair<First, Second>, char, First, Second>(delim));
162}
163
164template <class First, class Second>
165detail::Map<detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>>
166eachToPair(StringPiece delim) {
167 return detail::Map<
168 detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>>(
169 detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>(
170 to<fbstring>(delim)));
171}
172
173/**
174 * Outputs exactly the same bytes as the input stream, in different chunks.
175 * A chunk boundary occurs after each delimiter, or, if maxLength is
176 * non-zero, after maxLength bytes, whichever comes first. Your callback
177 * can return false to stop consuming the stream at any time.
178 *
179 * The splitter buffers the last incomplete chunk, so you must call flush()
180 * to consume the piece of the stream after the final delimiter. This piece
181 * may be empty. After a flush(), the splitter can be re-used for a new
182 * stream.
183 *
184 * operator() and flush() return false iff your callback returns false. The
185 * internal buffer is not flushed, so reusing such a splitter will have
186 * indeterminate results. Same goes if your callback throws. Feel free to
187 * fix these corner cases if needed.
188 *
189 * Tips:
190 * - Create via streamSplitter() to take advantage of template deduction.
191 * - If your callback needs an end-of-stream signal, test for "no
192 * trailing delimiter **and** shorter than maxLength".
193 * - You can fine-tune the initial capacity of the internal IOBuf.
194 */
195template <class Callback>
196class StreamSplitter {
197 public:
198 StreamSplitter(
199 char delimiter,
200 Callback&& pieceCb,
201 uint64_t maxLength = 0,
202 uint64_t initialCapacity = 0)
203 : buffer_(IOBuf::CREATE, initialCapacity),
204 delimiter_(delimiter),
205 maxLength_(maxLength),
206 pieceCb_(std::move(pieceCb)) {}
207
208 /**
209 * Consume any incomplete last line (may be empty). Do this before
210 * destroying the StreamSplitter, or you will fail to consume part of the
211 * input.
212 *
213 * After flush() you may proceed to consume the next stream via ().
214 *
215 * Returns false if the callback wants no more data, true otherwise.
216 * A return value of false means that this splitter must no longer be used.
217 */
218 bool flush();
219
220 /**
221 * Consume another piece of the input stream.
222 *
223 * Returns false only if your callback refuses to consume more data by
224 * returning false (true otherwise). A return value of false means that
225 * this splitter must no longer be used.
226 */
227 bool operator()(StringPiece in);
228
229 private:
230 // Holds the current "incomplete" chunk so that chunks can span calls to ()
231 IOBuf buffer_;
232 char delimiter_;
233 uint64_t maxLength_; // The callback never gets more chars than this
234 Callback pieceCb_;
235};
236
237template <class Callback> // Helper to enable template deduction
238StreamSplitter<Callback>
239streamSplitter(char delimiter, Callback&& pieceCb, uint64_t capacity = 0) {
240 return StreamSplitter<Callback>(delimiter, std::move(pieceCb), capacity);
241}
242
243} // namespace gen
244} // namespace folly
245
246#include <folly/gen/String-inl.h>
247