1 | /* |
2 | * Copyright 2014-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | #define FOLLY_GEN_STRING_H_ |
19 | |
20 | #include <folly/Range.h> |
21 | #include <folly/gen/Base.h> |
22 | #include <folly/io/IOBuf.h> |
23 | |
24 | namespace folly { |
25 | namespace gen { |
26 | |
27 | namespace detail { |
28 | class StringResplitter; |
29 | |
30 | template <class Delimiter> |
31 | class SplitStringSource; |
32 | |
33 | template <class Delimiter, class Output> |
34 | class Unsplit; |
35 | |
36 | template <class Delimiter, class OutputBuffer> |
37 | class UnsplitBuffer; |
38 | |
39 | template <class TargetContainer, class Delimiter, class... Targets> |
40 | class SplitTo; |
41 | |
42 | } // namespace detail |
43 | |
44 | /** |
45 | * Split the output from a generator into StringPiece "lines" delimited by |
46 | * the given delimiter. Delimters are NOT included in the output. |
47 | * |
48 | * resplit() behaves as if the input strings were concatenated into one long |
49 | * string and then split. |
50 | * |
51 | * Equivalently, you can use StreamSplitter outside of a folly::gen setting. |
52 | */ |
53 | // make this a template so we don't require StringResplitter to be complete |
54 | // until use |
55 | template <class S = detail::StringResplitter> |
56 | S resplit(char delimiter, bool keepDelimiter = false) { |
57 | return S(delimiter, keepDelimiter); |
58 | } |
59 | |
60 | template <class S = detail::SplitStringSource<char>> |
61 | S split(const StringPiece source, char delimiter) { |
62 | return S(source, delimiter); |
63 | } |
64 | |
65 | template <class S = detail::SplitStringSource<StringPiece>> |
66 | S split(StringPiece source, StringPiece delimiter) { |
67 | return S(source, delimiter); |
68 | } |
69 | |
70 | /** |
71 | * EOL terms ("\r", "\n", or "\r\n"). |
72 | */ |
73 | class MixedNewlines {}; |
74 | |
75 | /** |
76 | * Split by EOL ("\r", "\n", or "\r\n"). |
77 | * @see split(). |
78 | */ |
79 | template <class S = detail::SplitStringSource<MixedNewlines>> |
80 | S lines(StringPiece source) { |
81 | return S(source, MixedNewlines{}); |
82 | } |
83 | |
84 | /* |
85 | * Joins a sequence of tokens into a string, with the chosen delimiter. |
86 | * |
87 | * E.G. |
88 | * fbstring result = split("a,b,c", ",") | unsplit(","); |
89 | * assert(result == "a,b,c"); |
90 | * |
91 | * std::string result = split("a,b,c", ",") | unsplit<std::string>(" "); |
92 | * assert(result == "a b c"); |
93 | */ |
94 | |
95 | // NOTE: The template arguments are reversed to allow the user to cleanly |
96 | // specify the output type while still inferring the type of the delimiter. |
97 | template < |
98 | class Output = folly::fbstring, |
99 | class Delimiter, |
100 | class Unsplit = detail::Unsplit<Delimiter, Output>> |
101 | Unsplit unsplit(const Delimiter& delimiter) { |
102 | return Unsplit(delimiter); |
103 | } |
104 | |
105 | template < |
106 | class Output = folly::fbstring, |
107 | class Unsplit = detail::Unsplit<fbstring, Output>> |
108 | Unsplit unsplit(const char* delimiter) { |
109 | return Unsplit(delimiter); |
110 | } |
111 | |
112 | /* |
113 | * Joins a sequence of tokens into a string, appending them to the output |
114 | * buffer. If the output buffer is empty, an initial delimiter will not be |
115 | * inserted at the start. |
116 | * |
117 | * E.G. |
118 | * std::string buffer; |
119 | * split("a,b,c", ",") | unsplit(",", &buffer); |
120 | * assert(buffer == "a,b,c"); |
121 | * |
122 | * std::string anotherBuffer("initial"); |
123 | * split("a,b,c", ",") | unsplit(",", &anotherbuffer); |
124 | * assert(anotherBuffer == "initial,a,b,c"); |
125 | */ |
126 | template < |
127 | class Delimiter, |
128 | class OutputBuffer, |
129 | class UnsplitBuffer = detail::UnsplitBuffer<Delimiter, OutputBuffer>> |
130 | UnsplitBuffer unsplit(Delimiter delimiter, OutputBuffer* outputBuffer) { |
131 | return UnsplitBuffer(delimiter, outputBuffer); |
132 | } |
133 | |
134 | template < |
135 | class OutputBuffer, |
136 | class UnsplitBuffer = detail::UnsplitBuffer<fbstring, OutputBuffer>> |
137 | UnsplitBuffer unsplit(const char* delimiter, OutputBuffer* outputBuffer) { |
138 | return UnsplitBuffer(delimiter, outputBuffer); |
139 | } |
140 | |
141 | template <class... Targets> |
142 | detail::Map<detail::SplitTo<std::tuple<Targets...>, char, Targets...>> |
143 | eachToTuple(char delim) { |
144 | return detail::Map<detail::SplitTo<std::tuple<Targets...>, char, Targets...>>( |
145 | detail::SplitTo<std::tuple<Targets...>, char, Targets...>(delim)); |
146 | } |
147 | |
148 | template <class... Targets> |
149 | detail::Map<detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>> |
150 | eachToTuple(StringPiece delim) { |
151 | return detail::Map< |
152 | detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>>( |
153 | detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>(delim)); |
154 | } |
155 | |
156 | template <class First, class Second> |
157 | detail::Map<detail::SplitTo<std::pair<First, Second>, char, First, Second>> |
158 | eachToPair(char delim) { |
159 | return detail::Map< |
160 | detail::SplitTo<std::pair<First, Second>, char, First, Second>>( |
161 | detail::SplitTo<std::pair<First, Second>, char, First, Second>(delim)); |
162 | } |
163 | |
164 | template <class First, class Second> |
165 | detail::Map<detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>> |
166 | eachToPair(StringPiece delim) { |
167 | return detail::Map< |
168 | detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>>( |
169 | detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>( |
170 | to<fbstring>(delim))); |
171 | } |
172 | |
173 | /** |
174 | * Outputs exactly the same bytes as the input stream, in different chunks. |
175 | * A chunk boundary occurs after each delimiter, or, if maxLength is |
176 | * non-zero, after maxLength bytes, whichever comes first. Your callback |
177 | * can return false to stop consuming the stream at any time. |
178 | * |
179 | * The splitter buffers the last incomplete chunk, so you must call flush() |
180 | * to consume the piece of the stream after the final delimiter. This piece |
181 | * may be empty. After a flush(), the splitter can be re-used for a new |
182 | * stream. |
183 | * |
184 | * operator() and flush() return false iff your callback returns false. The |
185 | * internal buffer is not flushed, so reusing such a splitter will have |
186 | * indeterminate results. Same goes if your callback throws. Feel free to |
187 | * fix these corner cases if needed. |
188 | * |
189 | * Tips: |
190 | * - Create via streamSplitter() to take advantage of template deduction. |
191 | * - If your callback needs an end-of-stream signal, test for "no |
192 | * trailing delimiter **and** shorter than maxLength". |
193 | * - You can fine-tune the initial capacity of the internal IOBuf. |
194 | */ |
195 | template <class Callback> |
196 | class StreamSplitter { |
197 | public: |
198 | StreamSplitter( |
199 | char delimiter, |
200 | Callback&& pieceCb, |
201 | uint64_t maxLength = 0, |
202 | uint64_t initialCapacity = 0) |
203 | : buffer_(IOBuf::CREATE, initialCapacity), |
204 | delimiter_(delimiter), |
205 | maxLength_(maxLength), |
206 | pieceCb_(std::move(pieceCb)) {} |
207 | |
208 | /** |
209 | * Consume any incomplete last line (may be empty). Do this before |
210 | * destroying the StreamSplitter, or you will fail to consume part of the |
211 | * input. |
212 | * |
213 | * After flush() you may proceed to consume the next stream via (). |
214 | * |
215 | * Returns false if the callback wants no more data, true otherwise. |
216 | * A return value of false means that this splitter must no longer be used. |
217 | */ |
218 | bool flush(); |
219 | |
220 | /** |
221 | * Consume another piece of the input stream. |
222 | * |
223 | * Returns false only if your callback refuses to consume more data by |
224 | * returning false (true otherwise). A return value of false means that |
225 | * this splitter must no longer be used. |
226 | */ |
227 | bool operator()(StringPiece in); |
228 | |
229 | private: |
230 | // Holds the current "incomplete" chunk so that chunks can span calls to () |
231 | IOBuf buffer_; |
232 | char delimiter_; |
233 | uint64_t maxLength_; // The callback never gets more chars than this |
234 | Callback pieceCb_; |
235 | }; |
236 | |
237 | template <class Callback> // Helper to enable template deduction |
238 | StreamSplitter<Callback> |
239 | streamSplitter(char delimiter, Callback&& pieceCb, uint64_t capacity = 0) { |
240 | return StreamSplitter<Callback>(delimiter, std::move(pieceCb), capacity); |
241 | } |
242 | |
243 | } // namespace gen |
244 | } // namespace folly |
245 | |
246 | #include <folly/gen/String-inl.h> |
247 | |