1 | #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ |
2 | #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ |
3 | |
4 | #include <limits.h> |
5 | #include <stddef.h> |
6 | #include <stdlib.h> |
7 | |
8 | #include <cassert> |
9 | #include <initializer_list> |
10 | #include <iosfwd> |
11 | #include <iterator> |
12 | #include <memory> |
13 | #include <vector> |
14 | |
15 | #include "absl/strings/internal/str_format/checker.h" |
16 | #include "absl/strings/internal/str_format/extension.h" |
17 | |
18 | namespace absl { |
19 | namespace str_format_internal { |
20 | |
21 | // The analyzed properties of a single specified conversion. |
22 | struct UnboundConversion { |
23 | UnboundConversion() |
24 | : flags() /* This is required to zero all the fields of flags. */ { |
25 | flags.basic = true; |
26 | } |
27 | |
28 | class InputValue { |
29 | public: |
30 | void set_value(int value) { |
31 | assert(value >= 0); |
32 | value_ = value; |
33 | } |
34 | int value() const { return value_; } |
35 | |
36 | // Marks the value as "from arg". aka the '*' format. |
37 | // Requires `value >= 1`. |
38 | // When set, is_from_arg() return true and get_from_arg() returns the |
39 | // original value. |
40 | // `value()`'s return value is unspecfied in this state. |
41 | void set_from_arg(int value) { |
42 | assert(value > 0); |
43 | value_ = -value - 1; |
44 | } |
45 | bool is_from_arg() const { return value_ < -1; } |
46 | int get_from_arg() const { |
47 | assert(is_from_arg()); |
48 | return -value_ - 1; |
49 | } |
50 | |
51 | private: |
52 | int value_ = -1; |
53 | }; |
54 | |
55 | // No need to initialize. It will always be set in the parser. |
56 | int arg_position; |
57 | |
58 | InputValue width; |
59 | InputValue precision; |
60 | |
61 | Flags flags; |
62 | LengthMod length_mod; |
63 | ConversionChar conv; |
64 | }; |
65 | |
66 | // Consume conversion spec prefix (not including '%') of [p, end) if valid. |
67 | // Examples of valid specs would be e.g.: "s", "d", "-12.6f". |
68 | // If valid, it returns the first character following the conversion spec, |
69 | // and the spec part is broken down and returned in 'conv'. |
70 | // If invalid, returns nullptr. |
71 | const char* ConsumeUnboundConversion(const char* p, const char* end, |
72 | UnboundConversion* conv, int* next_arg); |
73 | |
74 | // Helper tag class for the table below. |
75 | // It allows fast `char -> ConversionChar/LengthMod` checking and conversions. |
76 | class ConvTag { |
77 | public: |
78 | constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT |
79 | // We invert the length modifiers to make them negative so that we can easily |
80 | // test for them. |
81 | constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT |
82 | // Everything else is -128, which is negative to make is_conv() simpler. |
83 | constexpr ConvTag() : tag_(-128) {} |
84 | |
85 | bool is_conv() const { return tag_ >= 0; } |
86 | bool is_length() const { return tag_ < 0 && tag_ != -128; } |
87 | ConversionChar as_conv() const { |
88 | assert(is_conv()); |
89 | return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_)); |
90 | } |
91 | LengthMod as_length() const { |
92 | assert(is_length()); |
93 | return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_)); |
94 | } |
95 | |
96 | private: |
97 | std::int8_t tag_; |
98 | }; |
99 | |
100 | extern const ConvTag kTags[256]; |
101 | // Keep a single table for all the conversion chars and length modifiers. |
102 | inline ConvTag GetTagForChar(char c) { |
103 | return kTags[static_cast<unsigned char>(c)]; |
104 | } |
105 | |
106 | // Parse the format string provided in 'src' and pass the identified items into |
107 | // 'consumer'. |
108 | // Text runs will be passed by calling |
109 | // Consumer::Append(string_view); |
110 | // ConversionItems will be passed by calling |
111 | // Consumer::ConvertOne(UnboundConversion, string_view); |
112 | // In the case of ConvertOne, the string_view that is passed is the |
113 | // portion of the format string corresponding to the conversion, not including |
114 | // the leading %. On success, it returns true. On failure, it stops and returns |
115 | // false. |
116 | template <typename Consumer> |
117 | bool ParseFormatString(string_view src, Consumer consumer) { |
118 | int next_arg = 0; |
119 | const char* p = src.data(); |
120 | const char* const end = p + src.size(); |
121 | while (p != end) { |
122 | const char* percent = static_cast<const char*>(memchr(p, '%', end - p)); |
123 | if (!percent) { |
124 | // We found the last substring. |
125 | return consumer.Append(string_view(p, end - p)); |
126 | } |
127 | // We found a percent, so push the text run then process the percent. |
128 | if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) { |
129 | return false; |
130 | } |
131 | if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; |
132 | |
133 | auto tag = GetTagForChar(percent[1]); |
134 | if (tag.is_conv()) { |
135 | if (ABSL_PREDICT_FALSE(next_arg < 0)) { |
136 | // This indicates an error in the format std::string. |
137 | // The only way to get `next_arg < 0` here is to have a positional |
138 | // argument first which sets next_arg to -1 and then a non-positional |
139 | // argument. |
140 | return false; |
141 | } |
142 | p = percent + 2; |
143 | |
144 | // Keep this case separate from the one below. |
145 | // ConvertOne is more efficient when the compiler can see that the `basic` |
146 | // flag is set. |
147 | UnboundConversion conv; |
148 | conv.conv = tag.as_conv(); |
149 | conv.arg_position = ++next_arg; |
150 | if (ABSL_PREDICT_FALSE( |
151 | !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { |
152 | return false; |
153 | } |
154 | } else if (percent[1] != '%') { |
155 | UnboundConversion conv; |
156 | p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); |
157 | if (ABSL_PREDICT_FALSE(p == nullptr)) return false; |
158 | if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( |
159 | conv, string_view(percent + 1, p - (percent + 1))))) { |
160 | return false; |
161 | } |
162 | } else { |
163 | if (ABSL_PREDICT_FALSE(!consumer.Append("%" ))) return false; |
164 | p = percent + 2; |
165 | continue; |
166 | } |
167 | } |
168 | return true; |
169 | } |
170 | |
171 | // Always returns true, or fails to compile in a constexpr context if s does not |
172 | // point to a constexpr char array. |
173 | constexpr bool EnsureConstexpr(string_view s) { |
174 | return s.empty() || s[0] == s[0]; |
175 | } |
176 | |
177 | class ParsedFormatBase { |
178 | public: |
179 | explicit ParsedFormatBase(string_view format, bool allow_ignored, |
180 | std::initializer_list<Conv> convs); |
181 | |
182 | ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } |
183 | |
184 | ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } |
185 | |
186 | ParsedFormatBase& operator=(const ParsedFormatBase& other) { |
187 | if (this == &other) return *this; |
188 | has_error_ = other.has_error_; |
189 | items_ = other.items_; |
190 | size_t text_size = items_.empty() ? 0 : items_.back().text_end; |
191 | data_.reset(new char[text_size]); |
192 | memcpy(data_.get(), other.data_.get(), text_size); |
193 | return *this; |
194 | } |
195 | |
196 | ParsedFormatBase& operator=(ParsedFormatBase&& other) { |
197 | if (this == &other) return *this; |
198 | has_error_ = other.has_error_; |
199 | data_ = std::move(other.data_); |
200 | items_ = std::move(other.items_); |
201 | // Reset the vector to make sure the invariants hold. |
202 | other.items_.clear(); |
203 | return *this; |
204 | } |
205 | |
206 | template <typename Consumer> |
207 | bool ProcessFormat(Consumer consumer) const { |
208 | const char* const base = data_.get(); |
209 | string_view text(base, 0); |
210 | for (const auto& item : items_) { |
211 | const char* const end = text.data() + text.size(); |
212 | text = string_view(end, (base + item.text_end) - end); |
213 | if (item.is_conversion) { |
214 | if (!consumer.ConvertOne(item.conv, text)) return false; |
215 | } else { |
216 | if (!consumer.Append(text)) return false; |
217 | } |
218 | } |
219 | return !has_error_; |
220 | } |
221 | |
222 | bool has_error() const { return has_error_; } |
223 | |
224 | private: |
225 | // Returns whether the conversions match and if !allow_ignored it verifies |
226 | // that all conversions are used by the format. |
227 | bool MatchesConversions(bool allow_ignored, |
228 | std::initializer_list<Conv> convs) const; |
229 | |
230 | struct ParsedFormatConsumer; |
231 | |
232 | struct ConversionItem { |
233 | bool is_conversion; |
234 | // Points to the past-the-end location of this element in the data_ array. |
235 | size_t text_end; |
236 | UnboundConversion conv; |
237 | }; |
238 | |
239 | bool has_error_; |
240 | std::unique_ptr<char[]> data_; |
241 | std::vector<ConversionItem> items_; |
242 | }; |
243 | |
244 | |
245 | // A value type representing a preparsed format. These can be created, copied |
246 | // around, and reused to speed up formatting loops. |
247 | // The user must specify through the template arguments the conversion |
248 | // characters used in the format. This will be checked at compile time. |
249 | // |
250 | // This class uses Conv enum values to specify each argument. |
251 | // This allows for more flexibility as you can specify multiple possible |
252 | // conversion characters for each argument. |
253 | // ParsedFormat<char...> is a simplified alias for when the user only |
254 | // needs to specify a single conversion character for each argument. |
255 | // |
256 | // Example: |
257 | // // Extended format supports multiple characters per argument: |
258 | // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; |
259 | // MyFormat GetFormat(bool use_hex) { |
260 | // if (use_hex) return MyFormat("foo %x bar"); |
261 | // return MyFormat("foo %d bar"); |
262 | // } |
263 | // // 'format' can be used with any value that supports 'd' and 'x', |
264 | // // like `int`. |
265 | // auto format = GetFormat(use_hex); |
266 | // value = StringF(format, i); |
267 | // |
268 | // This class also supports runtime format checking with the ::New() and |
269 | // ::NewAllowIgnored() factory functions. |
270 | // This is the only API that allows the user to pass a runtime specified format |
271 | // string. These factory functions will return NULL if the format does not match |
272 | // the conversions requested by the user. |
273 | template <str_format_internal::Conv... C> |
274 | class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { |
275 | public: |
276 | explicit ExtendedParsedFormat(string_view format) |
277 | #if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER |
278 | __attribute__(( |
279 | enable_if(str_format_internal::EnsureConstexpr(format), |
280 | "Format std::string is not constexpr." ), |
281 | enable_if(str_format_internal::ValidFormatImpl<C...>(format), |
282 | "Format specified does not match the template arguments." ))) |
283 | #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER |
284 | : ExtendedParsedFormat(format, false) { |
285 | } |
286 | |
287 | // ExtendedParsedFormat factory function. |
288 | // The user still has to specify the conversion characters, but they will not |
289 | // be checked at compile time. Instead, it will be checked at runtime. |
290 | // This delays the checking to runtime, but allows the user to pass |
291 | // dynamically sourced formats. |
292 | // It returns NULL if the format does not match the conversion characters. |
293 | // The user is responsible for checking the return value before using it. |
294 | // |
295 | // The 'New' variant will check that all the specified arguments are being |
296 | // consumed by the format and return NULL if any argument is being ignored. |
297 | // The 'NewAllowIgnored' variant will not verify this and will allow formats |
298 | // that ignore arguments. |
299 | static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { |
300 | return New(format, false); |
301 | } |
302 | static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( |
303 | string_view format) { |
304 | return New(format, true); |
305 | } |
306 | |
307 | private: |
308 | static std::unique_ptr<ExtendedParsedFormat> New(string_view format, |
309 | bool allow_ignored) { |
310 | std::unique_ptr<ExtendedParsedFormat> conv( |
311 | new ExtendedParsedFormat(format, allow_ignored)); |
312 | if (conv->has_error()) return nullptr; |
313 | return conv; |
314 | } |
315 | |
316 | ExtendedParsedFormat(string_view s, bool allow_ignored) |
317 | : ParsedFormatBase(s, allow_ignored, {C...}) {} |
318 | }; |
319 | } // namespace str_format_internal |
320 | } // namespace absl |
321 | |
322 | #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ |
323 | |