1#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
2#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
3
4#include <limits.h>
5#include <stddef.h>
6#include <stdlib.h>
7
8#include <cassert>
9#include <initializer_list>
10#include <iosfwd>
11#include <iterator>
12#include <memory>
13#include <vector>
14
15#include "absl/strings/internal/str_format/checker.h"
16#include "absl/strings/internal/str_format/extension.h"
17
18namespace absl {
19namespace str_format_internal {
20
21// The analyzed properties of a single specified conversion.
22struct UnboundConversion {
23 UnboundConversion()
24 : flags() /* This is required to zero all the fields of flags. */ {
25 flags.basic = true;
26 }
27
28 class InputValue {
29 public:
30 void set_value(int value) {
31 assert(value >= 0);
32 value_ = value;
33 }
34 int value() const { return value_; }
35
36 // Marks the value as "from arg". aka the '*' format.
37 // Requires `value >= 1`.
38 // When set, is_from_arg() return true and get_from_arg() returns the
39 // original value.
40 // `value()`'s return value is unspecfied in this state.
41 void set_from_arg(int value) {
42 assert(value > 0);
43 value_ = -value - 1;
44 }
45 bool is_from_arg() const { return value_ < -1; }
46 int get_from_arg() const {
47 assert(is_from_arg());
48 return -value_ - 1;
49 }
50
51 private:
52 int value_ = -1;
53 };
54
55 // No need to initialize. It will always be set in the parser.
56 int arg_position;
57
58 InputValue width;
59 InputValue precision;
60
61 Flags flags;
62 LengthMod length_mod;
63 ConversionChar conv;
64};
65
66// Consume conversion spec prefix (not including '%') of [p, end) if valid.
67// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
68// If valid, it returns the first character following the conversion spec,
69// and the spec part is broken down and returned in 'conv'.
70// If invalid, returns nullptr.
71const char* ConsumeUnboundConversion(const char* p, const char* end,
72 UnboundConversion* conv, int* next_arg);
73
74// Helper tag class for the table below.
75// It allows fast `char -> ConversionChar/LengthMod` checking and conversions.
76class ConvTag {
77 public:
78 constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT
79 // We invert the length modifiers to make them negative so that we can easily
80 // test for them.
81 constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT
82 // Everything else is -128, which is negative to make is_conv() simpler.
83 constexpr ConvTag() : tag_(-128) {}
84
85 bool is_conv() const { return tag_ >= 0; }
86 bool is_length() const { return tag_ < 0 && tag_ != -128; }
87 ConversionChar as_conv() const {
88 assert(is_conv());
89 return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_));
90 }
91 LengthMod as_length() const {
92 assert(is_length());
93 return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_));
94 }
95
96 private:
97 std::int8_t tag_;
98};
99
100extern const ConvTag kTags[256];
101// Keep a single table for all the conversion chars and length modifiers.
102inline ConvTag GetTagForChar(char c) {
103 return kTags[static_cast<unsigned char>(c)];
104}
105
106// Parse the format string provided in 'src' and pass the identified items into
107// 'consumer'.
108// Text runs will be passed by calling
109// Consumer::Append(string_view);
110// ConversionItems will be passed by calling
111// Consumer::ConvertOne(UnboundConversion, string_view);
112// In the case of ConvertOne, the string_view that is passed is the
113// portion of the format string corresponding to the conversion, not including
114// the leading %. On success, it returns true. On failure, it stops and returns
115// false.
116template <typename Consumer>
117bool ParseFormatString(string_view src, Consumer consumer) {
118 int next_arg = 0;
119 const char* p = src.data();
120 const char* const end = p + src.size();
121 while (p != end) {
122 const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
123 if (!percent) {
124 // We found the last substring.
125 return consumer.Append(string_view(p, end - p));
126 }
127 // We found a percent, so push the text run then process the percent.
128 if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
129 return false;
130 }
131 if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
132
133 auto tag = GetTagForChar(percent[1]);
134 if (tag.is_conv()) {
135 if (ABSL_PREDICT_FALSE(next_arg < 0)) {
136 // This indicates an error in the format std::string.
137 // The only way to get `next_arg < 0` here is to have a positional
138 // argument first which sets next_arg to -1 and then a non-positional
139 // argument.
140 return false;
141 }
142 p = percent + 2;
143
144 // Keep this case separate from the one below.
145 // ConvertOne is more efficient when the compiler can see that the `basic`
146 // flag is set.
147 UnboundConversion conv;
148 conv.conv = tag.as_conv();
149 conv.arg_position = ++next_arg;
150 if (ABSL_PREDICT_FALSE(
151 !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
152 return false;
153 }
154 } else if (percent[1] != '%') {
155 UnboundConversion conv;
156 p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
157 if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
158 if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
159 conv, string_view(percent + 1, p - (percent + 1))))) {
160 return false;
161 }
162 } else {
163 if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
164 p = percent + 2;
165 continue;
166 }
167 }
168 return true;
169}
170
171// Always returns true, or fails to compile in a constexpr context if s does not
172// point to a constexpr char array.
173constexpr bool EnsureConstexpr(string_view s) {
174 return s.empty() || s[0] == s[0];
175}
176
177class ParsedFormatBase {
178 public:
179 explicit ParsedFormatBase(string_view format, bool allow_ignored,
180 std::initializer_list<Conv> convs);
181
182 ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
183
184 ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
185
186 ParsedFormatBase& operator=(const ParsedFormatBase& other) {
187 if (this == &other) return *this;
188 has_error_ = other.has_error_;
189 items_ = other.items_;
190 size_t text_size = items_.empty() ? 0 : items_.back().text_end;
191 data_.reset(new char[text_size]);
192 memcpy(data_.get(), other.data_.get(), text_size);
193 return *this;
194 }
195
196 ParsedFormatBase& operator=(ParsedFormatBase&& other) {
197 if (this == &other) return *this;
198 has_error_ = other.has_error_;
199 data_ = std::move(other.data_);
200 items_ = std::move(other.items_);
201 // Reset the vector to make sure the invariants hold.
202 other.items_.clear();
203 return *this;
204 }
205
206 template <typename Consumer>
207 bool ProcessFormat(Consumer consumer) const {
208 const char* const base = data_.get();
209 string_view text(base, 0);
210 for (const auto& item : items_) {
211 const char* const end = text.data() + text.size();
212 text = string_view(end, (base + item.text_end) - end);
213 if (item.is_conversion) {
214 if (!consumer.ConvertOne(item.conv, text)) return false;
215 } else {
216 if (!consumer.Append(text)) return false;
217 }
218 }
219 return !has_error_;
220 }
221
222 bool has_error() const { return has_error_; }
223
224 private:
225 // Returns whether the conversions match and if !allow_ignored it verifies
226 // that all conversions are used by the format.
227 bool MatchesConversions(bool allow_ignored,
228 std::initializer_list<Conv> convs) const;
229
230 struct ParsedFormatConsumer;
231
232 struct ConversionItem {
233 bool is_conversion;
234 // Points to the past-the-end location of this element in the data_ array.
235 size_t text_end;
236 UnboundConversion conv;
237 };
238
239 bool has_error_;
240 std::unique_ptr<char[]> data_;
241 std::vector<ConversionItem> items_;
242};
243
244
245// A value type representing a preparsed format. These can be created, copied
246// around, and reused to speed up formatting loops.
247// The user must specify through the template arguments the conversion
248// characters used in the format. This will be checked at compile time.
249//
250// This class uses Conv enum values to specify each argument.
251// This allows for more flexibility as you can specify multiple possible
252// conversion characters for each argument.
253// ParsedFormat<char...> is a simplified alias for when the user only
254// needs to specify a single conversion character for each argument.
255//
256// Example:
257// // Extended format supports multiple characters per argument:
258// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
259// MyFormat GetFormat(bool use_hex) {
260// if (use_hex) return MyFormat("foo %x bar");
261// return MyFormat("foo %d bar");
262// }
263// // 'format' can be used with any value that supports 'd' and 'x',
264// // like `int`.
265// auto format = GetFormat(use_hex);
266// value = StringF(format, i);
267//
268// This class also supports runtime format checking with the ::New() and
269// ::NewAllowIgnored() factory functions.
270// This is the only API that allows the user to pass a runtime specified format
271// string. These factory functions will return NULL if the format does not match
272// the conversions requested by the user.
273template <str_format_internal::Conv... C>
274class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
275 public:
276 explicit ExtendedParsedFormat(string_view format)
277#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
278 __attribute__((
279 enable_if(str_format_internal::EnsureConstexpr(format),
280 "Format std::string is not constexpr."),
281 enable_if(str_format_internal::ValidFormatImpl<C...>(format),
282 "Format specified does not match the template arguments.")))
283#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
284 : ExtendedParsedFormat(format, false) {
285 }
286
287 // ExtendedParsedFormat factory function.
288 // The user still has to specify the conversion characters, but they will not
289 // be checked at compile time. Instead, it will be checked at runtime.
290 // This delays the checking to runtime, but allows the user to pass
291 // dynamically sourced formats.
292 // It returns NULL if the format does not match the conversion characters.
293 // The user is responsible for checking the return value before using it.
294 //
295 // The 'New' variant will check that all the specified arguments are being
296 // consumed by the format and return NULL if any argument is being ignored.
297 // The 'NewAllowIgnored' variant will not verify this and will allow formats
298 // that ignore arguments.
299 static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
300 return New(format, false);
301 }
302 static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
303 string_view format) {
304 return New(format, true);
305 }
306
307 private:
308 static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
309 bool allow_ignored) {
310 std::unique_ptr<ExtendedParsedFormat> conv(
311 new ExtendedParsedFormat(format, allow_ignored));
312 if (conv->has_error()) return nullptr;
313 return conv;
314 }
315
316 ExtendedParsedFormat(string_view s, bool allow_ignored)
317 : ParsedFormatBase(s, allow_ignored, {C...}) {}
318};
319} // namespace str_format_internal
320} // namespace absl
321
322#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
323