1#include "absl/strings/internal/str_format/parser.h"
2
3#include <assert.h>
4#include <string.h>
5#include <wchar.h>
6#include <cctype>
7#include <cstdint>
8
9#include <algorithm>
10#include <initializer_list>
11#include <limits>
12#include <ostream>
13#include <string>
14#include <unordered_set>
15
16namespace absl {
17namespace str_format_internal {
18
19using CC = ConversionChar::Id;
20using LM = LengthMod::Id;
21ABSL_CONST_INIT const ConvTag kTags[256] = {
22 {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
23 {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
24 {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
25 {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
26 {}, {}, {}, {}, {}, {}, {}, {}, // 20-27
27 {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f
28 {}, {}, {}, {}, {}, {}, {}, {}, // 30-37
29 {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f
30 {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG
31 {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
32 {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW
33 CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
34 {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
35 LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
36 CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
37 CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
38 {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
39 {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
40 {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
41 {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
42 {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
43 {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
44 {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
45 {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
46 {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
47 {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
48 {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
49 {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
50 {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
51 {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
52 {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
53 {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
54};
55
56namespace {
57
58bool CheckFastPathSetting(const UnboundConversion& conv) {
59 bool should_be_basic = !conv.flags.left && //
60 !conv.flags.show_pos && //
61 !conv.flags.sign_col && //
62 !conv.flags.alt && //
63 !conv.flags.zero && //
64 (conv.width.value() == -1) &&
65 (conv.precision.value() == -1);
66 if (should_be_basic != conv.flags.basic) {
67 fprintf(stderr,
68 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
69 "width=%d precision=%d\n",
70 conv.flags.basic, conv.flags.left, conv.flags.show_pos,
71 conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
72 conv.width.value(), conv.precision.value());
73 }
74 return should_be_basic == conv.flags.basic;
75}
76
77template <bool is_positional>
78const char *ConsumeConversion(const char *pos, const char *const end,
79 UnboundConversion *conv, int *next_arg) {
80 const char* const original_pos = pos;
81 char c;
82 // Read the next char into `c` and update `pos`. Returns false if there are
83 // no more chars to read.
84#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
85 do { \
86 if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
87 c = *pos++; \
88 } while (0)
89
90 const auto parse_digits = [&] {
91 int digits = c - '0';
92 // We do not want to overflow `digits` so we consume at most digits10
93 // digits. If there are more digits the parsing will fail later on when the
94 // digit doesn't match the expected characters.
95 int num_digits = std::numeric_limits<int>::digits10;
96 for (;;) {
97 if (ABSL_PREDICT_FALSE(pos == end)) break;
98 c = *pos++;
99 if (!std::isdigit(c)) break;
100 --num_digits;
101 if (ABSL_PREDICT_FALSE(!num_digits)) break;
102 digits = 10 * digits + c - '0';
103 }
104 return digits;
105 };
106
107 if (is_positional) {
108 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
109 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
110 conv->arg_position = parse_digits();
111 assert(conv->arg_position > 0);
112 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
113 }
114
115 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
116
117 // We should start with the basic flag on.
118 assert(conv->flags.basic);
119
120 // Any non alpha character makes this conversion not basic.
121 // This includes flags (-+ #0), width (1-9, *) or precision (.).
122 // All conversion characters and length modifiers are alpha characters.
123 if (c < 'A') {
124 conv->flags.basic = false;
125
126 for (; c <= '0';) {
127 // FIXME: We might be able to speed this up reusing the lookup table from
128 // above. It might require changing Flags to be a plain integer where we
129 // can |= a value.
130 switch (c) {
131 case '-':
132 conv->flags.left = true;
133 break;
134 case '+':
135 conv->flags.show_pos = true;
136 break;
137 case ' ':
138 conv->flags.sign_col = true;
139 break;
140 case '#':
141 conv->flags.alt = true;
142 break;
143 case '0':
144 conv->flags.zero = true;
145 break;
146 default:
147 goto flags_done;
148 }
149 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
150 }
151flags_done:
152
153 if (c <= '9') {
154 if (c >= '0') {
155 int maybe_width = parse_digits();
156 if (!is_positional && c == '$') {
157 if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
158 // Positional conversion.
159 *next_arg = -1;
160 conv->flags = Flags();
161 conv->flags.basic = true;
162 return ConsumeConversion<true>(original_pos, end, conv, next_arg);
163 }
164 conv->width.set_value(maybe_width);
165 } else if (c == '*') {
166 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
167 if (is_positional) {
168 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
169 conv->width.set_from_arg(parse_digits());
170 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
171 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
172 } else {
173 conv->width.set_from_arg(++*next_arg);
174 }
175 }
176 }
177
178 if (c == '.') {
179 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
180 if (std::isdigit(c)) {
181 conv->precision.set_value(parse_digits());
182 } else if (c == '*') {
183 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
184 if (is_positional) {
185 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
186 conv->precision.set_from_arg(parse_digits());
187 if (c != '$') return nullptr;
188 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
189 } else {
190 conv->precision.set_from_arg(++*next_arg);
191 }
192 } else {
193 conv->precision.set_value(0);
194 }
195 }
196 }
197
198 auto tag = GetTagForChar(c);
199
200 if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
201 if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
202
203 // It is a length modifier.
204 using str_format_internal::LengthMod;
205 LengthMod length_mod = tag.as_length();
206 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
207 if (c == 'h' && length_mod.id() == LengthMod::h) {
208 conv->length_mod = LengthMod::FromId(LengthMod::hh);
209 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
210 } else if (c == 'l' && length_mod.id() == LengthMod::l) {
211 conv->length_mod = LengthMod::FromId(LengthMod::ll);
212 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
213 } else {
214 conv->length_mod = length_mod;
215 }
216 tag = GetTagForChar(c);
217 if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
218 }
219
220 assert(CheckFastPathSetting(*conv));
221 (void)(&CheckFastPathSetting);
222
223 conv->conv = tag.as_conv();
224 if (!is_positional) conv->arg_position = ++*next_arg;
225 return pos;
226}
227
228} // namespace
229
230const char *ConsumeUnboundConversion(const char *p, const char *end,
231 UnboundConversion *conv, int *next_arg) {
232 if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
233 return ConsumeConversion<false>(p, end, conv, next_arg);
234}
235
236struct ParsedFormatBase::ParsedFormatConsumer {
237 explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
238 : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
239
240 bool Append(string_view s) {
241 if (s.empty()) return true;
242
243 size_t text_end = AppendText(s);
244
245 if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
246 // Let's extend the existing text run.
247 parsed->items_.back().text_end = text_end;
248 } else {
249 // Let's make a new text run.
250 parsed->items_.push_back({false, text_end, {}});
251 }
252 return true;
253 }
254
255 bool ConvertOne(const UnboundConversion &conv, string_view s) {
256 size_t text_end = AppendText(s);
257 parsed->items_.push_back({true, text_end, conv});
258 return true;
259 }
260
261 size_t AppendText(string_view s) {
262 memcpy(data_pos, s.data(), s.size());
263 data_pos += s.size();
264 return static_cast<size_t>(data_pos - parsed->data_.get());
265 }
266
267 ParsedFormatBase *parsed;
268 char* data_pos;
269};
270
271ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored,
272 std::initializer_list<Conv> convs)
273 : data_(format.empty() ? nullptr : new char[format.size()]) {
274 has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
275 !MatchesConversions(allow_ignored, convs);
276}
277
278bool ParsedFormatBase::MatchesConversions(
279 bool allow_ignored, std::initializer_list<Conv> convs) const {
280 std::unordered_set<int> used;
281 auto add_if_valid_conv = [&](int pos, char c) {
282 if (static_cast<size_t>(pos) > convs.size() ||
283 !Contains(convs.begin()[pos - 1], c))
284 return false;
285 used.insert(pos);
286 return true;
287 };
288 for (const ConversionItem &item : items_) {
289 if (!item.is_conversion) continue;
290 auto &conv = item.conv;
291 if (conv.precision.is_from_arg() &&
292 !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
293 return false;
294 if (conv.width.is_from_arg() &&
295 !add_if_valid_conv(conv.width.get_from_arg(), '*'))
296 return false;
297 if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false;
298 }
299 return used.size() == convs.size() || allow_ignored;
300}
301
302} // namespace str_format_internal
303} // namespace absl
304