| 1 | #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ |
| 2 | #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ |
| 3 | |
| 4 | #include <limits.h> |
| 5 | #include <stddef.h> |
| 6 | #include <stdlib.h> |
| 7 | |
| 8 | #include <cassert> |
| 9 | #include <initializer_list> |
| 10 | #include <iosfwd> |
| 11 | #include <iterator> |
| 12 | #include <memory> |
| 13 | #include <vector> |
| 14 | |
| 15 | #include "absl/strings/internal/str_format/checker.h" |
| 16 | #include "absl/strings/internal/str_format/extension.h" |
| 17 | |
| 18 | namespace absl { |
| 19 | namespace str_format_internal { |
| 20 | |
| 21 | // The analyzed properties of a single specified conversion. |
| 22 | struct UnboundConversion { |
| 23 | UnboundConversion() |
| 24 | : flags() /* This is required to zero all the fields of flags. */ { |
| 25 | flags.basic = true; |
| 26 | } |
| 27 | |
| 28 | class InputValue { |
| 29 | public: |
| 30 | void set_value(int value) { |
| 31 | assert(value >= 0); |
| 32 | value_ = value; |
| 33 | } |
| 34 | int value() const { return value_; } |
| 35 | |
| 36 | // Marks the value as "from arg". aka the '*' format. |
| 37 | // Requires `value >= 1`. |
| 38 | // When set, is_from_arg() return true and get_from_arg() returns the |
| 39 | // original value. |
| 40 | // `value()`'s return value is unspecfied in this state. |
| 41 | void set_from_arg(int value) { |
| 42 | assert(value > 0); |
| 43 | value_ = -value - 1; |
| 44 | } |
| 45 | bool is_from_arg() const { return value_ < -1; } |
| 46 | int get_from_arg() const { |
| 47 | assert(is_from_arg()); |
| 48 | return -value_ - 1; |
| 49 | } |
| 50 | |
| 51 | private: |
| 52 | int value_ = -1; |
| 53 | }; |
| 54 | |
| 55 | // No need to initialize. It will always be set in the parser. |
| 56 | int arg_position; |
| 57 | |
| 58 | InputValue width; |
| 59 | InputValue precision; |
| 60 | |
| 61 | Flags flags; |
| 62 | LengthMod length_mod; |
| 63 | ConversionChar conv; |
| 64 | }; |
| 65 | |
| 66 | // Consume conversion spec prefix (not including '%') of [p, end) if valid. |
| 67 | // Examples of valid specs would be e.g.: "s", "d", "-12.6f". |
| 68 | // If valid, it returns the first character following the conversion spec, |
| 69 | // and the spec part is broken down and returned in 'conv'. |
| 70 | // If invalid, returns nullptr. |
| 71 | const char* ConsumeUnboundConversion(const char* p, const char* end, |
| 72 | UnboundConversion* conv, int* next_arg); |
| 73 | |
| 74 | // Helper tag class for the table below. |
| 75 | // It allows fast `char -> ConversionChar/LengthMod` checking and conversions. |
| 76 | class ConvTag { |
| 77 | public: |
| 78 | constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT |
| 79 | // We invert the length modifiers to make them negative so that we can easily |
| 80 | // test for them. |
| 81 | constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT |
| 82 | // Everything else is -128, which is negative to make is_conv() simpler. |
| 83 | constexpr ConvTag() : tag_(-128) {} |
| 84 | |
| 85 | bool is_conv() const { return tag_ >= 0; } |
| 86 | bool is_length() const { return tag_ < 0 && tag_ != -128; } |
| 87 | ConversionChar as_conv() const { |
| 88 | assert(is_conv()); |
| 89 | return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_)); |
| 90 | } |
| 91 | LengthMod as_length() const { |
| 92 | assert(is_length()); |
| 93 | return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_)); |
| 94 | } |
| 95 | |
| 96 | private: |
| 97 | std::int8_t tag_; |
| 98 | }; |
| 99 | |
| 100 | extern const ConvTag kTags[256]; |
| 101 | // Keep a single table for all the conversion chars and length modifiers. |
| 102 | inline ConvTag GetTagForChar(char c) { |
| 103 | return kTags[static_cast<unsigned char>(c)]; |
| 104 | } |
| 105 | |
| 106 | // Parse the format string provided in 'src' and pass the identified items into |
| 107 | // 'consumer'. |
| 108 | // Text runs will be passed by calling |
| 109 | // Consumer::Append(string_view); |
| 110 | // ConversionItems will be passed by calling |
| 111 | // Consumer::ConvertOne(UnboundConversion, string_view); |
| 112 | // In the case of ConvertOne, the string_view that is passed is the |
| 113 | // portion of the format string corresponding to the conversion, not including |
| 114 | // the leading %. On success, it returns true. On failure, it stops and returns |
| 115 | // false. |
| 116 | template <typename Consumer> |
| 117 | bool ParseFormatString(string_view src, Consumer consumer) { |
| 118 | int next_arg = 0; |
| 119 | const char* p = src.data(); |
| 120 | const char* const end = p + src.size(); |
| 121 | while (p != end) { |
| 122 | const char* percent = static_cast<const char*>(memchr(p, '%', end - p)); |
| 123 | if (!percent) { |
| 124 | // We found the last substring. |
| 125 | return consumer.Append(string_view(p, end - p)); |
| 126 | } |
| 127 | // We found a percent, so push the text run then process the percent. |
| 128 | if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) { |
| 129 | return false; |
| 130 | } |
| 131 | if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; |
| 132 | |
| 133 | auto tag = GetTagForChar(percent[1]); |
| 134 | if (tag.is_conv()) { |
| 135 | if (ABSL_PREDICT_FALSE(next_arg < 0)) { |
| 136 | // This indicates an error in the format std::string. |
| 137 | // The only way to get `next_arg < 0` here is to have a positional |
| 138 | // argument first which sets next_arg to -1 and then a non-positional |
| 139 | // argument. |
| 140 | return false; |
| 141 | } |
| 142 | p = percent + 2; |
| 143 | |
| 144 | // Keep this case separate from the one below. |
| 145 | // ConvertOne is more efficient when the compiler can see that the `basic` |
| 146 | // flag is set. |
| 147 | UnboundConversion conv; |
| 148 | conv.conv = tag.as_conv(); |
| 149 | conv.arg_position = ++next_arg; |
| 150 | if (ABSL_PREDICT_FALSE( |
| 151 | !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { |
| 152 | return false; |
| 153 | } |
| 154 | } else if (percent[1] != '%') { |
| 155 | UnboundConversion conv; |
| 156 | p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); |
| 157 | if (ABSL_PREDICT_FALSE(p == nullptr)) return false; |
| 158 | if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( |
| 159 | conv, string_view(percent + 1, p - (percent + 1))))) { |
| 160 | return false; |
| 161 | } |
| 162 | } else { |
| 163 | if (ABSL_PREDICT_FALSE(!consumer.Append("%" ))) return false; |
| 164 | p = percent + 2; |
| 165 | continue; |
| 166 | } |
| 167 | } |
| 168 | return true; |
| 169 | } |
| 170 | |
| 171 | // Always returns true, or fails to compile in a constexpr context if s does not |
| 172 | // point to a constexpr char array. |
| 173 | constexpr bool EnsureConstexpr(string_view s) { |
| 174 | return s.empty() || s[0] == s[0]; |
| 175 | } |
| 176 | |
| 177 | class ParsedFormatBase { |
| 178 | public: |
| 179 | explicit ParsedFormatBase(string_view format, bool allow_ignored, |
| 180 | std::initializer_list<Conv> convs); |
| 181 | |
| 182 | ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } |
| 183 | |
| 184 | ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } |
| 185 | |
| 186 | ParsedFormatBase& operator=(const ParsedFormatBase& other) { |
| 187 | if (this == &other) return *this; |
| 188 | has_error_ = other.has_error_; |
| 189 | items_ = other.items_; |
| 190 | size_t text_size = items_.empty() ? 0 : items_.back().text_end; |
| 191 | data_.reset(new char[text_size]); |
| 192 | memcpy(data_.get(), other.data_.get(), text_size); |
| 193 | return *this; |
| 194 | } |
| 195 | |
| 196 | ParsedFormatBase& operator=(ParsedFormatBase&& other) { |
| 197 | if (this == &other) return *this; |
| 198 | has_error_ = other.has_error_; |
| 199 | data_ = std::move(other.data_); |
| 200 | items_ = std::move(other.items_); |
| 201 | // Reset the vector to make sure the invariants hold. |
| 202 | other.items_.clear(); |
| 203 | return *this; |
| 204 | } |
| 205 | |
| 206 | template <typename Consumer> |
| 207 | bool ProcessFormat(Consumer consumer) const { |
| 208 | const char* const base = data_.get(); |
| 209 | string_view text(base, 0); |
| 210 | for (const auto& item : items_) { |
| 211 | const char* const end = text.data() + text.size(); |
| 212 | text = string_view(end, (base + item.text_end) - end); |
| 213 | if (item.is_conversion) { |
| 214 | if (!consumer.ConvertOne(item.conv, text)) return false; |
| 215 | } else { |
| 216 | if (!consumer.Append(text)) return false; |
| 217 | } |
| 218 | } |
| 219 | return !has_error_; |
| 220 | } |
| 221 | |
| 222 | bool has_error() const { return has_error_; } |
| 223 | |
| 224 | private: |
| 225 | // Returns whether the conversions match and if !allow_ignored it verifies |
| 226 | // that all conversions are used by the format. |
| 227 | bool MatchesConversions(bool allow_ignored, |
| 228 | std::initializer_list<Conv> convs) const; |
| 229 | |
| 230 | struct ParsedFormatConsumer; |
| 231 | |
| 232 | struct ConversionItem { |
| 233 | bool is_conversion; |
| 234 | // Points to the past-the-end location of this element in the data_ array. |
| 235 | size_t text_end; |
| 236 | UnboundConversion conv; |
| 237 | }; |
| 238 | |
| 239 | bool has_error_; |
| 240 | std::unique_ptr<char[]> data_; |
| 241 | std::vector<ConversionItem> items_; |
| 242 | }; |
| 243 | |
| 244 | |
| 245 | // A value type representing a preparsed format. These can be created, copied |
| 246 | // around, and reused to speed up formatting loops. |
| 247 | // The user must specify through the template arguments the conversion |
| 248 | // characters used in the format. This will be checked at compile time. |
| 249 | // |
| 250 | // This class uses Conv enum values to specify each argument. |
| 251 | // This allows for more flexibility as you can specify multiple possible |
| 252 | // conversion characters for each argument. |
| 253 | // ParsedFormat<char...> is a simplified alias for when the user only |
| 254 | // needs to specify a single conversion character for each argument. |
| 255 | // |
| 256 | // Example: |
| 257 | // // Extended format supports multiple characters per argument: |
| 258 | // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; |
| 259 | // MyFormat GetFormat(bool use_hex) { |
| 260 | // if (use_hex) return MyFormat("foo %x bar"); |
| 261 | // return MyFormat("foo %d bar"); |
| 262 | // } |
| 263 | // // 'format' can be used with any value that supports 'd' and 'x', |
| 264 | // // like `int`. |
| 265 | // auto format = GetFormat(use_hex); |
| 266 | // value = StringF(format, i); |
| 267 | // |
| 268 | // This class also supports runtime format checking with the ::New() and |
| 269 | // ::NewAllowIgnored() factory functions. |
| 270 | // This is the only API that allows the user to pass a runtime specified format |
| 271 | // string. These factory functions will return NULL if the format does not match |
| 272 | // the conversions requested by the user. |
| 273 | template <str_format_internal::Conv... C> |
| 274 | class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { |
| 275 | public: |
| 276 | explicit ExtendedParsedFormat(string_view format) |
| 277 | #if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER |
| 278 | __attribute__(( |
| 279 | enable_if(str_format_internal::EnsureConstexpr(format), |
| 280 | "Format std::string is not constexpr." ), |
| 281 | enable_if(str_format_internal::ValidFormatImpl<C...>(format), |
| 282 | "Format specified does not match the template arguments." ))) |
| 283 | #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER |
| 284 | : ExtendedParsedFormat(format, false) { |
| 285 | } |
| 286 | |
| 287 | // ExtendedParsedFormat factory function. |
| 288 | // The user still has to specify the conversion characters, but they will not |
| 289 | // be checked at compile time. Instead, it will be checked at runtime. |
| 290 | // This delays the checking to runtime, but allows the user to pass |
| 291 | // dynamically sourced formats. |
| 292 | // It returns NULL if the format does not match the conversion characters. |
| 293 | // The user is responsible for checking the return value before using it. |
| 294 | // |
| 295 | // The 'New' variant will check that all the specified arguments are being |
| 296 | // consumed by the format and return NULL if any argument is being ignored. |
| 297 | // The 'NewAllowIgnored' variant will not verify this and will allow formats |
| 298 | // that ignore arguments. |
| 299 | static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { |
| 300 | return New(format, false); |
| 301 | } |
| 302 | static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( |
| 303 | string_view format) { |
| 304 | return New(format, true); |
| 305 | } |
| 306 | |
| 307 | private: |
| 308 | static std::unique_ptr<ExtendedParsedFormat> New(string_view format, |
| 309 | bool allow_ignored) { |
| 310 | std::unique_ptr<ExtendedParsedFormat> conv( |
| 311 | new ExtendedParsedFormat(format, allow_ignored)); |
| 312 | if (conv->has_error()) return nullptr; |
| 313 | return conv; |
| 314 | } |
| 315 | |
| 316 | ExtendedParsedFormat(string_view s, bool allow_ignored) |
| 317 | : ParsedFormatBase(s, allow_ignored, {C...}) {} |
| 318 | }; |
| 319 | } // namespace str_format_internal |
| 320 | } // namespace absl |
| 321 | |
| 322 | #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ |
| 323 | |